From c3c5eae6ac69d9f7d4618407aa6c8ac5edc2267c Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Tue, 19 Feb 2019 19:59:28 +0000 Subject: migration: Fix cancel state During a cancelled migration there's a race where the fd can go into an error state before we get back around the migration loop and migration_detect_error transitions from cancelling->failed. Check for cancelled/cancelling and don't change the state. Red Hat bug: https://bugzilla.redhat.com/show_bug.cgi?id=1608649 Fixes: b23c2ade250 Signed-off-by: Dr. David Alan Gilbert Message-Id: <20190219195928.12289-1-dgilbert@redhat.com> Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Peter Xu Reviewed-by: Juan Quintela --- migration/migration.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index c39d3054ec..e44f77af02 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -2911,6 +2911,13 @@ static MigThrError postcopy_pause(MigrationState *s) static MigThrError migration_detect_error(MigrationState *s) { int ret; + int state = s->state; + + if (state == MIGRATION_STATUS_CANCELLING || + state == MIGRATION_STATUS_CANCELLED) { + /* End the migration, but don't set the state to failed */ + return MIG_THR_ERR_FATAL; + } /* Try to detect any file errors */ ret = qemu_file_get_error(s->to_dst_file); @@ -2920,7 +2927,7 @@ static MigThrError migration_detect_error(MigrationState *s) return MIG_THR_ERR_NONE; } - if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret == -EIO) { + if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret == -EIO) { /* * For postcopy, we allow the network to be down for a * while. After that, it can be continued by a @@ -2932,7 +2939,7 @@ static MigThrError migration_detect_error(MigrationState *s) * For precopy (or postcopy with error outside IO), we fail * with no time. */ - migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); + migrate_set_state(&s->state, state, MIGRATION_STATUS_FAILED); trace_migration_thread_file_err(); /* Time to stop the migration, now. */ -- cgit v1.2.3 From cf75e2684938413f0bbe95f5a4b7db5c845e42c8 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 14 Feb 2019 18:53:51 +0000 Subject: migration/rdma: Fix qemu_rdma_cleanup null check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the migration fails before the channel is open (e.g. a bad address) we end up in the cleanup with rdma->channel==NULL. Spotted by Coverity: CID 1398634 Fixes: fbbaacab2758cb3f32a0 Signed-off-by: Dr. David Alan Gilbert Message-Id: <20190214185351.5927-1-dgilbert@redhat.com> Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Peter Xu Reviewed-by: Philippe Mathieu-Daudé --- migration/rdma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/migration/rdma.c b/migration/rdma.c index 54a3c11540..9fa3b176eb 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -2321,7 +2321,9 @@ static void qemu_rdma_cleanup(RDMAContext *rdma) rdma->connected = false; } - qemu_set_fd_handler(rdma->channel->fd, NULL, NULL, NULL); + if (rdma->channel) { + qemu_set_fd_handler(rdma->channel->fd, NULL, NULL, NULL); + } g_free(rdma->dest_blocks); rdma->dest_blocks = NULL; -- cgit v1.2.3 From 892ae715b6bc8107fccaa3caeb2a5bd4f6d2cb37 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 27 Feb 2019 16:49:00 +0000 Subject: migration: Cleanup during exit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we cleanup the migration object as we exit main after the main_loop finishes; however if there's a migration running things get messy and we can end up with the migration thread still trying to access freed structures. We now take a ref to the object around the migration thread itself, so the act of dropping the ref during exit doesn't cause us to lose the state until the thread quits. Cancelling the migration during migration also tries to get the thread to quit. We do this a bit earlier; so hopefully migration gets out of the way before all the devices etc are freed. Signed-off-by: Dr. David Alan Gilbert Tested-by: Alex Bennée Message-Id: <20190227164900.16378-1-dgilbert@redhat.com> Reviewed-by: Juan Quintela Reviewed-by: Alex Bennée Signed-off-by: Dr. David Alan Gilbert --- include/migration/misc.h | 2 +- migration/migration.c | 10 +++++++++- vl.c | 7 ++++++- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/include/migration/misc.h b/include/migration/misc.h index 0471e04d1f..6f9df74436 100644 --- a/include/migration/misc.h +++ b/include/migration/misc.h @@ -36,7 +36,7 @@ void dump_vmstate_json_to_file(FILE *out_fp); /* migration/migration.c */ void migration_object_init(void); -void migration_object_finalize(void); +void migration_shutdown(void); void qemu_start_incoming_migration(const char *uri, Error **errp); bool migration_is_idle(void); void add_migration_state_change_notifier(Notifier *notify); diff --git a/migration/migration.c b/migration/migration.c index e44f77af02..d45561f9b8 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -126,6 +126,7 @@ static bool migration_object_check(MigrationState *ms, Error **errp); static int migration_maybe_pause(MigrationState *s, int *current_active_state, int new_state); +static void migrate_fd_cancel(MigrationState *s); void migration_object_init(void) { @@ -167,8 +168,13 @@ void migration_object_init(void) } } -void migration_object_finalize(void) +void migration_shutdown(void) { + /* + * Cancel the current migration - that will (eventually) + * stop the migration using this structure + */ + migrate_fd_cancel(current_migration); object_unref(OBJECT(current_migration)); } @@ -3134,6 +3140,7 @@ static void *migration_thread(void *opaque) rcu_register_thread(); + object_ref(OBJECT(s)); s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); qemu_savevm_state_header(s->to_dst_file); @@ -3230,6 +3237,7 @@ static void *migration_thread(void *opaque) trace_migration_thread_after_loop(); migration_iteration_finish(s); + object_unref(OBJECT(s)); rcu_unregister_thread(); return NULL; } diff --git a/vl.c b/vl.c index fd0d51320d..5be8cf4f11 100644 --- a/vl.c +++ b/vl.c @@ -4579,6 +4579,12 @@ int main(int argc, char **argv, char **envp) gdbserver_cleanup(); + /* + * cleaning up the migration object cancels any existing migration + * try to do this early so that it also stops using devices. + */ + migration_shutdown(); + /* No more vcpu or device emulation activity beyond this point */ vm_shutdown(); @@ -4594,7 +4600,6 @@ int main(int argc, char **argv, char **envp) monitor_cleanup(); qemu_chr_cleanup(); user_creatable_cleanup(); - migration_object_finalize(); /* TODO: unref root container, check all devices are ok */ return 0; -- cgit v1.2.3 From 9589e7630139700581eff3ba9ddfdfbe99362440 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Mon, 4 Mar 2019 20:49:23 +0200 Subject: migration/rdma: clang compilation fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Configuring QEMU with: ../configure --cc=clang --enable-rdma Leads to compilation error: CC migration/rdma.o CC migration/block.o qemu/migration/rdma.c:3615:58: error: taking address of packed member 'rkey' of class or structure 'RDMARegisterResult' may result in an unaligned pointer value [-Werror,-Waddress-of-packed-member] (uintptr_t)host_addr, NULL, ®_result->rkey, ^~~~~~~~~~~~~~~~ Fix it by using a temp local variable. Signed-off-by: Marcel Apfelbaum Message-Id: <20190304184923.24215-1-marcel.apfelbaum@gmail.com> Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Philippe Mathieu-Daudé --- migration/rdma.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/migration/rdma.c b/migration/rdma.c index 9fa3b176eb..d5251cd820 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -3613,13 +3613,16 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque) } chunk_start = ram_chunk_start(block, chunk); chunk_end = ram_chunk_end(block, chunk + reg->chunks); + /* avoid "-Waddress-of-packed-member" warning */ + uint32_t tmp_rkey = 0; if (qemu_rdma_register_and_get_keys(rdma, block, - (uintptr_t)host_addr, NULL, ®_result->rkey, + (uintptr_t)host_addr, NULL, &tmp_rkey, chunk, chunk_start, chunk_end)) { error_report("cannot get rkey"); ret = -EINVAL; goto out; } + reg_result->rkey = tmp_rkey; reg_result->host_addr = (uintptr_t)block->local_host_addr; -- cgit v1.2.3 From 754cb9c0ebcf2164107baf39e75517e459887622 Mon Sep 17 00:00:00 2001 From: Yury Kotov Date: Fri, 15 Feb 2019 20:45:44 +0300 Subject: exec: Change RAMBlockIterFunc definition Currently, qemu_ram_foreach_* calls RAMBlockIterFunc with many block-specific arguments. But often iter func needs RAMBlock*. This refactoring is needed for fast access to RAMBlock flags from qemu_ram_foreach_block's callback. The only way to achieve this now is to call qemu_ram_block_from_host (which also enumerates blocks). So, this patch reduces complexity of qemu_ram_foreach_block() -> cb() -> qemu_ram_block_from_host() from O(n^2) to O(n). Fix RAMBlockIterFunc definition and add some functions to read RAMBlock* fields witch were passed. Signed-off-by: Yury Kotov Message-Id: <20190215174548.2630-2-yury-kotov@yandex-team.ru> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- exec.c | 21 +++++++++++++++++---- include/exec/cpu-common.h | 6 ++++-- migration/postcopy-ram.c | 36 +++++++++++++++++++++--------------- migration/rdma.c | 7 +++++-- stubs/ram-block.c | 15 +++++++++++++++ util/vfio-helpers.c | 6 +++--- 6 files changed, 65 insertions(+), 26 deletions(-) diff --git a/exec.c b/exec.c index 518064530b..bf71462929 100644 --- a/exec.c +++ b/exec.c @@ -1972,6 +1972,21 @@ const char *qemu_ram_get_idstr(RAMBlock *rb) return rb->idstr; } +void *qemu_ram_get_host_addr(RAMBlock *rb) +{ + return rb->host; +} + +ram_addr_t qemu_ram_get_offset(RAMBlock *rb) +{ + return rb->offset; +} + +ram_addr_t qemu_ram_get_used_length(RAMBlock *rb) +{ + return rb->used_length; +} + bool qemu_ram_is_shared(RAMBlock *rb) { return rb->flags & RAM_SHARED; @@ -3961,8 +3976,7 @@ int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque) rcu_read_lock(); RAMBLOCK_FOREACH(block) { - ret = func(block->idstr, block->host, block->offset, - block->used_length, opaque); + ret = func(block, opaque); if (ret) { break; } @@ -3981,8 +3995,7 @@ int qemu_ram_foreach_migratable_block(RAMBlockIterFunc func, void *opaque) if (!qemu_ram_is_migratable(block)) { continue; } - ret = func(block->idstr, block->host, block->offset, - block->used_length, opaque); + ret = func(block, opaque); if (ret) { break; } diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 63ec1f9b37..9cd1f94bc5 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -72,6 +72,9 @@ ram_addr_t qemu_ram_block_host_offset(RAMBlock *rb, void *host); void qemu_ram_set_idstr(RAMBlock *block, const char *name, DeviceState *dev); void qemu_ram_unset_idstr(RAMBlock *block); const char *qemu_ram_get_idstr(RAMBlock *rb); +void *qemu_ram_get_host_addr(RAMBlock *rb); +ram_addr_t qemu_ram_get_offset(RAMBlock *rb); +ram_addr_t qemu_ram_get_used_length(RAMBlock *rb); bool qemu_ram_is_shared(RAMBlock *rb); bool qemu_ram_is_uf_zeroable(RAMBlock *rb); void qemu_ram_set_uf_zeroable(RAMBlock *rb); @@ -116,8 +119,7 @@ void cpu_flush_icache_range(hwaddr start, hwaddr len); extern struct MemoryRegion io_mem_rom; extern struct MemoryRegion io_mem_notdirty; -typedef int (RAMBlockIterFunc)(const char *block_name, void *host_addr, - ram_addr_t offset, ram_addr_t length, void *opaque); +typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque); int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque); int qemu_ram_foreach_migratable_block(RAMBlockIterFunc func, void *opaque); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index fa09dba534..b098816221 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -319,10 +319,10 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) /* Callback from postcopy_ram_supported_by_host block iterator. */ -static int test_ramblock_postcopiable(const char *block_name, void *host_addr, - ram_addr_t offset, ram_addr_t length, void *opaque) +static int test_ramblock_postcopiable(RAMBlock *rb, void *opaque) { - RAMBlock *rb = qemu_ram_block_by_name(block_name); + const char *block_name = qemu_ram_get_idstr(rb); + ram_addr_t length = qemu_ram_get_used_length(rb); size_t pagesize = qemu_ram_pagesize(rb); if (length % pagesize) { @@ -443,9 +443,12 @@ out: * must be done right at the start prior to pre-copy. * opaque should be the MIS. */ -static int init_range(const char *block_name, void *host_addr, - ram_addr_t offset, ram_addr_t length, void *opaque) +static int init_range(RAMBlock *rb, void *opaque) { + const char *block_name = qemu_ram_get_idstr(rb); + void *host_addr = qemu_ram_get_host_addr(rb); + ram_addr_t offset = qemu_ram_get_offset(rb); + ram_addr_t length = qemu_ram_get_used_length(rb); trace_postcopy_init_range(block_name, host_addr, offset, length); /* @@ -465,9 +468,12 @@ static int init_range(const char *block_name, void *host_addr, * At the end of migration, undo the effects of init_range * opaque should be the MIS. */ -static int cleanup_range(const char *block_name, void *host_addr, - ram_addr_t offset, ram_addr_t length, void *opaque) +static int cleanup_range(RAMBlock *rb, void *opaque) { + const char *block_name = qemu_ram_get_idstr(rb); + void *host_addr = qemu_ram_get_host_addr(rb); + ram_addr_t offset = qemu_ram_get_offset(rb); + ram_addr_t length = qemu_ram_get_used_length(rb); MigrationIncomingState *mis = opaque; struct uffdio_range range_struct; trace_postcopy_cleanup_range(block_name, host_addr, offset, length); @@ -586,9 +592,12 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) /* * Disable huge pages on an area */ -static int nhp_range(const char *block_name, void *host_addr, - ram_addr_t offset, ram_addr_t length, void *opaque) +static int nhp_range(RAMBlock *rb, void *opaque) { + const char *block_name = qemu_ram_get_idstr(rb); + void *host_addr = qemu_ram_get_host_addr(rb); + ram_addr_t offset = qemu_ram_get_offset(rb); + ram_addr_t length = qemu_ram_get_used_length(rb); trace_postcopy_nhp_range(block_name, host_addr, offset, length); /* @@ -626,15 +635,13 @@ int postcopy_ram_prepare_discard(MigrationIncomingState *mis) * opaque: MigrationIncomingState pointer * Returns 0 on success */ -static int ram_block_enable_notify(const char *block_name, void *host_addr, - ram_addr_t offset, ram_addr_t length, - void *opaque) +static int ram_block_enable_notify(RAMBlock *rb, void *opaque) { MigrationIncomingState *mis = opaque; struct uffdio_register reg_struct; - reg_struct.range.start = (uintptr_t)host_addr; - reg_struct.range.len = length; + reg_struct.range.start = (uintptr_t)qemu_ram_get_host_addr(rb); + reg_struct.range.len = qemu_ram_get_used_length(rb); reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING; /* Now tell our userfault_fd that it's responsible for this area */ @@ -647,7 +654,6 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr, return -1; } if (reg_struct.ioctls & ((__u64)1 << _UFFDIO_ZEROPAGE)) { - RAMBlock *rb = qemu_ram_block_by_name(block_name); qemu_ram_set_uf_zeroable(rb); } diff --git a/migration/rdma.c b/migration/rdma.c index d5251cd820..b0115e4de5 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -624,9 +624,12 @@ static int rdma_add_block(RDMAContext *rdma, const char *block_name, * in advanced before the migration starts. This tells us where the RAM blocks * are so that we can register them individually. */ -static int qemu_rdma_init_one_block(const char *block_name, void *host_addr, - ram_addr_t block_offset, ram_addr_t length, void *opaque) +static int qemu_rdma_init_one_block(RAMBlock *rb, void *opaque) { + const char *block_name = qemu_ram_get_idstr(rb); + void *host_addr = qemu_ram_get_host_addr(rb); + ram_addr_t block_offset = qemu_ram_get_offset(rb); + ram_addr_t length = qemu_ram_get_used_length(rb); return rdma_add_block(opaque, block_name, host_addr, block_offset, length); } diff --git a/stubs/ram-block.c b/stubs/ram-block.c index cfa5d8678f..73c0a3ee08 100644 --- a/stubs/ram-block.c +++ b/stubs/ram-block.c @@ -2,6 +2,21 @@ #include "exec/ramlist.h" #include "exec/cpu-common.h" +void *qemu_ram_get_host_addr(RAMBlock *rb) +{ + return 0; +} + +ram_addr_t qemu_ram_get_offset(RAMBlock *rb) +{ + return 0; +} + +ram_addr_t qemu_ram_get_used_length(RAMBlock *rb) +{ + return 0; +} + void ram_block_notifier_add(RAMBlockNotifier *n) { } diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c index 342d4a2285..2367fe8f7f 100644 --- a/util/vfio-helpers.c +++ b/util/vfio-helpers.c @@ -391,10 +391,10 @@ static void qemu_vfio_ram_block_removed(RAMBlockNotifier *n, } } -static int qemu_vfio_init_ramblock(const char *block_name, void *host_addr, - ram_addr_t offset, ram_addr_t length, - void *opaque) +static int qemu_vfio_init_ramblock(RAMBlock *rb, void *opaque) { + void *host_addr = qemu_ram_get_host_addr(rb); + ram_addr_t length = qemu_ram_get_used_length(rb); int ret; QEMUVFIOState *s = opaque; -- cgit v1.2.3 From 18269069c31089c3b42ef78ddffe1ee5799e2708 Mon Sep 17 00:00:00 2001 From: Yury Kotov Date: Fri, 15 Feb 2019 20:45:45 +0300 Subject: migration: Introduce ignore-shared capability We want to use local migration to update QEMU for running guests. In this case we don't need to migrate shared (file backed) RAM. So, add a capability to ignore such blocks during live migration. Signed-off-by: Yury Kotov Message-Id: <20190215174548.2630-3-yury-kotov@yandex-team.ru> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/migration.c | 14 ++++++++++++++ migration/migration.h | 1 + qapi/migration.json | 5 ++++- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/migration/migration.c b/migration/migration.c index d45561f9b8..e823fd8b91 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -995,6 +995,11 @@ static bool migrate_caps_check(bool *cap_list, error_setg(errp, "Postcopy is not supported"); return false; } + + if (cap_list[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { + error_setg(errp, "Postcopy is not compatible with ignore-shared"); + return false; + } } return true; @@ -2074,6 +2079,15 @@ bool migrate_dirty_bitmaps(void) return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; } +bool migrate_ignore_shared(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->enabled_capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; +} + bool migrate_use_events(void) { MigrationState *s; diff --git a/migration/migration.h b/migration/migration.h index c99154dea2..443051adb0 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -265,6 +265,7 @@ bool migrate_release_ram(void); bool migrate_postcopy_ram(void); bool migrate_zero_blocks(void); bool migrate_dirty_bitmaps(void); +bool migrate_ignore_shared(void); bool migrate_auto_converge(void); bool migrate_use_multifd(void); diff --git a/qapi/migration.json b/qapi/migration.json index 1fd7bbea9b..eab87340b2 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -409,13 +409,16 @@ # devices (and thus take locks) immediately at the end of migration. # (since 3.0) # +# @x-ignore-shared: If enabled, QEMU will not migrate shared memory (since 4.0) +# # Since: 1.2 ## { 'enum': 'MigrationCapability', 'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks', 'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram', 'block', 'return-path', 'pause-before-switchover', 'x-multifd', - 'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate' ] } + 'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate', + 'x-ignore-shared' ] } ## # @MigrationCapabilityStatus: -- cgit v1.2.3 From fbd162e629aaf8a7e464af44d2f73d06b26428ad Mon Sep 17 00:00:00 2001 From: Yury Kotov Date: Fri, 15 Feb 2019 20:45:46 +0300 Subject: migration: Add an ability to ignore shared RAM blocks If ignore-shared capability is set then skip shared RAMBlocks during the RAM migration. Also, move qemu_ram_foreach_migratable_block (and rename) to the migration code, because it requires access to the migration capabilities. Signed-off-by: Yury Kotov Message-Id: <20190215174548.2630-4-yury-kotov@yandex-team.ru> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- exec.c | 19 -------- include/exec/cpu-common.h | 1 - migration/migration.h | 4 +- migration/postcopy-ram.c | 12 ++--- migration/ram.c | 110 +++++++++++++++++++++++++++++++++++----------- migration/rdma.c | 2 +- 6 files changed, 94 insertions(+), 54 deletions(-) diff --git a/exec.c b/exec.c index bf71462929..1d4f3784d6 100644 --- a/exec.c +++ b/exec.c @@ -3985,25 +3985,6 @@ int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque) return ret; } -int qemu_ram_foreach_migratable_block(RAMBlockIterFunc func, void *opaque) -{ - RAMBlock *block; - int ret = 0; - - rcu_read_lock(); - RAMBLOCK_FOREACH(block) { - if (!qemu_ram_is_migratable(block)) { - continue; - } - ret = func(block, opaque); - if (ret) { - break; - } - } - rcu_read_unlock(); - return ret; -} - /* * Unmap pages of memory from start to start+length such that * they a) read as 0, b) Trigger whatever fault mechanism diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 9cd1f94bc5..cef8b88a2a 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -122,7 +122,6 @@ extern struct MemoryRegion io_mem_notdirty; typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque); int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque); -int qemu_ram_foreach_migratable_block(RAMBlockIterFunc func, void *opaque); int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length); #endif diff --git a/migration/migration.h b/migration/migration.h index 443051adb0..81c261941d 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -306,8 +306,10 @@ void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value); void dirty_bitmap_mig_before_vm_start(void); void init_dirty_bitmap_incoming_migration(void); +int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); + #define qemu_ram_foreach_block \ - #warning "Use qemu_ram_foreach_block_migratable in migration code" + #warning "Use foreach_not_ignored_block in migration code" void migration_make_urgent_request(void); void migration_consume_urgent_request(void); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index b098816221..e2aa57a701 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -374,7 +374,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) } /* We don't support postcopy with shared RAM yet */ - if (qemu_ram_foreach_migratable_block(test_ramblock_postcopiable, NULL)) { + if (foreach_not_ignored_block(test_ramblock_postcopiable, NULL)) { goto out; } @@ -508,7 +508,7 @@ static int cleanup_range(RAMBlock *rb, void *opaque) */ int postcopy_ram_incoming_init(MigrationIncomingState *mis) { - if (qemu_ram_foreach_migratable_block(init_range, NULL)) { + if (foreach_not_ignored_block(init_range, NULL)) { return -1; } @@ -550,7 +550,7 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) return -1; } - if (qemu_ram_foreach_migratable_block(cleanup_range, mis)) { + if (foreach_not_ignored_block(cleanup_range, mis)) { return -1; } @@ -617,7 +617,7 @@ static int nhp_range(RAMBlock *rb, void *opaque) */ int postcopy_ram_prepare_discard(MigrationIncomingState *mis) { - if (qemu_ram_foreach_migratable_block(nhp_range, mis)) { + if (foreach_not_ignored_block(nhp_range, mis)) { return -1; } @@ -628,7 +628,7 @@ int postcopy_ram_prepare_discard(MigrationIncomingState *mis) /* * Mark the given area of RAM as requiring notification to unwritten areas - * Used as a callback on qemu_ram_foreach_migratable_block. + * Used as a callback on foreach_not_ignored_block. * host_addr: Base of area to mark * offset: Offset in the whole ram arena * length: Length of the section @@ -1122,7 +1122,7 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) mis->have_fault_thread = true; /* Mark so that we get notified of accesses to unwritten areas */ - if (qemu_ram_foreach_migratable_block(ram_block_enable_notify, mis)) { + if (foreach_not_ignored_block(ram_block_enable_notify, mis)) { error_report("ram_block_enable_notify failed"); return -1; } diff --git a/migration/ram.c b/migration/ram.c index 59191c1ed2..01315edd66 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -159,18 +159,44 @@ out: return ret; } +static bool ramblock_is_ignored(RAMBlock *block) +{ + return !qemu_ram_is_migratable(block) || + (migrate_ignore_shared() && qemu_ram_is_shared(block)); +} + /* Should be holding either ram_list.mutex, or the RCU lock. */ +#define RAMBLOCK_FOREACH_NOT_IGNORED(block) \ + INTERNAL_RAMBLOCK_FOREACH(block) \ + if (ramblock_is_ignored(block)) {} else + #define RAMBLOCK_FOREACH_MIGRATABLE(block) \ INTERNAL_RAMBLOCK_FOREACH(block) \ if (!qemu_ram_is_migratable(block)) {} else #undef RAMBLOCK_FOREACH +int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque) +{ + RAMBlock *block; + int ret = 0; + + rcu_read_lock(); + RAMBLOCK_FOREACH_NOT_IGNORED(block) { + ret = func(block, opaque); + if (ret) { + break; + } + } + rcu_read_unlock(); + return ret; +} + static void ramblock_recv_map_init(void) { RAMBlock *rb; - RAMBLOCK_FOREACH_MIGRATABLE(rb) { + RAMBLOCK_FOREACH_NOT_IGNORED(rb) { assert(!rb->receivedmap); rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits()); } @@ -1545,7 +1571,7 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb, unsigned long *bitmap = rb->bmap; unsigned long next; - if (!qemu_ram_is_migratable(rb)) { + if (ramblock_is_ignored(rb)) { return size; } @@ -1594,7 +1620,7 @@ uint64_t ram_pagesize_summary(void) RAMBlock *block; uint64_t summary = 0; - RAMBLOCK_FOREACH_MIGRATABLE(block) { + RAMBLOCK_FOREACH_NOT_IGNORED(block) { summary |= block->page_size; } @@ -1664,7 +1690,7 @@ static void migration_bitmap_sync(RAMState *rs) qemu_mutex_lock(&rs->bitmap_mutex); rcu_read_lock(); - RAMBLOCK_FOREACH_MIGRATABLE(block) { + RAMBLOCK_FOREACH_NOT_IGNORED(block) { migration_bitmap_sync_range(rs, block, 0, block->used_length); } ram_counters.remaining = ram_bytes_remaining(); @@ -2388,7 +2414,7 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, size_t pagesize_bits = qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS; - if (!qemu_ram_is_migratable(pss->block)) { + if (ramblock_is_ignored(pss->block)) { error_report("block %s should not be migrated !", pss->block->idstr); return 0; } @@ -2486,19 +2512,30 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero) } } -uint64_t ram_bytes_total(void) +static uint64_t ram_bytes_total_common(bool count_ignored) { RAMBlock *block; uint64_t total = 0; rcu_read_lock(); - RAMBLOCK_FOREACH_MIGRATABLE(block) { - total += block->used_length; + if (count_ignored) { + RAMBLOCK_FOREACH_MIGRATABLE(block) { + total += block->used_length; + } + } else { + RAMBLOCK_FOREACH_NOT_IGNORED(block) { + total += block->used_length; + } } rcu_read_unlock(); return total; } +uint64_t ram_bytes_total(void) +{ + return ram_bytes_total_common(false); +} + static void xbzrle_load_setup(void) { XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE); @@ -2547,7 +2584,7 @@ static void ram_save_cleanup(void *opaque) */ memory_global_dirty_log_stop(); - RAMBLOCK_FOREACH_MIGRATABLE(block) { + RAMBLOCK_FOREACH_NOT_IGNORED(block) { g_free(block->bmap); block->bmap = NULL; g_free(block->unsentmap); @@ -2610,7 +2647,7 @@ void ram_postcopy_migrated_memory_release(MigrationState *ms) { struct RAMBlock *block; - RAMBLOCK_FOREACH_MIGRATABLE(block) { + RAMBLOCK_FOREACH_NOT_IGNORED(block) { unsigned long *bitmap = block->bmap; unsigned long range = block->used_length >> TARGET_PAGE_BITS; unsigned long run_start = find_next_zero_bit(bitmap, range, 0); @@ -2688,7 +2725,7 @@ static int postcopy_each_ram_send_discard(MigrationState *ms) struct RAMBlock *block; int ret; - RAMBLOCK_FOREACH_MIGRATABLE(block) { + RAMBLOCK_FOREACH_NOT_IGNORED(block) { PostcopyDiscardState *pds = postcopy_discard_send_init(ms, block->idstr); @@ -2896,7 +2933,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms) rs->last_sent_block = NULL; rs->last_page = 0; - RAMBLOCK_FOREACH_MIGRATABLE(block) { + RAMBLOCK_FOREACH_NOT_IGNORED(block) { unsigned long pages = block->used_length >> TARGET_PAGE_BITS; unsigned long *bitmap = block->bmap; unsigned long *unsentmap = block->unsentmap; @@ -3062,7 +3099,7 @@ static void ram_list_init_bitmaps(void) /* Skip setting bitmap if there is no RAM */ if (ram_bytes_total()) { - RAMBLOCK_FOREACH_MIGRATABLE(block) { + RAMBLOCK_FOREACH_NOT_IGNORED(block) { pages = block->max_length >> TARGET_PAGE_BITS; block->bmap = bitmap_new(pages); bitmap_set(block->bmap, 0, pages); @@ -3117,7 +3154,7 @@ static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out) * about dirty page logging as well. */ - RAMBLOCK_FOREACH_MIGRATABLE(block) { + RAMBLOCK_FOREACH_NOT_IGNORED(block) { pages += bitmap_count_one(block->bmap, block->used_length >> TARGET_PAGE_BITS); } @@ -3176,7 +3213,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) rcu_read_lock(); - qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); + qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE); RAMBLOCK_FOREACH_MIGRATABLE(block) { qemu_put_byte(f, strlen(block->idstr)); @@ -3185,6 +3222,10 @@ static int ram_save_setup(QEMUFile *f, void *opaque) if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) { qemu_put_be64(f, block->page_size); } + if (migrate_ignore_shared()) { + qemu_put_be64(f, block->mr->addr); + qemu_put_byte(f, ramblock_is_ignored(block) ? 1 : 0); + } } rcu_read_unlock(); @@ -3443,7 +3484,7 @@ static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags) return NULL; } - if (!qemu_ram_is_migratable(block)) { + if (ramblock_is_ignored(block)) { error_report("block %s should not be migrated !", id); return NULL; } @@ -3698,7 +3739,7 @@ int colo_init_ram_cache(void) RAMBlock *block; rcu_read_lock(); - RAMBLOCK_FOREACH_MIGRATABLE(block) { + RAMBLOCK_FOREACH_NOT_IGNORED(block) { block->colo_cache = qemu_anon_ram_alloc(block->used_length, NULL, false); @@ -3719,7 +3760,7 @@ int colo_init_ram_cache(void) if (ram_bytes_total()) { RAMBlock *block; - RAMBLOCK_FOREACH_MIGRATABLE(block) { + RAMBLOCK_FOREACH_NOT_IGNORED(block) { unsigned long pages = block->max_length >> TARGET_PAGE_BITS; block->bmap = bitmap_new(pages); @@ -3734,7 +3775,7 @@ int colo_init_ram_cache(void) out_locked: - RAMBLOCK_FOREACH_MIGRATABLE(block) { + RAMBLOCK_FOREACH_NOT_IGNORED(block) { if (block->colo_cache) { qemu_anon_ram_free(block->colo_cache, block->used_length); block->colo_cache = NULL; @@ -3751,14 +3792,14 @@ void colo_release_ram_cache(void) RAMBlock *block; memory_global_dirty_log_stop(); - RAMBLOCK_FOREACH_MIGRATABLE(block) { + RAMBLOCK_FOREACH_NOT_IGNORED(block) { g_free(block->bmap); block->bmap = NULL; } rcu_read_lock(); - RAMBLOCK_FOREACH_MIGRATABLE(block) { + RAMBLOCK_FOREACH_NOT_IGNORED(block) { if (block->colo_cache) { qemu_anon_ram_free(block->colo_cache, block->used_length); block->colo_cache = NULL; @@ -3794,7 +3835,7 @@ static int ram_load_cleanup(void *opaque) { RAMBlock *rb; - RAMBLOCK_FOREACH_MIGRATABLE(rb) { + RAMBLOCK_FOREACH_NOT_IGNORED(rb) { if (ramblock_is_pmem(rb)) { pmem_persist(rb->host, rb->used_length); } @@ -3803,7 +3844,7 @@ static int ram_load_cleanup(void *opaque) xbzrle_load_cleanup(); compress_threads_load_cleanup(); - RAMBLOCK_FOREACH_MIGRATABLE(rb) { + RAMBLOCK_FOREACH_NOT_IGNORED(rb) { g_free(rb->receivedmap); rb->receivedmap = NULL; } @@ -4003,7 +4044,7 @@ static void colo_flush_ram_cache(void) memory_global_dirty_log_sync(); rcu_read_lock(); - RAMBLOCK_FOREACH_MIGRATABLE(block) { + RAMBLOCK_FOREACH_NOT_IGNORED(block) { migration_bitmap_sync_range(ram_state, block, 0, block->used_length); } rcu_read_unlock(); @@ -4146,6 +4187,23 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) ret = -EINVAL; } } + if (migrate_ignore_shared()) { + hwaddr addr = qemu_get_be64(f); + bool ignored = qemu_get_byte(f); + if (ignored != ramblock_is_ignored(block)) { + error_report("RAM block %s should %s be migrated", + id, ignored ? "" : "not"); + ret = -EINVAL; + } + if (ramblock_is_ignored(block) && + block->mr->addr != addr) { + error_report("Mismatched GPAs for block %s " + "%" PRId64 "!= %" PRId64, + id, (uint64_t)addr, + (uint64_t)block->mr->addr); + ret = -EINVAL; + } + } ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG, block->idstr); } else { @@ -4216,7 +4274,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) static bool ram_has_postcopy(void *opaque) { RAMBlock *rb; - RAMBLOCK_FOREACH_MIGRATABLE(rb) { + RAMBLOCK_FOREACH_NOT_IGNORED(rb) { if (ramblock_is_pmem(rb)) { info_report("Block: %s, host: %p is a nvdimm memory, postcopy" "is not supported now!", rb->idstr, rb->host); @@ -4236,7 +4294,7 @@ static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs) trace_ram_dirty_bitmap_sync_start(); - RAMBLOCK_FOREACH_MIGRATABLE(block) { + RAMBLOCK_FOREACH_NOT_IGNORED(block) { qemu_savevm_send_recv_bitmap(file, block->idstr); trace_ram_dirty_bitmap_request(block->idstr); ramblock_count++; diff --git a/migration/rdma.c b/migration/rdma.c index b0115e4de5..63c118af09 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -644,7 +644,7 @@ static int qemu_rdma_init_ram_blocks(RDMAContext *rdma) assert(rdma->blockmap == NULL); memset(local, 0, sizeof *local); - qemu_ram_foreach_migratable_block(qemu_rdma_init_one_block, rdma); + foreach_not_ignored_block(qemu_rdma_init_one_block, rdma); trace_qemu_rdma_init_ram_blocks(local->nb_blocks); rdma->dest_blocks = g_new0(RDMADestBlock, rdma->local_ram_blocks.nb_blocks); -- cgit v1.2.3 From 660a9b681256b07261048c5e4a569eadadd5753b Mon Sep 17 00:00:00 2001 From: Yury Kotov Date: Fri, 15 Feb 2019 20:45:47 +0300 Subject: tests/migration-test: Add a test for ignore-shared capability Signed-off-by: Yury Kotov Message-Id: <20190215174548.2630-5-yury-kotov@yandex-team.ru> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert dgilbert: Disabled the test for now, not happy on aarch64 --- tests/migration-test.c | 134 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 109 insertions(+), 25 deletions(-) diff --git a/tests/migration-test.c b/tests/migration-test.c index 8352612364..336b6e20cd 100644 --- a/tests/migration-test.c +++ b/tests/migration-test.c @@ -215,10 +215,10 @@ static gchar *migrate_query_status(QTestState *who) * events suddenly appearing confuse the qmp()/hmp() responses. */ -static uint64_t get_migration_pass(QTestState *who) +static int64_t read_ram_property_int(QTestState *who, const char *property) { QDict *rsp_return, *rsp_ram; - uint64_t result; + int64_t result; rsp_return = migrate_query(who); if (!qdict_haskey(rsp_return, "ram")) { @@ -226,12 +226,17 @@ static uint64_t get_migration_pass(QTestState *who) result = 0; } else { rsp_ram = qdict_get_qdict(rsp_return, "ram"); - result = qdict_get_try_int(rsp_ram, "dirty-sync-count", 0); + result = qdict_get_try_int(rsp_ram, property, 0); } qobject_unref(rsp_return); return result; } +static uint64_t get_migration_pass(QTestState *who) +{ + return read_ram_property_int(who, "dirty-sync-count"); +} + static void read_blocktime(QTestState *who) { QDict *rsp_return; @@ -332,6 +337,13 @@ static void cleanup(const char *filename) g_free(path); } +static char *get_shmem_opts(const char *mem_size, const char *shmem_path) +{ + return g_strdup_printf("-object memory-backend-file,id=mem0,size=%s" + ",mem-path=%s,share=on -numa node,memdev=mem0", + mem_size, shmem_path); +} + static void migrate_check_parameter(QTestState *who, const char *parameter, long long value) { @@ -430,73 +442,95 @@ static void migrate_postcopy_start(QTestState *from, QTestState *to) } static int test_migrate_start(QTestState **from, QTestState **to, - const char *uri, bool hide_stderr) + const char *uri, bool hide_stderr, + bool use_shmem) { gchar *cmd_src, *cmd_dst; - char *bootpath = g_strdup_printf("%s/bootsect", tmpfs); + char *bootpath = NULL; + char *extra_opts = NULL; + char *shmem_path = NULL; const char *arch = qtest_get_arch(); const char *accel = "kvm:tcg"; - got_stop = false; + if (use_shmem) { + if (!g_file_test("/dev/shm", G_FILE_TEST_IS_DIR)) { + g_test_skip("/dev/shm is not supported"); + return -1; + } + shmem_path = g_strdup_printf("/dev/shm/qemu-%d", getpid()); + } + got_stop = false; + bootpath = g_strdup_printf("%s/bootsect", tmpfs); if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) { init_bootfile(bootpath, x86_bootsect); + extra_opts = use_shmem ? get_shmem_opts("150M", shmem_path) : NULL; cmd_src = g_strdup_printf("-machine accel=%s -m 150M" " -name source,debug-threads=on" " -serial file:%s/src_serial" - " -drive file=%s,format=raw", - accel, tmpfs, bootpath); + " -drive file=%s,format=raw %s", + accel, tmpfs, bootpath, + extra_opts ? extra_opts : ""); cmd_dst = g_strdup_printf("-machine accel=%s -m 150M" " -name target,debug-threads=on" " -serial file:%s/dest_serial" " -drive file=%s,format=raw" - " -incoming %s", - accel, tmpfs, bootpath, uri); + " -incoming %s %s", + accel, tmpfs, bootpath, uri, + extra_opts ? extra_opts : ""); start_address = X86_TEST_MEM_START; end_address = X86_TEST_MEM_END; } else if (g_str_equal(arch, "s390x")) { init_bootfile_s390x(bootpath); + extra_opts = use_shmem ? get_shmem_opts("128M", shmem_path) : NULL; cmd_src = g_strdup_printf("-machine accel=%s -m 128M" " -name source,debug-threads=on" - " -serial file:%s/src_serial -bios %s", - accel, tmpfs, bootpath); + " -serial file:%s/src_serial -bios %s %s", + accel, tmpfs, bootpath, + extra_opts ? extra_opts : ""); cmd_dst = g_strdup_printf("-machine accel=%s -m 128M" " -name target,debug-threads=on" " -serial file:%s/dest_serial -bios %s" - " -incoming %s", - accel, tmpfs, bootpath, uri); + " -incoming %s %s", + accel, tmpfs, bootpath, uri, + extra_opts ? extra_opts : ""); start_address = S390_TEST_MEM_START; end_address = S390_TEST_MEM_END; } else if (strcmp(arch, "ppc64") == 0) { + extra_opts = use_shmem ? get_shmem_opts("256M", shmem_path) : NULL; cmd_src = g_strdup_printf("-machine accel=%s -m 256M -nodefaults" " -name source,debug-threads=on" " -serial file:%s/src_serial" " -prom-env 'use-nvramrc?=true' -prom-env " "'nvramrc=hex .\" _\" begin %x %x " "do i c@ 1 + i c! 1000 +loop .\" B\" 0 " - "until'", accel, tmpfs, end_address, - start_address); + "until' %s", accel, tmpfs, end_address, + start_address, extra_opts ? extra_opts : ""); cmd_dst = g_strdup_printf("-machine accel=%s -m 256M" " -name target,debug-threads=on" " -serial file:%s/dest_serial" - " -incoming %s", - accel, tmpfs, uri); + " -incoming %s %s", + accel, tmpfs, uri, + extra_opts ? extra_opts : ""); start_address = PPC_TEST_MEM_START; end_address = PPC_TEST_MEM_END; } else if (strcmp(arch, "aarch64") == 0) { init_bootfile(bootpath, aarch64_kernel); + extra_opts = use_shmem ? get_shmem_opts("150M", shmem_path) : NULL; cmd_src = g_strdup_printf("-machine virt,accel=%s,gic-version=max " "-name vmsource,debug-threads=on -cpu max " "-m 150M -serial file:%s/src_serial " - "-kernel %s ", - accel, tmpfs, bootpath); + "-kernel %s %s", + accel, tmpfs, bootpath, + extra_opts ? extra_opts : ""); cmd_dst = g_strdup_printf("-machine virt,accel=%s,gic-version=max " "-name vmdest,debug-threads=on -cpu max " "-m 150M -serial file:%s/dest_serial " "-kernel %s " - "-incoming %s ", - accel, tmpfs, bootpath, uri); + "-incoming %s %s", + accel, tmpfs, bootpath, uri, + extra_opts ? extra_opts : ""); start_address = ARM_TEST_MEM_START; end_address = ARM_TEST_MEM_END; @@ -507,6 +541,7 @@ static int test_migrate_start(QTestState **from, QTestState **to, } g_free(bootpath); + g_free(extra_opts); if (hide_stderr) { gchar *tmp; @@ -524,6 +559,16 @@ static int test_migrate_start(QTestState **from, QTestState **to, *to = qtest_init(cmd_dst); g_free(cmd_dst); + + /* + * Remove shmem file immediately to avoid memory leak in test failed case. + * It's valid becase QEMU has already opened this file + */ + if (use_shmem) { + unlink(shmem_path); + g_free(shmem_path); + } + return 0; } @@ -603,7 +648,7 @@ static int migrate_postcopy_prepare(QTestState **from_ptr, char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); QTestState *from, *to; - if (test_migrate_start(&from, &to, uri, hide_error)) { + if (test_migrate_start(&from, &to, uri, hide_error, false)) { return -1; } @@ -720,7 +765,7 @@ static void test_baddest(void) char *status; bool failed; - if (test_migrate_start(&from, &to, "tcp:0:0", true)) { + if (test_migrate_start(&from, &to, "tcp:0:0", true, false)) { return; } migrate(from, "tcp:0:0", "{}"); @@ -745,7 +790,7 @@ static void test_precopy_unix(void) char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); QTestState *from, *to; - if (test_migrate_start(&from, &to, uri, false)) { + if (test_migrate_start(&from, &to, uri, false, false)) { return; } @@ -781,6 +826,44 @@ static void test_precopy_unix(void) g_free(uri); } +#if 0 +/* Currently upset on aarch64 TCG */ +static void test_ignore_shared(void) +{ + char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); + QTestState *from, *to; + + if (test_migrate_start(&from, &to, uri, false, true)) { + return; + } + + migrate_set_capability(from, "x-ignore-shared", true); + migrate_set_capability(to, "x-ignore-shared", true); + + /* Wait for the first serial output from the source */ + wait_for_serial("src_serial"); + + migrate(from, uri, "{}"); + + wait_for_migration_pass(from); + + if (!got_stop) { + qtest_qmp_eventwait(from, "STOP"); + } + + qtest_qmp_eventwait(to, "RESUME"); + + wait_for_serial("dest_serial"); + wait_for_migration_complete(from); + + /* Check whether shared RAM has been really skipped */ + g_assert_cmpint(read_ram_property_int(from, "transferred"), <, 1024 * 1024); + + test_migrate_end(from, to, true); + g_free(uri); +} +#endif + int main(int argc, char **argv) { char template[] = "/tmp/migration-test-XXXXXX"; @@ -832,6 +915,7 @@ int main(int argc, char **argv) qtest_add_func("/migration/deprecated", test_deprecated); qtest_add_func("/migration/bad_dest", test_baddest); qtest_add_func("/migration/precopy/unix", test_precopy_unix); + /* qtest_add_func("/migration/ignore_shared", test_ignore_shared); */ ret = g_test_run(); -- cgit v1.2.3 From 6cafc8e4ddea32cb12346393f433e6cab346398c Mon Sep 17 00:00:00 2001 From: Yury Kotov Date: Fri, 15 Feb 2019 20:45:48 +0300 Subject: migration: Add capabilities validation Currently we don't check which capabilities set in the source QEMU. We just expect that the target QEMU has the same enabled capabilities. Add explicit validation for capabilities to make sure that the target VM has them too. This is enabled for only new capabilities to keep compatibily. Signed-off-by: Yury Kotov Message-Id: <20190215174548.2630-6-yury-kotov@yandex-team.ru> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert dgilbert: Manual merge --- migration/savevm.c | 137 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) diff --git a/migration/savevm.c b/migration/savevm.c index b3868f7fb5..013098581f 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -57,6 +57,7 @@ #include "sysemu/replay.h" #include "qjson.h" #include "migration/colo.h" +#include "qemu/bitmap.h" #include "net/announce.h" const unsigned int postcopy_ram_discard_version = 0; @@ -249,6 +250,8 @@ typedef struct SaveState { uint32_t len; const char *name; uint32_t target_page_bits; + uint32_t caps_count; + MigrationCapability *capabilities; } SaveState; static SaveState savevm_state = { @@ -256,15 +259,51 @@ static SaveState savevm_state = { .global_section_id = 0, }; +static bool should_validate_capability(int capability) +{ + assert(capability >= 0 && capability < MIGRATION_CAPABILITY__MAX); + /* Validate only new capabilities to keep compatibility. */ + switch (capability) { + case MIGRATION_CAPABILITY_X_IGNORE_SHARED: + return true; + default: + return false; + } +} + +static uint32_t get_validatable_capabilities_count(void) +{ + MigrationState *s = migrate_get_current(); + uint32_t result = 0; + int i; + for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { + if (should_validate_capability(i) && s->enabled_capabilities[i]) { + result++; + } + } + return result; +} + static int configuration_pre_save(void *opaque) { SaveState *state = opaque; const char *current_name = MACHINE_GET_CLASS(current_machine)->name; + MigrationState *s = migrate_get_current(); + int i, j; state->len = strlen(current_name); state->name = current_name; state->target_page_bits = qemu_target_page_bits(); + state->caps_count = get_validatable_capabilities_count(); + state->capabilities = g_renew(MigrationCapability, state->capabilities, + state->caps_count); + for (i = j = 0; i < MIGRATION_CAPABILITY__MAX; i++) { + if (should_validate_capability(i) && s->enabled_capabilities[i]) { + state->capabilities[j++] = i; + } + } + return 0; } @@ -280,6 +319,40 @@ static int configuration_pre_load(void *opaque) return 0; } +static bool configuration_validate_capabilities(SaveState *state) +{ + bool ret = true; + MigrationState *s = migrate_get_current(); + unsigned long *source_caps_bm; + int i; + + source_caps_bm = bitmap_new(MIGRATION_CAPABILITY__MAX); + for (i = 0; i < state->caps_count; i++) { + MigrationCapability capability = state->capabilities[i]; + set_bit(capability, source_caps_bm); + } + + for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { + bool source_state, target_state; + if (!should_validate_capability(i)) { + continue; + } + source_state = test_bit(i, source_caps_bm); + target_state = s->enabled_capabilities[i]; + if (source_state != target_state) { + error_report("Capability %s is %s, but received capability is %s", + MigrationCapability_str(i), + target_state ? "on" : "off", + source_state ? "on" : "off"); + ret = false; + /* Don't break here to report all failed capabilities */ + } + } + + g_free(source_caps_bm); + return ret; +} + static int configuration_post_load(void *opaque, int version_id) { SaveState *state = opaque; @@ -297,9 +370,53 @@ static int configuration_post_load(void *opaque, int version_id) return -EINVAL; } + if (!configuration_validate_capabilities(state)) { + return -EINVAL; + } + return 0; } +static int get_capability(QEMUFile *f, void *pv, size_t size, + const VMStateField *field) +{ + MigrationCapability *capability = pv; + char capability_str[UINT8_MAX + 1]; + uint8_t len; + int i; + + len = qemu_get_byte(f); + qemu_get_buffer(f, (uint8_t *)capability_str, len); + capability_str[len] = '\0'; + for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { + if (!strcmp(MigrationCapability_str(i), capability_str)) { + *capability = i; + return 0; + } + } + error_report("Received unknown capability %s", capability_str); + return -EINVAL; +} + +static int put_capability(QEMUFile *f, void *pv, size_t size, + const VMStateField *field, QJSON *vmdesc) +{ + MigrationCapability *capability = pv; + const char *capability_str = MigrationCapability_str(*capability); + size_t len = strlen(capability_str); + assert(len <= UINT8_MAX); + + qemu_put_byte(f, len); + qemu_put_buffer(f, (uint8_t *)capability_str, len); + return 0; +} + +static const VMStateInfo vmstate_info_capability = { + .name = "capability", + .get = get_capability, + .put = put_capability, +}; + /* The target-page-bits subsection is present only if the * target page size is not the same as the default (ie the * minimum page size for a variable-page-size guest CPU). @@ -324,6 +441,25 @@ static const VMStateDescription vmstate_target_page_bits = { } }; +static bool vmstate_capabilites_needed(void *opaque) +{ + return get_validatable_capabilities_count() > 0; +} + +static const VMStateDescription vmstate_capabilites = { + .name = "configuration/capabilities", + .version_id = 1, + .minimum_version_id = 1, + .needed = vmstate_capabilites_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT32_V(caps_count, SaveState, 1), + VMSTATE_VARRAY_UINT32_ALLOC(capabilities, SaveState, caps_count, 1, + vmstate_info_capability, + MigrationCapability), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_configuration = { .name = "configuration", .version_id = 1, @@ -337,6 +473,7 @@ static const VMStateDescription vmstate_configuration = { }, .subsections = (const VMStateDescription*[]) { &vmstate_target_page_bits, + &vmstate_capabilites, NULL } }; -- cgit v1.2.3 From cdf842299de873663e992a6b5db8914c406bba6f Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Wed, 27 Feb 2019 11:51:26 +0100 Subject: tests: Add migration xbzrle test Reviewed-by: Peter Xu Signed-off-by: Juan Quintela Message-Id: <20190227105128.1655-2-quintela@redhat.com> Signed-off-by: Dr. David Alan Gilbert dgilbert: Fixup for class with Yury's series --- tests/migration-test.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/tests/migration-test.c b/tests/migration-test.c index 336b6e20cd..4d2302079d 100644 --- a/tests/migration-test.c +++ b/tests/migration-test.c @@ -629,6 +629,17 @@ static void deprecated_set_speed(QTestState *who, long long value) migrate_check_parameter(who, "max-bandwidth", value); } +static void deprecated_set_cache_size(QTestState *who, long long value) +{ + QDict *rsp; + + rsp = qtest_qmp(who, "{ 'execute': 'migrate-set-cache-size'," + "'arguments': { 'value': %lld } }", value); + g_assert(qdict_haskey(rsp, "return")); + qobject_unref(rsp); + migrate_check_parameter(who, "xbzrle-cache-size", value); +} + static void test_deprecated(void) { QTestState *from; @@ -637,6 +648,7 @@ static void test_deprecated(void) deprecated_set_downtime(from, 0.12345); deprecated_set_speed(from, 12345); + deprecated_set_cache_size(from, 4096); qtest_quit(from); } @@ -864,6 +876,57 @@ static void test_ignore_shared(void) } #endif +static void test_xbzrle(const char *uri) +{ + QTestState *from, *to; + + if (test_migrate_start(&from, &to, uri, false, false)) { + return; + } + + /* + * We want to pick a speed slow enough that the test completes + * quickly, but that it doesn't complete precopy even on a slow + * machine, so also set the downtime. + */ + /* 1 ms should make it not converge*/ + migrate_set_parameter(from, "downtime-limit", 1); + /* 1GB/s */ + migrate_set_parameter(from, "max-bandwidth", 1000000000); + + migrate_set_parameter(from, "xbzrle-cache-size", 33554432); + + migrate_set_capability(from, "xbzrle", "true"); + migrate_set_capability(to, "xbzrle", "true"); + /* Wait for the first serial output from the source */ + wait_for_serial("src_serial"); + + migrate(from, uri, "{}"); + + wait_for_migration_pass(from); + + /* 300ms should converge */ + migrate_set_parameter(from, "downtime-limit", 300); + + if (!got_stop) { + qtest_qmp_eventwait(from, "STOP"); + } + qtest_qmp_eventwait(to, "RESUME"); + + wait_for_serial("dest_serial"); + wait_for_migration_complete(from); + + test_migrate_end(from, to, true); +} + +static void test_xbzrle_unix(void) +{ + char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); + + test_xbzrle(uri); + g_free(uri); +} + int main(int argc, char **argv) { char template[] = "/tmp/migration-test-XXXXXX"; @@ -916,6 +979,7 @@ int main(int argc, char **argv) qtest_add_func("/migration/bad_dest", test_baddest); qtest_add_func("/migration/precopy/unix", test_precopy_unix); /* qtest_add_func("/migration/ignore_shared", test_ignore_shared); */ + qtest_add_func("/migration/xbzrle/unix", test_xbzrle_unix); ret = g_test_run(); -- cgit v1.2.3 From 9aca82ba3108d15529a2e0f5b0996cac309a7216 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Wed, 27 Feb 2019 11:51:27 +0100 Subject: migration: Create socket-address parameter It will be used to store the uri parameters. We want this only for tcp, so we don't set it for other uris. We need it to know what port is migration running. Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Juan Quintela Signed-off-by: Dr. David Alan Gilbert dgilbert: Removed DummyStruct as suggested by Eric & Markus -- --- hmp.c | 33 +++++++++++++++++++++++++++++++++ migration/migration.c | 24 ++++++++++++++++++++++++ migration/migration.h | 4 ++++ migration/socket.c | 11 +++++++++++ qapi/migration.json | 6 +++++- 5 files changed, 77 insertions(+), 1 deletion(-) diff --git a/hmp.c b/hmp.c index 5f13b16e24..c2ad3f8251 100644 --- a/hmp.c +++ b/hmp.c @@ -166,6 +166,27 @@ void hmp_info_mice(Monitor *mon, const QDict *qdict) qapi_free_MouseInfoList(mice_list); } +static char *SocketAddress_to_str(SocketAddress *addr) +{ + switch (addr->type) { + case SOCKET_ADDRESS_TYPE_INET: + return g_strdup_printf("tcp:%s:%s", + addr->u.inet.host, + addr->u.inet.port); + case SOCKET_ADDRESS_TYPE_UNIX: + return g_strdup_printf("unix:%s", + addr->u.q_unix.path); + case SOCKET_ADDRESS_TYPE_FD: + return g_strdup_printf("fd:%s", addr->u.fd.str); + case SOCKET_ADDRESS_TYPE_VSOCK: + return g_strdup_printf("tcp:%s:%s", + addr->u.vsock.cid, + addr->u.vsock.port); + default: + return g_strdup("unknown address type"); + } +} + void hmp_info_migrate(Monitor *mon, const QDict *qdict) { MigrationInfo *info; @@ -306,6 +327,18 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) g_free(str); visit_free(v); } + if (info->has_socket_address) { + SocketAddressList *addr; + + monitor_printf(mon, "socket address: [\n"); + + for (addr = info->socket_address; addr; addr = addr->next) { + char *s = SocketAddress_to_str(addr->value); + monitor_printf(mon, "\t%s\n", s); + g_free(s); + } + monitor_printf(mon, "]\n"); + } qapi_free_MigrationInfo(info); qapi_free_MigrationCapabilityStatusList(caps); } diff --git a/migration/migration.c b/migration/migration.c index e823fd8b91..952d29243e 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -31,6 +31,8 @@ #include "migration/vmstate.h" #include "block/block.h" #include "qapi/error.h" +#include "qapi/clone-visitor.h" +#include "qapi/qapi-visit-sockets.h" #include "qapi/qapi-commands-migration.h" #include "qapi/qapi-events-migration.h" #include "qapi/qmp/qerror.h" @@ -213,6 +215,11 @@ void migration_incoming_state_destroy(void) } qemu_event_reset(&mis->main_thread_load_event); + + if (mis->socket_address_list) { + qapi_free_SocketAddressList(mis->socket_address_list); + mis->socket_address_list = NULL; + } } static void migrate_generate_event(int new_state) @@ -328,6 +335,17 @@ void migration_incoming_enable_colo(void) migration_colo_enabled = true; } +void migrate_add_address(SocketAddress *address) +{ + MigrationIncomingState *mis = migration_incoming_get_current(); + SocketAddressList *addrs; + + addrs = g_new0(SocketAddressList, 1); + addrs->next = mis->socket_address_list; + mis->socket_address_list = addrs; + addrs->value = QAPI_CLONE(SocketAddress, address); +} + void qemu_start_incoming_migration(const char *uri, Error **errp) { const char *p; @@ -1009,6 +1027,12 @@ static void fill_destination_migration_info(MigrationInfo *info) { MigrationIncomingState *mis = migration_incoming_get_current(); + if (mis->socket_address_list) { + info->has_socket_address = true; + info->socket_address = + QAPI_CLONE(SocketAddressList, mis->socket_address_list); + } + switch (mis->state) { case MIGRATION_STATUS_NONE: return; diff --git a/migration/migration.h b/migration/migration.h index 81c261941d..99e99e56bd 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -84,6 +84,9 @@ struct MigrationIncomingState { bool postcopy_recover_triggered; QemuSemaphore postcopy_pause_sem_dst; QemuSemaphore postcopy_pause_sem_fault; + + /* List of listening socket addresses */ + SocketAddressList *socket_address_list; }; MigrationIncomingState *migration_incoming_get_current(void); @@ -305,6 +308,7 @@ void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value); void dirty_bitmap_mig_before_vm_start(void); void init_dirty_bitmap_incoming_migration(void); +void migrate_add_address(SocketAddress *address); int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); diff --git a/migration/socket.c b/migration/socket.c index f4c8174400..239527fb1f 100644 --- a/migration/socket.c +++ b/migration/socket.c @@ -15,6 +15,7 @@ */ #include "qemu/osdep.h" +#include "qemu/cutils.h" #include "qemu-common.h" #include "qemu/error-report.h" @@ -177,6 +178,7 @@ static void socket_start_incoming_migration(SocketAddress *saddr, Error **errp) { QIONetListener *listener = qio_net_listener_new(); + size_t i; qio_net_listener_set_name(listener, "migration-socket-listener"); @@ -189,6 +191,15 @@ static void socket_start_incoming_migration(SocketAddress *saddr, socket_accept_incoming_migration, NULL, NULL, g_main_context_get_thread_default()); + + for (i = 0; i < listener->nsioc; i++) { + SocketAddress *address = + qio_channel_socket_get_local_address(listener->sioc[i], errp); + if (!address) { + return; + } + migrate_add_address(address); + } } void tcp_start_incoming_migration(const char *host_port, Error **errp) diff --git a/qapi/migration.json b/qapi/migration.json index eab87340b2..6bd7fd3f1a 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -6,6 +6,7 @@ ## { 'include': 'common.json' } +{ 'include': 'sockets.json' } ## # @MigrationStats: @@ -199,6 +200,8 @@ # @compression: migration compression statistics, only returned if compression # feature is on and status is 'active' or 'completed' (Since 3.1) # +# @socket-address: Only used for tcp, to know what the real port is (Since 4.0) +# # Since: 0.14.0 ## { 'struct': 'MigrationInfo', @@ -213,7 +216,8 @@ '*error-desc': 'str', '*postcopy-blocktime' : 'uint32', '*postcopy-vcpu-blocktime': ['uint32'], - '*compression': 'CompressionStats'} } + '*compression': 'CompressionStats', + '*socket-address': ['SocketAddress'] } } ## # @query-migrate: -- cgit v1.2.3 From 609d3844076ebcfc507fb8a8ddaf2cb7d7c6bc48 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Wed, 27 Feb 2019 11:51:28 +0100 Subject: tests: Add basic migration precopy tcp test Not sharing code from precopy/unix because we have to read back the tcp parameter. Signed-off-by: Juan Quintela Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Peter Xu Reviewed-by: Thomas Huth Message-Id: <20190227105128.1655-4-quintela@redhat.com> Signed-off-by: Dr. David Alan Gilbert dgilbert: Fixup for clash with Yury's --- tests/migration-test.c | 110 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 105 insertions(+), 5 deletions(-) diff --git a/tests/migration-test.c b/tests/migration-test.c index 4d2302079d..e3617ceaca 100644 --- a/tests/migration-test.c +++ b/tests/migration-test.c @@ -20,6 +20,9 @@ #include "qemu/sockets.h" #include "chardev/char.h" #include "sysemu/sysemu.h" +#include "qapi/qapi-visit-sockets.h" +#include "qapi/qobject-input-visitor.h" +#include "qapi/qobject-output-visitor.h" #include "migration/migration-test.h" @@ -344,15 +347,68 @@ static char *get_shmem_opts(const char *mem_size, const char *shmem_path) mem_size, shmem_path); } +static char *SocketAddress_to_str(SocketAddress *addr) +{ + switch (addr->type) { + case SOCKET_ADDRESS_TYPE_INET: + return g_strdup_printf("tcp:%s:%s", + addr->u.inet.host, + addr->u.inet.port); + case SOCKET_ADDRESS_TYPE_UNIX: + return g_strdup_printf("unix:%s", + addr->u.q_unix.path); + case SOCKET_ADDRESS_TYPE_FD: + return g_strdup_printf("fd:%s", addr->u.fd.str); + case SOCKET_ADDRESS_TYPE_VSOCK: + return g_strdup_printf("tcp:%s:%s", + addr->u.vsock.cid, + addr->u.vsock.port); + default: + return g_strdup("unknown address type"); + } +} + +static char *migrate_get_socket_address(QTestState *who, const char *parameter) +{ + QDict *rsp; + char *result; + Error *local_err = NULL; + SocketAddressList *addrs; + Visitor *iv = NULL; + QObject *object; + + rsp = migrate_query(who); + object = qdict_get(rsp, parameter); + + iv = qobject_input_visitor_new(object); + visit_type_SocketAddressList(iv, NULL, &addrs, &local_err); + + /* we are only using a single address */ + result = g_strdup_printf("%s", SocketAddress_to_str(addrs->value)); + + qapi_free_SocketAddressList(addrs); + qobject_unref(rsp); + return result; +} + +static long long migrate_get_parameter(QTestState *who, const char *parameter) +{ + QDict *rsp; + long long result; + + rsp = wait_command(who, "{ 'execute': 'query-migrate-parameters' }"); + result = qdict_get_int(rsp, parameter); + qobject_unref(rsp); + return result; +} + static void migrate_check_parameter(QTestState *who, const char *parameter, long long value) { - QDict *rsp_return; + long long result; - rsp_return = wait_command(who, - "{ 'execute': 'query-migrate-parameters' }"); - g_assert_cmpint(qdict_get_int(rsp_return, parameter), ==, value); - qobject_unref(rsp_return); + result = migrate_get_parameter(who, parameter); + g_assert_cmpint(result, ==, value); } static void migrate_set_parameter(QTestState *who, const char *parameter, @@ -927,6 +983,49 @@ static void test_xbzrle_unix(void) g_free(uri); } +static void test_precopy_tcp(void) +{ + char *uri; + QTestState *from, *to; + + if (test_migrate_start(&from, &to, "tcp:127.0.0.1:0", false, false)) { + return; + } + + /* + * We want to pick a speed slow enough that the test completes + * quickly, but that it doesn't complete precopy even on a slow + * machine, so also set the downtime. + */ + /* 1 ms should make it not converge*/ + migrate_set_parameter(from, "downtime-limit", 1); + /* 1GB/s */ + migrate_set_parameter(from, "max-bandwidth", 1000000000); + + /* Wait for the first serial output from the source */ + wait_for_serial("src_serial"); + + uri = migrate_get_socket_address(to, "socket-address"); + + migrate(from, uri, "{}"); + + wait_for_migration_pass(from); + + /* 300ms should converge */ + migrate_set_parameter(from, "downtime-limit", 300); + + if (!got_stop) { + qtest_qmp_eventwait(from, "STOP"); + } + qtest_qmp_eventwait(to, "RESUME"); + + wait_for_serial("dest_serial"); + wait_for_migration_complete(from); + + test_migrate_end(from, to, true); + g_free(uri); +} + int main(int argc, char **argv) { char template[] = "/tmp/migration-test-XXXXXX"; @@ -978,6 +1077,7 @@ int main(int argc, char **argv) qtest_add_func("/migration/deprecated", test_deprecated); qtest_add_func("/migration/bad_dest", test_baddest); qtest_add_func("/migration/precopy/unix", test_precopy_unix); + qtest_add_func("/migration/precopy/tcp", test_precopy_tcp); /* qtest_add_func("/migration/ignore_shared", test_ignore_shared); */ qtest_add_func("/migration/xbzrle/unix", test_xbzrle_unix); -- cgit v1.2.3 From e7c91368d24637f077b457187f8dff4767dfadae Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 11 Dec 2018 16:24:47 +0800 Subject: bitmap: fix bitmap_count_one BITMAP_LAST_WORD_MASK(nbits) returns 0xffffffff when "nbits=0", which makes bitmap_count_one fail to handle the "nbits=0" case. It appears to be preferred to remain BITMAP_LAST_WORD_MASK identical to the kernel implementation that it is ported from. So this patch fixes bitmap_count_one to handle the nbits=0 case. Inital Discussion Link: https://www.mail-archive.com/qemu-devel@nongnu.org/msg554316.html Signed-off-by: Wei Wang CC: Juan Quintela CC: Dr. David Alan Gilbert CC: Peter Xu Message-Id: <1544516693-5395-2-git-send-email-wei.w.wang@intel.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- include/qemu/bitmap.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h index 509eeddece..679f1bdce4 100644 --- a/include/qemu/bitmap.h +++ b/include/qemu/bitmap.h @@ -221,6 +221,10 @@ static inline int bitmap_intersects(const unsigned long *src1, static inline long bitmap_count_one(const unsigned long *bitmap, long nbits) { + if (unlikely(!nbits)) { + return 0; + } + if (small_nbits(nbits)) { return ctpopl(*bitmap & BITMAP_LAST_WORD_MASK(nbits)); } else { -- cgit v1.2.3 From 94960256ae0bc346de49e2ef3808778e8468caf5 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 11 Dec 2018 16:24:48 +0800 Subject: bitmap: bitmap_count_one_with_offset Count the number of 1s in a bitmap starting from an offset. Signed-off-by: Wei Wang CC: Dr. David Alan Gilbert CC: Juan Quintela CC: Michael S. Tsirkin Reviewed-by: Dr. David Alan Gilbert Message-Id: <1544516693-5395-3-git-send-email-wei.w.wang@intel.com> Signed-off-by: Dr. David Alan Gilbert --- include/qemu/bitmap.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h index 679f1bdce4..5c313346b9 100644 --- a/include/qemu/bitmap.h +++ b/include/qemu/bitmap.h @@ -232,6 +232,19 @@ static inline long bitmap_count_one(const unsigned long *bitmap, long nbits) } } +static inline long bitmap_count_one_with_offset(const unsigned long *bitmap, + long offset, long nbits) +{ + long aligned_offset = QEMU_ALIGN_DOWN(offset, BITS_PER_LONG); + long redundant_bits = offset - aligned_offset; + long bits_to_count = nbits + redundant_bits; + const unsigned long *bitmap_start = bitmap + + aligned_offset / BITS_PER_LONG; + + return bitmap_count_one(bitmap_start, bits_to_count) - + bitmap_count_one(bitmap_start, redundant_bits); +} + void bitmap_set(unsigned long *map, long i, long len); void bitmap_set_atomic(unsigned long *map, long i, long len); void bitmap_clear(unsigned long *map, long start, long nr); -- cgit v1.2.3 From 386a907b37a9321bc5d699bc37104d6ffba1b34d Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 11 Dec 2018 16:24:49 +0800 Subject: migration: use bitmap_mutex in migration_bitmap_clear_dirty The bitmap mutex is used to synchronize threads to update the dirty bitmap and the migration_dirty_pages counter. For example, the free page optimization clears bits of free pages from the bitmap in an iothread context. This patch makes migration_bitmap_clear_dirty update the bitmap and counter under the mutex. Signed-off-by: Wei Wang CC: Dr. David Alan Gilbert CC: Juan Quintela CC: Michael S. Tsirkin CC: Peter Xu Reviewed-by: Peter Xu Message-Id: <1544516693-5395-4-git-send-email-wei.w.wang@intel.com> Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/migration/ram.c b/migration/ram.c index 01315edd66..0747873ca9 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -342,7 +342,7 @@ struct RAMState { uint64_t target_page_count; /* number of dirty bits in the bitmap */ uint64_t migration_dirty_pages; - /* protects modification of the bitmap */ + /* Protects modification of the bitmap and migration dirty pages */ QemuMutex bitmap_mutex; /* The RAMBlock used in the last src_page_requests */ RAMBlock *last_req_rb; @@ -1590,11 +1590,14 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs, { bool ret; + qemu_mutex_lock(&rs->bitmap_mutex); ret = test_and_clear_bit(page, rb->bmap); if (ret) { rs->migration_dirty_pages--; } + qemu_mutex_unlock(&rs->bitmap_mutex); + return ret; } -- cgit v1.2.3 From 6bcb05fc4211d2bd88fe73c65e93602428c51e5b Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 11 Dec 2018 16:24:50 +0800 Subject: migration: API to clear bits of guest free pages from the dirty bitmap This patch adds an API to clear bits corresponding to guest free pages from the dirty bitmap. Spilt the free page block if it crosses the QEMU RAMBlock boundary. Signed-off-by: Wei Wang CC: Dr. David Alan Gilbert CC: Juan Quintela CC: Michael S. Tsirkin CC: Peter Xu Reviewed-by: Peter Xu Message-Id: <1544516693-5395-5-git-send-email-wei.w.wang@intel.com> Signed-off-by: Dr. David Alan Gilbert --- include/migration/misc.h | 2 ++ migration/ram.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/include/migration/misc.h b/include/migration/misc.h index 6f9df74436..81ee347e35 100644 --- a/include/migration/misc.h +++ b/include/migration/misc.h @@ -14,12 +14,14 @@ #ifndef MIGRATION_MISC_H #define MIGRATION_MISC_H +#include "exec/cpu-common.h" #include "qemu/notify.h" #include "qapi/qapi-types-net.h" /* migration/ram.c */ void ram_mig_init(void); +void qemu_guest_free_page_hint(void *addr, size_t len); /* migration/block.c */ diff --git a/migration/ram.c b/migration/ram.c index 0747873ca9..32c0dbb98a 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -3181,6 +3181,53 @@ static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out) trace_ram_state_resume_prepare(pages); } +/* + * This function clears bits of the free pages reported by the caller from the + * migration dirty bitmap. @addr is the host address corresponding to the + * start of the continuous guest free pages, and @len is the total bytes of + * those pages. + */ +void qemu_guest_free_page_hint(void *addr, size_t len) +{ + RAMBlock *block; + ram_addr_t offset; + size_t used_len, start, npages; + MigrationState *s = migrate_get_current(); + + /* This function is currently expected to be used during live migration */ + if (!migration_is_setup_or_active(s->state)) { + return; + } + + for (; len > 0; len -= used_len, addr += used_len) { + block = qemu_ram_block_from_host(addr, false, &offset); + if (unlikely(!block || offset >= block->used_length)) { + /* + * The implementation might not support RAMBlock resize during + * live migration, but it could happen in theory with future + * updates. So we add a check here to capture that case. + */ + error_report_once("%s unexpected error", __func__); + return; + } + + if (len <= block->used_length - offset) { + used_len = len; + } else { + used_len = block->used_length - offset; + } + + start = offset >> TARGET_PAGE_BITS; + npages = used_len >> TARGET_PAGE_BITS; + + qemu_mutex_lock(&ram_state->bitmap_mutex); + ram_state->migration_dirty_pages -= + bitmap_count_one_with_offset(block->bmap, start, npages); + bitmap_clear(block->bmap, start, npages); + qemu_mutex_unlock(&ram_state->bitmap_mutex); + } +} + /* * Each of ram_save_setup, ram_save_iterate and ram_save_complete has * long-running RCU critical section. When rcu-reclaims in the code -- cgit v1.2.3 From bd2270608fa0112108aafcba89b87282c68db741 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 11 Dec 2018 16:24:51 +0800 Subject: migration/ram.c: add a notifier chain for precopy This patch adds a notifier chain for the memory precopy. This enables various precopy optimizations to be invoked at specific places. Signed-off-by: Wei Wang CC: Dr. David Alan Gilbert CC: Juan Quintela CC: Michael S. Tsirkin CC: Peter Xu Reviewed-by: Peter Xu Message-Id: <1544516693-5395-6-git-send-email-wei.w.wang@intel.com> Signed-off-by: Dr. David Alan Gilbert --- include/migration/misc.h | 19 ++++++++++++++++++ migration/ram.c | 51 +++++++++++++++++++++++++++++++++++++++++++++--- migration/savevm.c | 15 ++++++++++++++ vl.c | 1 + 4 files changed, 83 insertions(+), 3 deletions(-) diff --git a/include/migration/misc.h b/include/migration/misc.h index 81ee347e35..dc46d3354f 100644 --- a/include/migration/misc.h +++ b/include/migration/misc.h @@ -20,6 +20,25 @@ /* migration/ram.c */ +typedef enum PrecopyNotifyReason { + PRECOPY_NOTIFY_SETUP = 0, + PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC = 1, + PRECOPY_NOTIFY_AFTER_BITMAP_SYNC = 2, + PRECOPY_NOTIFY_COMPLETE = 3, + PRECOPY_NOTIFY_CLEANUP = 4, + PRECOPY_NOTIFY_MAX = 5, +} PrecopyNotifyReason; + +typedef struct PrecopyNotifyData { + enum PrecopyNotifyReason reason; + Error **errp; +} PrecopyNotifyData; + +void precopy_infrastructure_init(void); +void precopy_add_notifier(NotifierWithReturn *n); +void precopy_remove_notifier(NotifierWithReturn *n); +int precopy_notify(PrecopyNotifyReason reason, Error **errp); + void ram_mig_init(void); void qemu_guest_free_page_hint(void *addr, size_t len); diff --git a/migration/ram.c b/migration/ram.c index 32c0dbb98a..bee4fb3fd4 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -354,6 +354,32 @@ typedef struct RAMState RAMState; static RAMState *ram_state; +static NotifierWithReturnList precopy_notifier_list; + +void precopy_infrastructure_init(void) +{ + notifier_with_return_list_init(&precopy_notifier_list); +} + +void precopy_add_notifier(NotifierWithReturn *n) +{ + notifier_with_return_list_add(&precopy_notifier_list, n); +} + +void precopy_remove_notifier(NotifierWithReturn *n) +{ + notifier_with_return_remove(n); +} + +int precopy_notify(PrecopyNotifyReason reason, Error **errp) +{ + PrecopyNotifyData pnd; + pnd.reason = reason; + pnd.errp = errp; + + return notifier_with_return_list_notify(&precopy_notifier_list, &pnd); +} + uint64_t ram_bytes_remaining(void) { return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) : @@ -1741,6 +1767,25 @@ static void migration_bitmap_sync(RAMState *rs) } } +static void migration_bitmap_sync_precopy(RAMState *rs) +{ + Error *local_err = NULL; + + /* + * The current notifier usage is just an optimization to migration, so we + * don't stop the normal migration process in the error case. + */ + if (precopy_notify(PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC, &local_err)) { + error_report_err(local_err); + } + + migration_bitmap_sync(rs); + + if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC, &local_err)) { + error_report_err(local_err); + } +} + /** * save_zero_page_to_file: send the zero page to the file * @@ -3123,7 +3168,7 @@ static void ram_init_bitmaps(RAMState *rs) ram_list_init_bitmaps(); memory_global_dirty_log_start(); - migration_bitmap_sync(rs); + migration_bitmap_sync_precopy(rs); rcu_read_unlock(); qemu_mutex_unlock_ramlist(); @@ -3403,7 +3448,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque) rcu_read_lock(); if (!migration_in_postcopy()) { - migration_bitmap_sync(rs); + migration_bitmap_sync_precopy(rs); } ram_control_before_iterate(f, RAM_CONTROL_FINISH); @@ -3452,7 +3497,7 @@ static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size, remaining_size < max_size) { qemu_mutex_lock_iothread(); rcu_read_lock(); - migration_bitmap_sync(rs); + migration_bitmap_sync_precopy(rs); rcu_read_unlock(); qemu_mutex_unlock_iothread(); remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE; diff --git a/migration/savevm.c b/migration/savevm.c index 013098581f..1415001d1c 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1088,6 +1088,7 @@ void qemu_savevm_state_header(QEMUFile *f) void qemu_savevm_state_setup(QEMUFile *f) { SaveStateEntry *se; + Error *local_err = NULL; int ret; trace_savevm_state_setup(); @@ -1109,6 +1110,10 @@ void qemu_savevm_state_setup(QEMUFile *f) break; } } + + if (precopy_notify(PRECOPY_NOTIFY_SETUP, &local_err)) { + error_report_err(local_err); + } } int qemu_savevm_state_resume_prepare(MigrationState *s) @@ -1251,6 +1256,11 @@ int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only, SaveStateEntry *se; int ret; bool in_postcopy = migration_in_postcopy(); + Error *local_err = NULL; + + if (precopy_notify(PRECOPY_NOTIFY_COMPLETE, &local_err)) { + error_report_err(local_err); + } trace_savevm_state_complete_precopy(); @@ -1383,6 +1393,11 @@ void qemu_savevm_state_pending(QEMUFile *f, uint64_t threshold_size, void qemu_savevm_state_cleanup(void) { SaveStateEntry *se; + Error *local_err = NULL; + + if (precopy_notify(PRECOPY_NOTIFY_CLEANUP, &local_err)) { + error_report_err(local_err); + } trace_savevm_state_cleanup(); QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { diff --git a/vl.c b/vl.c index 5be8cf4f11..4c5cc0d8ad 100644 --- a/vl.c +++ b/vl.c @@ -3039,6 +3039,7 @@ int main(int argc, char **argv, char **envp) module_call_init(MODULE_INIT_OPTS); runstate_init(); + precopy_infrastructure_init(); postcopy_infrastructure_init(); monitor_init_globals(); -- cgit v1.2.3 From 6eeb63f740874150d7b5921541948c29b920a21d Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 11 Dec 2018 16:24:52 +0800 Subject: migration/ram.c: add the free page optimization enable flag This patch adds the free page optimization enable flag, and a function to set this flag. When the free page optimization is enabled, not all the pages are needed to be sent in the bulk stage. Why using a new flag, instead of directly disabling ram_bulk_stage when the optimization is running? Thanks for Peter Xu's reminder that disabling ram_bulk_stage will affect the use of compression. Please see save_page_use_compression. When xbzrle and compression are used, if free page optimizaion causes the ram_bulk_stage to be disabled, save_page_use_compression will return false, which disables the use of compression. That is, if free page optimization avoids the sending of half of the guest pages, the other half of pages loses the benefits of compression in the meantime. Using a new flag to let migration_bitmap_find_dirty skip the free pages in the bulk stage will avoid the above issue. Signed-off-by: Wei Wang CC: Dr. David Alan Gilbert CC: Juan Quintela CC: Michael S. Tsirkin CC: Peter Xu Message-Id: <1544516693-5395-7-git-send-email-wei.w.wang@intel.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- include/migration/misc.h | 1 + migration/ram.c | 18 +++++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/include/migration/misc.h b/include/migration/misc.h index dc46d3354f..5cdbabd094 100644 --- a/include/migration/misc.h +++ b/include/migration/misc.h @@ -38,6 +38,7 @@ void precopy_infrastructure_init(void); void precopy_add_notifier(NotifierWithReturn *n); void precopy_remove_notifier(NotifierWithReturn *n); int precopy_notify(PrecopyNotifyReason reason, Error **errp); +void precopy_enable_free_page_optimization(void); void ram_mig_init(void); void qemu_guest_free_page_hint(void *addr, size_t len); diff --git a/migration/ram.c b/migration/ram.c index bee4fb3fd4..35bd6213e9 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -316,6 +316,8 @@ struct RAMState { uint32_t last_version; /* We are in the first round */ bool ram_bulk_stage; + /* The free page optimization is enabled */ + bool fpo_enabled; /* How many times we have dirty too many pages */ int dirty_rate_high_cnt; /* these variables are used for bitmap sync */ @@ -380,6 +382,15 @@ int precopy_notify(PrecopyNotifyReason reason, Error **errp) return notifier_with_return_list_notify(&precopy_notifier_list, &pnd); } +void precopy_enable_free_page_optimization(void) +{ + if (!ram_state) { + return; + } + + ram_state->fpo_enabled = true; +} + uint64_t ram_bytes_remaining(void) { return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) : @@ -1601,7 +1612,11 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb, return size; } - if (rs->ram_bulk_stage && start > 0) { + /* + * When the free page optimization is enabled, we need to check the bitmap + * to send the non-free pages rather than all the pages in the bulk stage. + */ + if (!rs->fpo_enabled && rs->ram_bulk_stage && start > 0) { next = start + 1; } else { next = find_next_bit(bitmap, size, start); @@ -2651,6 +2666,7 @@ static void ram_state_reset(RAMState *rs) rs->last_page = 0; rs->last_version = ram_list.version; rs->ram_bulk_stage = true; + rs->fpo_enabled = false; } #define MAX_WAIT 50 /* ms, half buffered_file limit */ -- cgit v1.2.3 From c13c4153f76db23cac06a12044bf4dd346764059 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 11 Dec 2018 16:24:53 +0800 Subject: virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT The new feature enables the virtio-balloon device to receive hints of guest free pages from the free page vq. A notifier is registered to the migration precopy notifier chain. The notifier calls free_page_start after the migration thread syncs the dirty bitmap, so that the free page optimization starts to clear bits of free pages from the bitmap. It calls the free_page_stop before the migration thread syncs the bitmap, which is the end of the current round of ram save. The free_page_stop is also called to stop the optimization in the case when there is an error occurred in the process of ram saving. Note: balloon will report pages which were free at the time of this call. As the reporting happens asynchronously, dirty bit logging must be enabled before this free_page_start call is made. Guest reporting must be disabled before the migration dirty bitmap is synchronized. Signed-off-by: Wei Wang CC: Michael S. Tsirkin CC: Dr. David Alan Gilbert CC: Juan Quintela CC: Peter Xu Message-Id: <1544516693-5395-8-git-send-email-wei.w.wang@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Dr. David Alan Gilbert dgilbert: Dropped kernel header update, fixed up CMD_ID_* name change --- hw/virtio/virtio-balloon.c | 263 +++++++++++++++++++++++++++++++++++++ include/hw/virtio/virtio-balloon.h | 28 +++- 2 files changed, 290 insertions(+), 1 deletion(-) diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index d3f2913a85..e3a65940ef 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -27,6 +27,7 @@ #include "qapi/visitor.h" #include "trace.h" #include "qemu/error-report.h" +#include "migration/misc.h" #include "hw/virtio/virtio-bus.h" #include "hw/virtio/virtio-access.h" @@ -378,6 +379,184 @@ out: } } +static void virtio_balloon_handle_free_page_vq(VirtIODevice *vdev, + VirtQueue *vq) +{ + VirtIOBalloon *s = VIRTIO_BALLOON(vdev); + qemu_bh_schedule(s->free_page_bh); +} + +static bool get_free_page_hints(VirtIOBalloon *dev) +{ + VirtQueueElement *elem; + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VirtQueue *vq = dev->free_page_vq; + + while (dev->block_iothread) { + qemu_cond_wait(&dev->free_page_cond, &dev->free_page_lock); + } + + elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); + if (!elem) { + return false; + } + + if (elem->out_num) { + uint32_t id; + size_t size = iov_to_buf(elem->out_sg, elem->out_num, 0, + &id, sizeof(id)); + virtqueue_push(vq, elem, size); + g_free(elem); + + virtio_tswap32s(vdev, &id); + if (unlikely(size != sizeof(id))) { + virtio_error(vdev, "received an incorrect cmd id"); + return false; + } + if (id == dev->free_page_report_cmd_id) { + dev->free_page_report_status = FREE_PAGE_REPORT_S_START; + } else { + /* + * Stop the optimization only when it has started. This + * avoids a stale stop sign for the previous command. + */ + if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) { + dev->free_page_report_status = FREE_PAGE_REPORT_S_STOP; + } + } + } + + if (elem->in_num) { + if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) { + qemu_guest_free_page_hint(elem->in_sg[0].iov_base, + elem->in_sg[0].iov_len); + } + virtqueue_push(vq, elem, 1); + g_free(elem); + } + + return true; +} + +static void virtio_ballloon_get_free_page_hints(void *opaque) +{ + VirtIOBalloon *dev = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VirtQueue *vq = dev->free_page_vq; + bool continue_to_get_hints; + + do { + qemu_mutex_lock(&dev->free_page_lock); + virtio_queue_set_notification(vq, 0); + continue_to_get_hints = get_free_page_hints(dev); + qemu_mutex_unlock(&dev->free_page_lock); + virtio_notify(vdev, vq); + /* + * Start to poll the vq once the reporting started. Otherwise, continue + * only when there are entries on the vq, which need to be given back. + */ + } while (continue_to_get_hints || + dev->free_page_report_status == FREE_PAGE_REPORT_S_START); + virtio_queue_set_notification(vq, 1); +} + +static bool virtio_balloon_free_page_support(void *opaque) +{ + VirtIOBalloon *s = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(s); + + return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT); +} + +static void virtio_balloon_free_page_start(VirtIOBalloon *s) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(s); + + /* For the stop and copy phase, we don't need to start the optimization */ + if (!vdev->vm_running) { + return; + } + + if (s->free_page_report_cmd_id == UINT_MAX) { + s->free_page_report_cmd_id = + VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN; + } else { + s->free_page_report_cmd_id++; + } + + s->free_page_report_status = FREE_PAGE_REPORT_S_REQUESTED; + virtio_notify_config(vdev); +} + +static void virtio_balloon_free_page_stop(VirtIOBalloon *s) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(s); + + if (s->free_page_report_status != FREE_PAGE_REPORT_S_STOP) { + /* + * The lock also guarantees us that the + * virtio_ballloon_get_free_page_hints exits after the + * free_page_report_status is set to S_STOP. + */ + qemu_mutex_lock(&s->free_page_lock); + /* + * The guest hasn't done the reporting, so host sends a notification + * to the guest to actively stop the reporting. + */ + s->free_page_report_status = FREE_PAGE_REPORT_S_STOP; + qemu_mutex_unlock(&s->free_page_lock); + virtio_notify_config(vdev); + } +} + +static void virtio_balloon_free_page_done(VirtIOBalloon *s) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(s); + + s->free_page_report_status = FREE_PAGE_REPORT_S_DONE; + virtio_notify_config(vdev); +} + +static int +virtio_balloon_free_page_report_notify(NotifierWithReturn *n, void *data) +{ + VirtIOBalloon *dev = container_of(n, VirtIOBalloon, + free_page_report_notify); + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + PrecopyNotifyData *pnd = data; + + if (!virtio_balloon_free_page_support(dev)) { + /* + * This is an optimization provided to migration, so just return 0 to + * have the normal migration process not affected when this feature is + * not supported. + */ + return 0; + } + + switch (pnd->reason) { + case PRECOPY_NOTIFY_SETUP: + precopy_enable_free_page_optimization(); + break; + case PRECOPY_NOTIFY_COMPLETE: + case PRECOPY_NOTIFY_CLEANUP: + case PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC: + virtio_balloon_free_page_stop(dev); + break; + case PRECOPY_NOTIFY_AFTER_BITMAP_SYNC: + if (vdev->vm_running) { + virtio_balloon_free_page_start(dev); + } else { + virtio_balloon_free_page_done(dev); + } + break; + default: + virtio_error(vdev, "%s: %d reason unknown", __func__, pnd->reason); + } + + return 0; +} + static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data) { VirtIOBalloon *dev = VIRTIO_BALLOON(vdev); @@ -386,6 +565,17 @@ static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data) config.num_pages = cpu_to_le32(dev->num_pages); config.actual = cpu_to_le32(dev->actual); + if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) { + config.free_page_report_cmd_id = + cpu_to_le32(dev->free_page_report_cmd_id); + } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_STOP) { + config.free_page_report_cmd_id = + cpu_to_le32(VIRTIO_BALLOON_CMD_ID_STOP); + } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_DONE) { + config.free_page_report_cmd_id = + cpu_to_le32(VIRTIO_BALLOON_CMD_ID_DONE); + } + trace_virtio_balloon_get_config(config.num_pages, config.actual); memcpy(config_data, &config, sizeof(struct virtio_balloon_config)); } @@ -446,6 +636,7 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f, VirtIOBalloon *dev = VIRTIO_BALLOON(vdev); f |= dev->host_features; virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ); + return f; } @@ -482,6 +673,18 @@ static int virtio_balloon_post_load_device(void *opaque, int version_id) return 0; } +static const VMStateDescription vmstate_virtio_balloon_free_page_report = { + .name = "virtio-balloon-device/free-page-report", + .version_id = 1, + .minimum_version_id = 1, + .needed = virtio_balloon_free_page_support, + .fields = (VMStateField[]) { + VMSTATE_UINT32(free_page_report_cmd_id, VirtIOBalloon), + VMSTATE_UINT32(free_page_report_status, VirtIOBalloon), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_virtio_balloon_device = { .name = "virtio-balloon-device", .version_id = 1, @@ -492,6 +695,10 @@ static const VMStateDescription vmstate_virtio_balloon_device = { VMSTATE_UINT32(actual, VirtIOBalloon), VMSTATE_END_OF_LIST() }, + .subsections = (const VMStateDescription * []) { + &vmstate_virtio_balloon_free_page_report, + NULL + } }; static void virtio_balloon_device_realize(DeviceState *dev, Error **errp) @@ -516,6 +723,29 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp) s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output); s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats); + if (virtio_has_feature(s->host_features, + VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { + s->free_page_vq = virtio_add_queue(vdev, VIRTQUEUE_MAX_SIZE, + virtio_balloon_handle_free_page_vq); + s->free_page_report_status = FREE_PAGE_REPORT_S_STOP; + s->free_page_report_cmd_id = + VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN; + s->free_page_report_notify.notify = + virtio_balloon_free_page_report_notify; + precopy_add_notifier(&s->free_page_report_notify); + if (s->iothread) { + object_ref(OBJECT(s->iothread)); + s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread), + virtio_ballloon_get_free_page_hints, s); + qemu_mutex_init(&s->free_page_lock); + qemu_cond_init(&s->free_page_cond); + s->block_iothread = false; + } else { + /* Simply disable this feature if the iothread wasn't created. */ + s->host_features &= ~(1 << VIRTIO_BALLOON_F_FREE_PAGE_HINT); + virtio_error(vdev, "iothread is missing"); + } + } reset_stats(s); } @@ -524,6 +754,11 @@ static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp) VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtIOBalloon *s = VIRTIO_BALLOON(dev); + if (virtio_balloon_free_page_support(s)) { + qemu_bh_delete(s->free_page_bh); + virtio_balloon_free_page_stop(s); + precopy_remove_notifier(&s->free_page_report_notify); + } balloon_stats_destroy_timer(s); qemu_remove_balloon_handler(s); virtio_cleanup(vdev); @@ -533,6 +768,10 @@ static void virtio_balloon_device_reset(VirtIODevice *vdev) { VirtIOBalloon *s = VIRTIO_BALLOON(vdev); + if (virtio_balloon_free_page_support(s)) { + virtio_balloon_free_page_stop(s); + } + if (s->stats_vq_elem != NULL) { virtqueue_unpop(s->svq, s->stats_vq_elem, 0); g_free(s->stats_vq_elem); @@ -550,6 +789,26 @@ static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status) * was stopped */ virtio_balloon_receive_stats(vdev, s->svq); } + + if (virtio_balloon_free_page_support(s)) { + /* + * The VM is woken up and the iothread was blocked, so signal it to + * continue. + */ + if (vdev->vm_running && s->block_iothread) { + qemu_mutex_lock(&s->free_page_lock); + s->block_iothread = false; + qemu_cond_signal(&s->free_page_cond); + qemu_mutex_unlock(&s->free_page_lock); + } + + /* The VM is stopped, block the iothread. */ + if (!vdev->vm_running) { + qemu_mutex_lock(&s->free_page_lock); + s->block_iothread = true; + qemu_mutex_unlock(&s->free_page_lock); + } + } } static void virtio_balloon_instance_init(Object *obj) @@ -578,6 +837,10 @@ static const VMStateDescription vmstate_virtio_balloon = { static Property virtio_balloon_properties[] = { DEFINE_PROP_BIT("deflate-on-oom", VirtIOBalloon, host_features, VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false), + DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features, + VIRTIO_BALLOON_F_FREE_PAGE_HINT, false), + DEFINE_PROP_LINK("iothread", VirtIOBalloon, iothread, TYPE_IOTHREAD, + IOThread *), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h index 99dcd6d105..1afafb12f6 100644 --- a/include/hw/virtio/virtio-balloon.h +++ b/include/hw/virtio/virtio-balloon.h @@ -17,11 +17,14 @@ #include "standard-headers/linux/virtio_balloon.h" #include "hw/virtio/virtio.h" +#include "sysemu/iothread.h" #define TYPE_VIRTIO_BALLOON "virtio-balloon-device" #define VIRTIO_BALLOON(obj) \ OBJECT_CHECK(VirtIOBalloon, (obj), TYPE_VIRTIO_BALLOON) +#define VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN 0x80000000 + typedef struct virtio_balloon_stat VirtIOBalloonStat; typedef struct virtio_balloon_stat_modern { @@ -32,15 +35,38 @@ typedef struct virtio_balloon_stat_modern { typedef struct PartiallyBalloonedPage PartiallyBalloonedPage; +enum virtio_balloon_free_page_report_status { + FREE_PAGE_REPORT_S_STOP = 0, + FREE_PAGE_REPORT_S_REQUESTED = 1, + FREE_PAGE_REPORT_S_START = 2, + FREE_PAGE_REPORT_S_DONE = 3, +}; + typedef struct VirtIOBalloon { VirtIODevice parent_obj; - VirtQueue *ivq, *dvq, *svq; + VirtQueue *ivq, *dvq, *svq, *free_page_vq; + uint32_t free_page_report_status; uint32_t num_pages; uint32_t actual; + uint32_t free_page_report_cmd_id; uint64_t stats[VIRTIO_BALLOON_S_NR]; VirtQueueElement *stats_vq_elem; size_t stats_vq_offset; QEMUTimer *stats_timer; + IOThread *iothread; + QEMUBH *free_page_bh; + /* + * Lock to synchronize threads to access the free page reporting related + * fields (e.g. free_page_report_status). + */ + QemuMutex free_page_lock; + QemuCond free_page_cond; + /* + * Set to block iothread to continue reading free page hints as the VM is + * stopped. + */ + bool block_iothread; + NotifierWithReturn free_page_report_notify; int64_t stats_last_update; int64_t stats_poll_interval; uint32_t host_features; -- cgit v1.2.3 From b8b5734b092baf3c53f3f117595704ffe9469a75 Mon Sep 17 00:00:00 2001 From: Zhang Chen Date: Sun, 3 Mar 2019 22:50:15 +0800 Subject: Migration/colo.c: Fix double close bug when occur COLO failover In migration_incoming_state_destroy(void) will check the mis->to_src_file to double close the mis->to_src_file when occur COLO failover. Signed-off-by: Zhang Chen Reviewed-by: Dr. David Alan Gilbert Message-Id: <20190303145021.2962-2-chen.zhang@intel.com> Signed-off-by: Dr. David Alan Gilbert --- migration/colo.c | 1 + 1 file changed, 1 insertion(+) diff --git a/migration/colo.c b/migration/colo.c index 398b239d1c..a916dc178c 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -872,6 +872,7 @@ out: /* Must be called after failover BH is completed */ if (mis->to_src_file) { qemu_fclose(mis->to_src_file); + mis->to_src_file = NULL; } migration_incoming_disable_colo(); -- cgit v1.2.3 From db00972922c875ce354a102c94699fe40612c90d Mon Sep 17 00:00:00 2001 From: Zhang Chen Date: Sun, 3 Mar 2019 22:50:17 +0800 Subject: Migration/colo.c: Make COLO node running after failover Delay to close COLO for auto start VM after failover. Signed-off-by: Zhang Chen Reviewed-by: Dr. David Alan Gilbert Message-Id: <20190303145021.2962-4-chen.zhang@intel.com> Signed-off-by: Dr. David Alan Gilbert --- migration/colo.c | 1 - migration/migration.c | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/migration/colo.c b/migration/colo.c index a916dc178c..5ba610dc01 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -874,7 +874,6 @@ out: qemu_fclose(mis->to_src_file); mis->to_src_file = NULL; } - migration_incoming_disable_colo(); rcu_unregister_thread(); return NULL; diff --git a/migration/migration.c b/migration/migration.c index 952d29243e..df6fd8e0e5 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -417,6 +417,9 @@ static void process_incoming_migration_bh(void *opaque) } else { runstate_set(RUN_STATE_PAUSED); } + } else if (migration_incoming_colo_enabled()) { + migration_incoming_disable_colo(); + vm_start(); } else { runstate_set(global_state_get_runstate()); } -- cgit v1.2.3 From b5922fc5891261153f1a0f20e814c620aabeb6ac Mon Sep 17 00:00:00 2001 From: Zhang Chen Date: Sun, 3 Mar 2019 22:50:19 +0800 Subject: qapi/migration.json: Remove a variable that doesn't exist in example Remove the "active" variable in example for query-colo-status. It is a doc bug from commit f56c0065 Signed-off-by: Zhang Chen Reviewed-by: Eric Blake Message-Id: <20190303145021.2962-6-chen.zhang@intel.com> Signed-off-by: Dr. David Alan Gilbert --- qapi/migration.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qapi/migration.json b/qapi/migration.json index 6bd7fd3f1a..5684733754 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -1393,7 +1393,7 @@ # Example: # # -> { "execute": "query-colo-status" } -# <- { "return": { "mode": "primary", "active": true, "reason": "request" } } +# <- { "return": { "mode": "primary", "reason": "request" } } # # Since: 3.1 ## -- cgit v1.2.3