From f548222c24342ca74689de7794f9006b43f86a54 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Thu, 3 May 2018 16:06:11 +0800 Subject: migration: introduce decompress-error-check QEMU 3.0 enables strict check for compression & decompression to make the migration more robust, that depends on the source to fix the internal design which triggers the unexpected error conditions To make it work for migrating old version QEMU to 2.13 QEMU, we introduce this parameter to disable the error check on the destination which is the default behavior of the machine type which is older than 2.13, alternately, the strict check can be enabled explicitly as followings: -M pc-q35-2.11 -global migration.decompress-error-check=true Signed-off-by: Xiao Guangrong Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/migration.c | 4 ++++ migration/migration.h | 7 +++++++ migration/ram.c | 2 +- 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'migration') diff --git a/migration/migration.c b/migration/migration.c index 05aec2c905..a5384865ff 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -2971,6 +2971,8 @@ void migration_global_dump(Monitor *mon) ms->send_configuration ? "on" : "off"); monitor_printf(mon, "send-section-footer: %s\n", ms->send_section_footer ? "on" : "off"); + monitor_printf(mon, "decompress-error-check: %s\n", + ms->decompress_error_check ? "on" : "off"); } #define DEFINE_PROP_MIG_CAP(name, x) \ @@ -2984,6 +2986,8 @@ static Property migration_properties[] = { send_configuration, true), DEFINE_PROP_BOOL("send-section-footer", MigrationState, send_section_footer, true), + DEFINE_PROP_BOOL("decompress-error-check", MigrationState, + decompress_error_check, true), /* Migration parameters */ DEFINE_PROP_UINT8("x-compress-level", MigrationState, diff --git a/migration/migration.h b/migration/migration.h index 8f0c82159b..5af57d616c 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -212,6 +212,13 @@ struct MigrationState /* Needed by postcopy-pause state */ QemuSemaphore postcopy_pause_sem; QemuSemaphore postcopy_pause_rp_sem; + /* + * Whether we abort the migration if decompression errors are + * detected at the destination. It is left at false for qemu + * older than 3.0, since only newer qemu sends streams that + * do not trigger spurious decompression errors. + */ + bool decompress_error_check; }; void migrate_set_state(int *state, int old_state, int new_state); diff --git a/migration/ram.c b/migration/ram.c index c53e8369a3..090187ca04 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2881,7 +2881,7 @@ static void *do_data_decompress(void *opaque) ret = qemu_uncompress_data(¶m->stream, des, pagesize, param->compbuf, len); - if (ret < 0) { + if (ret < 0 && migrate_get_current()->decompress_error_check) { error_report("decompress data failed"); qemu_file_set_error(decomp_file, ret); } -- cgit v1.2.3 From b895de502717b83b4e5f089df617cb23530c4d2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 14 May 2018 08:57:00 +0200 Subject: migration: discard non-migratable RAMBlocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On the POWER9 processor, the XIVE interrupt controller can control interrupt sources using MMIO to trigger events, to EOI or to turn off the sources. Priority management and interrupt acknowledgment is also controlled by MMIO in the presenter sub-engine. These MMIO regions are exposed to guests in QEMU with a set of 'ram device' memory mappings, similarly to VFIO, and the VMAs are populated dynamically with the appropriate pages using a fault handler. But, these regions are an issue for migration. We need to discard the associated RAMBlocks from the RAM state on the source VM and let the destination VM rebuild the memory mappings on the new host in the post_load() operation just before resuming the system. To achieve this goal, the following introduces a new RAMBlock flag RAM_MIGRATABLE which is updated in the vmstate_register_ram() and vmstate_unregister_ram() routines. This flag is then used by the migration to identify RAMBlocks to discard on the source. Some checks are also performed on the destination to make sure nothing invalid was sent. This change impacts the boston, malta and jazz mips boards for which migration compatibility is broken. Signed-off-by: CĂ©dric Le Goater Reviewed-by: Juan Quintela Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Juan Quintela --- migration/postcopy-ram.c | 12 ++++++------ migration/ram.c | 46 ++++++++++++++++++++++++++++++++++------------ migration/savevm.c | 2 ++ 3 files changed, 42 insertions(+), 18 deletions(-) (limited to 'migration') diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 658b750a8e..48e51556a7 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -374,7 +374,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) } /* We don't support postcopy with shared RAM yet */ - if (qemu_ram_foreach_block(test_ramblock_postcopiable, NULL)) { + if (qemu_ram_foreach_migratable_block(test_ramblock_postcopiable, NULL)) { goto out; } @@ -502,7 +502,7 @@ static int cleanup_range(const char *block_name, void *host_addr, */ int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages) { - if (qemu_ram_foreach_block(init_range, NULL)) { + if (qemu_ram_foreach_migratable_block(init_range, NULL)) { return -1; } @@ -524,7 +524,7 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) return -1; } - if (qemu_ram_foreach_block(cleanup_range, mis)) { + if (qemu_ram_foreach_migratable_block(cleanup_range, mis)) { return -1; } /* Let the fault thread quit */ @@ -593,7 +593,7 @@ static int nhp_range(const char *block_name, void *host_addr, */ int postcopy_ram_prepare_discard(MigrationIncomingState *mis) { - if (qemu_ram_foreach_block(nhp_range, mis)) { + if (qemu_ram_foreach_migratable_block(nhp_range, mis)) { return -1; } @@ -604,7 +604,7 @@ int postcopy_ram_prepare_discard(MigrationIncomingState *mis) /* * Mark the given area of RAM as requiring notification to unwritten areas - * Used as a callback on qemu_ram_foreach_block. + * Used as a callback on qemu_ram_foreach_migratable_block. * host_addr: Base of area to mark * offset: Offset in the whole ram arena * length: Length of the section @@ -1099,7 +1099,7 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) mis->have_fault_thread = true; /* Mark so that we get notified of accesses to unwritten areas */ - if (qemu_ram_foreach_block(ram_block_enable_notify, mis)) { + if (qemu_ram_foreach_migratable_block(ram_block_enable_notify, mis)) { return -1; } diff --git a/migration/ram.c b/migration/ram.c index 090187ca04..290b281446 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -157,11 +157,16 @@ out: return ret; } +/* Should be holding either ram_list.mutex, or the RCU lock. */ +#define RAMBLOCK_FOREACH_MIGRATABLE(block) \ + RAMBLOCK_FOREACH(block) \ + if (!qemu_ram_is_migratable(block)) {} else + static void ramblock_recv_map_init(void) { RAMBlock *rb; - RAMBLOCK_FOREACH(rb) { + RAMBLOCK_FOREACH_MIGRATABLE(rb) { assert(!rb->receivedmap); rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits()); } @@ -1078,6 +1083,10 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb, unsigned long *bitmap = rb->bmap; unsigned long next; + if (!qemu_ram_is_migratable(rb)) { + return size; + } + if (rs->ram_bulk_stage && start > 0) { next = start + 1; } else { @@ -1123,7 +1132,7 @@ uint64_t ram_pagesize_summary(void) RAMBlock *block; uint64_t summary = 0; - RAMBLOCK_FOREACH(block) { + RAMBLOCK_FOREACH_MIGRATABLE(block) { summary |= block->page_size; } @@ -1147,7 +1156,7 @@ static void migration_bitmap_sync(RAMState *rs) qemu_mutex_lock(&rs->bitmap_mutex); rcu_read_lock(); - RAMBLOCK_FOREACH(block) { + RAMBLOCK_FOREACH_MIGRATABLE(block) { migration_bitmap_sync_range(rs, block, 0, block->used_length); } rcu_read_unlock(); @@ -1786,6 +1795,11 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, size_t pagesize_bits = qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS; + if (!qemu_ram_is_migratable(pss->block)) { + error_report("block %s should not be migrated !", pss->block->idstr); + return 0; + } + do { /* Check the pages is dirty and if it is send it */ if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) { @@ -1884,7 +1898,7 @@ uint64_t ram_bytes_total(void) uint64_t total = 0; rcu_read_lock(); - RAMBLOCK_FOREACH(block) { + RAMBLOCK_FOREACH_MIGRATABLE(block) { total += block->used_length; } rcu_read_unlock(); @@ -1939,7 +1953,7 @@ static void ram_save_cleanup(void *opaque) */ memory_global_dirty_log_stop(); - QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + RAMBLOCK_FOREACH_MIGRATABLE(block) { g_free(block->bmap); block->bmap = NULL; g_free(block->unsentmap); @@ -2002,7 +2016,7 @@ void ram_postcopy_migrated_memory_release(MigrationState *ms) { struct RAMBlock *block; - RAMBLOCK_FOREACH(block) { + RAMBLOCK_FOREACH_MIGRATABLE(block) { unsigned long *bitmap = block->bmap; unsigned long range = block->used_length >> TARGET_PAGE_BITS; unsigned long run_start = find_next_zero_bit(bitmap, range, 0); @@ -2080,7 +2094,7 @@ static int postcopy_each_ram_send_discard(MigrationState *ms) struct RAMBlock *block; int ret; - RAMBLOCK_FOREACH(block) { + RAMBLOCK_FOREACH_MIGRATABLE(block) { PostcopyDiscardState *pds = postcopy_discard_send_init(ms, block->idstr); @@ -2288,7 +2302,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms) rs->last_sent_block = NULL; rs->last_page = 0; - QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + RAMBLOCK_FOREACH_MIGRATABLE(block) { unsigned long pages = block->used_length >> TARGET_PAGE_BITS; unsigned long *bitmap = block->bmap; unsigned long *unsentmap = block->unsentmap; @@ -2447,7 +2461,7 @@ static void ram_list_init_bitmaps(void) /* Skip setting bitmap if there is no RAM */ if (ram_bytes_total()) { - QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + RAMBLOCK_FOREACH_MIGRATABLE(block) { pages = block->max_length >> TARGET_PAGE_BITS; block->bmap = bitmap_new(pages); bitmap_set(block->bmap, 0, pages); @@ -2563,7 +2577,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); - RAMBLOCK_FOREACH(block) { + RAMBLOCK_FOREACH_MIGRATABLE(block) { qemu_put_byte(f, strlen(block->idstr)); qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); qemu_put_be64(f, block->used_length); @@ -2807,6 +2821,11 @@ static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags) return NULL; } + if (!qemu_ram_is_migratable(block)) { + error_report("block %s should not be migrated !", id); + return NULL; + } + return block; } @@ -3049,7 +3068,7 @@ static int ram_load_cleanup(void *opaque) xbzrle_load_cleanup(); compress_threads_load_cleanup(); - RAMBLOCK_FOREACH(rb) { + RAMBLOCK_FOREACH_MIGRATABLE(rb) { g_free(rb->receivedmap); rb->receivedmap = NULL; } @@ -3311,7 +3330,10 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) length = qemu_get_be64(f); block = qemu_ram_block_by_name(id); - if (block) { + if (block && !qemu_ram_is_migratable(block)) { + error_report("block %s should not be migrated !", id); + ret = -EINVAL; + } else if (block) { if (length != block->used_length) { Error *local_err = NULL; diff --git a/migration/savevm.c b/migration/savevm.c index 4251125831..a68a9ac635 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -2688,11 +2688,13 @@ void vmstate_register_ram(MemoryRegion *mr, DeviceState *dev) { qemu_ram_set_idstr(mr->ram_block, memory_region_name(mr), dev); + qemu_ram_set_migratable(mr->ram_block); } void vmstate_unregister_ram(MemoryRegion *mr, DeviceState *dev) { qemu_ram_unset_idstr(mr->ram_block); + qemu_ram_unset_migratable(mr->ram_block); } void vmstate_register_ram_global(MemoryRegion *mr) -- cgit v1.2.3 From 0f073f44df109ea0910d67caede70dec95956ff6 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 16 Apr 2018 18:09:30 +0100 Subject: migration: Don't activate block devices if using -S Activating the block devices causes the locks to be taken on the backing file. If we're running with -S and the destination libvirt hasn't started the destination with 'cont', it's expecting the locks are still untaken. Don't activate the block devices if we're not going to autostart the VM; 'cont' already will do that anyway. This change is tied to the new migration capability 'late-block-activate' that defaults to off, keeping the old behaviour by default. bz: https://bugzilla.redhat.com/show_bug.cgi?id=1560854 Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/migration.c | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) (limited to 'migration') diff --git a/migration/migration.c b/migration/migration.c index a5384865ff..1e99ec9b7e 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -202,6 +202,16 @@ static void migrate_generate_event(int new_state) } } +static bool migrate_late_block_activate(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->enabled_capabilities[ + MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; +} + /* * Called on -incoming with a defer: uri. * The migration can be started later after any parameters have been @@ -311,13 +321,23 @@ static void process_incoming_migration_bh(void *opaque) Error *local_err = NULL; MigrationIncomingState *mis = opaque; - /* Make sure all file formats flush their mutable metadata. - * If we get an error here, just don't restart the VM yet. */ - bdrv_invalidate_cache_all(&local_err); - if (local_err) { - error_report_err(local_err); - local_err = NULL; - autostart = false; + /* If capability late_block_activate is set: + * Only fire up the block code now if we're going to restart the + * VM, else 'cont' will do it. + * This causes file locking to happen; so we don't want it to happen + * unless we really are starting the VM. + */ + if (!migrate_late_block_activate() || + (autostart && (!global_state_received() || + global_state_get_runstate() == RUN_STATE_RUNNING))) { + /* Make sure all file formats flush their mutable metadata. + * If we get an error here, just don't restart the VM yet. */ + bdrv_invalidate_cache_all(&local_err); + if (local_err) { + error_report_err(local_err); + local_err = NULL; + autostart = false; + } } /* -- cgit v1.2.3 From f38f6d4155c0c5a3e96d81183362a40e2cc09b4c Mon Sep 17 00:00:00 2001 From: Lidong Chen Date: Wed, 30 May 2018 17:43:27 +0800 Subject: migration: remove unnecessary variables len in QIOChannelRDMA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because qio_channel_rdma_writev and qio_channel_rdma_readv maybe invoked by different threads concurrently, this patch removes unnecessary variables len in QIOChannelRDMA and use local variable instead. Signed-off-by: Lidong Chen Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Daniel P. BerrangĂ© Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela Signed-off-by: Lidong Chen --- migration/rdma.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'migration') diff --git a/migration/rdma.c b/migration/rdma.c index 7d233b0820..60779221b1 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -400,7 +400,6 @@ struct QIOChannelRDMA { QIOChannel parent; RDMAContext *rdma; QEMUFile *file; - size_t len; bool blocking; /* XXX we don't actually honour this yet */ }; @@ -2608,6 +2607,7 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc, int ret; ssize_t done = 0; size_t i; + size_t len = 0; CHECK_ERROR_STATE(); @@ -2627,10 +2627,10 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc, while (remaining) { RDMAControlHeader head; - rioc->len = MIN(remaining, RDMA_SEND_INCREMENT); - remaining -= rioc->len; + len = MIN(remaining, RDMA_SEND_INCREMENT); + remaining -= len; - head.len = rioc->len; + head.len = len; head.type = RDMA_CONTROL_QEMU_FILE; ret = qemu_rdma_exchange_send(rdma, &head, data, NULL, NULL, NULL); @@ -2640,8 +2640,8 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc, return ret; } - data += rioc->len; - done += rioc->len; + data += len; + done += len; } } @@ -2736,8 +2736,7 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc, } } } - rioc->len = done; - return rioc->len; + return done; } /* -- cgit v1.2.3 From c5e76115ccb4979cec795a8ae38becd07c2fde9f Mon Sep 17 00:00:00 2001 From: Lidong Chen Date: Wed, 30 May 2018 17:43:31 +0800 Subject: migration: not wait RDMA_CM_EVENT_DISCONNECTED event after rdma_disconnect When cancel migration during RDMA precopy, the source qemu main thread hangs sometime. The backtrace is: (gdb) bt #0 0x00007f249eabd43d in write () from /lib64/libpthread.so.0 #1 0x00007f24a1ce98e4 in rdma_get_cm_event (channel=0x4675d10, event=0x7ffe2f643dd0) at src/cma.c:2189 #2 0x00000000007b6166 in qemu_rdma_cleanup (rdma=0x6784000) at migration/rdma.c:2296 #3 0x00000000007b7cae in qio_channel_rdma_close (ioc=0x3bfcc30, errp=0x0) at migration/rdma.c:2999 #4 0x00000000008db60e in qio_channel_close (ioc=0x3bfcc30, errp=0x0) at io/channel.c:273 #5 0x00000000007a8765 in channel_close (opaque=0x3bfcc30) at migration/qemu-file-channel.c:98 #6 0x00000000007a71f9 in qemu_fclose (f=0x527c000) at migration/qemu-file.c:334 #7 0x0000000000795b96 in migrate_fd_cleanup (opaque=0x3b46280) at migration/migration.c:1162 #8 0x000000000093a71b in aio_bh_call (bh=0x3db7a20) at util/async.c:90 #9 0x000000000093a7b2 in aio_bh_poll (ctx=0x3b121c0) at util/async.c:118 #10 0x000000000093f2ad in aio_dispatch (ctx=0x3b121c0) at util/aio-posix.c:436 #11 0x000000000093ab41 in aio_ctx_dispatch (source=0x3b121c0, callback=0x0, user_data=0x0) at util/async.c:261 #12 0x00007f249f73c7aa in g_main_context_dispatch () from /lib64/libglib-2.0.so.0 #13 0x000000000093dc5e in glib_pollfds_poll () at util/main-loop.c:215 #14 0x000000000093dd4e in os_host_main_loop_wait (timeout=28000000) at util/main-loop.c:263 #15 0x000000000093de05 in main_loop_wait (nonblocking=0) at util/main-loop.c:522 #16 0x00000000005bc6a5 in main_loop () at vl.c:1944 #17 0x00000000005c39b5 in main (argc=56, argv=0x7ffe2f6443f8, envp=0x3ad0030) at vl.c:4752 It does not get the RDMA_CM_EVENT_DISCONNECTED event after rdma_disconnect sometime. According to IB Spec once active side send DREQ message, it should wait for DREP message and only once it arrived it should trigger a DISCONNECT event. DREP message can be dropped due to network issues. For that case the spec defines a DREP_timeout state in the CM state machine, if the DREP is dropped we should get a timeout and a TIMEWAIT_EXIT event will be trigger. Unfortunately the current kernel CM implementation doesn't include the DREP_timeout state and in above scenario we will not get DISCONNECT or TIMEWAIT_EXIT events. So it should not invoke rdma_get_cm_event which may hang forever, and the event channel is also destroyed in qemu_rdma_cleanup. Signed-off-by: Lidong Chen Reviewed-by: Juan Quintela Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Juan Quintela --- migration/rdma.c | 12 ++---------- migration/trace-events | 1 - 2 files changed, 2 insertions(+), 11 deletions(-) (limited to 'migration') diff --git a/migration/rdma.c b/migration/rdma.c index 60779221b1..05aee3d591 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -2267,8 +2267,7 @@ static int qemu_rdma_write(QEMUFile *f, RDMAContext *rdma, static void qemu_rdma_cleanup(RDMAContext *rdma) { - struct rdma_cm_event *cm_event; - int ret, idx; + int idx; if (rdma->cm_id && rdma->connected) { if ((rdma->error_state || @@ -2282,14 +2281,7 @@ static void qemu_rdma_cleanup(RDMAContext *rdma) qemu_rdma_post_send_control(rdma, NULL, &head); } - ret = rdma_disconnect(rdma->cm_id); - if (!ret) { - trace_qemu_rdma_cleanup_waiting_for_disconnect(); - ret = rdma_get_cm_event(rdma->channel, &cm_event); - if (!ret) { - rdma_ack_cm_event(cm_event); - } - } + rdma_disconnect(rdma->cm_id); trace_qemu_rdma_cleanup_disconnect(); rdma->connected = false; } diff --git a/migration/trace-events b/migration/trace-events index 3c798ddd11..4a768eaaeb 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -146,7 +146,6 @@ qemu_rdma_accept_pin_state(bool pin) "%d" qemu_rdma_accept_pin_verbsc(void *verbs) "Verbs context after listen: %p" qemu_rdma_block_for_wrid_miss(const char *wcompstr, int wcomp, const char *gcompstr, uint64_t req) "A Wanted wrid %s (%d) but got %s (%" PRIu64 ")" qemu_rdma_cleanup_disconnect(void) "" -qemu_rdma_cleanup_waiting_for_disconnect(void) "" qemu_rdma_close(void) "" qemu_rdma_connect_pin_all_requested(void) "" qemu_rdma_connect_pin_all_outcome(bool pin) "%d" -- cgit v1.2.3