diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2019-09-26 14:23:58 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2019-09-26 14:23:58 +0100 |
commit | eb13d1cf4a0478fc29f80abfbac8209479325f35 (patch) | |
tree | e556d4854ffdf592440878cd8b862c3752f2759e | |
parent | d4e536f336d3d26c9fafa2a2549aaa0b014f5b6b (diff) | |
parent | 3748fef9b95a9bc1602f3c4ed2a329d8ef47e63c (diff) |
Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20190925a' into staging
Migration pull 2019-09-25
me: test fixes from (should stop hangs in postcopy tests).
me: An RDMA cleanup hang fix
Wei: Tidy ups around postcopy
Marc-Andre: mem leak fix
# gpg: Signature made Wed 25 Sep 2019 15:59:41 BST
# gpg: using RSA key 45F5C71B4A0CB7FB977A9FA90516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>" [full]
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A 9FA9 0516 331E BC5B FDE7
* remotes/dgilbert/tags/pull-migration-20190925a:
migration/postcopy: Recognise the recovery states as 'in_postcopy'
tests/migration/postcopy: trim migration bandwidth
tests/migration: Fail on unexpected migration states
migration/rdma.c: Swap synchronize_rcu for call_rcu
migration/rdma: Don't moan about disconnects at the end
migration: remove sent parameter in get_queued_page_not_dirty
migration/postcopy: unsentmap is not necessary for postcopy
migration/postcopy: not necessary to do discard when canonicalizing bitmap
migration: fix vmdesc leak on vmstate_save() error
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | include/exec/ram_addr.h | 6 | ||||
-rw-r--r-- | migration/migration.c | 9 | ||||
-rw-r--r-- | migration/qjson.h | 2 | ||||
-rw-r--r-- | migration/ram.c | 94 | ||||
-rw-r--r-- | migration/rdma.c | 51 | ||||
-rw-r--r-- | migration/savevm.c | 3 | ||||
-rw-r--r-- | migration/trace-events | 2 | ||||
-rw-r--r-- | tests/migration-test.c | 25 |
8 files changed, 80 insertions, 112 deletions
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index a327a80cfe..e96e621de5 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -44,12 +44,6 @@ struct RAMBlock { size_t page_size; /* dirty bitmap used during migration */ unsigned long *bmap; - /* bitmap of pages that haven't been sent even once - * only maintained and used in postcopy at the moment - * where it's used to send the dirtymap at the start - * of the postcopy phase - */ - unsigned long *unsentmap; /* bitmap of already received pages in postcopy */ unsigned long *receivedmap; diff --git a/migration/migration.c b/migration/migration.c index 01863a95f5..5f7e4d15e9 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1659,7 +1659,14 @@ bool migration_in_postcopy(void) { MigrationState *s = migrate_get_current(); - return (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); + switch (s->state) { + case MIGRATION_STATUS_POSTCOPY_ACTIVE: + case MIGRATION_STATUS_POSTCOPY_PAUSED: + case MIGRATION_STATUS_POSTCOPY_RECOVER: + return true; + default: + return false; + } } bool migration_in_postcopy_after_devices(MigrationState *s) diff --git a/migration/qjson.h b/migration/qjson.h index 41664f2d71..1786bb5864 100644 --- a/migration/qjson.h +++ b/migration/qjson.h @@ -24,4 +24,6 @@ void json_start_object(QJSON *json, const char *name); const char *qjson_get_str(QJSON *json); void qjson_finish(QJSON *json); +G_DEFINE_AUTOPTR_CLEANUP_FUNC(QJSON, qjson_destroy) + #endif /* QEMU_QJSON_H */ diff --git a/migration/ram.c b/migration/ram.c index 01df326767..22423f08cd 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2348,7 +2348,7 @@ static bool get_queued_page(RAMState *rs, PageSearchStatus *pss) dirty = test_bit(page, block->bmap); if (!dirty) { trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset, - page, test_bit(page, block->unsentmap)); + page); } else { trace_get_queued_page(block->idstr, (uint64_t)offset, page); } @@ -2619,10 +2619,6 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, } pages += tmppages; - if (pss->block->unsentmap) { - clear_bit(pss->page, pss->block->unsentmap); - } - pss->page++; } while ((pss->page & (pagesize_bits - 1)) && offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS)); @@ -2776,8 +2772,6 @@ static void ram_save_cleanup(void *opaque) block->clear_bmap = NULL; g_free(block->bmap); block->bmap = NULL; - g_free(block->unsentmap); - block->unsentmap = NULL; } xbzrle_cleanup(); @@ -2857,8 +2851,6 @@ void ram_postcopy_migrated_memory_release(MigrationState *ms) * Returns zero on success * * Callback from postcopy_each_ram_send_discard for each RAMBlock - * Note: At this point the 'unsentmap' is the processed bitmap combined - * with the dirtymap; so a '1' means it's either dirty or unsent. * * @ms: current migration state * @block: RAMBlock to discard @@ -2867,17 +2859,17 @@ static int postcopy_send_discard_bm_ram(MigrationState *ms, RAMBlock *block) { unsigned long end = block->used_length >> TARGET_PAGE_BITS; unsigned long current; - unsigned long *unsentmap = block->unsentmap; + unsigned long *bitmap = block->bmap; for (current = 0; current < end; ) { - unsigned long one = find_next_bit(unsentmap, end, current); + unsigned long one = find_next_bit(bitmap, end, current); unsigned long zero, discard_length; if (one >= end) { break; } - zero = find_next_zero_bit(unsentmap, end, one + 1); + zero = find_next_zero_bit(bitmap, end, one + 1); if (zero >= end) { discard_length = end - one; @@ -2928,7 +2920,7 @@ static int postcopy_each_ram_send_discard(MigrationState *ms) } /** - * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages + * postcopy_chunk_hostpages_pass: canonicalize bitmap in hostpages * * Helper for postcopy_chunk_hostpages; it's called twice to * canonicalize the two bitmaps, that are similar, but one is @@ -2938,16 +2930,12 @@ static int postcopy_each_ram_send_discard(MigrationState *ms) * clean, not a mix. This function canonicalizes the bitmaps. * * @ms: current migration state - * @unsent_pass: if true we need to canonicalize partially unsent host pages - * otherwise we need to canonicalize partially dirty host pages * @block: block that contains the page we want to canonicalize */ -static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, - RAMBlock *block) +static void postcopy_chunk_hostpages_pass(MigrationState *ms, RAMBlock *block) { RAMState *rs = ram_state; unsigned long *bitmap = block->bmap; - unsigned long *unsentmap = block->unsentmap; unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE; unsigned long pages = block->used_length >> TARGET_PAGE_BITS; unsigned long run_start; @@ -2957,13 +2945,8 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, return; } - if (unsent_pass) { - /* Find a sent page */ - run_start = find_next_zero_bit(unsentmap, pages, 0); - } else { - /* Find a dirty page */ - run_start = find_next_bit(bitmap, pages, 0); - } + /* Find a dirty page */ + run_start = find_next_bit(bitmap, pages, 0); while (run_start < pages) { @@ -2973,11 +2956,7 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, */ if (QEMU_IS_ALIGNED(run_start, host_ratio)) { /* Find the end of this run */ - if (unsent_pass) { - run_start = find_next_bit(unsentmap, pages, run_start + 1); - } else { - run_start = find_next_zero_bit(bitmap, pages, run_start + 1); - } + run_start = find_next_zero_bit(bitmap, pages, run_start + 1); /* * If the end isn't at the start of a host page, then the * run doesn't finish at the end of a host page @@ -2991,24 +2970,9 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, host_ratio); run_start = QEMU_ALIGN_UP(run_start, host_ratio); - /* Tell the destination to discard this page */ - if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) { - /* For the unsent_pass we: - * discard partially sent pages - * For the !unsent_pass (dirty) we: - * discard partially dirty pages that were sent - * (any partially sent pages were already discarded - * by the previous unsent_pass) - */ - postcopy_discard_send_range(ms, fixup_start_addr, host_ratio); - } - /* Clean up the bitmap */ for (page = fixup_start_addr; page < fixup_start_addr + host_ratio; page++) { - /* All pages in this host page are now not sent */ - set_bit(page, unsentmap); - /* * Remark them as dirty, updating the count for any pages * that weren't previously dirty. @@ -3017,13 +2981,8 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, } } - if (unsent_pass) { - /* Find the next sent page for the next iteration */ - run_start = find_next_zero_bit(unsentmap, pages, run_start); - } else { - /* Find the next dirty page for the next iteration */ - run_start = find_next_bit(bitmap, pages, run_start); - } + /* Find the next dirty page for the next iteration */ + run_start = find_next_bit(bitmap, pages, run_start); } } @@ -3045,13 +3004,10 @@ static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block) { postcopy_discard_send_init(ms, block->idstr); - /* First pass: Discard all partially sent host pages */ - postcopy_chunk_hostpages_pass(ms, true, block); /* - * Second pass: Ensure that all partially dirty host pages are made - * fully dirty. + * Ensure that all partially dirty host pages are made fully dirty. */ - postcopy_chunk_hostpages_pass(ms, false, block); + postcopy_chunk_hostpages_pass(ms, block); postcopy_discard_send_finish(ms); return 0; @@ -3089,19 +3045,6 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms) rs->last_page = 0; RAMBLOCK_FOREACH_NOT_IGNORED(block) { - unsigned long pages = block->used_length >> TARGET_PAGE_BITS; - unsigned long *bitmap = block->bmap; - unsigned long *unsentmap = block->unsentmap; - - if (!unsentmap) { - /* We don't have a safe way to resize the sentmap, so - * if the bitmap was resized it will be NULL at this - * point. - */ - error_report("migration ram resized during precopy phase"); - rcu_read_unlock(); - return -EINVAL; - } /* Deal with TPS != HPS and huge pages */ ret = postcopy_chunk_hostpages(ms, block); if (ret) { @@ -3109,12 +3052,9 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms) return ret; } - /* - * Update the unsentmap to be unsentmap = unsentmap | dirty - */ - bitmap_or(unsentmap, unsentmap, bitmap, pages); #ifdef DEBUG_POSTCOPY - ram_debug_dump_bitmap(unsentmap, true, pages); + ram_debug_dump_bitmap(block->bmap, true, + block->used_length >> TARGET_PAGE_BITS); #endif } trace_ram_postcopy_send_discard_bitmap(); @@ -3282,10 +3222,6 @@ static void ram_list_init_bitmaps(void) bitmap_set(block->bmap, 0, pages); block->clear_bmap_shift = shift; block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift)); - if (migrate_postcopy_ram()) { - block->unsentmap = bitmap_new(pages); - bitmap_set(block->unsentmap, 0, pages); - } } } } diff --git a/migration/rdma.c b/migration/rdma.c index 78e6b72bac..4c74e88a37 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -3017,11 +3017,35 @@ static void qio_channel_rdma_set_aio_fd_handler(QIOChannel *ioc, } } +struct rdma_close_rcu { + struct rcu_head rcu; + RDMAContext *rdmain; + RDMAContext *rdmaout; +}; + +/* callback from qio_channel_rdma_close via call_rcu */ +static void qio_channel_rdma_close_rcu(struct rdma_close_rcu *rcu) +{ + if (rcu->rdmain) { + qemu_rdma_cleanup(rcu->rdmain); + } + + if (rcu->rdmaout) { + qemu_rdma_cleanup(rcu->rdmaout); + } + + g_free(rcu->rdmain); + g_free(rcu->rdmaout); + g_free(rcu); +} + static int qio_channel_rdma_close(QIOChannel *ioc, Error **errp) { QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); RDMAContext *rdmain, *rdmaout; + struct rdma_close_rcu *rcu = g_new(struct rdma_close_rcu, 1); + trace_qemu_rdma_close(); rdmain = rioc->rdmain; @@ -3034,18 +3058,9 @@ static int qio_channel_rdma_close(QIOChannel *ioc, atomic_rcu_set(&rioc->rdmaout, NULL); } - synchronize_rcu(); - - if (rdmain) { - qemu_rdma_cleanup(rdmain); - } - - if (rdmaout) { - qemu_rdma_cleanup(rdmaout); - } - - g_free(rdmain); - g_free(rdmaout); + rcu->rdmain = rdmain; + rcu->rdmaout = rdmaout; + call_rcu(rcu, qio_channel_rdma_close_rcu, rcu); return 0; } @@ -3253,10 +3268,14 @@ static void rdma_cm_poll_handler(void *opaque) if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED || cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) { - error_report("receive cm event, cm event is %d", cm_event->event); - rdma->error_state = -EPIPE; - if (rdma->return_path) { - rdma->return_path->error_state = -EPIPE; + if (!rdma->error_state && + migration_incoming_get_current()->state != + MIGRATION_STATUS_COMPLETED) { + error_report("receive cm event, cm event is %d", cm_event->event); + rdma->error_state = -EPIPE; + if (rdma->return_path) { + rdma->return_path->error_state = -EPIPE; + } } if (mis->migration_incoming_co) { diff --git a/migration/savevm.c b/migration/savevm.c index ee06f91d42..bb9462a54d 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1314,7 +1314,7 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, bool in_postcopy, bool inactivate_disks) { - QJSON *vmdesc; + g_autoptr(QJSON) vmdesc = NULL; int vmdesc_len; SaveStateEntry *se; int ret; @@ -1375,7 +1375,6 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, qemu_put_be32(f, vmdesc_len); qemu_put_buffer(f, (uint8_t *)qjson_get_str(vmdesc), vmdesc_len); } - qjson_destroy(vmdesc); return 0; } diff --git a/migration/trace-events b/migration/trace-events index 00ffcd5930..858d415d56 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -76,7 +76,7 @@ qemu_file_fclose(void) "" # ram.c get_queued_page(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx" -get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, unsigned long page_abs, int sent) "%s/0x%" PRIx64 " page_abs=0x%lx (sent=%d)" +get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx" migration_bitmap_sync_start(void) "" migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64 migration_bitmap_clear_dirty(char *str, uint64_t start, uint64_t size, unsigned long page) "rb %s start 0x%"PRIx64" size 0x%"PRIx64" page 0x%lx" diff --git a/tests/migration-test.c b/tests/migration-test.c index 258aa064d4..221a33d083 100644 --- a/tests/migration-test.c +++ b/tests/migration-test.c @@ -255,15 +255,19 @@ static void read_blocktime(QTestState *who) } static void wait_for_migration_status(QTestState *who, - const char *goal) + const char *goal, + const char **ungoals) { while (true) { bool completed; char *status; + const char **ungoal; status = migrate_query_status(who); completed = strcmp(status, goal) == 0; - g_assert_cmpstr(status, !=, "failed"); + for (ungoal = ungoals; *ungoal; ungoal++) { + g_assert_cmpstr(status, !=, *ungoal); + } g_free(status); if (completed) { return; @@ -274,7 +278,8 @@ static void wait_for_migration_status(QTestState *who, static void wait_for_migration_complete(QTestState *who) { - wait_for_migration_status(who, "completed"); + wait_for_migration_status(who, "completed", + (const char * []) { "failed", NULL }); } static void wait_for_migration_pass(QTestState *who) @@ -748,7 +753,7 @@ static int migrate_postcopy_prepare(QTestState **from_ptr, * quickly, but that it doesn't complete precopy even on a slow * machine, so also set the downtime. */ - migrate_set_parameter_int(from, "max-bandwidth", 100000000); + migrate_set_parameter_int(from, "max-bandwidth", 30000000); migrate_set_parameter_int(from, "downtime-limit", 1); /* Wait for the first serial output from the source */ @@ -809,7 +814,9 @@ static void test_postcopy_recovery(void) * Wait until postcopy is really started; we can only run the * migrate-pause command during a postcopy */ - wait_for_migration_status(from, "postcopy-active"); + wait_for_migration_status(from, "postcopy-active", + (const char * []) { "failed", + "completed", NULL }); /* * Manually stop the postcopy migration. This emulates a network @@ -822,7 +829,9 @@ static void test_postcopy_recovery(void) * migrate-recover command can only succeed if destination machine * is in the paused state */ - wait_for_migration_status(to, "postcopy-paused"); + wait_for_migration_status(to, "postcopy-paused", + (const char * []) { "failed", "active", + "completed", NULL }); /* * Create a new socket to emulate a new channel that is different @@ -836,7 +845,9 @@ static void test_postcopy_recovery(void) * Try to rebuild the migration channel using the resume flag and * the newly created channel */ - wait_for_migration_status(from, "postcopy-paused"); + wait_for_migration_status(from, "postcopy-paused", + (const char * []) { "failed", "active", + "completed", NULL }); migrate(from, uri, "{'resume': true}"); g_free(uri); |