diff options
author | Ivan Ren <renyime@gmail.com> | 2019-06-25 21:18:17 +0800 |
---|---|---|
committer | Juan Quintela <quintela@redhat.com> | 2019-07-24 14:46:51 +0200 |
commit | 713f762a316348b00f5a3713b5314c88ab0a5852 (patch) | |
tree | ef75edf4f63d222e255d2986186f39216015a260 /migration | |
parent | bf8b024372bf8abf5a9f40bfa65eeefad23ff988 (diff) |
migration: fix migrate_cancel leads live_migration thread endless loop
When we 'migrate_cancel' a multifd migration, live_migration thread may
go into endless loop in multifd_send_pages functions.
Reproduce steps:
(qemu) migrate_set_capability multifd on
(qemu) migrate -d url
(qemu) [wait a while]
(qemu) migrate_cancel
Then may get live_migration 100% cpu usage in following stack:
pthread_mutex_lock
qemu_mutex_lock_impl
multifd_send_pages
multifd_queue_page
ram_save_multifd_page
ram_save_target_page
ram_save_host_page
ram_find_and_save_block
ram_find_and_save_block
ram_save_iterate
qemu_savevm_state_iterate
migration_iteration_run
migration_thread
qemu_thread_start
start_thread
clone
Signed-off-by: Ivan Ren <ivanren@tencent.com>
Message-Id: <1561468699-9819-2-git-send-email-ivanren@tencent.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Diffstat (limited to 'migration')
-rw-r--r-- | migration/ram.c | 36 |
1 files changed, 29 insertions, 7 deletions
diff --git a/migration/ram.c b/migration/ram.c index 2b0774c2bf..52a2d498e4 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -920,7 +920,7 @@ struct { * false. */ -static void multifd_send_pages(void) +static int multifd_send_pages(void) { int i; static int next_channel; @@ -933,6 +933,11 @@ static void multifd_send_pages(void) p = &multifd_send_state->params[i]; qemu_mutex_lock(&p->mutex); + if (p->quit) { + error_report("%s: channel %d has already quit!", __func__, i); + qemu_mutex_unlock(&p->mutex); + return -1; + } if (!p->pending_job) { p->pending_job++; next_channel = (i + 1) % migrate_multifd_channels(); @@ -951,9 +956,11 @@ static void multifd_send_pages(void) ram_counters.transferred += transferred;; qemu_mutex_unlock(&p->mutex); qemu_sem_post(&p->sem); + + return 1; } -static void multifd_queue_page(RAMBlock *block, ram_addr_t offset) +static int multifd_queue_page(RAMBlock *block, ram_addr_t offset) { MultiFDPages_t *pages = multifd_send_state->pages; @@ -968,15 +975,19 @@ static void multifd_queue_page(RAMBlock *block, ram_addr_t offset) pages->used++; if (pages->used < pages->allocated) { - return; + return 1; } } - multifd_send_pages(); + if (multifd_send_pages() < 0) { + return -1; + } if (pages->block != block) { - multifd_queue_page(block, offset); + return multifd_queue_page(block, offset); } + + return 1; } static void multifd_send_terminate_threads(Error *err) @@ -1049,7 +1060,10 @@ static void multifd_send_sync_main(void) return; } if (multifd_send_state->pages->used) { - multifd_send_pages(); + if (multifd_send_pages() < 0) { + error_report("%s: multifd_send_pages fail", __func__); + return; + } } for (i = 0; i < migrate_multifd_channels(); i++) { MultiFDSendParams *p = &multifd_send_state->params[i]; @@ -1058,6 +1072,12 @@ static void multifd_send_sync_main(void) qemu_mutex_lock(&p->mutex); + if (p->quit) { + error_report("%s: channel %d has already quit", __func__, i); + qemu_mutex_unlock(&p->mutex); + return; + } + p->packet_num = multifd_send_state->packet_num++; p->flags |= MULTIFD_FLAG_SYNC; p->pending_job++; @@ -2033,7 +2053,9 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) static int ram_save_multifd_page(RAMState *rs, RAMBlock *block, ram_addr_t offset) { - multifd_queue_page(block, offset); + if (multifd_queue_page(block, offset) < 0) { + return -1; + } ram_counters.normal++; return 1; |