diff options
author | Peter Xu <peterx@redhat.com> | 2022-07-07 14:55:06 -0400 |
---|---|---|
committer | Dr. David Alan Gilbert <dgilbert@redhat.com> | 2022-07-20 12:15:08 +0100 |
commit | 60bb3c5871a7f7b7cfff5d0a30a035e30cce8e42 (patch) | |
tree | ce2a2d6915b9ddb44bd1c2a976fa6e57f1a091bb /migration/savevm.c | |
parent | c01b16edf6a22f28c2a943652c82d18fccc527b7 (diff) |
migration: Postcopy recover with preempt enabled
To allow postcopy recovery, the ram fast load (preempt-only) dest QEMU thread
needs similar handling on fault tolerance. When ram_load_postcopy() fails,
instead of stopping the thread it halts with a semaphore, preparing to be
kicked again when recovery is detected.
A mutex is introduced to make sure there's no concurrent operation upon the
socket. To make it simple, the fast ram load thread will take the mutex during
its whole procedure, and only release it if it's paused. The fast-path socket
will be properly released by the main loading thread safely when there's
network failures during postcopy with that mutex held.
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <20220707185506.27257-1-peterx@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Diffstat (limited to 'migration/savevm.c')
-rw-r--r-- | migration/savevm.c | 26 |
1 files changed, 24 insertions, 2 deletions
diff --git a/migration/savevm.c b/migration/savevm.c index e3af03cb9b..48e85c052c 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -2117,6 +2117,13 @@ static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis) */ qemu_sem_post(&mis->postcopy_pause_sem_fault); + if (migrate_postcopy_preempt()) { + /* The channel should already be setup again; make sure of it */ + assert(mis->postcopy_qemufile_dst); + /* Kick the fast ram load thread too */ + qemu_sem_post(&mis->postcopy_pause_sem_fast_load); + } + return 0; } @@ -2562,6 +2569,21 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis) mis->to_src_file = NULL; qemu_mutex_unlock(&mis->rp_mutex); + /* + * NOTE: this must happen before reset the PostcopyTmpPages below, + * otherwise it's racy to reset those fields when the fast load thread + * can be accessing it in parallel. + */ + if (mis->postcopy_qemufile_dst) { + qemu_file_shutdown(mis->postcopy_qemufile_dst); + /* Take the mutex to make sure the fast ram load thread halted */ + qemu_mutex_lock(&mis->postcopy_prio_thread_mutex); + migration_ioc_unregister_yank_from_file(mis->postcopy_qemufile_dst); + qemu_fclose(mis->postcopy_qemufile_dst); + mis->postcopy_qemufile_dst = NULL; + qemu_mutex_unlock(&mis->postcopy_prio_thread_mutex); + } + migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, MIGRATION_STATUS_POSTCOPY_PAUSED); @@ -2599,8 +2621,8 @@ retry: while (true) { section_type = qemu_get_byte(f); - if (qemu_file_get_error(f)) { - ret = qemu_file_get_error(f); + ret = qemu_file_get_error_obj_any(f, mis->postcopy_qemufile_dst, NULL); + if (ret) { break; } |