aboutsummaryrefslogtreecommitdiff
path: root/migration/postcopy-ram.c
diff options
context:
space:
mode:
authorPeter Xu <peterx@redhat.com>2022-07-07 14:55:06 -0400
committerDr. David Alan Gilbert <dgilbert@redhat.com>2022-07-20 12:15:08 +0100
commit60bb3c5871a7f7b7cfff5d0a30a035e30cce8e42 (patch)
treece2a2d6915b9ddb44bd1c2a976fa6e57f1a091bb /migration/postcopy-ram.c
parentc01b16edf6a22f28c2a943652c82d18fccc527b7 (diff)
migration: Postcopy recover with preempt enabled
To allow postcopy recovery, the ram fast load (preempt-only) dest QEMU thread needs similar handling on fault tolerance. When ram_load_postcopy() fails, instead of stopping the thread it halts with a semaphore, preparing to be kicked again when recovery is detected. A mutex is introduced to make sure there's no concurrent operation upon the socket. To make it simple, the fast ram load thread will take the mutex during its whole procedure, and only release it if it's paused. The fast-path socket will be properly released by the main loading thread safely when there's network failures during postcopy with that mutex held. Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> Signed-off-by: Peter Xu <peterx@redhat.com> Message-Id: <20220707185506.27257-1-peterx@redhat.com> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Diffstat (limited to 'migration/postcopy-ram.c')
-rw-r--r--migration/postcopy-ram.c25
1 files changed, 23 insertions, 2 deletions
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index a3561410fe..84f7b1526e 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -1580,6 +1580,15 @@ int postcopy_preempt_setup(MigrationState *s, Error **errp)
return 0;
}
+static void postcopy_pause_ram_fast_load(MigrationIncomingState *mis)
+{
+ trace_postcopy_pause_fast_load();
+ qemu_mutex_unlock(&mis->postcopy_prio_thread_mutex);
+ qemu_sem_wait(&mis->postcopy_pause_sem_fast_load);
+ qemu_mutex_lock(&mis->postcopy_prio_thread_mutex);
+ trace_postcopy_pause_fast_load_continued();
+}
+
void *postcopy_preempt_thread(void *opaque)
{
MigrationIncomingState *mis = opaque;
@@ -1592,11 +1601,23 @@ void *postcopy_preempt_thread(void *opaque)
qemu_sem_post(&mis->thread_sync_sem);
/* Sending RAM_SAVE_FLAG_EOS to terminate this thread */
- ret = ram_load_postcopy(mis->postcopy_qemufile_dst, RAM_CHANNEL_POSTCOPY);
+ qemu_mutex_lock(&mis->postcopy_prio_thread_mutex);
+ while (1) {
+ ret = ram_load_postcopy(mis->postcopy_qemufile_dst,
+ RAM_CHANNEL_POSTCOPY);
+ /* If error happened, go into recovery routine */
+ if (ret) {
+ postcopy_pause_ram_fast_load(mis);
+ } else {
+ /* We're done */
+ break;
+ }
+ }
+ qemu_mutex_unlock(&mis->postcopy_prio_thread_mutex);
rcu_unregister_thread();
trace_postcopy_preempt_thread_exit();
- return ret == 0 ? NULL : (void *)-1;
+ return NULL;
}