aboutsummaryrefslogtreecommitdiff
path: root/migration/colo.c
diff options
context:
space:
mode:
Diffstat (limited to 'migration/colo.c')
-rw-r--r--migration/colo.c212
1 files changed, 201 insertions, 11 deletions
diff --git a/migration/colo.c b/migration/colo.c
index 88936f5962..956ac236b7 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -25,8 +25,16 @@
#include "qemu/error-report.h"
#include "migration/failover.h"
#include "replication.h"
+#include "net/colo-compare.h"
+#include "net/colo.h"
+#include "block/block.h"
+#include "qapi/qapi-events-migration.h"
+#include "qapi/qmp/qerror.h"
+#include "sysemu/cpus.h"
+#include "net/filter.h"
static bool vmstate_loading;
+static Notifier packets_compare_notifier;
#define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
@@ -53,6 +61,7 @@ static void secondary_vm_do_failover(void)
{
int old_state;
MigrationIncomingState *mis = migration_incoming_get_current();
+ Error *local_err = NULL;
/* Can not do failover during the process of VM's loading VMstate, Or
* it will break the secondary VM.
@@ -70,6 +79,17 @@ static void secondary_vm_do_failover(void)
migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
MIGRATION_STATUS_COMPLETED);
+ replication_stop_all(true, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ }
+
+ /* Notify all filters of all NIC to do checkpoint */
+ colo_notify_filters_event(COLO_EVENT_FAILOVER, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ }
+
if (!autostart) {
error_report("\"-S\" qemu option will be ignored in secondary side");
/* recover runstate to normal migration finish state */
@@ -107,9 +127,15 @@ static void primary_vm_do_failover(void)
{
MigrationState *s = migrate_get_current();
int old_state;
+ Error *local_err = NULL;
migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
MIGRATION_STATUS_COMPLETED);
+ /*
+ * kick COLO thread which might wait at
+ * qemu_sem_wait(&s->colo_checkpoint_sem).
+ */
+ colo_checkpoint_notify(migrate_get_current());
/*
* Wake up COLO thread which may blocked in recv() or send(),
@@ -130,10 +156,28 @@ static void primary_vm_do_failover(void)
FailoverStatus_str(old_state));
return;
}
+
+ replication_stop_all(true, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ local_err = NULL;
+ }
+
/* Notify COLO thread that failover work is finished */
qemu_sem_post(&s->colo_exit_sem);
}
+COLOMode get_colo_mode(void)
+{
+ if (migration_in_colo_state()) {
+ return COLO_MODE_PRIMARY;
+ } else if (migration_incoming_in_colo_state()) {
+ return COLO_MODE_SECONDARY;
+ } else {
+ return COLO_MODE_NONE;
+ }
+}
+
void colo_do_failover(MigrationState *s)
{
/* Make sure VM stopped while failover happened. */
@@ -207,6 +251,26 @@ void qmp_xen_colo_do_checkpoint(Error **errp)
#endif
}
+COLOStatus *qmp_query_colo_status(Error **errp)
+{
+ COLOStatus *s = g_new0(COLOStatus, 1);
+
+ s->mode = get_colo_mode();
+
+ switch (failover_get_state()) {
+ case FAILOVER_STATUS_NONE:
+ s->reason = COLO_EXIT_REASON_NONE;
+ break;
+ case FAILOVER_STATUS_REQUIRE:
+ s->reason = COLO_EXIT_REASON_REQUEST;
+ break;
+ default:
+ s->reason = COLO_EXIT_REASON_ERROR;
+ }
+
+ return s;
+}
+
static void colo_send_message(QEMUFile *f, COLOMessage msg,
Error **errp)
{
@@ -343,21 +407,42 @@ static int colo_do_checkpoint_transaction(MigrationState *s,
goto out;
}
+ colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err);
+ if (local_err) {
+ goto out;
+ }
+
/* Disable block migration */
migrate_set_block_enabled(false, &local_err);
- qemu_savevm_state_header(fb);
- qemu_savevm_state_setup(fb);
qemu_mutex_lock_iothread();
- qemu_savevm_state_complete_precopy(fb, false, false);
- qemu_mutex_unlock_iothread();
-
- qemu_fflush(fb);
+ replication_do_checkpoint_all(&local_err);
+ if (local_err) {
+ qemu_mutex_unlock_iothread();
+ goto out;
+ }
colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err);
if (local_err) {
+ qemu_mutex_unlock_iothread();
+ goto out;
+ }
+ /* Note: device state is saved into buffer */
+ ret = qemu_save_device_state(fb);
+
+ qemu_mutex_unlock_iothread();
+ if (ret < 0) {
goto out;
}
/*
+ * Only save VM's live state, which not including device state.
+ * TODO: We may need a timeout mechanism to prevent COLO process
+ * to be blocked here.
+ */
+ qemu_savevm_live_state(s->to_dst_file);
+
+ qemu_fflush(fb);
+
+ /*
* We need the size of the VMstate data in Secondary side,
* With which we can decide how much data should be read.
*/
@@ -400,6 +485,11 @@ out:
return ret;
}
+static void colo_compare_notify_checkpoint(Notifier *notifier, void *data)
+{
+ colo_checkpoint_notify(data);
+}
+
static void colo_process_checkpoint(MigrationState *s)
{
QIOChannelBuffer *bioc;
@@ -416,6 +506,9 @@ static void colo_process_checkpoint(MigrationState *s)
goto out;
}
+ packets_compare_notifier.notify = colo_compare_notify_checkpoint;
+ colo_compare_register_notifier(&packets_compare_notifier);
+
/*
* Wait for Secondary finish loading VM states and enter COLO
* restore.
@@ -430,6 +523,12 @@ static void colo_process_checkpoint(MigrationState *s)
object_unref(OBJECT(bioc));
qemu_mutex_lock_iothread();
+ replication_start_all(REPLICATION_MODE_PRIMARY, &local_err);
+ if (local_err) {
+ qemu_mutex_unlock_iothread();
+ goto out;
+ }
+
vm_start();
qemu_mutex_unlock_iothread();
trace_colo_vm_state_change("stop", "run");
@@ -445,6 +544,9 @@ static void colo_process_checkpoint(MigrationState *s)
qemu_sem_wait(&s->colo_checkpoint_sem);
+ if (s->state != MIGRATION_STATUS_COLO) {
+ goto out;
+ }
ret = colo_do_checkpoint_transaction(s, bioc, fb);
if (ret < 0) {
goto out;
@@ -461,11 +563,38 @@ out:
qemu_fclose(fb);
}
- timer_del(s->colo_delay_timer);
+ /*
+ * There are only two reasons we can get here, some error happened
+ * or the user triggered failover.
+ */
+ switch (failover_get_state()) {
+ case FAILOVER_STATUS_NONE:
+ qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
+ COLO_EXIT_REASON_ERROR);
+ break;
+ case FAILOVER_STATUS_REQUIRE:
+ qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
+ COLO_EXIT_REASON_REQUEST);
+ break;
+ default:
+ abort();
+ }
/* Hope this not to be too long to wait here */
qemu_sem_wait(&s->colo_exit_sem);
qemu_sem_destroy(&s->colo_exit_sem);
+
+ /*
+ * It is safe to unregister notifier after failover finished.
+ * Besides, colo_delay_timer and colo_checkpoint_sem can't be
+ * released befor unregister notifier, or there will be use-after-free
+ * error.
+ */
+ colo_compare_unregister_notifier(&packets_compare_notifier);
+ timer_del(s->colo_delay_timer);
+ timer_free(s->colo_delay_timer);
+ qemu_sem_destroy(&s->colo_checkpoint_sem);
+
/*
* Must be called after failover BH is completed,
* Or the failover BH may shutdown the wrong fd that
@@ -533,6 +662,7 @@ void *colo_process_incoming_thread(void *opaque)
uint64_t total_size;
uint64_t value;
Error *local_err = NULL;
+ int ret;
rcu_register_thread();
qemu_sem_init(&mis->colo_incoming_sem, 0);
@@ -559,6 +689,16 @@ void *colo_process_incoming_thread(void *opaque)
fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
object_unref(OBJECT(bioc));
+ qemu_mutex_lock_iothread();
+ replication_start_all(REPLICATION_MODE_SECONDARY, &local_err);
+ if (local_err) {
+ qemu_mutex_unlock_iothread();
+ goto out;
+ }
+ vm_start();
+ trace_colo_vm_state_change("stop", "run");
+ qemu_mutex_unlock_iothread();
+
colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY,
&local_err);
if (local_err) {
@@ -578,6 +718,11 @@ void *colo_process_incoming_thread(void *opaque)
goto out;
}
+ qemu_mutex_lock_iothread();
+ vm_stop_force_state(RUN_STATE_COLO);
+ trace_colo_vm_state_change("run", "stop");
+ qemu_mutex_unlock_iothread();
+
/* FIXME: This is unnecessary for periodic checkpoint mode */
colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY,
&local_err);
@@ -591,6 +736,16 @@ void *colo_process_incoming_thread(void *opaque)
goto out;
}
+ qemu_mutex_lock_iothread();
+ cpu_synchronize_all_pre_loadvm();
+ ret = qemu_loadvm_state_main(mis->from_src_file, mis);
+ qemu_mutex_unlock_iothread();
+
+ if (ret < 0) {
+ error_report("Load VM's live state (ram) error");
+ goto out;
+ }
+
value = colo_receive_message_value(mis->from_src_file,
COLO_MESSAGE_VMSTATE_SIZE, &local_err);
if (local_err) {
@@ -622,15 +777,37 @@ void *colo_process_incoming_thread(void *opaque)
}
qemu_mutex_lock_iothread();
- qemu_system_reset(SHUTDOWN_CAUSE_NONE);
vmstate_loading = true;
- if (qemu_loadvm_state(fb) < 0) {
- error_report("COLO: loadvm failed");
+ ret = qemu_load_device_state(fb);
+ if (ret < 0) {
+ error_report("COLO: load device state failed");
+ qemu_mutex_unlock_iothread();
+ goto out;
+ }
+
+ replication_get_error_all(&local_err);
+ if (local_err) {
+ qemu_mutex_unlock_iothread();
+ goto out;
+ }
+ /* discard colo disk buffer */
+ replication_do_checkpoint_all(&local_err);
+ if (local_err) {
+ qemu_mutex_unlock_iothread();
+ goto out;
+ }
+
+ /* Notify all filters of all NIC to do checkpoint */
+ colo_notify_filters_event(COLO_EVENT_CHECKPOINT, &local_err);
+
+ if (local_err) {
qemu_mutex_unlock_iothread();
goto out;
}
vmstate_loading = false;
+ vm_start();
+ trace_colo_vm_state_change("stop", "run");
qemu_mutex_unlock_iothread();
if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
@@ -654,6 +831,19 @@ out:
error_report_err(local_err);
}
+ switch (failover_get_state()) {
+ case FAILOVER_STATUS_NONE:
+ qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
+ COLO_EXIT_REASON_ERROR);
+ break;
+ case FAILOVER_STATUS_REQUIRE:
+ qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
+ COLO_EXIT_REASON_REQUEST);
+ break;
+ default:
+ abort();
+ }
+
if (fb) {
qemu_fclose(fb);
}
@@ -665,7 +855,7 @@ out:
if (mis->to_src_file) {
qemu_fclose(mis->to_src_file);
}
- migration_incoming_exit_colo();
+ migration_incoming_disable_colo();
rcu_unregister_thread();
return NULL;