aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2020-01-20 10:41:27 +0000
committerPeter Maydell <peter.maydell@linaro.org>2020-01-20 10:41:27 +0000
commit26deea00260139fc5f323c3bf9db82a5d470538a (patch)
tree921d6fe8b18c6b2021ea5f8f27af52c46f3e240f
parent7fb38daf256bd1bcbcb5ea556422283d0d55a1b1 (diff)
parentddac5cb2d95774cd019bfaf93c54ffd921095fea (diff)
Merge remote-tracking branch 'remotes/juanquintela/tags/migration-pull-pull-request' into staging
Migration pull request # gpg: Signature made Mon 20 Jan 2020 10:29:53 GMT # gpg: using RSA key 1899FF8EDEBF58CCEE034B82F487EF185872D723 # gpg: Good signature from "Juan Quintela <quintela@redhat.com>" [full] # gpg: aka "Juan Quintela <quintela@trasno.org>" [full] # Primary key fingerprint: 1899 FF8E DEBF 58CC EE03 4B82 F487 EF18 5872 D723 * remotes/juanquintela/tags/migration-pull-pull-request: (29 commits) multifd: Be consistent about using uint64_t migration: Support QLIST migration apic: Use 32bit APIC ID for migration instance ID migration: Change SaveStateEntry.instance_id into uint32_t migration: Define VMSTATE_INSTANCE_ID_ANY Bug #1829242 correction. migration/multifd: fix destroyed mutex access in terminating multifd threads migration/multifd: fix nullptr access in terminating multifd threads migration/multifd: not use multifd during postcopy migration/multifd: clean pages after filling packet migration/postcopy: enable compress during postcopy migration/postcopy: enable random order target page arrival migration/postcopy: set all_zero to true on the first target page migration/postcopy: count target page number to decide the place_needed migration/postcopy: wait for decompress thread in precopy migration/postcopy: reduce memset when it is zero page and matches_target_page_size migration/ram: Yield periodically to the main loop migration: savevm_state_handler_insert: constant-time element insertion migration: add savevm_state_handler_remove() misc: use QEMU_IS_ALIGNED ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--backends/dbus-vmstate.c3
-rw-r--r--exec.c4
-rw-r--r--hw/arm/stellaris.c2
-rw-r--r--hw/core/qdev.c3
-rw-r--r--hw/display/ads7846.c2
-rw-r--r--hw/i2c/core.c2
-rw-r--r--hw/input/stellaris_input.c3
-rw-r--r--hw/intc/apic_common.c7
-rw-r--r--hw/misc/max111x.c3
-rw-r--r--hw/net/eepro100.c3
-rw-r--r--hw/pci/pci.c2
-rw-r--r--hw/ppc/spapr.c2
-rw-r--r--hw/timer/arm_timer.c2
-rw-r--r--hw/tpm/tpm_emulator.c3
-rw-r--r--include/migration/register.h2
-rw-r--r--include/migration/vmstate.h25
-rw-r--r--include/qemu/queue.h39
-rw-r--r--migration/migration.c72
-rw-r--r--migration/migration.h1
-rw-r--r--migration/ram.c196
-rw-r--r--migration/savevm.c61
-rw-r--r--migration/trace-events9
-rw-r--r--migration/vmstate-types.c70
-rw-r--r--stubs/vmstate.c2
-rw-r--r--tests/qtest/migration-test.c93
-rw-r--r--tests/test-vmstate.c170
-rw-r--r--vl.c10
27 files changed, 659 insertions, 132 deletions
diff --git a/backends/dbus-vmstate.c b/backends/dbus-vmstate.c
index 56b482a7d6..cc594a722e 100644
--- a/backends/dbus-vmstate.c
+++ b/backends/dbus-vmstate.c
@@ -412,7 +412,8 @@ dbus_vmstate_complete(UserCreatable *uc, Error **errp)
return;
}
- if (vmstate_register(VMSTATE_IF(self), -1, &dbus_vmstate, self) < 0) {
+ if (vmstate_register(VMSTATE_IF(self), VMSTATE_INSTANCE_ID_ANY,
+ &dbus_vmstate, self) < 0) {
error_setg(errp, "Failed to register vmstate");
}
}
diff --git a/exec.c b/exec.c
index 0f6b087f57..67e520d18e 100644
--- a/exec.c
+++ b/exec.c
@@ -3895,7 +3895,7 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
uint8_t *host_startaddr = rb->host + start;
- if ((uintptr_t)host_startaddr & (rb->page_size - 1)) {
+ if (!QEMU_PTR_IS_ALIGNED(host_startaddr, rb->page_size)) {
error_report("ram_block_discard_range: Unaligned start address: %p",
host_startaddr);
goto err;
@@ -3903,7 +3903,7 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
if ((start + length) <= rb->used_length) {
bool need_madvise, need_fallocate;
- if (length & (rb->page_size - 1)) {
+ if (!QEMU_IS_ALIGNED(length, rb->page_size)) {
error_report("ram_block_discard_range: Unaligned length: %zx",
length);
goto err;
diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c
index b198066b54..bb025e0bd0 100644
--- a/hw/arm/stellaris.c
+++ b/hw/arm/stellaris.c
@@ -708,7 +708,7 @@ static int stellaris_sys_init(uint32_t base, qemu_irq irq,
memory_region_init_io(&s->iomem, NULL, &ssys_ops, s, "ssys", 0x00001000);
memory_region_add_subregion(get_system_memory(), base, &s->iomem);
ssys_reset(s);
- vmstate_register(NULL, -1, &vmstate_stellaris_sys, s);
+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_stellaris_sys, s);
return 0;
}
diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index 9f1753f5cf..58e87d336d 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -879,7 +879,8 @@ static void device_set_realized(Object *obj, bool value, Error **errp)
if (qdev_get_vmsd(dev)) {
if (vmstate_register_with_alias_id(VMSTATE_IF(dev),
- -1, qdev_get_vmsd(dev), dev,
+ VMSTATE_INSTANCE_ID_ANY,
+ qdev_get_vmsd(dev), dev,
dev->instance_id_alias,
dev->alias_required_for_version,
&local_err) < 0) {
diff --git a/hw/display/ads7846.c b/hw/display/ads7846.c
index c12272ae72..9228b40b1a 100644
--- a/hw/display/ads7846.c
+++ b/hw/display/ads7846.c
@@ -154,7 +154,7 @@ static void ads7846_realize(SSISlave *d, Error **errp)
ads7846_int_update(s);
- vmstate_register(NULL, -1, &vmstate_ads7846, s);
+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_ads7846, s);
}
static void ads7846_class_init(ObjectClass *klass, void *data)
diff --git a/hw/i2c/core.c b/hw/i2c/core.c
index 92cd489069..d770035ba0 100644
--- a/hw/i2c/core.c
+++ b/hw/i2c/core.c
@@ -61,7 +61,7 @@ I2CBus *i2c_init_bus(DeviceState *parent, const char *name)
bus = I2C_BUS(qbus_create(TYPE_I2C_BUS, parent, name));
QLIST_INIT(&bus->current_devs);
- vmstate_register(NULL, -1, &vmstate_i2c_bus, bus);
+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_i2c_bus, bus);
return bus;
}
diff --git a/hw/input/stellaris_input.c b/hw/input/stellaris_input.c
index 59892b07fc..e6ee5e11f1 100644
--- a/hw/input/stellaris_input.c
+++ b/hw/input/stellaris_input.c
@@ -88,5 +88,6 @@ void stellaris_gamepad_init(int n, qemu_irq *irq, const int *keycode)
}
s->num_buttons = n;
qemu_add_kbd_event_handler(stellaris_gamepad_put_key, s);
- vmstate_register(NULL, -1, &vmstate_stellaris_gamepad, s);
+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY,
+ &vmstate_stellaris_gamepad, s);
}
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index 375cb6abe9..b5dbeb6206 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -268,7 +268,10 @@ static void apic_common_realize(DeviceState *dev, Error **errp)
APICCommonState *s = APIC_COMMON(dev);
APICCommonClass *info;
static DeviceState *vapic;
- int instance_id = s->id;
+ uint32_t instance_id = s->initial_apic_id;
+
+ /* Normally initial APIC ID should be no more than hundreds */
+ assert(instance_id != VMSTATE_INSTANCE_ID_ANY);
info = APIC_COMMON_GET_CLASS(s);
info->realize(dev, errp);
@@ -284,7 +287,7 @@ static void apic_common_realize(DeviceState *dev, Error **errp)
}
if (s->legacy_instance_id) {
- instance_id = -1;
+ instance_id = VMSTATE_INSTANCE_ID_ANY;
}
vmstate_register_with_alias_id(NULL, instance_id, &vmstate_apic_common,
s, -1, 0, NULL);
diff --git a/hw/misc/max111x.c b/hw/misc/max111x.c
index 211008ce02..2b87bdee5b 100644
--- a/hw/misc/max111x.c
+++ b/hw/misc/max111x.c
@@ -146,7 +146,8 @@ static int max111x_init(SSISlave *d, int inputs)
s->input[7] = 0x80;
s->com = 0;
- vmstate_register(VMSTATE_IF(dev), -1, &vmstate_max111x, s);
+ vmstate_register(VMSTATE_IF(dev), VMSTATE_INSTANCE_ID_ANY,
+ &vmstate_max111x, s);
return 0;
}
diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c
index cc71a7a036..6cc97769d9 100644
--- a/hw/net/eepro100.c
+++ b/hw/net/eepro100.c
@@ -1874,7 +1874,8 @@ static void e100_nic_realize(PCIDevice *pci_dev, Error **errp)
s->vmstate = g_memdup(&vmstate_eepro100, sizeof(vmstate_eepro100));
s->vmstate->name = qemu_get_queue(s->nic)->model;
- vmstate_register(VMSTATE_IF(&pci_dev->qdev), -1, s->vmstate, s);
+ vmstate_register(VMSTATE_IF(&pci_dev->qdev), VMSTATE_INSTANCE_ID_ANY,
+ s->vmstate, s);
}
static void eepro100_instance_init(Object *obj)
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index e3d310365d..3ac7961451 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -122,7 +122,7 @@ static void pci_bus_realize(BusState *qbus, Error **errp)
bus->machine_done.notify = pcibus_machine_done;
qemu_add_machine_init_done_notifier(&bus->machine_done);
- vmstate_register(NULL, -1, &vmstate_pcibus, bus);
+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_pcibus, bus);
}
static void pcie_bus_realize(BusState *qbus, Error **errp)
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 30a5fbd3be..02cf53fc5b 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2948,7 +2948,7 @@ static void spapr_machine_init(MachineState *machine)
* interface, this is a legacy from the sPAPREnvironment structure
* which predated MachineState but had a similar function */
vmstate_register(NULL, 0, &vmstate_spapr, spapr);
- register_savevm_live("spapr/htab", -1, 1,
+ register_savevm_live("spapr/htab", VMSTATE_INSTANCE_ID_ANY, 1,
&savevm_htab_handlers, spapr);
qbus_set_hotplug_handler(sysbus_get_default(), OBJECT(machine),
diff --git a/hw/timer/arm_timer.c b/hw/timer/arm_timer.c
index af524fabf7..beaa285685 100644
--- a/hw/timer/arm_timer.c
+++ b/hw/timer/arm_timer.c
@@ -180,7 +180,7 @@ static arm_timer_state *arm_timer_init(uint32_t freq)
s->control = TIMER_CTRL_IE;
s->timer = ptimer_init(arm_timer_tick, s, PTIMER_POLICY_DEFAULT);
- vmstate_register(NULL, -1, &vmstate_arm_timer, s);
+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_arm_timer, s);
return s;
}
diff --git a/hw/tpm/tpm_emulator.c b/hw/tpm/tpm_emulator.c
index 10d587ed40..3a0fc442f3 100644
--- a/hw/tpm/tpm_emulator.c
+++ b/hw/tpm/tpm_emulator.c
@@ -914,7 +914,8 @@ static void tpm_emulator_inst_init(Object *obj)
tpm_emu->cur_locty_number = ~0;
qemu_mutex_init(&tpm_emu->mutex);
- vmstate_register(NULL, -1, &vmstate_tpm_emulator, obj);
+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY,
+ &vmstate_tpm_emulator, obj);
}
/*
diff --git a/include/migration/register.h b/include/migration/register.h
index 00c38ebe9f..c1dcff0f90 100644
--- a/include/migration/register.h
+++ b/include/migration/register.h
@@ -71,7 +71,7 @@ typedef struct SaveVMHandlers {
} SaveVMHandlers;
int register_savevm_live(const char *idstr,
- int instance_id,
+ uint32_t instance_id,
int version_id,
const SaveVMHandlers *ops,
void *opaque);
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 4aef72c426..30667631bc 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -229,6 +229,7 @@ extern const VMStateInfo vmstate_info_tmp;
extern const VMStateInfo vmstate_info_bitmap;
extern const VMStateInfo vmstate_info_qtailq;
extern const VMStateInfo vmstate_info_gtree;
+extern const VMStateInfo vmstate_info_qlist;
#define type_check_2darray(t1,t2,n,m) ((t1(*)[n][m])0 - (t2*)0)
/*
@@ -798,6 +799,26 @@ extern const VMStateInfo vmstate_info_gtree;
.offset = offsetof(_state, _field), \
}
+/*
+ * For migrating a QLIST
+ * Target QLIST needs be properly initialized.
+ * _type: type of QLIST element
+ * _next: name of QLIST_ENTRY entry field in QLIST element
+ * _vmsd: VMSD for QLIST element
+ * size: size of QLIST element
+ * start: offset of QLIST_ENTRY in QTAILQ element
+ */
+#define VMSTATE_QLIST_V(_field, _state, _version, _vmsd, _type, _next) \
+{ \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .vmsd = &(_vmsd), \
+ .size = sizeof(_type), \
+ .info = &vmstate_info_qlist, \
+ .offset = offsetof(_state, _field), \
+ .start = offsetof(_type, _next), \
+}
+
/* _f : field name
_f_n : num of elements field_name
_n : num of elements
@@ -1157,8 +1178,10 @@ int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd,
bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque);
+#define VMSTATE_INSTANCE_ID_ANY -1
+
/* Returns: 0 on success, -1 on failure */
-int vmstate_register_with_alias_id(VMStateIf *obj, int instance_id,
+int vmstate_register_with_alias_id(VMStateIf *obj, uint32_t instance_id,
const VMStateDescription *vmsd,
void *base, int alias_id,
int required_for_version,
diff --git a/include/qemu/queue.h b/include/qemu/queue.h
index 4764d93ea3..4d4554a7ce 100644
--- a/include/qemu/queue.h
+++ b/include/qemu/queue.h
@@ -501,4 +501,43 @@ union { \
QTAILQ_RAW_TQH_CIRC(head)->tql_prev = QTAILQ_RAW_TQE_CIRC(elm, entry); \
} while (/*CONSTCOND*/0)
+#define QLIST_RAW_FIRST(head) \
+ field_at_offset(head, 0, void *)
+
+#define QLIST_RAW_NEXT(elm, entry) \
+ field_at_offset(elm, entry, void *)
+
+#define QLIST_RAW_PREVIOUS(elm, entry) \
+ field_at_offset(elm, entry + sizeof(void *), void *)
+
+#define QLIST_RAW_FOREACH(elm, head, entry) \
+ for ((elm) = *QLIST_RAW_FIRST(head); \
+ (elm); \
+ (elm) = *QLIST_RAW_NEXT(elm, entry))
+
+#define QLIST_RAW_INSERT_HEAD(head, elm, entry) do { \
+ void *first = *QLIST_RAW_FIRST(head); \
+ *QLIST_RAW_FIRST(head) = elm; \
+ *QLIST_RAW_PREVIOUS(elm, entry) = QLIST_RAW_FIRST(head); \
+ if (first) { \
+ *QLIST_RAW_NEXT(elm, entry) = first; \
+ *QLIST_RAW_PREVIOUS(first, entry) = QLIST_RAW_NEXT(elm, entry); \
+ } else { \
+ *QLIST_RAW_NEXT(elm, entry) = NULL; \
+ } \
+} while (0)
+
+#define QLIST_RAW_REVERSE(head, elm, entry) do { \
+ void *iter = *QLIST_RAW_FIRST(head), *prev = NULL, *next; \
+ while (iter) { \
+ next = *QLIST_RAW_NEXT(iter, entry); \
+ *QLIST_RAW_PREVIOUS(iter, entry) = QLIST_RAW_NEXT(next, entry); \
+ *QLIST_RAW_NEXT(iter, entry) = prev; \
+ prev = iter; \
+ iter = next; \
+ } \
+ *QLIST_RAW_FIRST(head) = prev; \
+ *QLIST_RAW_PREVIOUS(prev, entry) = QLIST_RAW_FIRST(head); \
+} while (0)
+
#endif /* QEMU_SYS_QUEUE_H */
diff --git a/migration/migration.c b/migration/migration.c
index 354ad072fa..990bff00c0 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1005,17 +1005,6 @@ static bool migrate_caps_check(bool *cap_list,
#endif
if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
- if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) {
- /* The decompression threads asynchronously write into RAM
- * rather than use the atomic copies needed to avoid
- * userfaulting. It should be possible to fix the decompression
- * threads for compatibility in future.
- */
- error_setg(errp, "Postcopy is not currently compatible "
- "with compression");
- return false;
- }
-
/* This check is reasonably expensive, so only when it's being
* set the first time, also it's only the destination that needs
* special support.
@@ -1784,6 +1773,7 @@ void qmp_migrate_incoming(const char *uri, Error **errp)
}
if (!once) {
error_setg(errp, "The incoming migration has already been started");
+ return;
}
qemu_start_incoming_migration(uri, &local_err);
@@ -2035,11 +2025,10 @@ void qmp_migrate_set_downtime(double value, Error **errp)
}
value *= 1000; /* Convert to milliseconds */
- value = MAX(0, MIN(INT64_MAX, value));
MigrateSetParameters p = {
.has_downtime_limit = true,
- .downtime_limit = value,
+ .downtime_limit = (int64_t)value,
};
qmp_migrate_set_parameters(&p, errp);
@@ -3224,6 +3213,37 @@ void migration_consume_urgent_request(void)
qemu_sem_wait(&migrate_get_current()->rate_limit_sem);
}
+/* Returns true if the rate limiting was broken by an urgent request */
+bool migration_rate_limit(void)
+{
+ int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+ MigrationState *s = migrate_get_current();
+
+ bool urgent = false;
+ migration_update_counters(s, now);
+ if (qemu_file_rate_limit(s->to_dst_file)) {
+ /*
+ * Wait for a delay to do rate limiting OR
+ * something urgent to post the semaphore.
+ */
+ int ms = s->iteration_start_time + BUFFER_DELAY - now;
+ trace_migration_rate_limit_pre(ms);
+ if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) {
+ /*
+ * We were woken by one or more urgent things but
+ * the timedwait will have consumed one of them.
+ * The service routine for the urgent wake will dec
+ * the semaphore itself for each item it consumes,
+ * so add this one we just eat back.
+ */
+ qemu_sem_post(&s->rate_limit_sem);
+ urgent = true;
+ }
+ trace_migration_rate_limit_post(urgent);
+ }
+ return urgent;
+}
+
/*
* Master migration thread on the source VM.
* It drives the migration and pumps the data down the outgoing channel.
@@ -3290,8 +3310,6 @@ static void *migration_thread(void *opaque)
trace_migration_thread_setup_complete();
while (migration_is_active(s)) {
- int64_t current_time;
-
if (urgent || !qemu_file_rate_limit(s->to_dst_file)) {
MigIterateState iter_state = migration_iteration_run(s);
if (iter_state == MIG_ITERATE_SKIP) {
@@ -3318,29 +3336,7 @@ static void *migration_thread(void *opaque)
update_iteration_initial_status(s);
}
- current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
-
- migration_update_counters(s, current_time);
-
- urgent = false;
- if (qemu_file_rate_limit(s->to_dst_file)) {
- /* Wait for a delay to do rate limiting OR
- * something urgent to post the semaphore.
- */
- int ms = s->iteration_start_time + BUFFER_DELAY - current_time;
- trace_migration_thread_ratelimit_pre(ms);
- if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) {
- /* We were worken by one or more urgent things but
- * the timedwait will have consumed one of them.
- * The service routine for the urgent wake will dec
- * the semaphore itself for each item it consumes,
- * so add this one we just eat back.
- */
- qemu_sem_post(&s->rate_limit_sem);
- urgent = true;
- }
- trace_migration_thread_ratelimit_post(urgent);
- }
+ urgent = migration_rate_limit();
}
trace_migration_thread_after_loop();
diff --git a/migration/migration.h b/migration/migration.h
index 79b3dda146..aa9ff6f27b 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -341,5 +341,6 @@ int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque);
void migration_make_urgent_request(void);
void migration_consume_urgent_request(void);
+bool migration_rate_limit(void);
#endif
diff --git a/migration/ram.c b/migration/ram.c
index 96feb4062c..d2208b5534 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -703,7 +703,7 @@ typedef struct {
static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp)
{
- MultiFDInit_t msg;
+ MultiFDInit_t msg = {};
int ret;
msg.magic = cpu_to_be32(MULTIFD_MAGIC);
@@ -803,7 +803,10 @@ static void multifd_send_fill_packet(MultiFDSendParams *p)
}
for (i = 0; i < p->pages->used; i++) {
- packet->offset[i] = cpu_to_be64(p->pages->offset[i]);
+ /* there are architectures where ram_addr_t is 32 bit */
+ uint64_t temp = p->pages->offset[i];
+
+ packet->offset[i] = cpu_to_be64(temp);
}
}
@@ -877,10 +880,10 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
}
for (i = 0; i < p->pages->used; i++) {
- ram_addr_t offset = be64_to_cpu(packet->offset[i]);
+ uint64_t offset = be64_to_cpu(packet->offset[i]);
if (offset > (block->used_length - TARGET_PAGE_SIZE)) {
- error_setg(errp, "multifd: offset too long " RAM_ADDR_FMT
+ error_setg(errp, "multifd: offset too long %" PRIu64
" (max " RAM_ADDR_FMT ")",
offset, block->max_length);
return -1;
@@ -900,6 +903,12 @@ struct {
uint64_t packet_num;
/* send channels ready */
QemuSemaphore channels_ready;
+ /*
+ * Have we already run terminate threads. There is a race when it
+ * happens that we got one error while we are exiting.
+ * We will use atomic operations. Only valid values are 0 and 1.
+ */
+ int exiting;
} *multifd_send_state;
/*
@@ -928,6 +937,10 @@ static int multifd_send_pages(RAMState *rs)
MultiFDPages_t *pages = multifd_send_state->pages;
uint64_t transferred;
+ if (atomic_read(&multifd_send_state->exiting)) {
+ return -1;
+ }
+
qemu_sem_wait(&multifd_send_state->channels_ready);
for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) {
p = &multifd_send_state->params[i];
@@ -945,10 +958,10 @@ static int multifd_send_pages(RAMState *rs)
}
qemu_mutex_unlock(&p->mutex);
}
- p->pages->used = 0;
+ assert(!p->pages->used);
+ assert(!p->pages->block);
p->packet_num = multifd_send_state->packet_num++;
- p->pages->block = NULL;
multifd_send_state->pages = p->pages;
p->pages = pages;
transferred = ((uint64_t) pages->used) * TARGET_PAGE_SIZE + p->packet_len;
@@ -1009,6 +1022,16 @@ static void multifd_send_terminate_threads(Error *err)
}
}
+ /*
+ * We don't want to exit each threads twice. Depending on where
+ * we get the error, or if there are two independent errors in two
+ * threads at the same time, we can end calling this function
+ * twice.
+ */
+ if (atomic_xchg(&multifd_send_state->exiting, 1)) {
+ return;
+ }
+
for (i = 0; i < migrate_multifd_channels(); i++) {
MultiFDSendParams *p = &multifd_send_state->params[i];
@@ -1033,6 +1056,10 @@ void multifd_save_cleanup(void)
if (p->running) {
qemu_thread_join(&p->thread);
}
+ }
+ for (i = 0; i < migrate_multifd_channels(); i++) {
+ MultiFDSendParams *p = &multifd_send_state->params[i];
+
socket_send_channel_destroy(p->c);
p->c = NULL;
qemu_mutex_destroy(&p->mutex);
@@ -1118,6 +1145,10 @@ static void *multifd_send_thread(void *opaque)
while (true) {
qemu_sem_wait(&p->sem);
+
+ if (atomic_read(&multifd_send_state->exiting)) {
+ break;
+ }
qemu_mutex_lock(&p->mutex);
if (p->pending_job) {
@@ -1130,6 +1161,8 @@ static void *multifd_send_thread(void *opaque)
p->flags = 0;
p->num_packets++;
p->num_pages += used;
+ p->pages->used = 0;
+ p->pages->block = NULL;
qemu_mutex_unlock(&p->mutex);
trace_multifd_send(p->id, packet_num, used, flags,
@@ -1224,6 +1257,7 @@ int multifd_save_setup(void)
multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
multifd_send_state->pages = multifd_pages_init(page_count);
qemu_sem_init(&multifd_send_state->channels_ready, 0);
+ atomic_set(&multifd_send_state->exiting, 0);
for (i = 0; i < thread_count; i++) {
MultiFDSendParams *p = &multifd_send_state->params[i];
@@ -1236,7 +1270,7 @@ int multifd_save_setup(void)
p->id = i;
p->pages = multifd_pages_init(page_count);
p->packet_len = sizeof(MultiFDPacket_t)
- + sizeof(ram_addr_t) * page_count;
+ + sizeof(uint64_t) * page_count;
p->packet = g_malloc0(p->packet_len);
p->packet->magic = cpu_to_be32(MULTIFD_MAGIC);
p->packet->version = cpu_to_be32(MULTIFD_VERSION);
@@ -1281,7 +1315,9 @@ static void multifd_recv_terminate_threads(Error *err)
- normal quit, i.e. everything went fine, just finished
- error quit: We close the channels so the channel threads
finish the qio_channel_read_all_eof() */
- qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
+ if (p->c) {
+ qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
+ }
qemu_mutex_unlock(&p->mutex);
}
}
@@ -1307,6 +1343,10 @@ int multifd_load_cleanup(Error **errp)
qemu_sem_post(&p->sem_sync);
qemu_thread_join(&p->thread);
}
+ }
+ for (i = 0; i < migrate_multifd_channels(); i++) {
+ MultiFDRecvParams *p = &multifd_recv_state->params[i];
+
object_unref(OBJECT(p->c));
p->c = NULL;
qemu_mutex_destroy(&p->mutex);
@@ -1447,7 +1487,7 @@ int multifd_load_setup(void)
p->id = i;
p->pages = multifd_pages_init(page_count);
p->packet_len = sizeof(MultiFDPacket_t)
- + sizeof(ram_addr_t) * page_count;
+ + sizeof(uint64_t) * page_count;
p->packet = g_malloc0(p->packet_len);
p->name = g_strdup_printf("multifdrecv_%d", i);
}
@@ -1731,7 +1771,7 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs,
if (rb->clear_bmap && clear_bmap_test_and_clear(rb, page)) {
uint8_t shift = rb->clear_bmap_shift;
hwaddr size = 1ULL << (TARGET_PAGE_BITS + shift);
- hwaddr start = (page << TARGET_PAGE_BITS) & (-size);
+ hwaddr start = (((ram_addr_t)page) << TARGET_PAGE_BITS) & (-size);
/*
* CLEAR_BITMAP_SHIFT_MIN should always guarantee this... this
@@ -1968,7 +2008,7 @@ static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
return;
}
- ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS);
+ ram_discard_range(rbname, offset, ((ram_addr_t)pages) << TARGET_PAGE_BITS);
}
/*
@@ -2056,7 +2096,7 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
uint8_t *p;
bool send_async = true;
RAMBlock *block = pss->block;
- ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
+ ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
ram_addr_t current_addr = block->offset + offset;
p = block->host + offset;
@@ -2243,7 +2283,8 @@ static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
*again = false;
return false;
}
- if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) {
+ if ((((ram_addr_t)pss->page) << TARGET_PAGE_BITS)
+ >= pss->block->used_length) {
/* Didn't find anything in this RAM Block */
pss->page = 0;
pss->block = QLIST_NEXT_RCU(pss->block, next);
@@ -2434,7 +2475,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
* it's the 1st request.
*/
error_report("ram_save_queue_pages no previous block");
- goto err;
+ return -1;
}
} else {
ramblock = qemu_ram_block_by_name(rbname);
@@ -2442,7 +2483,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
if (!ramblock) {
/* We shouldn't be asked for a non-existent RAMBlock */
error_report("ram_save_queue_pages no block '%s'", rbname);
- goto err;
+ return -1;
}
rs->last_req_rb = ramblock;
}
@@ -2451,7 +2492,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
error_report("%s request overrun start=" RAM_ADDR_FMT " len="
RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
__func__, start, len, ramblock->used_length);
- goto err;
+ return -1;
}
struct RAMSrcPageRequest *new_entry =
@@ -2467,9 +2508,6 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
qemu_mutex_unlock(&rs->src_page_req_mutex);
return 0;
-
-err:
- return -1;
}
static bool save_page_use_compression(RAMState *rs)
@@ -2537,7 +2575,7 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
bool last_stage)
{
RAMBlock *block = pss->block;
- ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
+ ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
int res;
if (control_save_page(rs, block, offset, &res)) {
@@ -2563,10 +2601,13 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
}
/*
- * do not use multifd for compression as the first page in the new
- * block should be posted out before sending the compressed page
+ * Do not use multifd for:
+ * 1. Compression as the first page in the new block should be posted out
+ * before sending the compressed page
+ * 2. In postcopy as one whole host page should be placed
*/
- if (!save_page_use_compression(rs) && migrate_use_multifd()) {
+ if (!save_page_use_compression(rs) && migrate_use_multifd()
+ && !migration_in_postcopy()) {
return ram_save_multifd_page(rs, block, offset);
}
@@ -2617,8 +2658,11 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
pages += tmppages;
pss->page++;
+ /* Allow rate limiting to happen in the middle of huge pages */
+ migration_rate_limit();
} while ((pss->page & (pagesize_bits - 1)) &&
- offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS));
+ offset_in_ramblock(pss->block,
+ ((ram_addr_t)pss->page) << TARGET_PAGE_BITS));
/* The offset we leave with is the last one we looked at */
pss->page--;
@@ -2835,8 +2879,10 @@ void ram_postcopy_migrated_memory_release(MigrationState *ms)
while (run_start < range) {
unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
- ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS,
- (run_end - run_start) << TARGET_PAGE_BITS);
+ ram_discard_range(block->idstr,
+ ((ram_addr_t)run_start) << TARGET_PAGE_BITS,
+ ((ram_addr_t)(run_end - run_start))
+ << TARGET_PAGE_BITS);
run_start = find_next_zero_bit(bitmap, range, run_end + 1);
}
}
@@ -3072,8 +3118,6 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms)
*/
int ram_discard_range(const char *rbname, uint64_t start, size_t length)
{
- int ret = -1;
-
trace_ram_discard_range(rbname, start, length);
RCU_READ_LOCK_GUARD();
@@ -3081,7 +3125,7 @@ int ram_discard_range(const char *rbname, uint64_t start, size_t length)
if (!rb) {
error_report("ram_discard_range: Failed to find block '%s'", rbname);
- goto err;
+ return -1;
}
/*
@@ -3093,10 +3137,7 @@ int ram_discard_range(const char *rbname, uint64_t start, size_t length)
length >> qemu_target_page_bits());
}
- ret = ram_block_discard_range(rb, start, length);
-
-err:
- return ret;
+ return ram_block_discard_range(rb, start, length);
}
/*
@@ -3451,6 +3492,14 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
rs->target_page_count += pages;
/*
+ * During postcopy, it is necessary to make sure one whole host
+ * page is sent in one chunk.
+ */
+ if (migrate_postcopy_ram()) {
+ flush_compressed_data(rs);
+ }
+
+ /*
* we want to check in the 1st loop, just in case it was the 1st
* time and we had to sync the dirty bitmap.
* qemu_clock_get_ns() is a bit expensive, so we only check each
@@ -4031,8 +4080,9 @@ static int ram_load_postcopy(QEMUFile *f)
MigrationIncomingState *mis = migration_incoming_get_current();
/* Temporary page that is later 'placed' */
void *postcopy_host_page = mis->postcopy_tmp_page;
- void *last_host = NULL;
+ void *this_host = NULL;
bool all_zero = false;
+ int target_pages = 0;
while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
ram_addr_t addr;
@@ -4041,6 +4091,7 @@ static int ram_load_postcopy(QEMUFile *f)
void *place_source = NULL;
RAMBlock *block = NULL;
uint8_t ch;
+ int len;
addr = qemu_get_be64(f);
@@ -4058,7 +4109,8 @@ static int ram_load_postcopy(QEMUFile *f)
trace_ram_load_postcopy_loop((uint64_t)addr, flags);
place_needed = false;
- if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) {
+ if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
+ RAM_SAVE_FLAG_COMPRESS_PAGE)) {
block = ram_block_from_stream(f, flags);
host = host_from_ram_block_offset(block, addr);
@@ -4067,6 +4119,7 @@ static int ram_load_postcopy(QEMUFile *f)
ret = -EINVAL;
break;
}
+ target_pages++;
matches_target_page_size = block->page_size == TARGET_PAGE_SIZE;
/*
* Postcopy requires that we place whole host pages atomically;
@@ -4076,38 +4129,47 @@ static int ram_load_postcopy(QEMUFile *f)
* that's moved into place later.
* The migration protocol uses, possibly smaller, target-pages
* however the source ensures it always sends all the components
- * of a host page in order.
+ * of a host page in one chunk.
*/
page_buffer = postcopy_host_page +
((uintptr_t)host & (block->page_size - 1));
/* If all TP are zero then we can optimise the place */
- if (!((uintptr_t)host & (block->page_size - 1))) {
+ if (target_pages == 1) {
all_zero = true;
+ this_host = (void *)QEMU_ALIGN_DOWN((uintptr_t)host,
+ block->page_size);
} else {
/* not the 1st TP within the HP */
- if (host != (last_host + TARGET_PAGE_SIZE)) {
- error_report("Non-sequential target page %p/%p",
- host, last_host);
+ if (QEMU_ALIGN_DOWN((uintptr_t)host, block->page_size) !=
+ (uintptr_t)this_host) {
+ error_report("Non-same host page %p/%p",
+ host, this_host);
ret = -EINVAL;
break;
}
}
-
/*
* If it's the last part of a host page then we place the host
* page
*/
- place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
- (block->page_size - 1)) == 0;
+ if (target_pages == (block->page_size / TARGET_PAGE_SIZE)) {
+ place_needed = true;
+ target_pages = 0;
+ }
place_source = postcopy_host_page;
}
- last_host = host;
switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
case RAM_SAVE_FLAG_ZERO:
ch = qemu_get_byte(f);
- memset(page_buffer, ch, TARGET_PAGE_SIZE);
+ /*
+ * Can skip to set page_buffer when
+ * this is a zero page and (block->page_size == TARGET_PAGE_SIZE).
+ */
+ if (ch || !matches_target_page_size) {
+ memset(page_buffer, ch, TARGET_PAGE_SIZE);
+ }
if (ch) {
all_zero = false;
}
@@ -4131,6 +4193,17 @@ static int ram_load_postcopy(QEMUFile *f)
TARGET_PAGE_SIZE);
}
break;
+ case RAM_SAVE_FLAG_COMPRESS_PAGE:
+ all_zero = false;
+ len = qemu_get_be32(f);
+ if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
+ error_report("Invalid compressed data length: %d", len);
+ ret = -EINVAL;
+ break;
+ }
+ decompress_data_with_multi_threads(f, page_buffer, len);
+ break;
+
case RAM_SAVE_FLAG_EOS:
/* normal exit */
multifd_recv_sync_main();
@@ -4142,6 +4215,11 @@ static int ram_load_postcopy(QEMUFile *f)
break;
}
+ /* Got the whole host page, wait for decompress before placing. */
+ if (place_needed) {
+ ret |= wait_for_decompress_done();
+ }
+
/* Detect for any possible file errors */
if (!ret && qemu_file_get_error(f)) {
ret = qemu_file_get_error(f);
@@ -4149,7 +4227,8 @@ static int ram_load_postcopy(QEMUFile *f)
if (!ret && place_needed) {
/* This gets called at the last target page in the host page */
- void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
+ void *place_dest = (void *)QEMU_ALIGN_DOWN((uintptr_t)host,
+ block->page_size);
if (all_zero) {
ret = postcopy_place_page_zero(mis, place_dest,
@@ -4201,13 +4280,16 @@ static void colo_flush_ram_cache(void)
while (block) {
offset = migration_bitmap_find_dirty(ram_state, block, offset);
- if (offset << TARGET_PAGE_BITS >= block->used_length) {
+ if (((ram_addr_t)offset) << TARGET_PAGE_BITS
+ >= block->used_length) {
offset = 0;
block = QLIST_NEXT_RCU(block, next);
} else {
migration_bitmap_clear_dirty(ram_state, block, offset);
- dst_host = block->host + (offset << TARGET_PAGE_BITS);
- src_host = block->colo_cache + (offset << TARGET_PAGE_BITS);
+ dst_host = block->host
+ + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
+ src_host = block->colo_cache
+ + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
memcpy(dst_host, src_host, TARGET_PAGE_SIZE);
}
}
@@ -4227,7 +4309,7 @@ static void colo_flush_ram_cache(void)
*/
static int ram_load_precopy(QEMUFile *f)
{
- int flags = 0, ret = 0, invalid_flags = 0, len = 0;
+ int flags = 0, ret = 0, invalid_flags = 0, len = 0, i = 0;
/* ADVISE is earlier, it shows the source has the postcopy capability on */
bool postcopy_advised = postcopy_is_advised();
if (!migrate_use_compression()) {
@@ -4239,6 +4321,17 @@ static int ram_load_precopy(QEMUFile *f)
void *host = NULL;
uint8_t ch;
+ /*
+ * Yield periodically to let main loop run, but an iteration of
+ * the main loop is expensive, so do it each some iterations
+ */
+ if ((i & 32767) == 0 && qemu_in_coroutine()) {
+ aio_co_schedule(qemu_get_current_aio_context(),
+ qemu_coroutine_self());
+ qemu_coroutine_yield();
+ }
+ i++;
+
addr = qemu_get_be64(f);
flags = addr & ~TARGET_PAGE_MASK;
addr &= TARGET_PAGE_MASK;
@@ -4385,6 +4478,7 @@ static int ram_load_precopy(QEMUFile *f)
}
}
+ ret |= wait_for_decompress_done();
return ret;
}
@@ -4416,8 +4510,6 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
} else {
ret = ram_load_precopy(f);
}
-
- ret |= wait_for_decompress_done();
}
trace_ram_load_complete(ret, seq_iter);
diff --git a/migration/savevm.c b/migration/savevm.c
index 59efc1981d..adfdca26ac 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -233,7 +233,7 @@ typedef struct CompatEntry {
typedef struct SaveStateEntry {
QTAILQ_ENTRY(SaveStateEntry) entry;
char idstr[256];
- int instance_id;
+ uint32_t instance_id;
int alias_id;
int version_id;
/* version id read from the stream */
@@ -250,6 +250,7 @@ typedef struct SaveStateEntry {
typedef struct SaveState {
QTAILQ_HEAD(, SaveStateEntry) handlers;
+ SaveStateEntry *handler_pri_head[MIG_PRI_MAX + 1];
int global_section_id;
uint32_t len;
const char *name;
@@ -261,6 +262,7 @@ typedef struct SaveState {
static SaveState savevm_state = {
.handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers),
+ .handler_pri_head = { [MIG_PRI_DEFAULT ... MIG_PRI_MAX] = NULL },
.global_section_id = 0,
};
@@ -665,10 +667,10 @@ void dump_vmstate_json_to_file(FILE *out_file)
fclose(out_file);
}
-static int calculate_new_instance_id(const char *idstr)
+static uint32_t calculate_new_instance_id(const char *idstr)
{
SaveStateEntry *se;
- int instance_id = 0;
+ uint32_t instance_id = 0;
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
if (strcmp(idstr, se->idstr) == 0
@@ -676,6 +678,8 @@ static int calculate_new_instance_id(const char *idstr)
instance_id = se->instance_id + 1;
}
}
+ /* Make sure we never loop over without being noticed */
+ assert(instance_id != VMSTATE_INSTANCE_ID_ANY);
return instance_id;
}
@@ -709,20 +713,43 @@ static void savevm_state_handler_insert(SaveStateEntry *nse)
{
MigrationPriority priority = save_state_priority(nse);
SaveStateEntry *se;
+ int i;
assert(priority <= MIG_PRI_MAX);
- QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
- if (save_state_priority(se) < priority) {
+ for (i = priority - 1; i >= 0; i--) {
+ se = savevm_state.handler_pri_head[i];
+ if (se != NULL) {
+ assert(save_state_priority(se) < priority);
break;
}
}
- if (se) {
+ if (i >= 0) {
QTAILQ_INSERT_BEFORE(se, nse, entry);
} else {
QTAILQ_INSERT_TAIL(&savevm_state.handlers, nse, entry);
}
+
+ if (savevm_state.handler_pri_head[priority] == NULL) {
+ savevm_state.handler_pri_head[priority] = nse;
+ }
+}
+
+static void savevm_state_handler_remove(SaveStateEntry *se)
+{
+ SaveStateEntry *next;
+ MigrationPriority priority = save_state_priority(se);
+
+ if (se == savevm_state.handler_pri_head[priority]) {
+ next = QTAILQ_NEXT(se, entry);
+ if (next != NULL && save_state_priority(next) == priority) {
+ savevm_state.handler_pri_head[priority] = next;
+ } else {
+ savevm_state.handler_pri_head[priority] = NULL;
+ }
+ }
+ QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
}
/* TODO: Individual devices generally have very little idea about the rest
@@ -730,7 +757,7 @@ static void savevm_state_handler_insert(SaveStateEntry *nse)
Meanwhile pass -1 as instance_id if you do not already have a clearly
distinguishing id for all instances of your device class. */
int register_savevm_live(const char *idstr,
- int instance_id,
+ uint32_t instance_id,
int version_id,
const SaveVMHandlers *ops,
void *opaque)
@@ -750,7 +777,7 @@ int register_savevm_live(const char *idstr,
pstrcat(se->idstr, sizeof(se->idstr), idstr);
- if (instance_id == -1) {
+ if (instance_id == VMSTATE_INSTANCE_ID_ANY) {
se->instance_id = calculate_new_instance_id(se->idstr);
} else {
se->instance_id = instance_id;
@@ -777,14 +804,14 @@ void unregister_savevm(VMStateIf *obj, const char *idstr, void *opaque)
QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
if (strcmp(se->idstr, id) == 0 && se->opaque == opaque) {
- QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
+ savevm_state_handler_remove(se);
g_free(se->compat);
g_free(se);
}
}
}
-int vmstate_register_with_alias_id(VMStateIf *obj, int instance_id,
+int vmstate_register_with_alias_id(VMStateIf *obj, uint32_t instance_id,
const VMStateDescription *vmsd,
void *opaque, int alias_id,
int required_for_version,
@@ -817,14 +844,14 @@ int vmstate_register_with_alias_id(VMStateIf *obj, int instance_id,
se->compat = g_new0(CompatEntry, 1);
pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name);
- se->compat->instance_id = instance_id == -1 ?
+ se->compat->instance_id = instance_id == VMSTATE_INSTANCE_ID_ANY ?
calculate_compat_instance_id(vmsd->name) : instance_id;
- instance_id = -1;
+ instance_id = VMSTATE_INSTANCE_ID_ANY;
}
}
pstrcat(se->idstr, sizeof(se->idstr), vmsd->name);
- if (instance_id == -1) {
+ if (instance_id == VMSTATE_INSTANCE_ID_ANY) {
se->instance_id = calculate_new_instance_id(se->idstr);
} else {
se->instance_id = instance_id;
@@ -841,7 +868,7 @@ void vmstate_unregister(VMStateIf *obj, const VMStateDescription *vmsd,
QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
if (se->vmsd == vmsd && se->opaque == opaque) {
- QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
+ savevm_state_handler_remove(se);
g_free(se->compat);
g_free(se);
}
@@ -1600,7 +1627,7 @@ int qemu_save_device_state(QEMUFile *f)
return qemu_file_get_error(f);
}
-static SaveStateEntry *find_se(const char *idstr, int instance_id)
+static SaveStateEntry *find_se(const char *idstr, uint32_t instance_id)
{
SaveStateEntry *se;
@@ -2267,7 +2294,7 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
/* Find savevm section */
se = find_se(idstr, instance_id);
if (se == NULL) {
- error_report("Unknown savevm section or instance '%s' %d. "
+ error_report("Unknown savevm section or instance '%s' %"PRIu32". "
"Make sure that your current VM setup matches your "
"saved VM setup, including any hotplugged devices",
idstr, instance_id);
@@ -2291,7 +2318,7 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
ret = vmstate_load(f, se);
if (ret < 0) {
- error_report("error while loading state for instance 0x%x of"
+ error_report("error while loading state for instance 0x%"PRIx32" of"
" device '%s'", instance_id, idstr);
return ret;
}
diff --git a/migration/trace-events b/migration/trace-events
index 6dee7b5389..4ab0a503d2 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -76,6 +76,11 @@ get_gtree_end(const char *field_name, const char *key_vmsd_name, const char *val
put_gtree(const char *field_name, const char *key_vmsd_name, const char *val_vmsd_name, uint32_t nnodes) "%s(%s/%s) nnodes=%d"
put_gtree_end(const char *field_name, const char *key_vmsd_name, const char *val_vmsd_name, int ret) "%s(%s/%s) %d"
+get_qlist(const char *field_name, const char *vmsd_name, int version_id) "%s(%s v%d)"
+get_qlist_end(const char *field_name, const char *vmsd_name) "%s(%s)"
+put_qlist(const char *field_name, const char *vmsd_name, int version_id) "%s(%s v%d)"
+put_qlist_end(const char *field_name, const char *vmsd_name) "%s(%s)"
+
# qemu-file.c
qemu_file_fclose(void) ""
@@ -138,12 +143,12 @@ migrate_send_rp_recv_bitmap(char *name, int64_t size) "block '%s' size 0x%"PRIi6
migration_completion_file_err(void) ""
migration_completion_postcopy_end(void) ""
migration_completion_postcopy_end_after_complete(void) ""
+migration_rate_limit_pre(int ms) "%d ms"
+migration_rate_limit_post(int urgent) "urgent: %d"
migration_return_path_end_before(void) ""
migration_return_path_end_after(int rp_error) "%d"
migration_thread_after_loop(void) ""
migration_thread_file_err(void) ""
-migration_thread_ratelimit_pre(int ms) "%d ms"
-migration_thread_ratelimit_post(int urgent) "urgent: %d"
migration_thread_setup_complete(void) ""
open_return_path_on_source(void) ""
open_return_path_on_source_continue(void) ""
diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c
index 7236cf92bc..1eee36773a 100644
--- a/migration/vmstate-types.c
+++ b/migration/vmstate-types.c
@@ -843,3 +843,73 @@ const VMStateInfo vmstate_info_gtree = {
.get = get_gtree,
.put = put_gtree,
};
+
+static int put_qlist(QEMUFile *f, void *pv, size_t unused_size,
+ const VMStateField *field, QJSON *vmdesc)
+{
+ const VMStateDescription *vmsd = field->vmsd;
+ /* offset of the QTAILQ entry in a QTAILQ element*/
+ size_t entry_offset = field->start;
+ void *elm;
+ int ret;
+
+ trace_put_qlist(field->name, vmsd->name, vmsd->version_id);
+ QLIST_RAW_FOREACH(elm, pv, entry_offset) {
+ qemu_put_byte(f, true);
+ ret = vmstate_save_state(f, vmsd, elm, vmdesc);
+ if (ret) {
+ error_report("%s: failed to save %s (%d)", field->name,
+ vmsd->name, ret);
+ return ret;
+ }
+ }
+ qemu_put_byte(f, false);
+ trace_put_qlist_end(field->name, vmsd->name);
+
+ return 0;
+}
+
+static int get_qlist(QEMUFile *f, void *pv, size_t unused_size,
+ const VMStateField *field)
+{
+ int ret = 0;
+ const VMStateDescription *vmsd = field->vmsd;
+ /* size of a QLIST element */
+ size_t size = field->size;
+ /* offset of the QLIST entry in a QLIST element */
+ size_t entry_offset = field->start;
+ int version_id = field->version_id;
+ void *elm;
+
+ trace_get_qlist(field->name, vmsd->name, vmsd->version_id);
+ if (version_id > vmsd->version_id) {
+ error_report("%s %s", vmsd->name, "too new");
+ return -EINVAL;
+ }
+ if (version_id < vmsd->minimum_version_id) {
+ error_report("%s %s", vmsd->name, "too old");
+ return -EINVAL;
+ }
+
+ while (qemu_get_byte(f)) {
+ elm = g_malloc(size);
+ ret = vmstate_load_state(f, vmsd, elm, version_id);
+ if (ret) {
+ error_report("%s: failed to load %s (%d)", field->name,
+ vmsd->name, ret);
+ g_free(elm);
+ return ret;
+ }
+ QLIST_RAW_INSERT_HEAD(pv, elm, entry_offset);
+ }
+ QLIST_RAW_REVERSE(pv, elm, entry_offset);
+ trace_get_qlist_end(field->name, vmsd->name);
+
+ return ret;
+}
+
+const VMStateInfo vmstate_info_qlist = {
+ .name = "qlist",
+ .get = get_qlist,
+ .put = put_qlist,
+};
diff --git a/stubs/vmstate.c b/stubs/vmstate.c
index 6951d9fdc5..cc4fe41dfc 100644
--- a/stubs/vmstate.c
+++ b/stubs/vmstate.c
@@ -4,7 +4,7 @@
const VMStateDescription vmstate_dummy = {};
int vmstate_register_with_alias_id(VMStateIf *obj,
- int instance_id,
+ uint32_t instance_id,
const VMStateDescription *vmsd,
void *base, int alias_id,
int required_for_version,
diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 341d190922..26e2e77289 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -356,6 +356,43 @@ static void migrate_set_parameter_int(QTestState *who, const char *parameter,
migrate_check_parameter_int(who, parameter, value);
}
+static char *migrate_get_parameter_str(QTestState *who,
+ const char *parameter)
+{
+ QDict *rsp;
+ char *result;
+
+ rsp = wait_command(who, "{ 'execute': 'query-migrate-parameters' }");
+ result = g_strdup(qdict_get_str(rsp, parameter));
+ qobject_unref(rsp);
+ return result;
+}
+
+static void migrate_check_parameter_str(QTestState *who, const char *parameter,
+ const char *value)
+{
+ char *result;
+
+ result = migrate_get_parameter_str(who, parameter);
+ g_assert_cmpstr(result, ==, value);
+ g_free(result);
+}
+
+__attribute__((unused))
+static void migrate_set_parameter_str(QTestState *who, const char *parameter,
+ const char *value)
+{
+ QDict *rsp;
+
+ rsp = qtest_qmp(who,
+ "{ 'execute': 'migrate-set-parameters',"
+ "'arguments': { %s: %s } }",
+ parameter, value);
+ g_assert(qdict_haskey(rsp, "return"));
+ qobject_unref(rsp);
+ migrate_check_parameter_str(who, parameter, value);
+}
+
static void migrate_pause(QTestState *who)
{
QDict *rsp;
@@ -1202,6 +1239,61 @@ static void test_migrate_auto_converge(void)
test_migrate_end(from, to, true);
}
+static void test_multifd_tcp(void)
+{
+ MigrateStart *args = migrate_start_new();
+ QTestState *from, *to;
+ QDict *rsp;
+ char *uri;
+
+ if (test_migrate_start(&from, &to, "defer", args)) {
+ return;
+ }
+
+ /*
+ * We want to pick a speed slow enough that the test completes
+ * quickly, but that it doesn't complete precopy even on a slow
+ * machine, so also set the downtime.
+ */
+ /* 1 ms should make it not converge*/
+ migrate_set_parameter_int(from, "downtime-limit", 1);
+ /* 1GB/s */
+ migrate_set_parameter_int(from, "max-bandwidth", 1000000000);
+
+ migrate_set_parameter_int(from, "multifd-channels", 16);
+ migrate_set_parameter_int(to, "multifd-channels", 16);
+
+ migrate_set_capability(from, "multifd", "true");
+ migrate_set_capability(to, "multifd", "true");
+
+ /* Start incoming migration from the 1st socket */
+ rsp = wait_command(to, "{ 'execute': 'migrate-incoming',"
+ " 'arguments': { 'uri': 'tcp:127.0.0.1:0' }}");
+ qobject_unref(rsp);
+
+ /* Wait for the first serial output from the source */
+ wait_for_serial("src_serial");
+
+ uri = migrate_get_socket_address(to, "socket-address");
+
+ migrate_qmp(from, uri, "{}");
+
+ wait_for_migration_pass(from);
+
+ /* 300ms it should converge */
+ migrate_set_parameter_int(from, "downtime-limit", 300);
+
+ if (!got_stop) {
+ qtest_qmp_eventwait(from, "STOP");
+ }
+ qtest_qmp_eventwait(to, "RESUME");
+
+ wait_for_serial("dest_serial");
+ wait_for_migration_complete(from);
+ test_migrate_end(from, to, true);
+ free(uri);
+}
+
int main(int argc, char **argv)
{
char template[] = "/tmp/migration-test-XXXXXX";
@@ -1266,6 +1358,7 @@ int main(int argc, char **argv)
test_validate_uuid_dst_not_set);
qtest_add_func("/migration/auto_converge", test_migrate_auto_converge);
+ qtest_add_func("/migration/multifd/tcp", test_multifd_tcp);
ret = g_test_run();
diff --git a/tests/test-vmstate.c b/tests/test-vmstate.c
index 8f184f3556..cea363dd69 100644
--- a/tests/test-vmstate.c
+++ b/tests/test-vmstate.c
@@ -926,6 +926,28 @@ static const VMStateDescription vmstate_domain = {
}
};
+/* test QLIST Migration */
+
+typedef struct TestQListElement {
+ uint32_t id;
+ QLIST_ENTRY(TestQListElement) next;
+} TestQListElement;
+
+typedef struct TestQListContainer {
+ uint32_t id;
+ QLIST_HEAD(, TestQListElement) list;
+} TestQListContainer;
+
+static const VMStateDescription vmstate_qlist_element = {
+ .name = "test/queue list",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32(id, TestQListElement),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
static const VMStateDescription vmstate_iommu = {
.name = "iommu",
.version_id = 1,
@@ -939,6 +961,18 @@ static const VMStateDescription vmstate_iommu = {
}
};
+static const VMStateDescription vmstate_container = {
+ .name = "test/container/qlist",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32(id, TestQListContainer),
+ VMSTATE_QLIST_V(list, TestQListContainer, 1, vmstate_qlist_element,
+ TestQListElement, next),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
uint8_t first_domain_dump[] = {
/* id */
0x00, 0x0, 0x0, 0x6,
@@ -1229,6 +1263,140 @@ static void test_gtree_load_iommu(void)
qemu_fclose(fload);
}
+static uint8_t qlist_dump[] = {
+ 0x00, 0x00, 0x00, 0x01, /* container id */
+ 0x1, /* start of a */
+ 0x00, 0x00, 0x00, 0x0a,
+ 0x1, /* start of b */
+ 0x00, 0x00, 0x0b, 0x00,
+ 0x1, /* start of c */
+ 0x00, 0x0c, 0x00, 0x00,
+ 0x1, /* start of d */
+ 0x0d, 0x00, 0x00, 0x00,
+ 0x0, /* end of list */
+ QEMU_VM_EOF, /* just to ensure we won't get EOF reported prematurely */
+};
+
+static TestQListContainer *alloc_container(void)
+{
+ TestQListElement *a = g_malloc(sizeof(TestQListElement));
+ TestQListElement *b = g_malloc(sizeof(TestQListElement));
+ TestQListElement *c = g_malloc(sizeof(TestQListElement));
+ TestQListElement *d = g_malloc(sizeof(TestQListElement));
+ TestQListContainer *container = g_malloc(sizeof(TestQListContainer));
+
+ a->id = 0x0a;
+ b->id = 0x0b00;
+ c->id = 0xc0000;
+ d->id = 0xd000000;
+ container->id = 1;
+
+ QLIST_INIT(&container->list);
+ QLIST_INSERT_HEAD(&container->list, d, next);
+ QLIST_INSERT_HEAD(&container->list, c, next);
+ QLIST_INSERT_HEAD(&container->list, b, next);
+ QLIST_INSERT_HEAD(&container->list, a, next);
+ return container;
+}
+
+static void free_container(TestQListContainer *container)
+{
+ TestQListElement *iter, *tmp;
+
+ QLIST_FOREACH_SAFE(iter, &container->list, next, tmp) {
+ QLIST_REMOVE(iter, next);
+ g_free(iter);
+ }
+ g_free(container);
+}
+
+static void compare_containers(TestQListContainer *c1, TestQListContainer *c2)
+{
+ TestQListElement *first_item_c1, *first_item_c2;
+
+ while (!QLIST_EMPTY(&c1->list)) {
+ first_item_c1 = QLIST_FIRST(&c1->list);
+ first_item_c2 = QLIST_FIRST(&c2->list);
+ assert(first_item_c2);
+ assert(first_item_c1->id == first_item_c2->id);
+ QLIST_REMOVE(first_item_c1, next);
+ QLIST_REMOVE(first_item_c2, next);
+ g_free(first_item_c1);
+ g_free(first_item_c2);
+ }
+ assert(QLIST_EMPTY(&c2->list));
+}
+
+/*
+ * Check the prev & next fields are correct by doing list
+ * manipulations on the container. We will do that for both
+ * the source and the destination containers
+ */
+static void manipulate_container(TestQListContainer *c)
+{
+ TestQListElement *prev = NULL, *iter = QLIST_FIRST(&c->list);
+ TestQListElement *elem;
+
+ elem = g_malloc(sizeof(TestQListElement));
+ elem->id = 0x12;
+ QLIST_INSERT_AFTER(iter, elem, next);
+
+ elem = g_malloc(sizeof(TestQListElement));
+ elem->id = 0x13;
+ QLIST_INSERT_HEAD(&c->list, elem, next);
+
+ while (iter) {
+ prev = iter;
+ iter = QLIST_NEXT(iter, next);
+ }
+
+ elem = g_malloc(sizeof(TestQListElement));
+ elem->id = 0x14;
+ QLIST_INSERT_BEFORE(prev, elem, next);
+
+ elem = g_malloc(sizeof(TestQListElement));
+ elem->id = 0x15;
+ QLIST_INSERT_AFTER(prev, elem, next);
+
+ QLIST_REMOVE(prev, next);
+ g_free(prev);
+}
+
+static void test_save_qlist(void)
+{
+ TestQListContainer *container = alloc_container();
+
+ save_vmstate(&vmstate_container, container);
+ compare_vmstate(qlist_dump, sizeof(qlist_dump));
+ free_container(container);
+}
+
+static void test_load_qlist(void)
+{
+ QEMUFile *fsave, *fload;
+ TestQListContainer *orig_container = alloc_container();
+ TestQListContainer *dest_container = g_malloc0(sizeof(TestQListContainer));
+ char eof;
+
+ QLIST_INIT(&dest_container->list);
+
+ fsave = open_test_file(true);
+ qemu_put_buffer(fsave, qlist_dump, sizeof(qlist_dump));
+ g_assert(!qemu_file_get_error(fsave));
+ qemu_fclose(fsave);
+
+ fload = open_test_file(false);
+ vmstate_load_state(fload, &vmstate_container, dest_container, 1);
+ eof = qemu_get_byte(fload);
+ g_assert(!qemu_file_get_error(fload));
+ g_assert_cmpint(eof, ==, QEMU_VM_EOF);
+ manipulate_container(orig_container);
+ manipulate_container(dest_container);
+ compare_containers(orig_container, dest_container);
+ free_container(orig_container);
+ free_container(dest_container);
+}
+
typedef struct TmpTestStruct {
TestStruct *parent;
int64_t diff;
@@ -1353,6 +1521,8 @@ int main(int argc, char **argv)
g_test_add_func("/vmstate/gtree/load/loaddomain", test_gtree_load_domain);
g_test_add_func("/vmstate/gtree/save/saveiommu", test_gtree_save_iommu);
g_test_add_func("/vmstate/gtree/load/loadiommu", test_gtree_load_iommu);
+ g_test_add_func("/vmstate/qlist/save/saveqlist", test_save_qlist);
+ g_test_add_func("/vmstate/qlist/load/loadqlist", test_load_qlist);
g_test_add_func("/vmstate/tmp_struct", test_tmp_struct);
g_test_run();
diff --git a/vl.c b/vl.c
index 751401214c..71d3e7eefb 100644
--- a/vl.c
+++ b/vl.c
@@ -1604,9 +1604,6 @@ static bool main_loop_should_exit(void)
RunState r;
ShutdownCause request;
- if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
- return false;
- }
if (preconfig_exit_requested) {
if (runstate_check(RUN_STATE_PRECONFIG)) {
runstate_set(RUN_STATE_PRELAUNCH);
@@ -1635,8 +1632,13 @@ static bool main_loop_should_exit(void)
pause_all_vcpus();
qemu_system_reset(request);
resume_all_vcpus();
+ /*
+ * runstate can change in pause_all_vcpus()
+ * as iothread mutex is unlocked
+ */
if (!runstate_check(RUN_STATE_RUNNING) &&
- !runstate_check(RUN_STATE_INMIGRATE)) {
+ !runstate_check(RUN_STATE_INMIGRATE) &&
+ !runstate_check(RUN_STATE_FINISH_MIGRATE)) {
runstate_set(RUN_STATE_PRELAUNCH);
}
}