aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2024-06-21 11:19:25 -0700
committerRichard Henderson <richard.henderson@linaro.org>2024-06-21 11:19:25 -0700
commitffeddb979400b1580ad28acbee09b6f971c3912d (patch)
treeb6e6752ff6c864edd312b9f6c15b05886861a1d0
parent02d9c38236cf8c9826e5c5be61780c4444cb4ae0 (diff)
parent04b09de16d78cf2d163ca65d7c6d161bf2baceb6 (diff)
Merge tag 'migration-20240621-pull-request' of https://gitlab.com/farosas/qemu into staging
Migration pull request - Fabiano's fix for fdset + file migration truncating the migration file - Fabiano's fdset + direct-io support for mapped-ram - Peter's various cleanups (multifd sync, thread names, migration states, tests) - Peter's new migration state postcopy-recover-setup - Philippe's unused vmstate macro cleanup # -----BEGIN PGP SIGNATURE----- # # iQJEBAABCAAuFiEEqhtIsKIjJqWkw2TPx5jcdBvsMZ0FAmZ1vIsQHGZhcm9zYXNA # c3VzZS5kZQAKCRDHmNx0G+wxnVZTEACdFIsQ/PJw2C9eeLNor5B5MNSEqUjxX0KN # 6s/uTkJ/dcv+2PI92SzRCZ1dpR5e9AyjTFYbLc9tPRBIROEhlUaoc84iyEy0jCFU # eJ65/RQbH5QHRpOZwbN5RmGwnapfOWHGTn3bpdrmSQTOAy8R2TPGY4SVYR+gamTn # bAv1cAsrOOBUfCi8aqvSlmvuliOW0lzJdF4XHa3mAaigLoF14JdwUZdyIMP1mLDp # /fllbHCKCvJ1vprE9hQmptBR9PzveJZOZamIVt96djJr5+C869+9PMCn3a5vxqNW # b+/LhOZjac37Ecg5kgbq+cO1E4EXKC3zWOmDTw8kHUwp9oYNi1upwLdpHbAAZaQD # /JmHKsExx9QuV8mrVyGBXMI92E6RrT54b1Bjcuo63gAP8p9JRRxGT22U3LghNbTm # 1XcGPR3rswjT1yTgE6qAqAIMR+7X5MrJVWop9ub/lF5DQ1VYIwmlKSNdwDHFDhRq # 0F1k2+EksNpcZ0BH2+3iFml7qKHLVupLQKTWcLdrlnQnTfSG3+yW7eyA5Mte79Qp # nJPcHt8qBqUVQ9Uf/4490TM4Lrp+T+m16exIi0tISLaDXSVkFJnlowipSm+tQ7U3 # Sm68JWdWWEsXZVaMqJeBE8nA/hCoQDpo4hVdwftStI+NayXbRX/EgvPqrNAvwh+c # i4AdHdn6hQ== # =ZX0p # -----END PGP SIGNATURE----- # gpg: Signature made Fri 21 Jun 2024 10:46:51 AM PDT # gpg: using RSA key AA1B48B0A22326A5A4C364CFC798DC741BEC319D # gpg: issuer "farosas@suse.de" # gpg: Good signature from "Fabiano Rosas <farosas@suse.de>" [unknown] # gpg: aka "Fabiano Almeida Rosas <fabiano.rosas@suse.com>" [unknown] # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: AA1B 48B0 A223 26A5 A4C3 64CF C798 DC74 1BEC 319D * tag 'migration-20240621-pull-request' of https://gitlab.com/farosas/qemu: (28 commits) migration: Remove unused VMSTATE_ARRAY_TEST() macro tests/migration-tests: Cover postcopy failure on reconnect tests/migration-tests: Verify postcopy-recover-setup status tests/migration-tests: migration_event_wait() tests/migration-tests: Always enable migration events tests/migration-tests: Drop most WIN32 ifdefs for postcopy failure tests migration/docs: Update postcopy recover session for SETUP phase migration/postcopy: Add postcopy-recover-setup phase migration: Cleanup incoming migration setup state change migration: Use MigrationStatus instead of int migration: Rename thread debug names migration/multifd: Avoid the final FLUSH in complete() tests/qtest/migration: Add a test for mapped-ram with passing of fds migration: Add documentation for fdset with multifd + file monitor: fdset: Match against O_DIRECT tests/qtest/migration: Add tests for file migration with direct-io migration/multifd: Add direct-io support migration: Add direct-io parameter io: Stop using qemu_open_old in channel-file monitor: Report errors from monitor_fdset_dup_fd_add ... Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r--docs/devel/migration/main.rst24
-rw-r--r--docs/devel/migration/mapped-ram.rst6
-rw-r--r--docs/devel/migration/postcopy.rst31
-rw-r--r--include/migration/vmstate.h10
-rw-r--r--include/monitor/monitor.h3
-rw-r--r--include/qemu/osdep.h2
-rw-r--r--io/channel-file.c8
-rw-r--r--migration/colo.c2
-rw-r--r--migration/file.c45
-rw-r--r--migration/file.h1
-rw-r--r--migration/migration-hmp-cmds.c11
-rw-r--r--migration/migration.c121
-rw-r--r--migration/migration.h9
-rw-r--r--migration/multifd.c6
-rw-r--r--migration/options.c35
-rw-r--r--migration/options.h1
-rw-r--r--migration/postcopy-ram.c10
-rw-r--r--migration/postcopy-ram.h3
-rw-r--r--migration/ram.c4
-rw-r--r--migration/savevm.c6
-rw-r--r--monitor/fds.c96
-rw-r--r--monitor/hmp.c2
-rw-r--r--monitor/monitor-internal.h1
-rw-r--r--monitor/monitor.c1
-rw-r--r--monitor/qmp.c2
-rw-r--r--qapi/migration.json25
-rw-r--r--stubs/fdset.c7
-rw-r--r--tests/qtest/libqtest.c15
-rw-r--r--tests/qtest/libqtest.h2
-rw-r--r--tests/qtest/migration-helpers.c76
-rw-r--r--tests/qtest/migration-helpers.h10
-rw-r--r--tests/qtest/migration-test.c468
-rw-r--r--util/osdep.c34
33 files changed, 838 insertions, 239 deletions
diff --git a/docs/devel/migration/main.rst b/docs/devel/migration/main.rst
index 495cdcb112..784c899dca 100644
--- a/docs/devel/migration/main.rst
+++ b/docs/devel/migration/main.rst
@@ -47,11 +47,25 @@ over any transport.
QEMU interference. Note that QEMU does not flush cached file
data/metadata at the end of migration.
-In addition, support is included for migration using RDMA, which
-transports the page data using ``RDMA``, where the hardware takes care of
-transporting the pages, and the load on the CPU is much lower. While the
-internals of RDMA migration are a bit different, this isn't really visible
-outside the RAM migration code.
+ The file migration also supports using a file that has already been
+ opened. A set of file descriptors is passed to QEMU via an "fdset"
+ (see add-fd QMP command documentation). This method allows a
+ management application to have control over the migration file
+ opening operation. There are, however, strict requirements to this
+ interface if the multifd capability is enabled:
+
+ - the fdset must contain two file descriptors that are not
+ duplicates between themselves;
+ - if the direct-io capability is to be used, exactly one of the
+ file descriptors must have the O_DIRECT flag set;
+ - the file must be opened with WRONLY on the migration source side
+ and RDONLY on the migration destination side.
+
+- rdma migration: support is included for migration using RDMA, which
+ transports the page data using ``RDMA``, where the hardware takes
+ care of transporting the pages, and the load on the CPU is much
+ lower. While the internals of RDMA migration are a bit different,
+ this isn't really visible outside the RAM migration code.
All these migration protocols use the same infrastructure to
save/restore state devices. This infrastructure is shared with the
diff --git a/docs/devel/migration/mapped-ram.rst b/docs/devel/migration/mapped-ram.rst
index fa4cefd9fc..d352b546e9 100644
--- a/docs/devel/migration/mapped-ram.rst
+++ b/docs/devel/migration/mapped-ram.rst
@@ -16,7 +16,7 @@ location in the file, rather than constantly being added to a
sequential stream. Having the pages at fixed offsets also allows the
usage of O_DIRECT for save/restore of the migration stream as the
pages are ensured to be written respecting O_DIRECT alignment
-restrictions (direct-io support not yet implemented).
+restrictions.
Usage
-----
@@ -35,6 +35,10 @@ Use a ``file:`` URL for migration:
Mapped-ram migration is best done non-live, i.e. by stopping the VM on
the source side before migrating.
+For best performance enable the ``direct-io`` parameter as well:
+
+ ``migrate_set_parameter direct-io on``
+
Use-cases
---------
diff --git a/docs/devel/migration/postcopy.rst b/docs/devel/migration/postcopy.rst
index 6c51e96d79..82e7a848c6 100644
--- a/docs/devel/migration/postcopy.rst
+++ b/docs/devel/migration/postcopy.rst
@@ -99,17 +99,6 @@ ADVISE->DISCARD->LISTEN->RUNNING->END
(although it can't do the cleanup it would do as it
finishes a normal migration).
- - Paused
-
- Postcopy can run into a paused state (normally on both sides when
- happens), where all threads will be temporarily halted mostly due to
- network errors. When reaching paused state, migration will make sure
- the qemu binary on both sides maintain the data without corrupting
- the VM. To continue the migration, the admin needs to fix the
- migration channel using the QMP command 'migrate-recover' on the
- destination node, then resume the migration using QMP command 'migrate'
- again on source node, with resume=true flag set.
-
- End
The listen thread can now quit, and perform the cleanup of migration
@@ -221,7 +210,8 @@ paused postcopy migration.
The recovery phase normally contains a few steps:
- - When network issue occurs, both QEMU will go into PAUSED state
+ - When network issue occurs, both QEMU will go into **POSTCOPY_PAUSED**
+ migration state.
- When the network is recovered (or a new network is provided), the admin
can setup the new channel for migration using QMP command
@@ -229,9 +219,20 @@ The recovery phase normally contains a few steps:
- On source host, the admin can continue the interrupted postcopy
migration using QMP command 'migrate' with resume=true flag set.
-
- - After the connection is re-established, QEMU will continue the postcopy
- migration on both sides.
+ Source QEMU will go into **POSTCOPY_RECOVER_SETUP** state trying to
+ re-establish the channels.
+
+ - When both sides of QEMU successfully reconnect using a new or fixed up
+ channel, they will go into **POSTCOPY_RECOVER** state, some handshake
+ procedure will be needed to properly synchronize the VM states between
+ the two QEMUs to continue the postcopy migration. For example, there
+ can be pages sent right during the window when the network is
+ interrupted, then the handshake will guarantee pages lost in-flight
+ will be resent again.
+
+ - After a proper handshake synchronization, QEMU will continue the
+ postcopy migration on both sides and go back to **POSTCOPY_ACTIVE**
+ state. Postcopy migration will continue.
During a paused postcopy migration, the VM can logically still continue
running, and it will not be impacted from any page access to pages that
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 294d2d8486..f313f2f408 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -388,16 +388,6 @@ extern const VMStateInfo vmstate_info_qlist;
.offset = vmstate_offset_varray(_state, _field, _type), \
}
-#define VMSTATE_ARRAY_TEST(_field, _state, _num, _test, _info, _type) {\
- .name = (stringify(_field)), \
- .field_exists = (_test), \
- .num = (_num), \
- .info = &(_info), \
- .size = sizeof(_type), \
- .flags = VMS_ARRAY, \
- .offset = vmstate_offset_array(_state, _field, _type, _num),\
-}
-
#define VMSTATE_SUB_ARRAY(_field, _state, _start, _num, _version, _info, _type) { \
.name = (stringify(_field)), \
.version_id = (_version), \
diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
index 965f5d5450..c3740ec616 100644
--- a/include/monitor/monitor.h
+++ b/include/monitor/monitor.h
@@ -51,9 +51,8 @@ int monitor_read_password(MonitorHMP *mon, ReadLineFunc *readline_func,
AddfdInfo *monitor_fdset_add_fd(int fd, bool has_fdset_id, int64_t fdset_id,
const char *opaque, Error **errp);
-int monitor_fdset_dup_fd_add(int64_t fdset_id, int flags);
+int monitor_fdset_dup_fd_add(int64_t fdset_id, int flags, Error **errp);
void monitor_fdset_dup_fd_remove(int dup_fd);
-int64_t monitor_fdset_dup_fd_find(int dup_fd);
void monitor_register_hmp(const char *name, bool info,
void (*cmd)(Monitor *mon, const QDict *qdict));
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index f61edcfdc2..191916f38e 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -612,6 +612,8 @@ int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive);
bool qemu_has_ofd_lock(void);
#endif
+bool qemu_has_direct_io(void);
+
#if defined(__HAIKU__) && defined(__i386__)
#define FMT_pid "%ld"
#elif defined(WIN64)
diff --git a/io/channel-file.c b/io/channel-file.c
index 6436cfb6ae..2ea8d08360 100644
--- a/io/channel-file.c
+++ b/io/channel-file.c
@@ -68,11 +68,13 @@ qio_channel_file_new_path(const char *path,
ioc = QIO_CHANNEL_FILE(object_new(TYPE_QIO_CHANNEL_FILE));
- ioc->fd = qemu_open_old(path, flags, mode);
+ if (flags & O_CREAT) {
+ ioc->fd = qemu_create(path, flags & ~O_CREAT, mode, errp);
+ } else {
+ ioc->fd = qemu_open(path, flags, errp);
+ }
if (ioc->fd < 0) {
object_unref(OBJECT(ioc));
- error_setg_errno(errp, errno,
- "Unable to open %s", path);
return NULL;
}
diff --git a/migration/colo.c b/migration/colo.c
index f96c2ee069..6449490221 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -935,7 +935,7 @@ void coroutine_fn colo_incoming_co(void)
assert(bql_locked());
assert(migration_incoming_colo_enabled());
- qemu_thread_create(&th, "COLO incoming", colo_process_incoming_thread,
+ qemu_thread_create(&th, "mig/dst/colo", colo_process_incoming_thread,
mis, QEMU_THREAD_JOINABLE);
mis->colo_incoming_co = qemu_coroutine_self();
diff --git a/migration/file.c b/migration/file.c
index ab18ba505a..db870f2cf0 100644
--- a/migration/file.c
+++ b/migration/file.c
@@ -50,12 +50,31 @@ void file_cleanup_outgoing_migration(void)
outgoing_args.fname = NULL;
}
+static void file_enable_direct_io(int *flags)
+{
+#ifdef O_DIRECT
+ *flags |= O_DIRECT;
+#else
+ /* it should have been rejected when setting the parameter */
+ g_assert_not_reached();
+#endif
+}
+
bool file_send_channel_create(gpointer opaque, Error **errp)
{
QIOChannelFile *ioc;
int flags = O_WRONLY;
bool ret = true;
+ if (migrate_direct_io()) {
+ /*
+ * Enable O_DIRECT for the secondary channels. These are used
+ * for sending ram pages and writes should be guaranteed to be
+ * aligned to at least page size.
+ */
+ file_enable_direct_io(&flags);
+ }
+
ioc = qio_channel_file_new_path(outgoing_args.fname, flags, 0, errp);
if (!ioc) {
ret = false;
@@ -84,16 +103,24 @@ void file_start_outgoing_migration(MigrationState *s,
trace_migration_file_outgoing(filename);
- fioc = qio_channel_file_new_path(filename, O_CREAT | O_WRONLY | O_TRUNC,
- 0600, errp);
+ fioc = qio_channel_file_new_path(filename, O_CREAT | O_WRONLY, 0600, errp);
if (!fioc) {
return;
}
+ if (ftruncate(fioc->fd, offset)) {
+ error_setg_errno(errp, errno,
+ "failed to truncate migration file to offset %" PRIx64,
+ offset);
+ object_unref(OBJECT(fioc));
+ return;
+ }
+
outgoing_args.fname = g_strdup(filename);
ioc = QIO_CHANNEL(fioc);
if (offset && qio_channel_io_seek(ioc, offset, SEEK_SET, errp) < 0) {
+ object_unref(OBJECT(fioc));
return;
}
qio_channel_set_name(ioc, "migration-file-outgoing");
@@ -109,21 +136,25 @@ static gboolean file_accept_incoming_migration(QIOChannel *ioc,
return G_SOURCE_REMOVE;
}
-void file_create_incoming_channels(QIOChannel *ioc, Error **errp)
+static void file_create_incoming_channels(QIOChannel *ioc, char *filename,
+ Error **errp)
{
- int i, fd, channels = 1;
+ int i, channels = 1;
g_autofree QIOChannel **iocs = NULL;
+ int flags = O_RDONLY;
if (migrate_multifd()) {
channels += migrate_multifd_channels();
+ if (migrate_direct_io()) {
+ file_enable_direct_io(&flags);
+ }
}
iocs = g_new0(QIOChannel *, channels);
- fd = QIO_CHANNEL_FILE(ioc)->fd;
iocs[0] = ioc;
for (i = 1; i < channels; i++) {
- QIOChannelFile *fioc = qio_channel_file_new_dupfd(fd, errp);
+ QIOChannelFile *fioc = qio_channel_file_new_path(filename, flags, 0, errp);
if (!fioc) {
while (i) {
@@ -163,7 +194,7 @@ void file_start_incoming_migration(FileMigrationArgs *file_args, Error **errp)
return;
}
- file_create_incoming_channels(QIO_CHANNEL(fioc), errp);
+ file_create_incoming_channels(QIO_CHANNEL(fioc), filename, errp);
}
int file_write_ramblock_iov(QIOChannel *ioc, const struct iovec *iov,
diff --git a/migration/file.h b/migration/file.h
index 7699c04677..9f71e87f74 100644
--- a/migration/file.h
+++ b/migration/file.h
@@ -20,7 +20,6 @@ void file_start_outgoing_migration(MigrationState *s,
int file_parse_offset(char *filespec, uint64_t *offsetp, Error **errp);
void file_cleanup_outgoing_migration(void);
bool file_send_channel_create(gpointer opaque, Error **errp);
-void file_create_incoming_channels(QIOChannel *ioc, Error **errp);
int file_write_ramblock_iov(QIOChannel *ioc, const struct iovec *iov,
int niov, RAMBlock *block, Error **errp);
int multifd_file_recv_data(MultiFDRecvParams *p, Error **errp);
diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
index 9f0e8029e0..7d608d26e1 100644
--- a/migration/migration-hmp-cmds.c
+++ b/migration/migration-hmp-cmds.c
@@ -351,6 +351,13 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict)
monitor_printf(mon, "%s: %s\n",
MigrationParameter_str(MIGRATION_PARAMETER_MODE),
qapi_enum_lookup(&MigMode_lookup, params->mode));
+
+ if (params->has_direct_io) {
+ monitor_printf(mon, "%s: %s\n",
+ MigrationParameter_str(
+ MIGRATION_PARAMETER_DIRECT_IO),
+ params->direct_io ? "on" : "off");
+ }
}
qapi_free_MigrationParameters(params);
@@ -624,6 +631,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
p->has_mode = true;
visit_type_MigMode(v, param, &p->mode, &err);
break;
+ case MIGRATION_PARAMETER_DIRECT_IO:
+ p->has_direct_io = true;
+ visit_type_bool(v, param, &p->direct_io, &err);
+ break;
default:
assert(0);
}
diff --git a/migration/migration.c b/migration/migration.c
index e1b269624c..3dea06d577 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -155,6 +155,16 @@ static bool migration_needs_seekable_channel(void)
return migrate_mapped_ram();
}
+static bool migration_needs_extra_fds(void)
+{
+ /*
+ * When doing direct-io, multifd requires two different,
+ * non-duplicated file descriptors so we can use one of them for
+ * unaligned IO.
+ */
+ return migrate_multifd() && migrate_direct_io();
+}
+
static bool transport_supports_seeking(MigrationAddress *addr)
{
if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) {
@@ -164,6 +174,12 @@ static bool transport_supports_seeking(MigrationAddress *addr)
return false;
}
+static bool transport_supports_extra_fds(MigrationAddress *addr)
+{
+ /* file: works because QEMU can open it multiple times */
+ return addr->transport == MIGRATION_ADDRESS_TYPE_FILE;
+}
+
static bool
migration_channels_and_transport_compatible(MigrationAddress *addr,
Error **errp)
@@ -180,6 +196,13 @@ migration_channels_and_transport_compatible(MigrationAddress *addr,
return false;
}
+ if (migration_needs_extra_fds() &&
+ !transport_supports_extra_fds(addr)) {
+ error_setg(errp,
+ "Migration requires a transport that allows for extra fds (e.g. file)");
+ return false;
+ }
+
return true;
}
@@ -390,7 +413,7 @@ void migration_incoming_state_destroy(void)
yank_unregister_instance(MIGRATION_YANK_INSTANCE);
}
-static void migrate_generate_event(int new_state)
+static void migrate_generate_event(MigrationStatus new_state)
{
if (migrate_events()) {
qapi_event_send_migration(new_state);
@@ -595,6 +618,29 @@ bool migrate_uri_parse(const char *uri, MigrationChannel **channel,
return true;
}
+static bool
+migration_incoming_state_setup(MigrationIncomingState *mis, Error **errp)
+{
+ MigrationStatus current = mis->state;
+
+ if (current == MIGRATION_STATUS_POSTCOPY_PAUSED) {
+ /*
+ * Incoming postcopy migration will stay in PAUSED state even if
+ * reconnection happened.
+ */
+ return true;
+ }
+
+ if (current != MIGRATION_STATUS_NONE) {
+ error_setg(errp, "Illegal migration incoming state: %s",
+ MigrationStatus_str(current));
+ return false;
+ }
+
+ migrate_set_state(&mis->state, current, MIGRATION_STATUS_SETUP);
+ return true;
+}
+
static void qemu_start_incoming_migration(const char *uri, bool has_channels,
MigrationChannelList *channels,
Error **errp)
@@ -633,8 +679,9 @@ static void qemu_start_incoming_migration(const char *uri, bool has_channels,
return;
}
- migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
- MIGRATION_STATUS_SETUP);
+ if (!migration_incoming_state_setup(mis, errp)) {
+ return;
+ }
if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
SocketAddress *saddr = &addr->u.socket;
@@ -1070,6 +1117,7 @@ bool migration_is_setup_or_active(void)
case MIGRATION_STATUS_ACTIVE:
case MIGRATION_STATUS_POSTCOPY_ACTIVE:
case MIGRATION_STATUS_POSTCOPY_PAUSED:
+ case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
case MIGRATION_STATUS_POSTCOPY_RECOVER:
case MIGRATION_STATUS_SETUP:
case MIGRATION_STATUS_PRE_SWITCHOVER:
@@ -1092,6 +1140,7 @@ bool migration_is_running(void)
case MIGRATION_STATUS_ACTIVE:
case MIGRATION_STATUS_POSTCOPY_ACTIVE:
case MIGRATION_STATUS_POSTCOPY_PAUSED:
+ case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
case MIGRATION_STATUS_POSTCOPY_RECOVER:
case MIGRATION_STATUS_SETUP:
case MIGRATION_STATUS_PRE_SWITCHOVER:
@@ -1229,6 +1278,7 @@ static void fill_source_migration_info(MigrationInfo *info)
case MIGRATION_STATUS_PRE_SWITCHOVER:
case MIGRATION_STATUS_DEVICE:
case MIGRATION_STATUS_POSTCOPY_PAUSED:
+ case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
case MIGRATION_STATUS_POSTCOPY_RECOVER:
/* TODO add some postcopy stats */
populate_time_info(info, s);
@@ -1273,8 +1323,6 @@ static void fill_destination_migration_info(MigrationInfo *info)
}
switch (mis->state) {
- case MIGRATION_STATUS_NONE:
- return;
case MIGRATION_STATUS_SETUP:
case MIGRATION_STATUS_CANCELLING:
case MIGRATION_STATUS_CANCELLED:
@@ -1290,6 +1338,8 @@ static void fill_destination_migration_info(MigrationInfo *info)
info->has_status = true;
fill_destination_postcopy_migration_info(info);
break;
+ default:
+ return;
}
info->status = mis->state;
@@ -1337,7 +1387,8 @@ void qmp_migrate_start_postcopy(Error **errp)
/* shared migration helpers */
-void migrate_set_state(int *state, int old_state, int new_state)
+void migrate_set_state(MigrationStatus *state, MigrationStatus old_state,
+ MigrationStatus new_state)
{
assert(new_state < MIGRATION_STATUS__MAX);
if (qatomic_cmpxchg(state, old_state, new_state) == old_state) {
@@ -1434,9 +1485,30 @@ static void migrate_error_free(MigrationState *s)
static void migrate_fd_error(MigrationState *s, const Error *error)
{
+ MigrationStatus current = s->state;
+ MigrationStatus next;
+
assert(s->to_dst_file == NULL);
- migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
- MIGRATION_STATUS_FAILED);
+
+ switch (current) {
+ case MIGRATION_STATUS_SETUP:
+ next = MIGRATION_STATUS_FAILED;
+ break;
+ case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
+ /* Never fail a postcopy migration; switch back to PAUSED instead */
+ next = MIGRATION_STATUS_POSTCOPY_PAUSED;
+ break;
+ default:
+ /*
+ * This really shouldn't happen. Just be careful to not crash a VM
+ * just for this. Instead, dump something.
+ */
+ error_report("%s: Illegal migration status (%s) detected",
+ __func__, MigrationStatus_str(current));
+ return;
+ }
+
+ migrate_set_state(&s->state, current, next);
migrate_set_error(s, error);
}
@@ -1537,6 +1609,7 @@ bool migration_in_postcopy(void)
switch (s->state) {
case MIGRATION_STATUS_POSTCOPY_ACTIVE:
case MIGRATION_STATUS_POSTCOPY_PAUSED:
+ case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
case MIGRATION_STATUS_POSTCOPY_RECOVER:
return true;
default:
@@ -1544,7 +1617,7 @@ bool migration_in_postcopy(void)
}
}
-bool migration_postcopy_is_alive(int state)
+bool migration_postcopy_is_alive(MigrationStatus state)
{
switch (state) {
case MIGRATION_STATUS_POSTCOPY_ACTIVE:
@@ -1589,20 +1662,9 @@ bool migration_is_idle(void)
case MIGRATION_STATUS_COMPLETED:
case MIGRATION_STATUS_FAILED:
return true;
- case MIGRATION_STATUS_SETUP:
- case MIGRATION_STATUS_CANCELLING:
- case MIGRATION_STATUS_ACTIVE:
- case MIGRATION_STATUS_POSTCOPY_ACTIVE:
- case MIGRATION_STATUS_COLO:
- case MIGRATION_STATUS_PRE_SWITCHOVER:
- case MIGRATION_STATUS_DEVICE:
- case MIGRATION_STATUS_WAIT_UNPLUG:
+ default:
return false;
- case MIGRATION_STATUS__MAX:
- g_assert_not_reached();
}
-
- return false;
}
bool migration_is_active(void)
@@ -1935,6 +1997,9 @@ static bool migrate_prepare(MigrationState *s, bool resume, Error **errp)
return false;
}
+ migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
+ MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP);
+
/* This is a resume, skip init status */
return true;
}
@@ -2408,7 +2473,7 @@ static int open_return_path_on_source(MigrationState *ms)
trace_open_return_path_on_source();
- qemu_thread_create(&ms->rp_state.rp_thread, "return path",
+ qemu_thread_create(&ms->rp_state.rp_thread, "mig/src/rp-thr",
source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
ms->rp_state.rp_thread_created = true;
@@ -2967,9 +3032,9 @@ static MigThrError postcopy_pause(MigrationState *s)
* We wait until things fixed up. Then someone will setup the
* status back for us.
*/
- while (s->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
+ do {
qemu_sem_wait(&s->postcopy_pause_sem);
- }
+ } while (postcopy_is_paused(s->state));
if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
/* Woken up by a recover procedure. Give it a shot */
@@ -3665,7 +3730,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
{
Error *local_err = NULL;
uint64_t rate_limit;
- bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED;
+ bool resume = (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP);
int ret;
/*
@@ -3732,7 +3797,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
if (resume) {
/* Wakeup the main migration thread to do the recovery */
- migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
+ migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP,
MIGRATION_STATUS_POSTCOPY_RECOVER);
qemu_sem_post(&s->postcopy_pause_sem);
return;
@@ -3747,10 +3812,10 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
}
if (migrate_background_snapshot()) {
- qemu_thread_create(&s->thread, "bg_snapshot",
+ qemu_thread_create(&s->thread, "mig/snapshot",
bg_migration_thread, s, QEMU_THREAD_JOINABLE);
} else {
- qemu_thread_create(&s->thread, "live_migration",
+ qemu_thread_create(&s->thread, "mig/src/main",
migration_thread, s, QEMU_THREAD_JOINABLE);
}
s->migration_thread_running = true;
diff --git a/migration/migration.h b/migration/migration.h
index 6af01362d4..38aa1402d5 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -160,7 +160,7 @@ struct MigrationIncomingState {
/* PostCopyFD's for external userfaultfds & handlers of shared memory */
GArray *postcopy_remote_fds;
- int state;
+ MigrationStatus state;
/*
* The incoming migration coroutine, non-NULL during qemu_loadvm_state().
@@ -301,7 +301,7 @@ struct MigrationState {
/* params from 'migrate-set-parameters' */
MigrationParameters parameters;
- int state;
+ MigrationStatus state;
/* State related to return path */
struct {
@@ -459,7 +459,8 @@ struct MigrationState {
bool rdma_migration;
};
-void migrate_set_state(int *state, int old_state, int new_state);
+void migrate_set_state(MigrationStatus *state, MigrationStatus old_state,
+ MigrationStatus new_state);
void migration_fd_process_incoming(QEMUFile *f);
void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp);
@@ -479,7 +480,7 @@ int migrate_init(MigrationState *s, Error **errp);
bool migration_is_blocked(Error **errp);
/* True if outgoing migration has entered postcopy phase */
bool migration_in_postcopy(void);
-bool migration_postcopy_is_alive(int state);
+bool migration_postcopy_is_alive(MigrationStatus state);
MigrationState *migrate_get_current(void);
bool migration_has_failed(MigrationState *);
bool migrate_mode_is_cpr(MigrationState *);
diff --git a/migration/multifd.c b/migration/multifd.c
index d82885fdbb..0b4cbaddfe 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -1069,7 +1069,7 @@ static bool multifd_tls_channel_connect(MultiFDSendParams *p,
args->p = p;
p->tls_thread_created = true;
- qemu_thread_create(&p->tls_thread, "multifd-tls-handshake-worker",
+ qemu_thread_create(&p->tls_thread, "mig/src/tls",
multifd_tls_handshake_thread, args,
QEMU_THREAD_JOINABLE);
return true;
@@ -1190,7 +1190,7 @@ bool multifd_send_setup(void)
p->packet->magic = cpu_to_be32(MULTIFD_MAGIC);
p->packet->version = cpu_to_be32(MULTIFD_VERSION);
}
- p->name = g_strdup_printf("multifdsend_%d", i);
+ p->name = g_strdup_printf("mig/src/send_%d", i);
p->page_size = qemu_target_page_size();
p->page_count = page_count;
p->write_flags = 0;
@@ -1604,7 +1604,7 @@ int multifd_recv_setup(Error **errp)
+ sizeof(uint64_t) * page_count;
p->packet = g_malloc0(p->packet_len);
}
- p->name = g_strdup_printf("multifdrecv_%d", i);
+ p->name = g_strdup_printf("mig/dst/recv_%d", i);
p->normal = g_new0(ram_addr_t, page_count);
p->zero = g_new0(ram_addr_t, page_count);
p->page_count = page_count;
diff --git a/migration/options.c b/migration/options.c
index 5ab5b6d85d..645f55003d 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -702,6 +702,25 @@ bool migrate_cpu_throttle_tailslow(void)
return s->parameters.cpu_throttle_tailslow;
}
+bool migrate_direct_io(void)
+{
+ MigrationState *s = migrate_get_current();
+
+ /*
+ * O_DIRECT is only supported with mapped-ram and multifd.
+ *
+ * mapped-ram is needed because filesystems impose restrictions on
+ * O_DIRECT IO alignment (see MAPPED_RAM_FILE_OFFSET_ALIGNMENT).
+ *
+ * multifd is needed to keep the unaligned portion of the stream
+ * isolated to the main migration thread while multifd channels
+ * process the aligned data with O_DIRECT enabled.
+ */
+ return s->parameters.direct_io &&
+ s->capabilities[MIGRATION_CAPABILITY_MAPPED_RAM] &&
+ s->capabilities[MIGRATION_CAPABILITY_MULTIFD];
+}
+
uint64_t migrate_downtime_limit(void)
{
MigrationState *s = migrate_get_current();
@@ -905,6 +924,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp)
params->mode = s->parameters.mode;
params->has_zero_page_detection = true;
params->zero_page_detection = s->parameters.zero_page_detection;
+ params->has_direct_io = true;
+ params->direct_io = s->parameters.direct_io;
return params;
}
@@ -937,6 +958,7 @@ void migrate_params_init(MigrationParameters *params)
params->has_vcpu_dirty_limit = true;
params->has_mode = true;
params->has_zero_page_detection = true;
+ params->has_direct_io = true;
}
/*
@@ -1110,6 +1132,11 @@ bool migrate_params_check(MigrationParameters *params, Error **errp)
return false;
}
+ if (params->has_direct_io && params->direct_io && !qemu_has_direct_io()) {
+ error_setg(errp, "No build-time support for direct-io");
+ return false;
+ }
+
return true;
}
@@ -1216,6 +1243,10 @@ static void migrate_params_test_apply(MigrateSetParameters *params,
if (params->has_zero_page_detection) {
dest->zero_page_detection = params->zero_page_detection;
}
+
+ if (params->has_direct_io) {
+ dest->direct_io = params->direct_io;
+ }
}
static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
@@ -1341,6 +1372,10 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
if (params->has_zero_page_detection) {
s->parameters.zero_page_detection = params->zero_page_detection;
}
+
+ if (params->has_direct_io) {
+ s->parameters.direct_io = params->direct_io;
+ }
}
void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
diff --git a/migration/options.h b/migration/options.h
index 4b21cc2669..a2397026db 100644
--- a/migration/options.h
+++ b/migration/options.h
@@ -69,6 +69,7 @@ uint32_t migrate_checkpoint_delay(void);
uint8_t migrate_cpu_throttle_increment(void);
uint8_t migrate_cpu_throttle_initial(void);
bool migrate_cpu_throttle_tailslow(void);
+bool migrate_direct_io(void);
uint64_t migrate_downtime_limit(void);
uint8_t migrate_max_cpu_throttle(void);
uint64_t migrate_max_bandwidth(void);
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 3419779548..1c374b7ea1 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -1238,7 +1238,7 @@ int postcopy_ram_incoming_setup(MigrationIncomingState *mis)
return -1;
}
- postcopy_thread_create(mis, &mis->fault_thread, "fault-default",
+ postcopy_thread_create(mis, &mis->fault_thread, "mig/dst/fault",
postcopy_ram_fault_thread, QEMU_THREAD_JOINABLE);
mis->have_fault_thread = true;
@@ -1258,7 +1258,7 @@ int postcopy_ram_incoming_setup(MigrationIncomingState *mis)
* This thread needs to be created after the temp pages because
* it'll fetch RAM_CHANNEL_POSTCOPY PostcopyTmpPage immediately.
*/
- postcopy_thread_create(mis, &mis->postcopy_prio_thread, "fault-fast",
+ postcopy_thread_create(mis, &mis->postcopy_prio_thread, "mig/dst/preempt",
postcopy_preempt_thread, QEMU_THREAD_JOINABLE);
mis->preempt_thread_status = PREEMPT_THREAD_CREATED;
}
@@ -1770,3 +1770,9 @@ void *postcopy_preempt_thread(void *opaque)
return NULL;
}
+
+bool postcopy_is_paused(MigrationStatus status)
+{
+ return status == MIGRATION_STATUS_POSTCOPY_PAUSED ||
+ status == MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP;
+}
diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h
index ecae941211..a6df1b2811 100644
--- a/migration/postcopy-ram.h
+++ b/migration/postcopy-ram.h
@@ -13,6 +13,8 @@
#ifndef QEMU_POSTCOPY_RAM_H
#define QEMU_POSTCOPY_RAM_H
+#include "qapi/qapi-types-migration.h"
+
/* Return true if the host supports everything we need to do postcopy-ram */
bool postcopy_ram_supported_by_host(MigrationIncomingState *mis,
Error **errp);
@@ -193,5 +195,6 @@ enum PostcopyChannels {
void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file);
void postcopy_preempt_setup(MigrationState *s);
int postcopy_preempt_establish_channel(MigrationState *s);
+bool postcopy_is_paused(MigrationStatus status);
#endif
diff --git a/migration/ram.c b/migration/ram.c
index ceea586b06..edec1a2d07 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -3300,10 +3300,6 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
}
}
- if (migrate_multifd() && !migrate_multifd_flush_after_each_section() &&
- !migrate_mapped_ram()) {
- qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH);
- }
qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
return qemu_fflush(f);
}
diff --git a/migration/savevm.c b/migration/savevm.c
index c621f2359b..deb57833f8 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -2129,7 +2129,7 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
}
mis->have_listen_thread = true;
- postcopy_thread_create(mis, &mis->listen_thread, "postcopy/listen",
+ postcopy_thread_create(mis, &mis->listen_thread, "mig/dst/listen",
postcopy_ram_listen_thread, QEMU_THREAD_DETACHED);
trace_loadvm_postcopy_handle_listen("return");
@@ -2864,9 +2864,9 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis)
error_report("Detected IO failure for postcopy. "
"Migration paused.");
- while (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
+ do {
qemu_sem_wait(&mis->postcopy_pause_sem_dst);
- }
+ } while (postcopy_is_paused(mis->state));
trace_postcopy_pause_incoming_continued();
diff --git a/monitor/fds.c b/monitor/fds.c
index d86c2c674c..b5416b5b5d 100644
--- a/monitor/fds.c
+++ b/monitor/fds.c
@@ -43,7 +43,6 @@ struct mon_fd_t {
typedef struct MonFdsetFd MonFdsetFd;
struct MonFdsetFd {
int fd;
- bool removed;
char *opaque;
QLIST_ENTRY(MonFdsetFd) next;
};
@@ -167,28 +166,32 @@ int monitor_get_fd(Monitor *mon, const char *fdname, Error **errp)
return -1;
}
-static void monitor_fdset_cleanup(MonFdset *mon_fdset)
+static void monitor_fdset_free(MonFdset *mon_fdset)
{
- MonFdsetFd *mon_fdset_fd;
- MonFdsetFd *mon_fdset_fd_next;
-
- QLIST_FOREACH_SAFE(mon_fdset_fd, &mon_fdset->fds, next, mon_fdset_fd_next) {
- if ((mon_fdset_fd->removed ||
- (QLIST_EMPTY(&mon_fdset->dup_fds) && mon_refcount == 0)) &&
- runstate_is_running()) {
- close(mon_fdset_fd->fd);
- g_free(mon_fdset_fd->opaque);
- QLIST_REMOVE(mon_fdset_fd, next);
- g_free(mon_fdset_fd);
- }
- }
+ QLIST_REMOVE(mon_fdset, next);
+ g_free(mon_fdset);
+}
+static void monitor_fdset_free_if_empty(MonFdset *mon_fdset)
+{
+ /*
+ * Only remove an empty fdset. The fds are owned by the user and
+ * should have been removed with qmp_remove_fd(). The dup_fds are
+ * owned by QEMU and should have been removed with qemu_close().
+ */
if (QLIST_EMPTY(&mon_fdset->fds) && QLIST_EMPTY(&mon_fdset->dup_fds)) {
- QLIST_REMOVE(mon_fdset, next);
- g_free(mon_fdset);
+ monitor_fdset_free(mon_fdset);
}
}
+static void monitor_fdset_fd_free(MonFdsetFd *mon_fdset_fd)
+{
+ close(mon_fdset_fd->fd);
+ g_free(mon_fdset_fd->opaque);
+ QLIST_REMOVE(mon_fdset_fd, next);
+ g_free(mon_fdset_fd);
+}
+
void monitor_fdsets_cleanup(void)
{
MonFdset *mon_fdset;
@@ -196,7 +199,7 @@ void monitor_fdsets_cleanup(void)
QEMU_LOCK_GUARD(&mon_fdsets_lock);
QLIST_FOREACH_SAFE(mon_fdset, &mon_fdsets, next, mon_fdset_next) {
- monitor_fdset_cleanup(mon_fdset);
+ monitor_fdset_free_if_empty(mon_fdset);
}
}
@@ -263,7 +266,7 @@ void qmp_get_win32_socket(const char *infos, const char *fdname, Error **errp)
void qmp_remove_fd(int64_t fdset_id, bool has_fd, int64_t fd, Error **errp)
{
MonFdset *mon_fdset;
- MonFdsetFd *mon_fdset_fd;
+ MonFdsetFd *mon_fdset_fd, *mon_fdset_fd_next;
char fd_str[60];
QEMU_LOCK_GUARD(&mon_fdsets_lock);
@@ -271,21 +274,22 @@ void qmp_remove_fd(int64_t fdset_id, bool has_fd, int64_t fd, Error **errp)
if (mon_fdset->id != fdset_id) {
continue;
}
- QLIST_FOREACH(mon_fdset_fd, &mon_fdset->fds, next) {
+ QLIST_FOREACH_SAFE(mon_fdset_fd, &mon_fdset->fds, next,
+ mon_fdset_fd_next) {
if (has_fd) {
if (mon_fdset_fd->fd != fd) {
continue;
}
- mon_fdset_fd->removed = true;
+ monitor_fdset_fd_free(mon_fdset_fd);
break;
} else {
- mon_fdset_fd->removed = true;
+ monitor_fdset_fd_free(mon_fdset_fd);
}
}
if (has_fd && !mon_fdset_fd) {
goto error;
}
- monitor_fdset_cleanup(mon_fdset);
+ monitor_fdset_free_if_empty(mon_fdset);
return;
}
@@ -395,7 +399,6 @@ AddfdInfo *monitor_fdset_add_fd(int fd, bool has_fdset_id, int64_t fdset_id,
mon_fdset_fd = g_malloc0(sizeof(*mon_fdset_fd));
mon_fdset_fd->fd = fd;
- mon_fdset_fd->removed = false;
mon_fdset_fd->opaque = g_strdup(opaque);
QLIST_INSERT_HEAD(&mon_fdset->fds, mon_fdset_fd, next);
@@ -406,9 +409,10 @@ AddfdInfo *monitor_fdset_add_fd(int fd, bool has_fdset_id, int64_t fdset_id,
return fdinfo;
}
-int monitor_fdset_dup_fd_add(int64_t fdset_id, int flags)
+int monitor_fdset_dup_fd_add(int64_t fdset_id, int flags, Error **errp)
{
#ifdef _WIN32
+ error_setg(errp, "Platform does not support fd passing (fdset)");
return -ENOENT;
#else
MonFdset *mon_fdset;
@@ -420,6 +424,11 @@ int monitor_fdset_dup_fd_add(int64_t fdset_id, int flags)
int fd = -1;
int dup_fd;
int mon_fd_flags;
+ int mask = O_ACCMODE;
+
+#ifdef O_DIRECT
+ mask |= O_DIRECT;
+#endif
if (mon_fdset->id != fdset_id) {
continue;
@@ -428,10 +437,12 @@ int monitor_fdset_dup_fd_add(int64_t fdset_id, int flags)
QLIST_FOREACH(mon_fdset_fd, &mon_fdset->fds, next) {
mon_fd_flags = fcntl(mon_fdset_fd->fd, F_GETFL);
if (mon_fd_flags == -1) {
+ error_setg(errp, "Failed to read file status flags for fd=%d",
+ mon_fdset_fd->fd);
return -1;
}
- if ((flags & O_ACCMODE) == (mon_fd_flags & O_ACCMODE)) {
+ if ((flags & mask) == (mon_fd_flags & mask)) {
fd = mon_fdset_fd->fd;
break;
}
@@ -439,11 +450,15 @@ int monitor_fdset_dup_fd_add(int64_t fdset_id, int flags)
if (fd == -1) {
errno = EACCES;
+ error_setg(errp,
+ "Failed to find file descriptor with matching flags=0x%x",
+ flags);
return -1;
}
dup_fd = qemu_dup_flags(fd, flags);
if (dup_fd == -1) {
+ error_setg(errp, "Failed to dup() given file descriptor fd=%d", fd);
return -1;
}
@@ -453,12 +468,13 @@ int monitor_fdset_dup_fd_add(int64_t fdset_id, int flags)
return dup_fd;
}
+ error_setg(errp, "Failed to find fdset /dev/fdset/%" PRId64, fdset_id);
errno = ENOENT;
return -1;
#endif
}
-static int64_t monitor_fdset_dup_fd_find_remove(int dup_fd, bool remove)
+void monitor_fdset_dup_fd_remove(int dup_fd)
{
MonFdset *mon_fdset;
MonFdsetFd *mon_fdset_fd_dup;
@@ -467,31 +483,13 @@ static int64_t monitor_fdset_dup_fd_find_remove(int dup_fd, bool remove)
QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
QLIST_FOREACH(mon_fdset_fd_dup, &mon_fdset->dup_fds, next) {
if (mon_fdset_fd_dup->fd == dup_fd) {
- if (remove) {
- QLIST_REMOVE(mon_fdset_fd_dup, next);
- g_free(mon_fdset_fd_dup);
- if (QLIST_EMPTY(&mon_fdset->dup_fds)) {
- monitor_fdset_cleanup(mon_fdset);
- }
- return -1;
- } else {
- return mon_fdset->id;
- }
+ QLIST_REMOVE(mon_fdset_fd_dup, next);
+ g_free(mon_fdset_fd_dup);
+ monitor_fdset_free_if_empty(mon_fdset);
+ return;
}
}
}
-
- return -1;
-}
-
-int64_t monitor_fdset_dup_fd_find(int dup_fd)
-{
- return monitor_fdset_dup_fd_find_remove(dup_fd, false);
-}
-
-void monitor_fdset_dup_fd_remove(int dup_fd)
-{
- monitor_fdset_dup_fd_find_remove(dup_fd, true);
}
int monitor_fd_param(Monitor *mon, const char *fdname, Error **errp)
diff --git a/monitor/hmp.c b/monitor/hmp.c
index 69c1b7e98a..460e8832f6 100644
--- a/monitor/hmp.c
+++ b/monitor/hmp.c
@@ -1437,11 +1437,9 @@ static void monitor_event(void *opaque, QEMUChrEvent event)
monitor_resume(mon);
}
qemu_mutex_unlock(&mon->mon_lock);
- mon_refcount++;
break;
case CHR_EVENT_CLOSED:
- mon_refcount--;
monitor_fdsets_cleanup();
break;
diff --git a/monitor/monitor-internal.h b/monitor/monitor-internal.h
index 252de85681..cb628f681d 100644
--- a/monitor/monitor-internal.h
+++ b/monitor/monitor-internal.h
@@ -168,7 +168,6 @@ extern bool qmp_dispatcher_co_shutdown;
extern QmpCommandList qmp_commands, qmp_cap_negotiation_commands;
extern QemuMutex monitor_lock;
extern MonitorList mon_list;
-extern int mon_refcount;
extern HMPCommand hmp_cmds[];
diff --git a/monitor/monitor.c b/monitor/monitor.c
index 01ede1babd..db52a9c7ef 100644
--- a/monitor/monitor.c
+++ b/monitor/monitor.c
@@ -71,7 +71,6 @@ static GHashTable *monitor_qapi_event_state;
static GHashTable *coroutine_mon; /* Maps Coroutine* to Monitor* */
MonitorList mon_list;
-int mon_refcount;
static bool monitor_destroyed;
Monitor *monitor_cur(void)
diff --git a/monitor/qmp.c b/monitor/qmp.c
index a239945e8d..5e538f34c0 100644
--- a/monitor/qmp.c
+++ b/monitor/qmp.c
@@ -466,7 +466,6 @@ static void monitor_qmp_event(void *opaque, QEMUChrEvent event)
data = qmp_greeting(mon);
qmp_send_response(mon, data);
qobject_unref(data);
- mon_refcount++;
break;
case CHR_EVENT_CLOSED:
/*
@@ -479,7 +478,6 @@ static void monitor_qmp_event(void *opaque, QEMUChrEvent event)
json_message_parser_destroy(&mon->parser);
json_message_parser_init(&mon->parser, handle_qmp_command,
mon, NULL);
- mon_refcount--;
monitor_fdsets_cleanup();
break;
case CHR_EVENT_BREAK:
diff --git a/qapi/migration.json b/qapi/migration.json
index 470f746cc5..0f24206bce 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -142,6 +142,9 @@
#
# @postcopy-paused: during postcopy but paused. (since 3.0)
#
+# @postcopy-recover-setup: setup phase for a postcopy recovery process,
+# preparing for a recovery phase to start. (since 9.1)
+#
# @postcopy-recover: trying to recover from a paused postcopy. (since
# 3.0)
#
@@ -166,6 +169,7 @@
{ 'enum': 'MigrationStatus',
'data': [ 'none', 'setup', 'cancelling', 'cancelled',
'active', 'postcopy-active', 'postcopy-paused',
+ 'postcopy-recover-setup',
'postcopy-recover', 'completed', 'failed', 'colo',
'pre-switchover', 'device', 'wait-unplug' ] }
##
@@ -821,6 +825,10 @@
# See description in @ZeroPageDetection. Default is 'multifd'.
# (since 9.0)
#
+# @direct-io: Open migration files with O_DIRECT when possible. This
+# only has effect if the @mapped-ram capability is enabled.
+# (Since 9.1)
+#
# Features:
#
# @unstable: Members @x-checkpoint-delay and
@@ -845,7 +853,8 @@
{ 'name': 'x-vcpu-dirty-limit-period', 'features': ['unstable'] },
'vcpu-dirty-limit',
'mode',
- 'zero-page-detection'] }
+ 'zero-page-detection',
+ 'direct-io'] }
##
# @MigrateSetParameters:
@@ -991,6 +1000,10 @@
# See description in @ZeroPageDetection. Default is 'multifd'.
# (since 9.0)
#
+# @direct-io: Open migration files with O_DIRECT when possible. This
+# only has effect if the @mapped-ram capability is enabled.
+# (Since 9.1)
+#
# Features:
#
# @unstable: Members @x-checkpoint-delay and
@@ -1030,7 +1043,8 @@
'features': [ 'unstable' ] },
'*vcpu-dirty-limit': 'uint64',
'*mode': 'MigMode',
- '*zero-page-detection': 'ZeroPageDetection'} }
+ '*zero-page-detection': 'ZeroPageDetection',
+ '*direct-io': 'bool' } }
##
# @migrate-set-parameters:
@@ -1190,6 +1204,10 @@
# See description in @ZeroPageDetection. Default is 'multifd'.
# (since 9.0)
#
+# @direct-io: Open migration files with O_DIRECT when possible. This
+# only has effect if the @mapped-ram capability is enabled.
+# (Since 9.1)
+#
# Features:
#
# @unstable: Members @x-checkpoint-delay and
@@ -1226,7 +1244,8 @@
'features': [ 'unstable' ] },
'*vcpu-dirty-limit': 'uint64',
'*mode': 'MigMode',
- '*zero-page-detection': 'ZeroPageDetection'} }
+ '*zero-page-detection': 'ZeroPageDetection',
+ '*direct-io': 'bool' } }
##
# @query-migrate-parameters:
diff --git a/stubs/fdset.c b/stubs/fdset.c
index d7c39a28ac..2950fd91fd 100644
--- a/stubs/fdset.c
+++ b/stubs/fdset.c
@@ -3,17 +3,12 @@
#include "monitor/monitor.h"
#include "../monitor/monitor-internal.h"
-int monitor_fdset_dup_fd_add(int64_t fdset_id, int flags)
+int monitor_fdset_dup_fd_add(int64_t fdset_id, int flags, Error **errp)
{
errno = ENOSYS;
return -1;
}
-int64_t monitor_fdset_dup_fd_find(int dup_fd)
-{
- return -1;
-}
-
void monitor_fdset_dup_fd_remove(int dupfd)
{
}
diff --git a/tests/qtest/libqtest.c b/tests/qtest/libqtest.c
index 18e2f7f282..c7f6897d78 100644
--- a/tests/qtest/libqtest.c
+++ b/tests/qtest/libqtest.c
@@ -514,11 +514,6 @@ static QTestState *qtest_init_internal(const char *qemu_bin,
kill(s->qemu_pid, SIGSTOP);
}
#endif
-
- /* ask endianness of the target */
-
- s->big_endian = qtest_query_target_endianness(s);
-
return s;
}
@@ -527,11 +522,21 @@ QTestState *qtest_init_without_qmp_handshake(const char *extra_args)
return qtest_init_internal(qtest_qemu_binary(NULL), extra_args);
}
+QTestState *qtest_init_with_env_no_handshake(const char *var,
+ const char *extra_args)
+{
+ return qtest_init_internal(qtest_qemu_binary(var), extra_args);
+}
+
QTestState *qtest_init_with_env(const char *var, const char *extra_args)
{
QTestState *s = qtest_init_internal(qtest_qemu_binary(var), extra_args);
QDict *greeting;
+ /* ask endianness of the target */
+
+ s->big_endian = qtest_query_target_endianness(s);
+
/* Read the QMP greeting and then do the handshake */
greeting = qtest_qmp_receive(s);
qobject_unref(greeting);
diff --git a/tests/qtest/libqtest.h b/tests/qtest/libqtest.h
index beb96b18eb..c261b7e0b3 100644
--- a/tests/qtest/libqtest.h
+++ b/tests/qtest/libqtest.h
@@ -68,6 +68,8 @@ QTestState *qtest_init(const char *extra_args);
*/
QTestState *qtest_init_with_env(const char *var, const char *extra_args);
+QTestState *qtest_init_with_env_no_handshake(const char *var,
+ const char *extra_args);
/**
* qtest_init_without_qmp_handshake:
* @extra_args: other arguments to pass to QEMU. CAUTION: these
diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c
index ce6d6615b5..84f49db85e 100644
--- a/tests/qtest/migration-helpers.c
+++ b/tests/qtest/migration-helpers.c
@@ -18,6 +18,7 @@
#include "qapi/error.h"
#include "qapi/qmp/qlist.h"
#include "qemu/cutils.h"
+#include "qemu/memalign.h"
#include "migration-helpers.h"
@@ -248,7 +249,7 @@ void migrate_set_capability(QTestState *who, const char *capability,
void migrate_incoming_qmp(QTestState *to, const char *uri, const char *fmt, ...)
{
va_list ap;
- QDict *args, *rsp, *data;
+ QDict *args, *rsp;
va_start(ap, fmt);
args = qdict_from_vjsonf_nofail(fmt, ap);
@@ -257,6 +258,7 @@ void migrate_incoming_qmp(QTestState *to, const char *uri, const char *fmt, ...)
g_assert(!qdict_haskey(args, "uri"));
qdict_put_str(args, "uri", uri);
+ /* This function relies on the event to work, make sure it's enabled */
migrate_set_capability(to, "events", true);
rsp = qtest_qmp(to, "{ 'execute': 'migrate-incoming', 'arguments': %p}",
@@ -270,14 +272,7 @@ void migrate_incoming_qmp(QTestState *to, const char *uri, const char *fmt, ...)
g_assert(qdict_haskey(rsp, "return"));
qobject_unref(rsp);
- rsp = qtest_qmp_eventwait_ref(to, "MIGRATION");
- g_assert(qdict_haskey(rsp, "data"));
-
- data = qdict_get_qdict(rsp, "data");
- g_assert(qdict_haskey(data, "status"));
- g_assert_cmpstr(qdict_get_str(data, "status"), ==, "setup");
-
- qobject_unref(rsp);
+ migration_event_wait(to, "setup");
}
/*
@@ -473,3 +468,66 @@ void migration_test_add(const char *path, void (*fn)(void))
qtest_add_data_func_full(path, test, migration_test_wrapper,
migration_test_destroy);
}
+
+#ifdef O_DIRECT
+/*
+ * Probe for O_DIRECT support on the filesystem. Since this is used
+ * for tests, be conservative, if anything fails, assume it's
+ * unsupported.
+ */
+bool probe_o_direct_support(const char *tmpfs)
+{
+ g_autofree char *filename = g_strdup_printf("%s/probe-o-direct", tmpfs);
+ int fd, flags = O_CREAT | O_RDWR | O_TRUNC | O_DIRECT;
+ void *buf;
+ ssize_t ret, len;
+ uint64_t offset;
+
+ fd = open(filename, flags, 0660);
+ if (fd < 0) {
+ unlink(filename);
+ return false;
+ }
+
+ /*
+ * Using 1MB alignment as conservative choice to satisfy any
+ * plausible architecture default page size, and/or filesystem
+ * alignment restrictions.
+ */
+ len = 0x100000;
+ offset = 0x100000;
+
+ buf = qemu_try_memalign(len, len);
+ g_assert(buf);
+
+ ret = pwrite(fd, buf, len, offset);
+ unlink(filename);
+ g_free(buf);
+
+ if (ret < 0) {
+ return false;
+ }
+
+ return true;
+}
+#endif
+
+/*
+ * Wait for a "MIGRATION" event. This is what Libvirt uses to track
+ * migration status changes.
+ */
+void migration_event_wait(QTestState *s, const char *target)
+{
+ QDict *response, *data;
+ const char *status;
+ bool found;
+
+ do {
+ response = qtest_qmp_eventwait_ref(s, "MIGRATION");
+ data = qdict_get_qdict(response, "data");
+ g_assert(data);
+ status = qdict_get_str(data, "status");
+ found = (strcmp(status, target) == 0);
+ qobject_unref(response);
+ } while (!found);
+}
diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h
index 1339835698..72dba369fb 100644
--- a/tests/qtest/migration-helpers.h
+++ b/tests/qtest/migration-helpers.h
@@ -54,5 +54,15 @@ char *find_common_machine_version(const char *mtype, const char *var1,
const char *var2);
char *resolve_machine_version(const char *alias, const char *var1,
const char *var2);
+#ifdef O_DIRECT
+bool probe_o_direct_support(const char *tmpfs);
+#else
+static inline bool probe_o_direct_support(const char *tmpfs)
+{
+ return false;
+}
+#endif
void migration_test_add(const char *path, void (*fn)(void));
+void migration_event_wait(QTestState *s, const char *target);
+
#endif /* MIGRATION_HELPERS_H */
diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 0dccb4beff..571fc1334c 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -64,13 +64,26 @@ static QTestMigrationState dst_state;
#define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */
#define ANALYZE_SCRIPT "scripts/analyze-migration.py"
+#define VMSTATE_CHECKER_SCRIPT "scripts/vmstate-static-checker.py"
#define QEMU_VM_FILE_MAGIC 0x5145564d
#define FILE_TEST_FILENAME "migfile"
#define FILE_TEST_OFFSET 0x1000
+#define FILE_TEST_MARKER 'X'
#define QEMU_ENV_SRC "QTEST_QEMU_BINARY_SRC"
#define QEMU_ENV_DST "QTEST_QEMU_BINARY_DST"
+typedef enum PostcopyRecoveryFailStage {
+ /*
+ * "no failure" must be 0 as it's the default. OTOH, real failure
+ * cases must be >0 to make sure they trigger by a "if" test.
+ */
+ POSTCOPY_FAIL_NONE = 0,
+ POSTCOPY_FAIL_CHANNEL_ESTABLISH,
+ POSTCOPY_FAIL_RECOVERY,
+ POSTCOPY_FAIL_MAX
+} PostcopyRecoveryFailStage;
+
#if defined(__linux__)
#include <sys/syscall.h>
#include <sys/vfs.h>
@@ -406,6 +419,38 @@ static void migrate_set_parameter_str(QTestState *who, const char *parameter,
migrate_check_parameter_str(who, parameter, value);
}
+static long long migrate_get_parameter_bool(QTestState *who,
+ const char *parameter)
+{
+ QDict *rsp;
+ int result;
+
+ rsp = qtest_qmp_assert_success_ref(
+ who, "{ 'execute': 'query-migrate-parameters' }");
+ result = qdict_get_bool(rsp, parameter);
+ qobject_unref(rsp);
+ return !!result;
+}
+
+static void migrate_check_parameter_bool(QTestState *who, const char *parameter,
+ int value)
+{
+ int result;
+
+ result = migrate_get_parameter_bool(who, parameter);
+ g_assert_cmpint(result, ==, value);
+}
+
+static void migrate_set_parameter_bool(QTestState *who, const char *parameter,
+ int value)
+{
+ qtest_qmp_assert_success(who,
+ "{ 'execute': 'migrate-set-parameters',"
+ "'arguments': { %s: %i } }",
+ parameter, value);
+ migrate_check_parameter_bool(who, parameter, value);
+}
+
static void migrate_ensure_non_converge(QTestState *who)
{
/* Can't converge with 1ms downtime + 3 mbs bandwidth limit */
@@ -659,7 +704,7 @@ typedef struct {
/* Postcopy specific fields */
void *postcopy_data;
bool postcopy_preempt;
- bool postcopy_recovery_test_fail;
+ PostcopyRecoveryFailStage postcopy_recovery_fail_stage;
} MigrateCommon;
static int test_migrate_start(QTestState **from, QTestState **to,
@@ -818,6 +863,13 @@ static int test_migrate_start(QTestState **from, QTestState **to,
unlink(shmem_path);
}
+ /*
+ * Always enable migration events. Libvirt always uses it, let's try
+ * to mimic as closer as that.
+ */
+ migrate_set_capability(*from, "events", true);
+ migrate_set_capability(*to, "events", true);
+
return 0;
}
@@ -1330,12 +1382,16 @@ static void wait_for_postcopy_status(QTestState *one, const char *status)
"completed", NULL });
}
-#ifndef _WIN32
-static void postcopy_recover_fail(QTestState *from, QTestState *to)
+static void postcopy_recover_fail(QTestState *from, QTestState *to,
+ PostcopyRecoveryFailStage stage)
{
+#ifndef _WIN32
+ bool fail_early = (stage == POSTCOPY_FAIL_CHANNEL_ESTABLISH);
int ret, pair1[2], pair2[2];
char c;
+ g_assert(stage > POSTCOPY_FAIL_NONE && stage < POSTCOPY_FAIL_MAX);
+
/* Create two unrelated socketpairs */
ret = qemu_socketpair(PF_LOCAL, SOCK_STREAM, 0, pair1);
g_assert_cmpint(ret, ==, 0);
@@ -1369,33 +1425,72 @@ static void postcopy_recover_fail(QTestState *from, QTestState *to)
ret = send(pair2[1], &c, 1, 0);
g_assert_cmpint(ret, ==, 1);
+ if (stage == POSTCOPY_FAIL_CHANNEL_ESTABLISH) {
+ /*
+ * This will make src QEMU to fail at an early stage when trying to
+ * resume later, where it shouldn't reach RECOVER stage at all.
+ */
+ close(pair1[1]);
+ }
+
migrate_recover(to, "fd:fd-mig");
migrate_qmp(from, to, "fd:fd-mig", NULL, "{'resume': true}");
/*
- * Make sure both QEMU instances will go into RECOVER stage, then test
- * kicking them out using migrate-pause.
+ * Source QEMU has an extra RECOVER_SETUP phase, dest doesn't have it.
+ * Make sure it appears along the way.
*/
- wait_for_postcopy_status(from, "postcopy-recover");
- wait_for_postcopy_status(to, "postcopy-recover");
+ migration_event_wait(from, "postcopy-recover-setup");
+
+ if (fail_early) {
+ /*
+ * When fails at reconnection, src QEMU will automatically goes
+ * back to PAUSED state. Making sure there is an event in this
+ * case: Libvirt relies on this to detect early reconnection
+ * errors.
+ */
+ migration_event_wait(from, "postcopy-paused");
+ } else {
+ /*
+ * We want to test "fail later" at RECOVER stage here. Make sure
+ * both QEMU instances will go into RECOVER stage first, then test
+ * kicking them out using migrate-pause.
+ *
+ * Explicitly check the RECOVER event on src, that's what Libvirt
+ * relies on, rather than polling.
+ */
+ migration_event_wait(from, "postcopy-recover");
+ wait_for_postcopy_status(from, "postcopy-recover");
+
+ /* Need an explicit kick on src QEMU in this case */
+ migrate_pause(from);
+ }
/*
- * This would be issued by the admin upon noticing the hang, we should
- * make sure we're able to kick this out.
+ * For all failure cases, we'll reach such states on both sides now.
+ * Check them.
*/
- migrate_pause(from);
wait_for_postcopy_status(from, "postcopy-paused");
+ wait_for_postcopy_status(to, "postcopy-recover");
- /* Do the same test on dest */
+ /*
+ * Kick dest QEMU out too. This is normally not needed in reality
+ * because when the channel is shutdown it should also happen on src.
+ * However here we used separate socket pairs so we need to do that
+ * explicitly.
+ */
migrate_pause(to);
wait_for_postcopy_status(to, "postcopy-paused");
close(pair1[0]);
- close(pair1[1]);
close(pair2[0]);
close(pair2[1]);
+
+ if (stage != POSTCOPY_FAIL_CHANNEL_ESTABLISH) {
+ close(pair1[1]);
+ }
+#endif
}
-#endif /* _WIN32 */
static void test_postcopy_recovery_common(MigrateCommon *args)
{
@@ -1435,16 +1530,14 @@ static void test_postcopy_recovery_common(MigrateCommon *args)
wait_for_postcopy_status(to, "postcopy-paused");
wait_for_postcopy_status(from, "postcopy-paused");
-#ifndef _WIN32
- if (args->postcopy_recovery_test_fail) {
+ if (args->postcopy_recovery_fail_stage) {
/*
* Test when a wrong socket specified for recover, and then the
* ability to kick it out, and continue with a correct socket.
*/
- postcopy_recover_fail(from, to);
+ postcopy_recover_fail(from, to, args->postcopy_recovery_fail_stage);
/* continue with a good recovery */
}
-#endif /* _WIN32 */
/*
* Create a new socket to emulate a new channel that is different
@@ -1473,16 +1566,23 @@ static void test_postcopy_recovery(void)
test_postcopy_recovery_common(&args);
}
-#ifndef _WIN32
-static void test_postcopy_recovery_double_fail(void)
+static void test_postcopy_recovery_fail_handshake(void)
{
MigrateCommon args = {
- .postcopy_recovery_test_fail = true,
+ .postcopy_recovery_fail_stage = POSTCOPY_FAIL_RECOVERY,
+ };
+
+ test_postcopy_recovery_common(&args);
+}
+
+static void test_postcopy_recovery_fail_reconnect(void)
+{
+ MigrateCommon args = {
+ .postcopy_recovery_fail_stage = POSTCOPY_FAIL_CHANNEL_ESTABLISH,
};
test_postcopy_recovery_common(&args);
}
-#endif /* _WIN32 */
#ifdef CONFIG_GNUTLS
static void test_postcopy_recovery_tls_psk(void)
@@ -1588,6 +1688,85 @@ static void test_analyze_script(void)
test_migrate_end(from, to, false);
cleanup("migfile");
}
+
+static void test_vmstate_checker_script(void)
+{
+ g_autofree gchar *cmd_src = NULL;
+ g_autofree gchar *cmd_dst = NULL;
+ g_autofree gchar *vmstate_src = NULL;
+ g_autofree gchar *vmstate_dst = NULL;
+ const char *machine_alias, *machine_opts = "";
+ g_autofree char *machine = NULL;
+ const char *arch = qtest_get_arch();
+ int pid, wstatus;
+ const char *python = g_getenv("PYTHON");
+
+ if (!getenv(QEMU_ENV_SRC) && !getenv(QEMU_ENV_DST)) {
+ g_test_skip("Test needs two different QEMU versions");
+ return;
+ }
+
+ if (!python) {
+ g_test_skip("PYTHON variable not set");
+ return;
+ }
+
+ if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
+ if (g_str_equal(arch, "i386")) {
+ machine_alias = "pc";
+ } else {
+ machine_alias = "q35";
+ }
+ } else if (g_str_equal(arch, "s390x")) {
+ machine_alias = "s390-ccw-virtio";
+ } else if (strcmp(arch, "ppc64") == 0) {
+ machine_alias = "pseries";
+ } else if (strcmp(arch, "aarch64") == 0) {
+ machine_alias = "virt";
+ } else {
+ g_assert_not_reached();
+ }
+
+ if (!qtest_has_machine(machine_alias)) {
+ g_autofree char *msg = g_strdup_printf("machine %s not supported", machine_alias);
+ g_test_skip(msg);
+ return;
+ }
+
+ machine = resolve_machine_version(machine_alias, QEMU_ENV_SRC,
+ QEMU_ENV_DST);
+
+ vmstate_src = g_strdup_printf("%s/vmstate-src", tmpfs);
+ vmstate_dst = g_strdup_printf("%s/vmstate-dst", tmpfs);
+
+ cmd_dst = g_strdup_printf("-machine %s,%s -dump-vmstate %s",
+ machine, machine_opts, vmstate_dst);
+ cmd_src = g_strdup_printf("-machine %s,%s -dump-vmstate %s",
+ machine, machine_opts, vmstate_src);
+
+ qtest_init_with_env_no_handshake(QEMU_ENV_SRC, cmd_src);
+ qtest_init_with_env_no_handshake(QEMU_ENV_DST, cmd_dst);
+
+ pid = fork();
+ if (!pid) {
+ close(1);
+ open("/dev/null", O_WRONLY);
+ execl(python, python, VMSTATE_CHECKER_SCRIPT,
+ "-s", vmstate_src,
+ "-d", vmstate_dst,
+ NULL);
+ g_assert_not_reached();
+ }
+
+ g_assert(waitpid(pid, &wstatus, 0) == pid);
+ if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus) != 0) {
+ g_test_message("Failed to run vmstate-static-checker.py");
+ g_test_fail();
+ }
+
+ cleanup("vmstate-src");
+ cleanup("vmstate-dst");
+}
#endif
static void test_precopy_common(MigrateCommon *args)
@@ -1693,10 +1872,43 @@ finish:
test_migrate_end(from, to, args->result == MIG_TEST_SUCCEED);
}
+static void file_dirty_offset_region(void)
+{
+ g_autofree char *path = g_strdup_printf("%s/%s", tmpfs, FILE_TEST_FILENAME);
+ size_t size = FILE_TEST_OFFSET;
+ g_autofree char *data = g_new0(char, size);
+
+ memset(data, FILE_TEST_MARKER, size);
+ g_assert(g_file_set_contents(path, data, size, NULL));
+}
+
+static void file_check_offset_region(void)
+{
+ g_autofree char *path = g_strdup_printf("%s/%s", tmpfs, FILE_TEST_FILENAME);
+ size_t size = FILE_TEST_OFFSET;
+ g_autofree char *expected = g_new0(char, size);
+ g_autofree char *actual = NULL;
+ uint64_t *stream_start;
+
+ /*
+ * Ensure the skipped offset region's data has not been touched
+ * and the migration stream starts at the right place.
+ */
+
+ memset(expected, FILE_TEST_MARKER, size);
+
+ g_assert(g_file_get_contents(path, &actual, NULL, NULL));
+ g_assert(!memcmp(actual, expected, size));
+
+ stream_start = (uint64_t *)(actual + size);
+ g_assert_cmpint(cpu_to_be64(*stream_start) >> 32, ==, QEMU_VM_FILE_MAGIC);
+}
+
static void test_file_common(MigrateCommon *args, bool stop_src)
{
QTestState *from, *to;
void *data_hook = NULL;
+ bool check_offset = false;
if (test_migrate_start(&from, &to, args->listen_uri, &args->start)) {
return;
@@ -1709,6 +1921,16 @@ static void test_file_common(MigrateCommon *args, bool stop_src)
*/
g_assert_false(args->live);
+ if (g_strrstr(args->connect_uri, "offset=")) {
+ check_offset = true;
+ /*
+ * This comes before the start_hook because it's equivalent to
+ * a management application creating the file and writing to
+ * it so hooks should expect the file to be already present.
+ */
+ file_dirty_offset_region();
+ }
+
if (args->start_hook) {
data_hook = args->start_hook(from, to);
}
@@ -1743,6 +1965,10 @@ static void test_file_common(MigrateCommon *args, bool stop_src)
wait_for_serial("dest_serial");
+ if (check_offset) {
+ file_check_offset_region();
+ }
+
finish:
if (args->finish_hook) {
args->finish_hook(from, to, data_hook);
@@ -1942,36 +2168,53 @@ static void test_precopy_file(void)
test_file_common(&args, true);
}
-static void file_offset_finish_hook(QTestState *from, QTestState *to,
- void *opaque)
+#ifndef _WIN32
+static void fdset_add_fds(QTestState *qts, const char *file, int flags,
+ int num_fds, bool direct_io)
{
-#if defined(__linux__)
- g_autofree char *path = g_strdup_printf("%s/%s", tmpfs, FILE_TEST_FILENAME);
- size_t size = FILE_TEST_OFFSET + sizeof(QEMU_VM_FILE_MAGIC);
- uintptr_t *addr, *p;
- int fd;
+ for (int i = 0; i < num_fds; i++) {
+ int fd;
- fd = open(path, O_RDONLY);
- g_assert(fd != -1);
- addr = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0);
- g_assert(addr != MAP_FAILED);
+#ifdef O_DIRECT
+ /* only secondary channels can use direct-io */
+ if (direct_io && i != 0) {
+ flags |= O_DIRECT;
+ }
+#endif
- /*
- * Ensure the skipped offset contains zeros and the migration
- * stream starts at the right place.
- */
- p = addr;
- while (p < addr + FILE_TEST_OFFSET / sizeof(uintptr_t)) {
- g_assert(*p == 0);
- p++;
+ fd = open(file, flags, 0660);
+ assert(fd != -1);
+
+ qtest_qmp_fds_assert_success(qts, &fd, 1, "{'execute': 'add-fd', "
+ "'arguments': {'fdset-id': 1}}");
+ close(fd);
}
- g_assert_cmpint(cpu_to_be64(*p) >> 32, ==, QEMU_VM_FILE_MAGIC);
+}
- munmap(addr, size);
- close(fd);
-#endif
+static void *file_offset_fdset_start_hook(QTestState *from, QTestState *to)
+{
+ g_autofree char *file = g_strdup_printf("%s/%s", tmpfs, FILE_TEST_FILENAME);
+
+ fdset_add_fds(from, file, O_WRONLY, 1, false);
+ fdset_add_fds(to, file, O_RDONLY, 1, false);
+
+ return NULL;
}
+static void test_precopy_file_offset_fdset(void)
+{
+ g_autofree char *uri = g_strdup_printf("file:/dev/fdset/1,offset=%d",
+ FILE_TEST_OFFSET);
+ MigrateCommon args = {
+ .connect_uri = uri,
+ .listen_uri = "defer",
+ .start_hook = file_offset_fdset_start_hook,
+ };
+
+ test_file_common(&args, false);
+}
+#endif
+
static void test_precopy_file_offset(void)
{
g_autofree char *uri = g_strdup_printf("file:%s/%s,offset=%d", tmpfs,
@@ -1980,7 +2223,6 @@ static void test_precopy_file_offset(void)
MigrateCommon args = {
.connect_uri = uri,
.listen_uri = "defer",
- .finish_hook = file_offset_finish_hook,
};
test_file_common(&args, false);
@@ -2098,6 +2340,118 @@ static void test_multifd_file_mapped_ram(void)
test_file_common(&args, true);
}
+static void *multifd_mapped_ram_dio_start(QTestState *from, QTestState *to)
+{
+ migrate_multifd_mapped_ram_start(from, to);
+
+ migrate_set_parameter_bool(from, "direct-io", true);
+ migrate_set_parameter_bool(to, "direct-io", true);
+
+ return NULL;
+}
+
+static void test_multifd_file_mapped_ram_dio(void)
+{
+ g_autofree char *uri = g_strdup_printf("file:%s/%s", tmpfs,
+ FILE_TEST_FILENAME);
+ MigrateCommon args = {
+ .connect_uri = uri,
+ .listen_uri = "defer",
+ .start_hook = multifd_mapped_ram_dio_start,
+ };
+
+ if (!probe_o_direct_support(tmpfs)) {
+ g_test_skip("Filesystem does not support O_DIRECT");
+ return;
+ }
+
+ test_file_common(&args, true);
+}
+
+#ifndef _WIN32
+static void multifd_mapped_ram_fdset_end(QTestState *from, QTestState *to,
+ void *opaque)
+{
+ QDict *resp;
+ QList *fdsets;
+
+ /*
+ * Remove the fdsets after migration, otherwise a second migration
+ * would fail due fdset reuse.
+ */
+ qtest_qmp_assert_success(from, "{'execute': 'remove-fd', "
+ "'arguments': { 'fdset-id': 1}}");
+
+ /*
+ * Make sure no fdsets are left after migration, otherwise a
+ * second migration would fail due fdset reuse.
+ */
+ resp = qtest_qmp(from, "{'execute': 'query-fdsets', "
+ "'arguments': {}}");
+ g_assert(qdict_haskey(resp, "return"));
+ fdsets = qdict_get_qlist(resp, "return");
+ g_assert(fdsets && qlist_empty(fdsets));
+}
+
+static void *multifd_mapped_ram_fdset_dio(QTestState *from, QTestState *to)
+{
+ g_autofree char *file = g_strdup_printf("%s/%s", tmpfs, FILE_TEST_FILENAME);
+
+ fdset_add_fds(from, file, O_WRONLY, 2, true);
+ fdset_add_fds(to, file, O_RDONLY, 2, true);
+
+ migrate_multifd_mapped_ram_start(from, to);
+ migrate_set_parameter_bool(from, "direct-io", true);
+ migrate_set_parameter_bool(to, "direct-io", true);
+
+ return NULL;
+}
+
+static void *multifd_mapped_ram_fdset(QTestState *from, QTestState *to)
+{
+ g_autofree char *file = g_strdup_printf("%s/%s", tmpfs, FILE_TEST_FILENAME);
+
+ fdset_add_fds(from, file, O_WRONLY, 2, false);
+ fdset_add_fds(to, file, O_RDONLY, 2, false);
+
+ migrate_multifd_mapped_ram_start(from, to);
+
+ return NULL;
+}
+
+static void test_multifd_file_mapped_ram_fdset(void)
+{
+ g_autofree char *uri = g_strdup_printf("file:/dev/fdset/1,offset=%d",
+ FILE_TEST_OFFSET);
+ MigrateCommon args = {
+ .connect_uri = uri,
+ .listen_uri = "defer",
+ .start_hook = multifd_mapped_ram_fdset,
+ .finish_hook = multifd_mapped_ram_fdset_end,
+ };
+
+ test_file_common(&args, true);
+}
+
+static void test_multifd_file_mapped_ram_fdset_dio(void)
+{
+ g_autofree char *uri = g_strdup_printf("file:/dev/fdset/1,offset=%d",
+ FILE_TEST_OFFSET);
+ MigrateCommon args = {
+ .connect_uri = uri,
+ .listen_uri = "defer",
+ .start_hook = multifd_mapped_ram_fdset_dio,
+ .finish_hook = multifd_mapped_ram_fdset_end,
+ };
+
+ if (!probe_o_direct_support(tmpfs)) {
+ g_test_skip("Filesystem does not support O_DIRECT");
+ return;
+ }
+
+ test_file_common(&args, true);
+}
+#endif /* !_WIN32 */
static void test_precopy_tcp_plain(void)
{
@@ -3465,6 +3819,8 @@ int main(int argc, char **argv)
migration_test_add("/migration/bad_dest", test_baddest);
#ifndef _WIN32
migration_test_add("/migration/analyze-script", test_analyze_script);
+ migration_test_add("/migration/vmstate-checker-script",
+ test_vmstate_checker_script);
#endif
/*
@@ -3492,10 +3848,10 @@ int main(int argc, char **argv)
test_postcopy_preempt);
migration_test_add("/migration/postcopy/preempt/recovery/plain",
test_postcopy_preempt_recovery);
-#ifndef _WIN32
- migration_test_add("/migration/postcopy/recovery/double-failures",
- test_postcopy_recovery_double_fail);
-#endif /* _WIN32 */
+ migration_test_add("/migration/postcopy/recovery/double-failures/handshake",
+ test_postcopy_recovery_fail_handshake);
+ migration_test_add("/migration/postcopy/recovery/double-failures/reconnect",
+ test_postcopy_recovery_fail_reconnect);
if (is_x86) {
migration_test_add("/migration/postcopy/suspend",
test_postcopy_suspend);
@@ -3510,6 +3866,10 @@ int main(int argc, char **argv)
test_precopy_file);
migration_test_add("/migration/precopy/file/offset",
test_precopy_file_offset);
+#ifndef _WIN32
+ migration_test_add("/migration/precopy/file/offset/fdset",
+ test_precopy_file_offset_fdset);
+#endif
migration_test_add("/migration/precopy/file/offset/bad",
test_precopy_file_offset_bad);
@@ -3531,6 +3891,16 @@ int main(int argc, char **argv)
migration_test_add("/migration/multifd/file/mapped-ram/live",
test_multifd_file_mapped_ram_live);
+ migration_test_add("/migration/multifd/file/mapped-ram/dio",
+ test_multifd_file_mapped_ram_dio);
+
+#ifndef _WIN32
+ migration_test_add("/migration/multifd/file/mapped-ram/fdset",
+ test_multifd_file_mapped_ram_fdset);
+ migration_test_add("/migration/multifd/file/mapped-ram/fdset/dio",
+ test_multifd_file_mapped_ram_fdset_dio);
+#endif
+
#ifdef CONFIG_GNUTLS
migration_test_add("/migration/precopy/unix/tls/psk",
test_precopy_unix_tls_psk);
diff --git a/util/osdep.c b/util/osdep.c
index 5d23bbfbec..770369831b 100644
--- a/util/osdep.c
+++ b/util/osdep.c
@@ -282,6 +282,15 @@ int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive)
}
#endif
+bool qemu_has_direct_io(void)
+{
+#ifdef O_DIRECT
+ return true;
+#else
+ return false;
+#endif
+}
+
static int qemu_open_cloexec(const char *name, int flags, mode_t mode)
{
int ret;
@@ -310,7 +319,6 @@ qemu_open_internal(const char *name, int flags, mode_t mode, Error **errp)
/* Attempt dup of fd from fd set */
if (strstart(name, "/dev/fdset/", &fdset_id_str)) {
int64_t fdset_id;
- int dupfd;
fdset_id = qemu_parse_fdset(fdset_id_str);
if (fdset_id == -1) {
@@ -319,14 +327,7 @@ qemu_open_internal(const char *name, int flags, mode_t mode, Error **errp)
return -1;
}
- dupfd = monitor_fdset_dup_fd_add(fdset_id, flags);
- if (dupfd == -1) {
- error_setg_errno(errp, errno, "Could not dup FD for %s flags %x",
- name, flags);
- return -1;
- }
-
- return dupfd;
+ return monitor_fdset_dup_fd_add(fdset_id, flags, errp);
}
#endif
@@ -398,21 +399,8 @@ int qemu_open_old(const char *name, int flags, ...)
int qemu_close(int fd)
{
- int64_t fdset_id;
-
/* Close fd that was dup'd from an fdset */
- fdset_id = monitor_fdset_dup_fd_find(fd);
- if (fdset_id != -1) {
- int ret;
-
- ret = close(fd);
- if (ret == 0) {
- monitor_fdset_dup_fd_remove(fd);
- }
-
- return ret;
- }
-
+ monitor_fdset_dup_fd_remove(fd);
return close(fd);
}