From 3b59ee722726d023f4a56d711aef362ab9ba777e Mon Sep 17 00:00:00 2001 From: Pan Nengyuan Date: Fri, 8 May 2020 06:07:54 -0400 Subject: migration/rdma: fix potential nullptr access in rdma_start_incoming_migration 'rdma' is NULL when taking the first error branch in rdma_start_incoming_migration. And it will cause a null pointer access in label 'err'. Fix that. Fixes: 59c59c67ee6b0327ae932deb303caa47919aeb1e Signed-off-by: Pan Nengyuan Message-Id: <20200508100755.7875-2-pannengyuan@huawei.com> Reviewed-by: Juan Quintela Signed-off-by: Dr. David Alan Gilbert Note this is CID 1428762 --- migration/rdma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/migration/rdma.c b/migration/rdma.c index 967fda5b0c..72e8b1c95b 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -4056,7 +4056,9 @@ void rdma_start_incoming_migration(const char *host_port, Error **errp) return; err: error_propagate(errp, local_err); - g_free(rdma->host); + if (rdma) { + g_free(rdma->host); + } g_free(rdma); g_free(rdma_return_path); } -- cgit v1.2.3 From 2f0c285aaac5fdc4c5c2f8e03fa8c11e679c50c4 Mon Sep 17 00:00:00 2001 From: Pan Nengyuan Date: Fri, 8 May 2020 06:07:55 -0400 Subject: migration/rdma: cleanup rdma context before g_free to avoid memleaks When error happen in initializing 'rdma_return_path', we should cleanup rdma context before g_free(rdma) to avoid some memleaks. This patch fix that. Reported-by: Euler Robot Signed-off-by: Pan Nengyuan Message-Id: <20200508100755.7875-3-pannengyuan@huawei.com> Reviewed-by: Juan Quintela Signed-off-by: Dr. David Alan Gilbert --- migration/rdma.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/migration/rdma.c b/migration/rdma.c index 72e8b1c95b..ec45d33ba3 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -4094,20 +4094,20 @@ void rdma_start_outgoing_migration(void *opaque, rdma_return_path = qemu_rdma_data_init(host_port, errp); if (rdma_return_path == NULL) { - goto err; + goto return_path_err; } ret = qemu_rdma_source_init(rdma_return_path, s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); if (ret) { - goto err; + goto return_path_err; } ret = qemu_rdma_connect(rdma_return_path, errp); if (ret) { - goto err; + goto return_path_err; } rdma->return_path = rdma_return_path; @@ -4120,6 +4120,8 @@ void rdma_start_outgoing_migration(void *opaque, s->to_dst_file = qemu_fopen_rdma(rdma, "wb"); migrate_fd_connect(s, NULL); return; +return_path_err: + qemu_rdma_cleanup(rdma); err: g_free(rdma); g_free(rdma_return_path); -- cgit v1.2.3 From 89cf4fe34f4afa671a2ab5d9430021ea12106274 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 20 May 2020 16:11:07 +0100 Subject: hmp: Implement qom-get HMP command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This started off as Andreas Färber's implementation from March 2015, but after feedback from Paolo and Markus it morphed into using the json output which handles structs reasonably. Use with qom-list to find the members of an object. (qemu) qom-get /backend/console[0]/device/vga.rom[0] size 65536 (qemu) qom-get /machine smm "auto" (qemu) qom-get /machine rtc-time { "tm_year": 120, "tm_sec": 51, "tm_hour": 9, "tm_min": 50, "tm_mon": 4, "tm_mday": 20 } (qemu) qom-get /machine frob Error: Property '.frob' not found Signed-off-by: Dr. David Alan Gilbert Message-Id: <20200520151108.160598-2-dgilbert@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Markus Armbruster Signed-off-by: Dr. David Alan Gilbert --- hmp-commands.hx | 14 ++++++++++++++ include/monitor/hmp.h | 1 + qom/qom-hmp-cmds.c | 18 ++++++++++++++++++ tests/qtest/test-hmp.c | 1 + 4 files changed, 34 insertions(+) diff --git a/hmp-commands.hx b/hmp-commands.hx index 7f0f3974ad..250ddae54d 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1790,6 +1790,20 @@ SRST Print QOM properties of object at location *path* ERST + { + .name = "qom-get", + .args_type = "path:s,property:s", + .params = "path property", + .help = "print QOM property", + .cmd = hmp_qom_get, + .flags = "p", + }, + +SRST +``qom-get`` *path* *property* + Print QOM property *property* of object at location *path* +ERST + { .name = "qom-set", .args_type = "path:s,property:s,value:s", diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h index e33ca5a911..c986cfd28b 100644 --- a/include/monitor/hmp.h +++ b/include/monitor/hmp.h @@ -96,6 +96,7 @@ void hmp_info_memdev(Monitor *mon, const QDict *qdict); void hmp_info_numa(Monitor *mon, const QDict *qdict); void hmp_info_memory_devices(Monitor *mon, const QDict *qdict); void hmp_qom_list(Monitor *mon, const QDict *qdict); +void hmp_qom_get(Monitor *mon, const QDict *qdict); void hmp_qom_set(Monitor *mon, const QDict *qdict); void hmp_info_qom_tree(Monitor *mon, const QDict *dict); void object_add_completion(ReadLineState *rs, int nb_args, const char *str); diff --git a/qom/qom-hmp-cmds.c b/qom/qom-hmp-cmds.c index cd08233a4c..a8b0a080c7 100644 --- a/qom/qom-hmp-cmds.c +++ b/qom/qom-hmp-cmds.c @@ -12,6 +12,8 @@ #include "qapi/error.h" #include "qapi/qapi-commands-qom.h" #include "qapi/qmp/qdict.h" +#include "qapi/qmp/qjson.h" +#include "qapi/qmp/qstring.h" #include "qom/object.h" void hmp_qom_list(Monitor *mon, const QDict *qdict) @@ -62,6 +64,22 @@ void hmp_qom_set(Monitor *mon, const QDict *qdict) hmp_handle_error(mon, err); } +void hmp_qom_get(Monitor *mon, const QDict *qdict) +{ + const char *path = qdict_get_str(qdict, "path"); + const char *property = qdict_get_str(qdict, "property"); + Error *err = NULL; + QObject *obj = qmp_qom_get(path, property, &err); + + if (err == NULL) { + QString *str = qobject_to_json_pretty(obj); + monitor_printf(mon, "%s\n", qstring_get_str(str)); + qobject_unref(str); + } + + hmp_handle_error(mon, err); +} + typedef struct QOMCompositionState { Monitor *mon; int indent; diff --git a/tests/qtest/test-hmp.c b/tests/qtest/test-hmp.c index f8aa5f92c5..b8b1271b9e 100644 --- a/tests/qtest/test-hmp.c +++ b/tests/qtest/test-hmp.c @@ -61,6 +61,7 @@ static const char *hmp_cmds[] = { "p $pc + 8", "qom-list /", "qom-set /machine initrd test", + "qom-get /machine initrd", "screendump /dev/null", "sendkey x", "singlestep on", -- cgit v1.2.3 From 7d2ef6dcc1cf87e7506487051eda010b479f5d0e Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 20 May 2020 16:11:08 +0100 Subject: hmp: Simplify qom-set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simplify qom_set by making it use qmp_qom_set and the JSON parser. (qemu) qom-get /machine smm "auto" (qemu) qom-set /machine smm "auto" Signed-off-by: Dr. David Alan Gilbert Message-Id: <20200520151108.160598-3-dgilbert@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Markus Armbruster Signed-off-by: Dr. David Alan Gilbert With 's'->'S' type change suggested by Paolo and Markus --- hmp-commands.hx | 2 +- qom/qom-hmp-cmds.c | 16 +++++----------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/hmp-commands.hx b/hmp-commands.hx index 250ddae54d..28256209b5 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1806,7 +1806,7 @@ ERST { .name = "qom-set", - .args_type = "path:s,property:s,value:s", + .args_type = "path:s,property:s,value:S", .params = "path property value", .help = "set QOM property", .cmd = hmp_qom_set, diff --git a/qom/qom-hmp-cmds.c b/qom/qom-hmp-cmds.c index a8b0a080c7..f704b6949a 100644 --- a/qom/qom-hmp-cmds.c +++ b/qom/qom-hmp-cmds.c @@ -48,19 +48,13 @@ void hmp_qom_set(Monitor *mon, const QDict *qdict) const char *property = qdict_get_str(qdict, "property"); const char *value = qdict_get_str(qdict, "value"); Error *err = NULL; - bool ambiguous = false; - Object *obj; + QObject *obj; - obj = object_resolve_path(path, &ambiguous); - if (obj == NULL) { - error_set(&err, ERROR_CLASS_DEVICE_NOT_FOUND, - "Device '%s' not found", path); - } else { - if (ambiguous) { - monitor_printf(mon, "Warning: Path '%s' is ambiguous\n", path); - } - object_property_parse(obj, value, property, &err); + obj = qobject_from_json(value, &err); + if (err == NULL) { + qmp_qom_set(path, property, obj, &err); } + hmp_handle_error(mon, err); } -- cgit v1.2.3 From 93bb3d8d4cdadf90f714b8ab71b4caa16092f38c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 14 May 2020 16:07:36 +0200 Subject: virtiofsd: remove symlink fallbacks Path lookup in the kernel has special rules for looking up magic symlinks under /proc. If a filesystem operation is instructed to follow symlinks (e.g. via AT_SYMLINK_FOLLOW or lack of AT_SYMLINK_NOFOLLOW), and the final component is such a proc symlink, then the target of the magic symlink is used for the operation, even if the target itself is a symlink. I.e. path lookup is always terminated after following a final magic symlink. I was erronously assuming that in the above case the target symlink would also be followed, and so workarounds were added for a couple of operations to handle the symlink case. Since the symlink can be handled simply by following the proc symlink, these workardouds are not needed. Also remove the "norace" option, which disabled the workarounds. Commit bdfd66788349 ("virtiofsd: Fix xattr operations") already dealt with the same issue for xattr operations. Signed-off-by: Miklos Szeredi Message-Id: <20200514140736.20561-1-mszeredi@redhat.com> Acked-by: Vivek Goyal Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/passthrough_ll.c | 175 ++------------------------------------- 1 file changed, 6 insertions(+), 169 deletions(-) diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index 3ba1d90984..2ce7c96085 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -140,7 +140,6 @@ enum { struct lo_data { pthread_mutex_t mutex; int debug; - int norace; int writeback; int flock; int posix_lock; @@ -176,7 +175,6 @@ static const struct fuse_opt lo_opts[] = { { "cache=none", offsetof(struct lo_data, cache), CACHE_NONE }, { "cache=auto", offsetof(struct lo_data, cache), CACHE_AUTO }, { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, - { "norace", offsetof(struct lo_data, norace), 1 }, { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 }, { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 }, FUSE_OPT_END @@ -592,136 +590,6 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino, fuse_reply_attr(req, &buf, lo->timeout); } -/* - * Increments parent->nlookup and caller must release refcount using - * lo_inode_put(&parent). - */ -static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode, - char path[PATH_MAX], struct lo_inode **parent) -{ - char procname[64]; - char *last; - struct stat stat; - struct lo_inode *p; - int retries = 2; - int res; - -retry: - sprintf(procname, "%i", inode->fd); - - res = readlinkat(lo->proc_self_fd, procname, path, PATH_MAX); - if (res < 0) { - fuse_log(FUSE_LOG_WARNING, "%s: readlink failed: %m\n", __func__); - goto fail_noretry; - } - - if (res >= PATH_MAX) { - fuse_log(FUSE_LOG_WARNING, "%s: readlink overflowed\n", __func__); - goto fail_noretry; - } - path[res] = '\0'; - - last = strrchr(path, '/'); - if (last == NULL) { - /* Shouldn't happen */ - fuse_log( - FUSE_LOG_WARNING, - "%s: INTERNAL ERROR: bad path read from proc\n", __func__); - goto fail_noretry; - } - if (last == path) { - p = &lo->root; - pthread_mutex_lock(&lo->mutex); - p->nlookup++; - g_atomic_int_inc(&p->refcount); - pthread_mutex_unlock(&lo->mutex); - } else { - *last = '\0'; - res = fstatat(AT_FDCWD, last == path ? "/" : path, &stat, 0); - if (res == -1) { - if (!retries) { - fuse_log(FUSE_LOG_WARNING, - "%s: failed to stat parent: %m\n", __func__); - } - goto fail; - } - p = lo_find(lo, &stat); - if (p == NULL) { - if (!retries) { - fuse_log(FUSE_LOG_WARNING, - "%s: failed to find parent\n", __func__); - } - goto fail; - } - } - last++; - res = fstatat(p->fd, last, &stat, AT_SYMLINK_NOFOLLOW); - if (res == -1) { - if (!retries) { - fuse_log(FUSE_LOG_WARNING, - "%s: failed to stat last\n", __func__); - } - goto fail_unref; - } - if (stat.st_dev != inode->key.dev || stat.st_ino != inode->key.ino) { - if (!retries) { - fuse_log(FUSE_LOG_WARNING, - "%s: failed to match last\n", __func__); - } - goto fail_unref; - } - *parent = p; - memmove(path, last, strlen(last) + 1); - - return 0; - -fail_unref: - unref_inode_lolocked(lo, p, 1); - lo_inode_put(lo, &p); -fail: - if (retries) { - retries--; - goto retry; - } -fail_noretry: - errno = EIO; - return -1; -} - -static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode, - const struct timespec *tv) -{ - int res; - struct lo_inode *parent; - char path[PATH_MAX]; - - if (S_ISLNK(inode->filetype)) { - res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH); - if (res == -1 && errno == EINVAL) { - /* Sorry, no race free way to set times on symlink. */ - if (lo->norace) { - errno = EPERM; - } else { - goto fallback; - } - } - return res; - } - sprintf(path, "%i", inode->fd); - - return utimensat(lo->proc_self_fd, path, tv, 0); - -fallback: - res = lo_parent_and_name(lo, inode, path, &parent); - if (res != -1) { - res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW); - unref_inode_lolocked(lo, parent, 1); - lo_inode_put(lo, &parent); - } - - return res; -} - static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi) { struct lo_data *lo = lo_data(req); @@ -828,7 +696,8 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, if (fi) { res = futimens(fd, tv); } else { - res = utimensat_empty(lo, inode, tv); + sprintf(procname, "%i", inode->fd); + res = utimensat(lo->proc_self_fd, procname, tv, 0); } if (res == -1) { goto out_err; @@ -1129,41 +998,6 @@ static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent, lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link); } -static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode, - int dfd, const char *name) -{ - int res; - struct lo_inode *parent; - char path[PATH_MAX]; - - if (S_ISLNK(inode->filetype)) { - res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); - if (res == -1 && (errno == ENOENT || errno == EINVAL)) { - /* Sorry, no race free way to hard-link a symlink. */ - if (lo->norace) { - errno = EPERM; - } else { - goto fallback; - } - } - return res; - } - - sprintf(path, "%i", inode->fd); - - return linkat(lo->proc_self_fd, path, dfd, name, AT_SYMLINK_FOLLOW); - -fallback: - res = lo_parent_and_name(lo, inode, path, &parent); - if (res != -1) { - res = linkat(parent->fd, path, dfd, name, 0); - unref_inode_lolocked(lo, parent, 1); - lo_inode_put(lo, &parent); - } - - return res; -} - static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, const char *name) { @@ -1172,6 +1006,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, struct lo_inode *parent_inode; struct lo_inode *inode; struct fuse_entry_param e; + char procname[64]; int saverr; if (!is_safe_path_component(name)) { @@ -1190,7 +1025,9 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, e.attr_timeout = lo->timeout; e.entry_timeout = lo->timeout; - res = linkat_empty_nofollow(lo, inode, parent_inode->fd, name); + sprintf(procname, "%i", inode->fd); + res = linkat(lo->proc_self_fd, procname, parent_inode->fd, name, + AT_SYMLINK_FOLLOW); if (res == -1) { goto out_err; } -- cgit v1.2.3 From e0d138aa9b3a4c72fe8bca735686132fdea646b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Sat, 30 May 2020 18:55:12 +0200 Subject: migration/vmstate: Remove unnecessary MemoryRegion forward declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "migration/vmstate.h" only uses pointer to MemoryRegion, which is already forward declared in "qemu/typedefs.h". Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20200530165512.15225-1-f4bug@amsat.org> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- include/migration/vmstate.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index 30667631bc..eafa39f560 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -1199,7 +1199,6 @@ static inline int vmstate_register(VMStateIf *obj, int instance_id, void vmstate_unregister(VMStateIf *obj, const VMStateDescription *vmsd, void *opaque); -struct MemoryRegion; void vmstate_register_ram(struct MemoryRegion *memory, DeviceState *dev); void vmstate_unregister_ram(struct MemoryRegion *memory, DeviceState *dev); void vmstate_register_ram_global(struct MemoryRegion *memory); -- cgit v1.2.3 From bb70b66ed7a5600a63437fb6d64dedb44f670e58 Mon Sep 17 00:00:00 2001 From: Lukas Straub Date: Mon, 11 May 2020 13:10:44 +0200 Subject: migration/colo.c: Use event instead of semaphore If multiple packets miscompare in a short timeframe, the semaphore value will be increased multiple times. This causes multiple checkpoints even if one would be sufficient. Fix this by using a event instead of a semaphore for triggering checkpoints. Now, checkpoint requests will be ignored until the checkpoint event is sent to colo-compare (which releases the miscompared packets). Benchmark results (iperf3): Client-to-server tcp: without patch: ~66 Mbit/s with patch: ~61 Mbit/s Server-to-client tcp: without patch: ~702 Kbit/s with patch: ~16 Mbit/s Signed-off-by: Lukas Straub Message-Id: Reviewed-by: zhanghailiang Signed-off-by: Dr. David Alan Gilbert --- migration/colo.c | 9 +++++---- migration/migration.h | 4 ++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/migration/colo.c b/migration/colo.c index d015d4f84e..fe0d6e93e5 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -436,6 +436,7 @@ static int colo_do_checkpoint_transaction(MigrationState *s, goto out; } + qemu_event_reset(&s->colo_checkpoint_event); colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err); if (local_err) { goto out; @@ -589,7 +590,7 @@ static void colo_process_checkpoint(MigrationState *s) goto out; } - qemu_sem_wait(&s->colo_checkpoint_sem); + qemu_event_wait(&s->colo_checkpoint_event); if (s->state != MIGRATION_STATUS_COLO) { goto out; @@ -637,7 +638,7 @@ out: colo_compare_unregister_notifier(&packets_compare_notifier); timer_del(s->colo_delay_timer); timer_free(s->colo_delay_timer); - qemu_sem_destroy(&s->colo_checkpoint_sem); + qemu_event_destroy(&s->colo_checkpoint_event); /* * Must be called after failover BH is completed, @@ -654,7 +655,7 @@ void colo_checkpoint_notify(void *opaque) MigrationState *s = opaque; int64_t next_notify_time; - qemu_sem_post(&s->colo_checkpoint_sem); + qemu_event_set(&s->colo_checkpoint_event); s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); next_notify_time = s->colo_checkpoint_time + s->parameters.x_checkpoint_delay; @@ -664,7 +665,7 @@ void colo_checkpoint_notify(void *opaque) void migrate_start_colo_process(MigrationState *s) { qemu_mutex_unlock_iothread(); - qemu_sem_init(&s->colo_checkpoint_sem, 0); + qemu_event_init(&s->colo_checkpoint_event, false); s->colo_delay_timer = timer_new_ms(QEMU_CLOCK_HOST, colo_checkpoint_notify, s); diff --git a/migration/migration.h b/migration/migration.h index 507284e563..f617960522 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -215,8 +215,8 @@ struct MigrationState /* The semaphore is used to notify COLO thread that failover is finished */ QemuSemaphore colo_exit_sem; - /* The semaphore is used to notify COLO thread to do checkpoint */ - QemuSemaphore colo_checkpoint_sem; + /* The event is used to notify COLO thread to do checkpoint */ + QemuEvent colo_checkpoint_event; int64_t colo_checkpoint_time; QEMUTimer *colo_delay_timer; -- cgit v1.2.3 From 786d8b8e38b134f99556e08047e016563b7063f9 Mon Sep 17 00:00:00 2001 From: Lukas Straub Date: Mon, 11 May 2020 13:10:48 +0200 Subject: migration/colo.c: Use cpu_synchronize_all_states() cpu_synchronize_all_pre_loadvm() marks all vcpus as dirty, so the registers are loaded from CPUState before we continue running the vm. However if we failover during checkpoint, CPUState is not initialized and the registers are loaded with garbage. This causes guest hangs and crashes. Fix this by using cpu_synchronize_all_states(), which initializes CPUState from the current cpu registers additionally to marking the vcpus as dirty. Signed-off-by: Lukas Straub Message-Id: <9675031ce557b73ebd10e7bd20ebbf57f30b177c.1589193382.git.lukasstraub2@web.de> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/colo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/migration/colo.c b/migration/colo.c index fe0d6e93e5..d00b3b9d6b 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -705,7 +705,7 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, } qemu_mutex_lock_iothread(); - cpu_synchronize_all_pre_loadvm(); + cpu_synchronize_all_states(); ret = qemu_loadvm_state_main(mis->from_src_file, mis); qemu_mutex_unlock_iothread(); -- cgit v1.2.3 From 24fa16f8cc374935458672b027829180ea4b4460 Mon Sep 17 00:00:00 2001 From: Lukas Straub Date: Mon, 11 May 2020 13:10:51 +0200 Subject: migration/colo.c: Flush ram cache only after receiving device state If we suceed in receiving ram state, but fail receiving the device state, there will be a mismatch between the two. Fix this by flushing the ram cache only after the vmstate has been received. Signed-off-by: Lukas Straub Message-Id: <3289d007d494cb0e2f05b1cf4ae6a78d300fede3.1589193382.git.lukasstraub2@web.de> Reviewed-by: zhanghailiang Signed-off-by: Dr. David Alan Gilbert --- migration/colo.c | 1 + migration/ram.c | 5 +---- migration/ram.h | 1 + 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/migration/colo.c b/migration/colo.c index d00b3b9d6b..4105999634 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -748,6 +748,7 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, qemu_mutex_lock_iothread(); vmstate_loading = true; + colo_flush_ram_cache(); ret = qemu_load_device_state(fb); if (ret < 0) { error_setg(errp, "COLO: load device state failed"); diff --git a/migration/ram.c b/migration/ram.c index 859f835f1a..41cc530d9d 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -3360,7 +3360,7 @@ static bool postcopy_is_running(void) * Flush content of RAM cache into SVM's memory. * Only flush the pages that be dirtied by PVM or SVM or both. */ -static void colo_flush_ram_cache(void) +void colo_flush_ram_cache(void) { RAMBlock *block = NULL; void *dst_host; @@ -3632,9 +3632,6 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) } trace_ram_load_complete(ret, seq_iter); - if (!ret && migration_incoming_in_colo_state()) { - colo_flush_ram_cache(); - } return ret; } diff --git a/migration/ram.h b/migration/ram.h index 5ceaff7cb4..2eeaacfa13 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -65,6 +65,7 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb); /* ram cache */ int colo_init_ram_cache(void); +void colo_flush_ram_cache(void); void colo_release_ram_cache(void); void colo_incoming_start_dirty_log(void); -- cgit v1.2.3 From 92c932de6c4a2b25826201e9d0740207136953c6 Mon Sep 17 00:00:00 2001 From: Lukas Straub Date: Mon, 11 May 2020 13:10:55 +0200 Subject: migration/colo.c: Relaunch failover even if there was an error If vmstate_loading is true, secondary_vm_do_failover will set failover status to FAILOVER_STATUS_RELAUNCH and return success without initiating failover. However, if there is an error during the vmstate_loading section, failover isn't relaunched. Instead we then wait for failover on colo_incoming_sem. Fix this by relaunching failover even if there was an error. Also, to make this work properly, set vmstate_loading to false when returning during the vmstate_loading section. Signed-off-by: Lukas Straub Message-Id: Reviewed-by: zhanghailiang Signed-off-by: Dr. David Alan Gilbert --- migration/colo.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/migration/colo.c b/migration/colo.c index 4105999634..59639f519f 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -752,6 +752,7 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, ret = qemu_load_device_state(fb); if (ret < 0) { error_setg(errp, "COLO: load device state failed"); + vmstate_loading = false; qemu_mutex_unlock_iothread(); return; } @@ -760,6 +761,7 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, replication_get_error_all(&local_err); if (local_err) { error_propagate(errp, local_err); + vmstate_loading = false; qemu_mutex_unlock_iothread(); return; } @@ -768,6 +770,7 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, replication_do_checkpoint_all(&local_err); if (local_err) { error_propagate(errp, local_err); + vmstate_loading = false; qemu_mutex_unlock_iothread(); return; } @@ -779,6 +782,7 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, if (local_err) { error_propagate(errp, local_err); + vmstate_loading = false; qemu_mutex_unlock_iothread(); return; } @@ -789,9 +793,6 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, qemu_mutex_unlock_iothread(); if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) { - failover_set_state(FAILOVER_STATUS_RELAUNCH, - FAILOVER_STATUS_NONE); - failover_request_active(NULL); return; } @@ -890,6 +891,14 @@ void *colo_process_incoming_thread(void *opaque) error_report_err(local_err); break; } + + if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) { + failover_set_state(FAILOVER_STATUS_RELAUNCH, + FAILOVER_STATUS_NONE); + failover_request_active(NULL); + break; + } + if (failover_get_state() != FAILOVER_STATUS_NONE) { error_report("failover request"); break; @@ -897,8 +906,6 @@ void *colo_process_incoming_thread(void *opaque) } out: - vmstate_loading = false; - /* * There are only two reasons we can get here, some error happened * or the user triggered failover. -- cgit v1.2.3 From 4fa8ed25b8dc996d92c60fb6b63752593be5f3a4 Mon Sep 17 00:00:00 2001 From: Lukas Straub Date: Mon, 11 May 2020 13:11:01 +0200 Subject: migration/colo.c: Move colo_notify_compares_event to the right place If the secondary has to failover during checkpointing, it still is in the old state (i.e. different state than primary). Thus we can't expose the primary state until after the checkpoint is sent. This fixes sporadic connection reset of client connections during failover. Signed-off-by: Lukas Straub Message-Id: Reviewed-by: zhanghailiang Signed-off-by: Dr. David Alan Gilbert --- migration/colo.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/migration/colo.c b/migration/colo.c index 59639f519f..ea7d1e9d4e 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -436,12 +436,6 @@ static int colo_do_checkpoint_transaction(MigrationState *s, goto out; } - qemu_event_reset(&s->colo_checkpoint_event); - colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err); - if (local_err) { - goto out; - } - /* Disable block migration */ migrate_set_block_enabled(false, &local_err); if (local_err) { @@ -503,6 +497,12 @@ static int colo_do_checkpoint_transaction(MigrationState *s, goto out; } + qemu_event_reset(&s->colo_checkpoint_event); + colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err); + if (local_err) { + goto out; + } + colo_receive_check_message(s->rp_state.from_dst_file, COLO_MESSAGE_VMSTATE_LOADED, &local_err); if (local_err) { -- cgit v1.2.3 From 773861274ad75a62c7ecf70ecc8e4ba31ed62190 Mon Sep 17 00:00:00 2001 From: Lukas Straub Date: Wed, 20 May 2020 22:42:32 +0200 Subject: migration/migration.c: Fix hang in ram_save_host_page migration_rate_limit will erroneously ratelimit a shutdown socket, which causes the migration thread to hang in ram_save_host_page if the socket is shutdown. Fix this by explicitly testing if the socket has errors or was shutdown in migration_rate_limit. Signed-off-by: Lukas Straub Message-Id: Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/migration.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/migration/migration.c b/migration/migration.c index 0bb042a0f7..b63ad91d34 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -3361,6 +3361,10 @@ bool migration_rate_limit(void) bool urgent = false; migration_update_counters(s, now); if (qemu_file_rate_limit(s->to_dst_file)) { + + if (qemu_file_get_error(s->to_dst_file)) { + return false; + } /* * Wait for a delay to do rate limiting OR * something urgent to post the semaphore. -- cgit v1.2.3