diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2020-12-31 15:55:11 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2020-12-31 15:55:11 +0000 |
commit | 3fb340ccf5f8385088a3d3b0e07763a8f5b85f4a (patch) | |
tree | a67fe3f2d895796d03ba7b76aa58d74fc6b3f98e | |
parent | 091774bfdee2b4f7dfd570061a200dfdc54374a6 (diff) | |
parent | 36d0fe65160d83cb065de9b6fe60114ee127d9f0 (diff) |
Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20201218a' into staging
Monitor, virtiofsd and migration pull
HMP cleanups
Migration fixes
Note the change in behaviour of not allowing a postmigrate migrtion
rather than crashing
Virtiofsd cleanups and fixes
--thread-pool-size=0 for no thread pool (faster for some workloads)
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
# gpg: Signature made Fri 18 Dec 2020 10:39:37 GMT
# gpg: using RSA key 45F5C71B4A0CB7FB977A9FA90516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>" [full]
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A 9FA9 0516 331E BC5B FDE7
* remotes/dgilbert/tags/pull-migration-20201218a:
migration: Don't allow migration if vm is in POSTMIGRATE
savevm: Delete snapshots just created in case of error
savevm: Remove dead code in save_snapshot()
docs/devel/migration: Improve debugging section a bit
virtiofsd: Remove useless code about send_notify_iov
virtiofsd: update FUSE_FORGET comment on "lo_inode.nlookup"
virtiofsd: Check file type in lo_flush()
virtiofsd: Disable posix_lock hash table if remote locks are not enabled
virtiofsd: Set up posix_lock hash table for root inode
virtiofsd: make the debug log timestamp on stderr more human-readable
virtiofsd: Use --thread-pool-size=0 to mean no thread pool
hmp-commands.hx: List abbreviation after command for cont, quit, print
monitor:Don't use '#' flag of printf format ('%#') in format strings
monitor:braces {} are necessary for all arms of this statement
monitor:open brace '{' following struct go on the same line
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | docs/devel/migration.rst | 11 | ||||
-rw-r--r-- | hmp-commands.hx | 12 | ||||
-rw-r--r-- | migration/migration.c | 6 | ||||
-rw-r--r-- | migration/savevm.c | 11 | ||||
-rw-r--r-- | monitor/hmp-cmds.c | 3 | ||||
-rw-r--r-- | monitor/misc.c | 16 | ||||
-rw-r--r-- | tools/virtiofsd/fuse_lowlevel.c | 98 | ||||
-rw-r--r-- | tools/virtiofsd/fuse_virtio.c | 36 | ||||
-rw-r--r-- | tools/virtiofsd/passthrough_ll.c | 91 |
9 files changed, 131 insertions, 153 deletions
diff --git a/docs/devel/migration.rst b/docs/devel/migration.rst index 49112bb27a..ad381b89b2 100644 --- a/docs/devel/migration.rst +++ b/docs/devel/migration.rst @@ -53,22 +53,23 @@ savevm/loadvm functionality. Debugging ========= -The migration stream can be analyzed thanks to `scripts/analyze_migration.py`. +The migration stream can be analyzed thanks to `scripts/analyze-migration.py`. Example usage: .. code-block:: shell - $ qemu-system-x86_64 - (qemu) migrate "exec:cat > mig" - $ ./scripts/analyze_migration.py -f mig + $ qemu-system-x86_64 -display none -monitor stdio + (qemu) migrate "exec:cat > mig" + (qemu) q + $ ./scripts/analyze-migration.py -f mig { "ram (3)": { "section sizes": { "pc.ram": "0x0000000008000000", ... -See also ``analyze_migration.py -h`` help for more options. +See also ``analyze-migration.py -h`` help for more options. Common infrastructure ===================== diff --git a/hmp-commands.hx b/hmp-commands.hx index 470a420c2d..73e0832ea1 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -40,7 +40,7 @@ SRST ERST { - .name = "q|quit", + .name = "quit|q", .args_type = "", .params = "", .help = "quit the emulator", @@ -49,7 +49,7 @@ ERST }, SRST -``q`` or ``quit`` +``quit`` or ``q`` Quit the emulator. ERST @@ -401,7 +401,7 @@ SRST ERST { - .name = "c|cont", + .name = "cont|c", .args_type = "", .params = "", .help = "resume emulation", @@ -409,7 +409,7 @@ ERST }, SRST -``c`` or ``cont`` +``cont`` or ``c`` Resume emulation. ERST @@ -554,7 +554,7 @@ SRST ERST { - .name = "p|print", + .name = "print|p", .args_type = "fmt:/,val:l", .params = "/fmt expr", .help = "print expression value (use $reg for CPU register access)", @@ -562,7 +562,7 @@ ERST }, SRST -``p`` or ``print/``\ *fmt* *expr* +``print`` or ``p/``\ *fmt* *expr* Print expression value. Only the *format* part of *fmt* is used. ERST diff --git a/migration/migration.c b/migration/migration.c index e0dbde4091..f5d4a52c95 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -2102,6 +2102,12 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, return false; } + if (runstate_check(RUN_STATE_POSTMIGRATE)) { + error_setg(errp, "Can't migrate the vm that was paused due to " + "previous migration"); + return false; + } + if (migration_is_blocked(errp)) { return false; } diff --git a/migration/savevm.c b/migration/savevm.c index 5f937a2762..4a18c9d897 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -2728,7 +2728,7 @@ int qemu_load_device_state(QEMUFile *f) int save_snapshot(const char *name, Error **errp) { BlockDriverState *bs, *bs1; - QEMUSnapshotInfo sn1, *sn = &sn1, old_sn1, *old_sn = &old_sn1; + QEMUSnapshotInfo sn1, *sn = &sn1; int ret = -1, ret2; QEMUFile *f; int saved_vm_running; @@ -2797,13 +2797,7 @@ int save_snapshot(const char *name, Error **errp) } if (name) { - ret = bdrv_snapshot_find(bs, old_sn, name); - if (ret >= 0) { - pstrcpy(sn->name, sizeof(sn->name), old_sn->name); - pstrcpy(sn->id_str, sizeof(sn->id_str), old_sn->id_str); - } else { - pstrcpy(sn->name, sizeof(sn->name), name); - } + pstrcpy(sn->name, sizeof(sn->name), name); } else { /* cast below needed for OpenBSD where tv_sec is still 'long' */ localtime_r((const time_t *)&tv.tv_sec, &tm); @@ -2839,6 +2833,7 @@ int save_snapshot(const char *name, Error **errp) if (ret < 0) { error_setg(errp, "Error while creating snapshot on '%s'", bdrv_get_device_or_node_name(bs)); + bdrv_all_delete_snapshot(sn->name, &bs, NULL); goto the_end; } diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c index 65d8ff4849..79c84322b3 100644 --- a/monitor/hmp-cmds.c +++ b/monitor/hmp-cmds.c @@ -1549,8 +1549,7 @@ end: hmp_handle_error(mon, err); } -typedef struct HMPMigrationStatus -{ +typedef struct HMPMigrationStatus { QEMUTimer *timer; Monitor *mon; bool is_block_migration; diff --git a/monitor/misc.c b/monitor/misc.c index fde6e36a0b..6f5ae096dc 100644 --- a/monitor/misc.c +++ b/monitor/misc.c @@ -492,8 +492,10 @@ static void hmp_singlestep(Monitor *mon, const QDict *qdict) static void hmp_gdbserver(Monitor *mon, const QDict *qdict) { const char *device = qdict_get_try_str(qdict, "device"); - if (!device) + if (!device) { device = "tcp::" DEFAULT_GDBSTUB_PORT; + } + if (gdbserver_start(device) < 0) { monitor_printf(mon, "Could not open gdbserver on device '%s'\n", device); @@ -559,10 +561,11 @@ static void memory_dump(Monitor *mon, int count, int format, int wsize, } len = wsize * count; - if (wsize == 1) + if (wsize == 1) { line_size = 8; - else + } else { line_size = 16; + } max_digits = 0; switch(format) { @@ -583,10 +586,11 @@ static void memory_dump(Monitor *mon, int count, int format, int wsize, } while (len > 0) { - if (is_physical) + if (is_physical) { monitor_printf(mon, TARGET_FMT_plx ":", addr); - else + } else { monitor_printf(mon, TARGET_FMT_lx ":", (target_ulong)addr); + } l = len; if (l > line_size) l = line_size; @@ -915,7 +919,7 @@ static void hmp_ioport_read(Monitor *mon, const QDict *qdict) suffix = 'l'; break; } - monitor_printf(mon, "port%c[0x%04x] = %#0*x\n", + monitor_printf(mon, "port%c[0x%04x] = 0x%0*x\n", suffix, addr, size * 2, val); } diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c index d4119e92ab..e94b71110b 100644 --- a/tools/virtiofsd/fuse_lowlevel.c +++ b/tools/virtiofsd/fuse_lowlevel.c @@ -2143,104 +2143,6 @@ static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, send_reply_ok(req, NULL, 0); } -static int send_notify_iov(struct fuse_session *se, int notify_code, - struct iovec *iov, int count) -{ - struct fuse_out_header out = { - .error = notify_code, - }; - - if (!se->got_init) { - return -ENOTCONN; - } - - iov[0].iov_base = &out; - iov[0].iov_len = sizeof(struct fuse_out_header); - - return fuse_send_msg(se, NULL, iov, count); -} - -int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph) -{ - if (ph != NULL) { - struct fuse_notify_poll_wakeup_out outarg = { - .kh = ph->kh, - }; - struct iovec iov[2]; - - iov[1].iov_base = &outarg; - iov[1].iov_len = sizeof(outarg); - - return send_notify_iov(ph->se, FUSE_NOTIFY_POLL, iov, 2); - } else { - return 0; - } -} - -int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, - off_t off, off_t len) -{ - struct fuse_notify_inval_inode_out outarg = { - .ino = ino, - .off = off, - .len = len, - }; - struct iovec iov[2]; - - if (!se) { - return -EINVAL; - } - - iov[1].iov_base = &outarg; - iov[1].iov_len = sizeof(outarg); - - return send_notify_iov(se, FUSE_NOTIFY_INVAL_INODE, iov, 2); -} - -int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, - const char *name, size_t namelen) -{ - struct fuse_notify_inval_entry_out outarg = { - .parent = parent, - .namelen = namelen, - }; - struct iovec iov[3]; - - if (!se) { - return -EINVAL; - } - - iov[1].iov_base = &outarg; - iov[1].iov_len = sizeof(outarg); - iov[2].iov_base = (void *)name; - iov[2].iov_len = namelen + 1; - - return send_notify_iov(se, FUSE_NOTIFY_INVAL_ENTRY, iov, 3); -} - -int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, - fuse_ino_t child, const char *name, - size_t namelen) -{ - struct fuse_notify_delete_out outarg = { - .parent = parent, - .child = child, - .namelen = namelen, - }; - struct iovec iov[3]; - - if (!se) { - return -EINVAL; - } - - iov[1].iov_base = &outarg; - iov[1].iov_len = sizeof(outarg); - iov[2].iov_base = (void *)name; - iov[2].iov_len = namelen + 1; - - return send_notify_iov(se, FUSE_NOTIFY_DELETE, iov, 3); -} - int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, off_t offset, struct fuse_bufvec *bufv) { diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c index b264dcbd18..ddcefee427 100644 --- a/tools/virtiofsd/fuse_virtio.c +++ b/tools/virtiofsd/fuse_virtio.c @@ -578,13 +578,18 @@ static void *fv_queue_thread(void *opaque) struct VuDev *dev = &qi->virtio_dev->dev; struct VuVirtq *q = vu_get_queue(dev, qi->qidx); struct fuse_session *se = qi->virtio_dev->se; - GThreadPool *pool; - - pool = g_thread_pool_new(fv_queue_worker, qi, se->thread_pool_size, FALSE, - NULL); - if (!pool) { - fuse_log(FUSE_LOG_ERR, "%s: g_thread_pool_new failed\n", __func__); - return NULL; + GThreadPool *pool = NULL; + GList *req_list = NULL; + + if (se->thread_pool_size) { + fuse_log(FUSE_LOG_DEBUG, "%s: Creating thread pool for Queue %d\n", + __func__, qi->qidx); + pool = g_thread_pool_new(fv_queue_worker, qi, se->thread_pool_size, + FALSE, NULL); + if (!pool) { + fuse_log(FUSE_LOG_ERR, "%s: g_thread_pool_new failed\n", __func__); + return NULL; + } } fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, @@ -659,14 +664,27 @@ static void *fv_queue_thread(void *opaque) req->reply_sent = false; - g_thread_pool_push(pool, req, NULL); + if (!se->thread_pool_size) { + req_list = g_list_prepend(req_list, req); + } else { + g_thread_pool_push(pool, req, NULL); + } } pthread_mutex_unlock(&qi->vq_lock); pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); + + /* Process all the requests. */ + if (!se->thread_pool_size && req_list != NULL) { + g_list_foreach(req_list, fv_queue_worker, qi); + g_list_free(req_list); + req_list = NULL; + } } - g_thread_pool_free(pool, FALSE, TRUE); + if (pool) { + g_thread_pool_free(pool, FALSE, TRUE); + } return NULL; } diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index 12de321745..5fb36d9407 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -101,7 +101,7 @@ struct lo_inode { * This counter keeps the inode alive during the FUSE session. * Incremented when the FUSE inode number is sent in a reply * (FUSE_LOOKUP, FUSE_READDIRPLUS, etc). Decremented when an inode is - * released by requests like FUSE_FORGET, FUSE_RMDIR, FUSE_RENAME, etc. + * released by a FUSE_FORGET request. * * Note that this value is untrusted because the client can manipulate * it arbitrarily using FUSE_FORGET requests. @@ -902,10 +902,11 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, inode->key.ino = e->attr.st_ino; inode->key.dev = e->attr.st_dev; inode->key.mnt_id = mnt_id; - pthread_mutex_init(&inode->plock_mutex, NULL); - inode->posix_locks = g_hash_table_new_full( - g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy); - + if (lo->posix_lock) { + pthread_mutex_init(&inode->plock_mutex, NULL); + inode->posix_locks = g_hash_table_new_full( + g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy); + } pthread_mutex_lock(&lo->mutex); inode->fuse_ino = lo_add_inode_mapping(req, inode); g_hash_table_insert(lo->inodes, &inode->key, inode); @@ -1291,12 +1292,13 @@ static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) if (!inode->nlookup) { lo_map_remove(&lo->ino_map, inode->fuse_ino); g_hash_table_remove(lo->inodes, &inode->key); - if (g_hash_table_size(inode->posix_locks)) { - fuse_log(FUSE_LOG_WARNING, "Hash table is not empty\n"); + if (lo->posix_lock) { + if (g_hash_table_size(inode->posix_locks)) { + fuse_log(FUSE_LOG_WARNING, "Hash table is not empty\n"); + } + g_hash_table_destroy(inode->posix_locks); + pthread_mutex_destroy(&inode->plock_mutex); } - g_hash_table_destroy(inode->posix_locks); - pthread_mutex_destroy(&inode->plock_mutex); - /* Drop our refcount from lo_do_lookup() */ lo_inode_put(lo, &inode); } @@ -1772,6 +1774,11 @@ static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, ino, fi->flags, fi->lock_owner, lock->l_type, lock->l_start, lock->l_len); + if (!lo->posix_lock) { + fuse_reply_err(req, ENOSYS); + return; + } + inode = lo_inode(req, ino); if (!inode) { fuse_reply_err(req, EBADF); @@ -1817,6 +1824,11 @@ static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, ino, fi->flags, lock->l_type, lock->l_pid, fi->lock_owner, sleep, lock->l_whence, lock->l_start, lock->l_len); + if (!lo->posix_lock) { + fuse_reply_err(req, ENOSYS); + return; + } + if (sleep) { fuse_reply_err(req, EOPNOTSUPP); return; @@ -1941,6 +1953,7 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) int res; (void)ino; struct lo_inode *inode; + struct lo_data *lo = lo_data(req); inode = lo_inode(req, ino); if (!inode) { @@ -1948,13 +1961,21 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) return; } - /* An fd is going away. Cleanup associated posix locks */ - pthread_mutex_lock(&inode->plock_mutex); - g_hash_table_remove(inode->posix_locks, GUINT_TO_POINTER(fi->lock_owner)); - pthread_mutex_unlock(&inode->plock_mutex); + if (!S_ISREG(inode->filetype)) { + lo_inode_put(lo, &inode); + fuse_reply_err(req, EBADF); + return; + } + /* An fd is going away. Cleanup associated posix locks */ + if (lo->posix_lock) { + pthread_mutex_lock(&inode->plock_mutex); + g_hash_table_remove(inode->posix_locks, + GUINT_TO_POINTER(fi->lock_owner)); + pthread_mutex_unlock(&inode->plock_mutex); + } res = close(dup(lo_fi_fd(req, fi))); - lo_inode_put(lo_data(req), &inode); + lo_inode_put(lo, &inode); fuse_reply_err(req, res == -1 ? errno : 0); } @@ -3284,18 +3305,38 @@ static void setup_nofile_rlimit(unsigned long rlimit_nofile) static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) { g_autofree char *localfmt = NULL; + struct timespec ts; + struct tm tm; + char sec_fmt[sizeof "2020-12-07 18:17:54"]; + char zone_fmt[sizeof "+0100"]; if (current_log_level < level) { return; } if (current_log_level == FUSE_LOG_DEBUG) { - if (!use_syslog) { - localfmt = g_strdup_printf("[%" PRId64 "] [ID: %08ld] %s", - get_clock(), syscall(__NR_gettid), fmt); - } else { + if (use_syslog) { + /* no timestamp needed */ localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid), fmt); + } else { + /* try formatting a broken-down timestamp */ + if (clock_gettime(CLOCK_REALTIME, &ts) != -1 && + localtime_r(&ts.tv_sec, &tm) != NULL && + strftime(sec_fmt, sizeof sec_fmt, "%Y-%m-%d %H:%M:%S", + &tm) != 0 && + strftime(zone_fmt, sizeof zone_fmt, "%z", &tm) != 0) { + localfmt = g_strdup_printf("[%s.%02ld%s] [ID: %08ld] %s", + sec_fmt, + ts.tv_nsec / (10L * 1000 * 1000), + zone_fmt, syscall(__NR_gettid), + fmt); + } else { + /* fall back to a flat timestamp */ + localfmt = g_strdup_printf("[%" PRId64 "] [ID: %08ld] %s", + get_clock(), syscall(__NR_gettid), + fmt); + } } fmt = localfmt; } @@ -3360,6 +3401,11 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) root->key.mnt_id = mnt_id; root->nlookup = 2; g_atomic_int_set(&root->refcount, 2); + if (lo->posix_lock) { + pthread_mutex_init(&root->plock_mutex, NULL); + root->posix_locks = g_hash_table_new_full( + g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy); + } } static guint lo_key_hash(gconstpointer key) @@ -3382,6 +3428,10 @@ static void fuse_lo_data_cleanup(struct lo_data *lo) if (lo->inodes) { g_hash_table_destroy(lo->inodes); } + + if (lo->root.posix_locks) { + g_hash_table_destroy(lo->root.posix_locks); + } lo_map_destroy(&lo->fd_map); lo_map_destroy(&lo->dirp_map); lo_map_destroy(&lo->ino_map); @@ -3416,6 +3466,9 @@ int main(int argc, char *argv[]) struct lo_map_elem *reserve_elem; int ret = -1; + /* Initialize time conversion information for localtime_r(). */ + tzset(); + /* Don't mask creation mode, kernel already did that */ umask(0); |