diff options
-rw-r--r-- | block.c | 4 | ||||
-rw-r--r-- | block/export/fuse.c | 45 | ||||
-rw-r--r-- | block/export/vhost-user-blk-server.c | 5 | ||||
-rw-r--r-- | block/rbd.c | 52 | ||||
-rw-r--r-- | docs/tools/qemu-img.rst | 2 | ||||
-rw-r--r-- | docs/tools/qemu-storage-daemon.rst | 9 | ||||
-rw-r--r-- | include/qemu/vhost-user-server.h | 5 | ||||
-rw-r--r-- | qemu-img-cmds.hx | 4 | ||||
-rw-r--r-- | storage-daemon/qemu-storage-daemon.c | 4 | ||||
-rw-r--r-- | util/vhost-user-server.c | 22 |
10 files changed, 118 insertions, 34 deletions
@@ -3341,6 +3341,8 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, int ret; Transaction *tran = tran_new(); + bdrv_drained_begin(bs); + ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp); if (ret < 0) { goto out; @@ -3350,6 +3352,8 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, out: tran_finalize(tran, ret); + bdrv_drained_end(bs); + return ret; } diff --git a/block/export/fuse.c b/block/export/fuse.c index 6710d8aed8..fdda8e3c81 100644 --- a/block/export/fuse.c +++ b/block/export/fuse.c @@ -625,11 +625,33 @@ static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode, return; } +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE if (mode & FALLOC_FL_KEEP_SIZE) { length = MIN(length, blk_len - offset); } +#endif /* CONFIG_FALLOCATE_PUNCH_HOLE */ - if (mode & FALLOC_FL_PUNCH_HOLE) { + if (!mode) { + /* We can only fallocate at the EOF with a truncate */ + if (offset < blk_len) { + fuse_reply_err(req, EOPNOTSUPP); + return; + } + + if (offset > blk_len) { + /* No preallocation needed here */ + ret = fuse_do_truncate(exp, offset, true, PREALLOC_MODE_OFF); + if (ret < 0) { + fuse_reply_err(req, -ret); + return; + } + } + + ret = fuse_do_truncate(exp, offset + length, true, + PREALLOC_MODE_FALLOC); + } +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE + else if (mode & FALLOC_FL_PUNCH_HOLE) { if (!(mode & FALLOC_FL_KEEP_SIZE)) { fuse_reply_err(req, EINVAL); return; @@ -643,6 +665,7 @@ static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode, length -= size; } while (ret == 0 && length > 0); } +#endif /* CONFIG_FALLOCATE_PUNCH_HOLE */ #ifdef CONFIG_FALLOCATE_ZERO_RANGE else if (mode & FALLOC_FL_ZERO_RANGE) { if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + length > blk_len) { @@ -665,25 +688,7 @@ static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode, } while (ret == 0 && length > 0); } #endif /* CONFIG_FALLOCATE_ZERO_RANGE */ - else if (!mode) { - /* We can only fallocate at the EOF with a truncate */ - if (offset < blk_len) { - fuse_reply_err(req, EOPNOTSUPP); - return; - } - - if (offset > blk_len) { - /* No preallocation needed here */ - ret = fuse_do_truncate(exp, offset, true, PREALLOC_MODE_OFF); - if (ret < 0) { - fuse_reply_err(req, -ret); - return; - } - } - - ret = fuse_do_truncate(exp, offset + length, true, - PREALLOC_MODE_FALLOC); - } else { + else { ret = -EOPNOTSUPP; } diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c index 1862563336..a129204c44 100644 --- a/block/export/vhost-user-blk-server.c +++ b/block/export/vhost-user-blk-server.c @@ -172,6 +172,7 @@ vu_blk_discard_write_zeroes(VuBlkExport *vexp, struct iovec *iov, return VIRTIO_BLK_S_IOERR; } +/* Called with server refcount increased, must decrease before returning */ static void coroutine_fn vu_blk_virtio_process_req(void *opaque) { VuBlkReq *req = opaque; @@ -286,10 +287,12 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque) } vu_blk_req_complete(req); + vhost_user_server_unref(server); return; err: free(req); + vhost_user_server_unref(server); } static void vu_blk_process_vq(VuDev *vu_dev, int idx) @@ -310,6 +313,8 @@ static void vu_blk_process_vq(VuDev *vu_dev, int idx) Coroutine *co = qemu_coroutine_create(vu_blk_virtio_process_req, req); + + vhost_user_server_ref(server); qemu_coroutine_enter(co); } } diff --git a/block/rbd.c b/block/rbd.c index def96292e0..8f183eba2a 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -1279,11 +1279,11 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len, RBDDiffIterateReq *req = opaque; assert(req->offs + req->bytes <= offs); - /* - * we do not diff against a snapshot so we should never receive a callback - * for a hole. - */ - assert(exists); + + /* treat a hole like an unallocated area and bail out */ + if (!exists) { + return 0; + } if (!req->exists && offs > req->offs) { /* @@ -1320,6 +1320,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, int status, r; RBDDiffIterateReq req = { .offs = offset }; uint64_t features, flags; + uint64_t head = 0; assert(offset + bytes <= s->image_size); @@ -1347,7 +1348,43 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, return status; } - r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true, +#if LIBRBD_VERSION_CODE < LIBRBD_VERSION(1, 17, 0) + /* + * librbd had a bug until early 2022 that affected all versions of ceph that + * supported fast-diff. This bug results in reporting of incorrect offsets + * if the offset parameter to rbd_diff_iterate2 is not object aligned. + * Work around this bug by rounding down the offset to object boundaries. + * This is OK because we call rbd_diff_iterate2 with whole_object = true. + * However, this workaround only works for non cloned images with default + * striping. + * + * See: https://tracker.ceph.com/issues/53784 + */ + + /* check if RBD image has non-default striping enabled */ + if (features & RBD_FEATURE_STRIPINGV2) { + return status; + } + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + /* + * check if RBD image is a clone (= has a parent). + * + * rbd_get_parent_info is deprecated from Nautilus onwards, but the + * replacement rbd_get_parent is not present in Luminous and Mimic. + */ + if (rbd_get_parent_info(s->image, NULL, 0, NULL, 0, NULL, 0) != -ENOENT) { + return status; + } +#pragma GCC diagnostic pop + + head = req.offs & (s->object_size - 1); + req.offs -= head; + bytes += head; +#endif + + r = rbd_diff_iterate2(s->image, NULL, req.offs, bytes, true, true, qemu_rbd_diff_iterate_cb, &req); if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) { return status; @@ -1366,7 +1403,8 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID; } - *pnum = req.bytes; + assert(req.bytes > head); + *pnum = req.bytes - head; return status; } diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst index d663dd92bd..8885ea11cf 100644 --- a/docs/tools/qemu-img.rst +++ b/docs/tools/qemu-img.rst @@ -463,7 +463,7 @@ Command description: ``--skip-broken-bitmaps`` is also specified to copy only the consistent bitmaps. -.. option:: create [--object OBJECTDEF] [-q] [-f FMT] [-b BACKING_FILE] [-F BACKING_FMT] [-u] [-o OPTIONS] FILENAME [SIZE] +.. option:: create [--object OBJECTDEF] [-q] [-f FMT] [-b BACKING_FILE [-F BACKING_FMT]] [-u] [-o OPTIONS] FILENAME [SIZE] Create the new disk image *FILENAME* of size *SIZE* and format *FMT*. Depending on the file format, you can add one or more *OPTIONS* diff --git a/docs/tools/qemu-storage-daemon.rst b/docs/tools/qemu-storage-daemon.rst index 9b0eaba6e5..878e6a5c5c 100644 --- a/docs/tools/qemu-storage-daemon.rst +++ b/docs/tools/qemu-storage-daemon.rst @@ -76,7 +76,7 @@ Standard options: .. option:: --export [type=]nbd,id=<id>,node-name=<node-name>[,name=<export-name>][,writable=on|off][,bitmap=<name>] --export [type=]vhost-user-blk,id=<id>,node-name=<node-name>,addr.type=unix,addr.path=<socket-path>[,writable=on|off][,logical-block-size=<block-size>][,num-queues=<num-queues>] --export [type=]vhost-user-blk,id=<id>,node-name=<node-name>,addr.type=fd,addr.str=<fd>[,writable=on|off][,logical-block-size=<block-size>][,num-queues=<num-queues>] - --export [type=]fuse,id=<id>,node-name=<node-name>,mountpoint=<file>[,growable=on|off][,writable=on|off] + --export [type=]fuse,id=<id>,node-name=<node-name>,mountpoint=<file>[,growable=on|off][,writable=on|off][,allow-other=on|off|auto] is a block export definition. ``node-name`` is the block node that should be exported. ``writable`` determines whether or not the export allows write @@ -103,7 +103,12 @@ Standard options: mounted). Consequently, applications that have opened the given file before the export became active will continue to see its original content. If ``growable`` is set, writes after the end of the exported file will grow the - block node to fit. + block node to fit. The ``allow-other`` option controls whether users other + than the user running the process will be allowed to access the export. Note + that enabling this option as a non-root user requires enabling the + user_allow_other option in the global fuse.conf configuration file. Setting + ``allow-other`` to auto (the default) will try enabling this option, and on + error fall back to disabling it. .. option:: --monitor MONITORDEF diff --git a/include/qemu/vhost-user-server.h b/include/qemu/vhost-user-server.h index 121ea1dedf..cd43193b80 100644 --- a/include/qemu/vhost-user-server.h +++ b/include/qemu/vhost-user-server.h @@ -42,6 +42,8 @@ typedef struct { const VuDevIface *vu_iface; /* Protected by ctx lock */ + unsigned int refcount; + bool wait_idle; VuDev vu_dev; QIOChannel *ioc; /* The I/O channel with the client */ QIOChannelSocket *sioc; /* The underlying data channel with the client */ @@ -59,6 +61,9 @@ bool vhost_user_server_start(VuServer *server, void vhost_user_server_stop(VuServer *server); +void vhost_user_server_ref(VuServer *server); +void vhost_user_server_unref(VuServer *server); + void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx); void vhost_user_server_detach_aio_context(VuServer *server); diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx index 72bcdcfbfa..1b1dab5b17 100644 --- a/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx @@ -52,9 +52,9 @@ SRST ERST DEF("create", img_create, - "create [--object objectdef] [-q] [-f fmt] [-b backing_file] [-F backing_fmt] [-u] [-o options] filename [size]") + "create [--object objectdef] [-q] [-f fmt] [-b backing_file [-F backing_fmt]] [-u] [-o options] filename [size]") SRST -.. option:: create [--object OBJECTDEF] [-q] [-f FMT] [-b BACKING_FILE] [-F BACKING_FMT] [-u] [-o OPTIONS] FILENAME [SIZE] +.. option:: create [--object OBJECTDEF] [-q] [-f FMT] [-b BACKING_FILE [-F BACKING_FMT]] [-u] [-o OPTIONS] FILENAME [SIZE] ERST DEF("dd", img_dd, diff --git a/storage-daemon/qemu-storage-daemon.c b/storage-daemon/qemu-storage-daemon.c index 9d76d1114d..504d33aa91 100644 --- a/storage-daemon/qemu-storage-daemon.c +++ b/storage-daemon/qemu-storage-daemon.c @@ -100,7 +100,7 @@ static void help(void) "\n" #ifdef CONFIG_FUSE " --export [type=]fuse,id=<id>,node-name=<node-name>,mountpoint=<file>\n" -" [,growable=on|off][,writable=on|off]\n" +" [,growable=on|off][,writable=on|off][,allow-other=on|off|auto]\n" " export the specified block node over FUSE\n" "\n" #endif /* CONFIG_FUSE */ @@ -111,7 +111,7 @@ static void help(void) " export the specified block node as a\n" " vhost-user-blk device over UNIX domain socket\n" " --export [type=]vhost-user-blk,id=<id>,node-name=<node-name>,\n" -" fd,addr.str=<fd>[,writable=on|off]\n" +" addr.type=fd,addr.str=<fd>[,writable=on|off]\n" " [,logical-block-size=<block-size>][,num-queues=<num-queues>]\n" " export the specified block node as a\n" " vhost-user-blk device over file descriptor\n" diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c index f68287e811..f66fbba710 100644 --- a/util/vhost-user-server.c +++ b/util/vhost-user-server.c @@ -74,6 +74,20 @@ static void panic_cb(VuDev *vu_dev, const char *buf) error_report("vu_panic: %s", buf); } +void vhost_user_server_ref(VuServer *server) +{ + assert(!server->wait_idle); + server->refcount++; +} + +void vhost_user_server_unref(VuServer *server) +{ + server->refcount--; + if (server->wait_idle && !server->refcount) { + aio_co_wake(server->co_trip); + } +} + static bool coroutine_fn vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg) { @@ -177,6 +191,14 @@ static coroutine_fn void vu_client_trip(void *opaque) /* Keep running */ } + if (server->refcount) { + /* Wait for requests to complete before we can unmap the memory */ + server->wait_idle = true; + qemu_coroutine_yield(); + server->wait_idle = false; + } + assert(server->refcount == 0); + vu_deinit(vu_dev); /* vu_deinit() should have called remove_watch() */ |