aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block.c4
-rw-r--r--block/export/fuse.c45
-rw-r--r--block/export/vhost-user-blk-server.c5
-rw-r--r--block/rbd.c52
-rw-r--r--docs/tools/qemu-img.rst2
-rw-r--r--docs/tools/qemu-storage-daemon.rst9
-rw-r--r--include/qemu/vhost-user-server.h5
-rw-r--r--qemu-img-cmds.hx4
-rw-r--r--storage-daemon/qemu-storage-daemon.c4
-rw-r--r--util/vhost-user-server.c22
10 files changed, 118 insertions, 34 deletions
diff --git a/block.c b/block.c
index 7b3ce415d8..b54d59d1fa 100644
--- a/block.c
+++ b/block.c
@@ -3341,6 +3341,8 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
int ret;
Transaction *tran = tran_new();
+ bdrv_drained_begin(bs);
+
ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp);
if (ret < 0) {
goto out;
@@ -3350,6 +3352,8 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
out:
tran_finalize(tran, ret);
+ bdrv_drained_end(bs);
+
return ret;
}
diff --git a/block/export/fuse.c b/block/export/fuse.c
index 6710d8aed8..fdda8e3c81 100644
--- a/block/export/fuse.c
+++ b/block/export/fuse.c
@@ -625,11 +625,33 @@ static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode,
return;
}
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
if (mode & FALLOC_FL_KEEP_SIZE) {
length = MIN(length, blk_len - offset);
}
+#endif /* CONFIG_FALLOCATE_PUNCH_HOLE */
- if (mode & FALLOC_FL_PUNCH_HOLE) {
+ if (!mode) {
+ /* We can only fallocate at the EOF with a truncate */
+ if (offset < blk_len) {
+ fuse_reply_err(req, EOPNOTSUPP);
+ return;
+ }
+
+ if (offset > blk_len) {
+ /* No preallocation needed here */
+ ret = fuse_do_truncate(exp, offset, true, PREALLOC_MODE_OFF);
+ if (ret < 0) {
+ fuse_reply_err(req, -ret);
+ return;
+ }
+ }
+
+ ret = fuse_do_truncate(exp, offset + length, true,
+ PREALLOC_MODE_FALLOC);
+ }
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+ else if (mode & FALLOC_FL_PUNCH_HOLE) {
if (!(mode & FALLOC_FL_KEEP_SIZE)) {
fuse_reply_err(req, EINVAL);
return;
@@ -643,6 +665,7 @@ static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode,
length -= size;
} while (ret == 0 && length > 0);
}
+#endif /* CONFIG_FALLOCATE_PUNCH_HOLE */
#ifdef CONFIG_FALLOCATE_ZERO_RANGE
else if (mode & FALLOC_FL_ZERO_RANGE) {
if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + length > blk_len) {
@@ -665,25 +688,7 @@ static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode,
} while (ret == 0 && length > 0);
}
#endif /* CONFIG_FALLOCATE_ZERO_RANGE */
- else if (!mode) {
- /* We can only fallocate at the EOF with a truncate */
- if (offset < blk_len) {
- fuse_reply_err(req, EOPNOTSUPP);
- return;
- }
-
- if (offset > blk_len) {
- /* No preallocation needed here */
- ret = fuse_do_truncate(exp, offset, true, PREALLOC_MODE_OFF);
- if (ret < 0) {
- fuse_reply_err(req, -ret);
- return;
- }
- }
-
- ret = fuse_do_truncate(exp, offset + length, true,
- PREALLOC_MODE_FALLOC);
- } else {
+ else {
ret = -EOPNOTSUPP;
}
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
index 1862563336..a129204c44 100644
--- a/block/export/vhost-user-blk-server.c
+++ b/block/export/vhost-user-blk-server.c
@@ -172,6 +172,7 @@ vu_blk_discard_write_zeroes(VuBlkExport *vexp, struct iovec *iov,
return VIRTIO_BLK_S_IOERR;
}
+/* Called with server refcount increased, must decrease before returning */
static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
{
VuBlkReq *req = opaque;
@@ -286,10 +287,12 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
}
vu_blk_req_complete(req);
+ vhost_user_server_unref(server);
return;
err:
free(req);
+ vhost_user_server_unref(server);
}
static void vu_blk_process_vq(VuDev *vu_dev, int idx)
@@ -310,6 +313,8 @@ static void vu_blk_process_vq(VuDev *vu_dev, int idx)
Coroutine *co =
qemu_coroutine_create(vu_blk_virtio_process_req, req);
+
+ vhost_user_server_ref(server);
qemu_coroutine_enter(co);
}
}
diff --git a/block/rbd.c b/block/rbd.c
index def96292e0..8f183eba2a 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -1279,11 +1279,11 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len,
RBDDiffIterateReq *req = opaque;
assert(req->offs + req->bytes <= offs);
- /*
- * we do not diff against a snapshot so we should never receive a callback
- * for a hole.
- */
- assert(exists);
+
+ /* treat a hole like an unallocated area and bail out */
+ if (!exists) {
+ return 0;
+ }
if (!req->exists && offs > req->offs) {
/*
@@ -1320,6 +1320,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
int status, r;
RBDDiffIterateReq req = { .offs = offset };
uint64_t features, flags;
+ uint64_t head = 0;
assert(offset + bytes <= s->image_size);
@@ -1347,7 +1348,43 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
return status;
}
- r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true,
+#if LIBRBD_VERSION_CODE < LIBRBD_VERSION(1, 17, 0)
+ /*
+ * librbd had a bug until early 2022 that affected all versions of ceph that
+ * supported fast-diff. This bug results in reporting of incorrect offsets
+ * if the offset parameter to rbd_diff_iterate2 is not object aligned.
+ * Work around this bug by rounding down the offset to object boundaries.
+ * This is OK because we call rbd_diff_iterate2 with whole_object = true.
+ * However, this workaround only works for non cloned images with default
+ * striping.
+ *
+ * See: https://tracker.ceph.com/issues/53784
+ */
+
+ /* check if RBD image has non-default striping enabled */
+ if (features & RBD_FEATURE_STRIPINGV2) {
+ return status;
+ }
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+ /*
+ * check if RBD image is a clone (= has a parent).
+ *
+ * rbd_get_parent_info is deprecated from Nautilus onwards, but the
+ * replacement rbd_get_parent is not present in Luminous and Mimic.
+ */
+ if (rbd_get_parent_info(s->image, NULL, 0, NULL, 0, NULL, 0) != -ENOENT) {
+ return status;
+ }
+#pragma GCC diagnostic pop
+
+ head = req.offs & (s->object_size - 1);
+ req.offs -= head;
+ bytes += head;
+#endif
+
+ r = rbd_diff_iterate2(s->image, NULL, req.offs, bytes, true, true,
qemu_rbd_diff_iterate_cb, &req);
if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) {
return status;
@@ -1366,7 +1403,8 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID;
}
- *pnum = req.bytes;
+ assert(req.bytes > head);
+ *pnum = req.bytes - head;
return status;
}
diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst
index d663dd92bd..8885ea11cf 100644
--- a/docs/tools/qemu-img.rst
+++ b/docs/tools/qemu-img.rst
@@ -463,7 +463,7 @@ Command description:
``--skip-broken-bitmaps`` is also specified to copy only the
consistent bitmaps.
-.. option:: create [--object OBJECTDEF] [-q] [-f FMT] [-b BACKING_FILE] [-F BACKING_FMT] [-u] [-o OPTIONS] FILENAME [SIZE]
+.. option:: create [--object OBJECTDEF] [-q] [-f FMT] [-b BACKING_FILE [-F BACKING_FMT]] [-u] [-o OPTIONS] FILENAME [SIZE]
Create the new disk image *FILENAME* of size *SIZE* and format
*FMT*. Depending on the file format, you can add one or more *OPTIONS*
diff --git a/docs/tools/qemu-storage-daemon.rst b/docs/tools/qemu-storage-daemon.rst
index 9b0eaba6e5..878e6a5c5c 100644
--- a/docs/tools/qemu-storage-daemon.rst
+++ b/docs/tools/qemu-storage-daemon.rst
@@ -76,7 +76,7 @@ Standard options:
.. option:: --export [type=]nbd,id=<id>,node-name=<node-name>[,name=<export-name>][,writable=on|off][,bitmap=<name>]
--export [type=]vhost-user-blk,id=<id>,node-name=<node-name>,addr.type=unix,addr.path=<socket-path>[,writable=on|off][,logical-block-size=<block-size>][,num-queues=<num-queues>]
--export [type=]vhost-user-blk,id=<id>,node-name=<node-name>,addr.type=fd,addr.str=<fd>[,writable=on|off][,logical-block-size=<block-size>][,num-queues=<num-queues>]
- --export [type=]fuse,id=<id>,node-name=<node-name>,mountpoint=<file>[,growable=on|off][,writable=on|off]
+ --export [type=]fuse,id=<id>,node-name=<node-name>,mountpoint=<file>[,growable=on|off][,writable=on|off][,allow-other=on|off|auto]
is a block export definition. ``node-name`` is the block node that should be
exported. ``writable`` determines whether or not the export allows write
@@ -103,7 +103,12 @@ Standard options:
mounted). Consequently, applications that have opened the given file before
the export became active will continue to see its original content. If
``growable`` is set, writes after the end of the exported file will grow the
- block node to fit.
+ block node to fit. The ``allow-other`` option controls whether users other
+ than the user running the process will be allowed to access the export. Note
+ that enabling this option as a non-root user requires enabling the
+ user_allow_other option in the global fuse.conf configuration file. Setting
+ ``allow-other`` to auto (the default) will try enabling this option, and on
+ error fall back to disabling it.
.. option:: --monitor MONITORDEF
diff --git a/include/qemu/vhost-user-server.h b/include/qemu/vhost-user-server.h
index 121ea1dedf..cd43193b80 100644
--- a/include/qemu/vhost-user-server.h
+++ b/include/qemu/vhost-user-server.h
@@ -42,6 +42,8 @@ typedef struct {
const VuDevIface *vu_iface;
/* Protected by ctx lock */
+ unsigned int refcount;
+ bool wait_idle;
VuDev vu_dev;
QIOChannel *ioc; /* The I/O channel with the client */
QIOChannelSocket *sioc; /* The underlying data channel with the client */
@@ -59,6 +61,9 @@ bool vhost_user_server_start(VuServer *server,
void vhost_user_server_stop(VuServer *server);
+void vhost_user_server_ref(VuServer *server);
+void vhost_user_server_unref(VuServer *server);
+
void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx);
void vhost_user_server_detach_aio_context(VuServer *server);
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index 72bcdcfbfa..1b1dab5b17 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -52,9 +52,9 @@ SRST
ERST
DEF("create", img_create,
- "create [--object objectdef] [-q] [-f fmt] [-b backing_file] [-F backing_fmt] [-u] [-o options] filename [size]")
+ "create [--object objectdef] [-q] [-f fmt] [-b backing_file [-F backing_fmt]] [-u] [-o options] filename [size]")
SRST
-.. option:: create [--object OBJECTDEF] [-q] [-f FMT] [-b BACKING_FILE] [-F BACKING_FMT] [-u] [-o OPTIONS] FILENAME [SIZE]
+.. option:: create [--object OBJECTDEF] [-q] [-f FMT] [-b BACKING_FILE [-F BACKING_FMT]] [-u] [-o OPTIONS] FILENAME [SIZE]
ERST
DEF("dd", img_dd,
diff --git a/storage-daemon/qemu-storage-daemon.c b/storage-daemon/qemu-storage-daemon.c
index 9d76d1114d..504d33aa91 100644
--- a/storage-daemon/qemu-storage-daemon.c
+++ b/storage-daemon/qemu-storage-daemon.c
@@ -100,7 +100,7 @@ static void help(void)
"\n"
#ifdef CONFIG_FUSE
" --export [type=]fuse,id=<id>,node-name=<node-name>,mountpoint=<file>\n"
-" [,growable=on|off][,writable=on|off]\n"
+" [,growable=on|off][,writable=on|off][,allow-other=on|off|auto]\n"
" export the specified block node over FUSE\n"
"\n"
#endif /* CONFIG_FUSE */
@@ -111,7 +111,7 @@ static void help(void)
" export the specified block node as a\n"
" vhost-user-blk device over UNIX domain socket\n"
" --export [type=]vhost-user-blk,id=<id>,node-name=<node-name>,\n"
-" fd,addr.str=<fd>[,writable=on|off]\n"
+" addr.type=fd,addr.str=<fd>[,writable=on|off]\n"
" [,logical-block-size=<block-size>][,num-queues=<num-queues>]\n"
" export the specified block node as a\n"
" vhost-user-blk device over file descriptor\n"
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
index f68287e811..f66fbba710 100644
--- a/util/vhost-user-server.c
+++ b/util/vhost-user-server.c
@@ -74,6 +74,20 @@ static void panic_cb(VuDev *vu_dev, const char *buf)
error_report("vu_panic: %s", buf);
}
+void vhost_user_server_ref(VuServer *server)
+{
+ assert(!server->wait_idle);
+ server->refcount++;
+}
+
+void vhost_user_server_unref(VuServer *server)
+{
+ server->refcount--;
+ if (server->wait_idle && !server->refcount) {
+ aio_co_wake(server->co_trip);
+ }
+}
+
static bool coroutine_fn
vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
{
@@ -177,6 +191,14 @@ static coroutine_fn void vu_client_trip(void *opaque)
/* Keep running */
}
+ if (server->refcount) {
+ /* Wait for requests to complete before we can unmap the memory */
+ server->wait_idle = true;
+ qemu_coroutine_yield();
+ server->wait_idle = false;
+ }
+ assert(server->refcount == 0);
+
vu_deinit(vu_dev);
/* vu_deinit() should have called remove_watch() */