aboutsummaryrefslogtreecommitdiff
path: root/block/file-posix.c
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2017-08-21 18:58:56 +0200
committerPaolo Bonzini <pbonzini@redhat.com>2017-09-22 01:06:51 +0200
commit7c9e527659c67d4d7b41d9504f93d2d7ee482488 (patch)
tree2f604a8ad523e8cf9217b464344900f86d0bbfba /block/file-posix.c
parent092aa2fc65b7a35121616aad8f39d47b8f921618 (diff)
scsi, file-posix: add support for persistent reservation management
It is a common requirement for virtual machine to send persistent reservations, but this currently requires either running QEMU with CAP_SYS_RAWIO, or using out-of-tree patches that let an unprivileged QEMU bypass Linux's filter on SG_IO commands. As an alternative mechanism, the next patches will introduce a privileged helper to run persistent reservation commands without expanding QEMU's attack surface unnecessarily. The helper is invoked through a "pr-manager" QOM object, to which file-posix.c passes SG_IO requests for PERSISTENT RESERVE OUT and PERSISTENT RESERVE IN commands. For example: $ qemu-system-x86_64 -device virtio-scsi \ -object pr-manager-helper,id=helper0,path=/var/run/qemu-pr-helper.sock -drive if=none,id=hd,driver=raw,file.filename=/dev/sdb,file.pr-manager=helper0 -device scsi-block,drive=hd or: $ qemu-system-x86_64 -device virtio-scsi \ -object pr-manager-helper,id=helper0,path=/var/run/qemu-pr-helper.sock -blockdev node-name=hd,driver=raw,file.driver=host_device,file.filename=/dev/sdb,file.pr-manager=helper0 -device scsi-block,drive=hd Multiple pr-manager implementations are conceivable and possible, though only one is implemented right now. For example, a pr-manager could: - talk directly to the multipath daemon from a privileged QEMU (i.e. QEMU links to libmpathpersist); this makes reservation work properly with multipath, but still requires CAP_SYS_RAWIO - use the Linux IOC_PR_* ioctls (they require CAP_SYS_ADMIN though) - more interestingly, implement reservations directly in QEMU through file system locks or a shared database (e.g. sqlite) Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'block/file-posix.c')
-rw-r--r--block/file-posix.c30
1 files changed, 30 insertions, 0 deletions
diff --git a/block/file-posix.c b/block/file-posix.c
index 6acbd56238..ab12a2b591 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -33,6 +33,9 @@
#include "block/raw-aio.h"
#include "qapi/qmp/qstring.h"
+#include "scsi/pr-manager.h"
+#include "scsi/constants.h"
+
#if defined(__APPLE__) && (__MACH__)
#include <paths.h>
#include <sys/param.h>
@@ -155,6 +158,8 @@ typedef struct BDRVRawState {
bool page_cache_inconsistent:1;
bool has_fallocate;
bool needs_alignment;
+
+ PRManager *pr_mgr;
} BDRVRawState;
typedef struct BDRVRawReopenState {
@@ -402,6 +407,11 @@ static QemuOptsList raw_runtime_opts = {
.type = QEMU_OPT_STRING,
.help = "file locking mode (on/off/auto, default: auto)",
},
+ {
+ .name = "pr-manager",
+ .type = QEMU_OPT_STRING,
+ .help = "id of persistent reservation manager object (default: none)",
+ },
{ /* end of list */ }
},
};
@@ -413,6 +423,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
QemuOpts *opts;
Error *local_err = NULL;
const char *filename = NULL;
+ const char *str;
BlockdevAioOptions aio, aio_default;
int fd, ret;
struct stat st;
@@ -476,6 +487,16 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
abort();
}
+ str = qemu_opt_get(opts, "pr-manager");
+ if (str) {
+ s->pr_mgr = pr_manager_lookup(str, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto fail;
+ }
+ }
+
s->open_flags = open_flags;
raw_parse_flags(bdrv_flags, &s->open_flags);
@@ -2597,6 +2618,15 @@ static BlockAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
if (fd_open(bs) < 0)
return NULL;
+ if (req == SG_IO && s->pr_mgr) {
+ struct sg_io_hdr *io_hdr = buf;
+ if (io_hdr->cmdp[0] == PERSISTENT_RESERVE_OUT ||
+ io_hdr->cmdp[0] == PERSISTENT_RESERVE_IN) {
+ return pr_manager_execute(s->pr_mgr, bdrv_get_aio_context(bs),
+ s->fd, io_hdr, cb, opaque);
+ }
+ }
+
acb = g_new(RawPosixAIOData, 1);
acb->bs = bs;
acb->aio_type = QEMU_AIO_IOCTL;