diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2017-08-21 18:58:56 +0200 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2017-09-22 01:06:51 +0200 |
commit | 7c9e527659c67d4d7b41d9504f93d2d7ee482488 (patch) | |
tree | 2f604a8ad523e8cf9217b464344900f86d0bbfba /block | |
parent | 092aa2fc65b7a35121616aad8f39d47b8f921618 (diff) |
scsi, file-posix: add support for persistent reservation management
It is a common requirement for virtual machine to send persistent
reservations, but this currently requires either running QEMU with
CAP_SYS_RAWIO, or using out-of-tree patches that let an unprivileged
QEMU bypass Linux's filter on SG_IO commands.
As an alternative mechanism, the next patches will introduce a
privileged helper to run persistent reservation commands without
expanding QEMU's attack surface unnecessarily.
The helper is invoked through a "pr-manager" QOM object, to which
file-posix.c passes SG_IO requests for PERSISTENT RESERVE OUT and
PERSISTENT RESERVE IN commands. For example:
$ qemu-system-x86_64
-device virtio-scsi \
-object pr-manager-helper,id=helper0,path=/var/run/qemu-pr-helper.sock
-drive if=none,id=hd,driver=raw,file.filename=/dev/sdb,file.pr-manager=helper0
-device scsi-block,drive=hd
or:
$ qemu-system-x86_64
-device virtio-scsi \
-object pr-manager-helper,id=helper0,path=/var/run/qemu-pr-helper.sock
-blockdev node-name=hd,driver=raw,file.driver=host_device,file.filename=/dev/sdb,file.pr-manager=helper0
-device scsi-block,drive=hd
Multiple pr-manager implementations are conceivable and possible, though
only one is implemented right now. For example, a pr-manager could:
- talk directly to the multipath daemon from a privileged QEMU
(i.e. QEMU links to libmpathpersist); this makes reservation work
properly with multipath, but still requires CAP_SYS_RAWIO
- use the Linux IOC_PR_* ioctls (they require CAP_SYS_ADMIN though)
- more interestingly, implement reservations directly in QEMU
through file system locks or a shared database (e.g. sqlite)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'block')
-rw-r--r-- | block/file-posix.c | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/block/file-posix.c b/block/file-posix.c index 6acbd56238..ab12a2b591 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -33,6 +33,9 @@ #include "block/raw-aio.h" #include "qapi/qmp/qstring.h" +#include "scsi/pr-manager.h" +#include "scsi/constants.h" + #if defined(__APPLE__) && (__MACH__) #include <paths.h> #include <sys/param.h> @@ -155,6 +158,8 @@ typedef struct BDRVRawState { bool page_cache_inconsistent:1; bool has_fallocate; bool needs_alignment; + + PRManager *pr_mgr; } BDRVRawState; typedef struct BDRVRawReopenState { @@ -402,6 +407,11 @@ static QemuOptsList raw_runtime_opts = { .type = QEMU_OPT_STRING, .help = "file locking mode (on/off/auto, default: auto)", }, + { + .name = "pr-manager", + .type = QEMU_OPT_STRING, + .help = "id of persistent reservation manager object (default: none)", + }, { /* end of list */ } }, }; @@ -413,6 +423,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, QemuOpts *opts; Error *local_err = NULL; const char *filename = NULL; + const char *str; BlockdevAioOptions aio, aio_default; int fd, ret; struct stat st; @@ -476,6 +487,16 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, abort(); } + str = qemu_opt_get(opts, "pr-manager"); + if (str) { + s->pr_mgr = pr_manager_lookup(str, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + } + s->open_flags = open_flags; raw_parse_flags(bdrv_flags, &s->open_flags); @@ -2597,6 +2618,15 @@ static BlockAIOCB *hdev_aio_ioctl(BlockDriverState *bs, if (fd_open(bs) < 0) return NULL; + if (req == SG_IO && s->pr_mgr) { + struct sg_io_hdr *io_hdr = buf; + if (io_hdr->cmdp[0] == PERSISTENT_RESERVE_OUT || + io_hdr->cmdp[0] == PERSISTENT_RESERVE_IN) { + return pr_manager_execute(s->pr_mgr, bdrv_get_aio_context(bs), + s->fd, io_hdr, cb, opaque); + } + } + acb = g_new(RawPosixAIOData, 1); acb->bs = bs; acb->aio_type = QEMU_AIO_IOCTL; |