aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile7
-rw-r--r--Makefile.objs1
-rw-r--r--accel/kvm/kvm-all.c1
-rw-r--r--block/file-posix.c30
-rw-r--r--chardev/char-fd.c5
-rw-r--r--chardev/char-fe.c7
-rw-r--r--chardev/char-pty.c5
-rw-r--r--chardev/char-socket.c7
-rw-r--r--chardev/char-udp.c5
-rw-r--r--chardev/char.c11
-rwxr-xr-xconfigure60
-rw-r--r--cpus.c5
-rw-r--r--docs/devel/atomics.txt14
-rw-r--r--docs/interop/pr-helper.rst83
-rw-r--r--docs/pr-manager.rst111
-rw-r--r--exec.c330
-rw-r--r--hmp-commands-info.hx7
-rw-r--r--hw/arm/armv7m.c9
-rw-r--r--hw/char/virtio-console.c21
-rw-r--r--hw/char/virtio-serial-bus.c7
-rw-r--r--hw/intc/openpic_kvm.c2
-rw-r--r--include/chardev/char.h13
-rw-r--r--include/exec/memory-internal.h16
-rw-r--r--include/exec/memory.h75
-rw-r--r--include/hw/arm/armv7m.h2
-rw-r--r--include/hw/virtio/virtio-serial.h3
-rw-r--r--include/qemu/atomic.h8
-rw-r--r--include/qemu/typedefs.h1
-rw-r--r--include/scsi/pr-manager.h56
-rw-r--r--include/scsi/utils.h4
-rw-r--r--memory.c378
-rw-r--r--monitor.c3
-rw-r--r--qapi/block-core.json4
-rw-r--r--scsi/Makefile.objs2
-rw-r--r--scsi/pr-helper.h41
-rw-r--r--scsi/pr-manager-helper.c302
-rw-r--r--scsi/pr-manager.c109
-rw-r--r--scsi/qemu-pr-helper.c1075
-rw-r--r--scsi/trace-events3
-rw-r--r--scsi/utils.c10
-rw-r--r--target/arm/cpu.c16
-rw-r--r--target/i386/cpu.c5
-rw-r--r--trace-events3
-rw-r--r--vl.c3
44 files changed, 2550 insertions, 310 deletions
diff --git a/Makefile b/Makefile
index d1501a07ee..2be61fcf1c 100644
--- a/Makefile
+++ b/Makefile
@@ -372,6 +372,11 @@ qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o $(COMMON_LDADDS)
fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/9p-marshal.o fsdev/9p-iov-marshal.o $(COMMON_LDADDS)
fsdev/virtfs-proxy-helper$(EXESUF): LIBS += -lcap
+scsi/qemu-pr-helper$(EXESUF): scsi/qemu-pr-helper.o scsi/utils.o $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
+ifdef CONFIG_MPATH
+scsi/qemu-pr-helper$(EXESUF): LIBS += -ludev -lmultipath -lmpathpersist
+endif
+
qemu-img-cmds.h: $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/scripts/hxtool
$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@,"GEN","$@")
@@ -488,7 +493,7 @@ clean:
rm -f *.msi
find . \( -name '*.so' -o -name '*.dll' -o -name '*.mo' -o -name '*.[oda]' \) -type f -exec rm {} +
rm -f $(filter-out %.tlb,$(TOOLS)) $(HELPERS-y) qemu-ga TAGS cscope.* *.pod *~ */*~
- rm -f fsdev/*.pod
+ rm -f fsdev/*.pod scsi/*.pod
rm -f qemu-img-cmds.h
rm -f ui/shader/*-vert.h ui/shader/*-frag.h
@# May not be present in GENERATED_FILES
diff --git a/Makefile.objs b/Makefile.objs
index 9e89100302..bdfa3b6177 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -171,6 +171,7 @@ trace-events-subdirs += qapi
trace-events-subdirs += accel/tcg
trace-events-subdirs += accel/kvm
trace-events-subdirs += nbd
+trace-events-subdirs += scsi
trace-events-files = $(SRC_PATH)/trace-events $(trace-events-subdirs:%=$(SRC_PATH)/%/trace-events)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index b0181d7220..4f1997deec 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -722,7 +722,6 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
mem = kvm_lookup_matching_slot(kml, start_addr, size);
if (!add) {
if (!mem) {
- g_assert(!memory_region_is_ram(mr) && !writeable && !mr->romd_mode);
return;
}
if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
diff --git a/block/file-posix.c b/block/file-posix.c
index 6acbd56238..ab12a2b591 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -33,6 +33,9 @@
#include "block/raw-aio.h"
#include "qapi/qmp/qstring.h"
+#include "scsi/pr-manager.h"
+#include "scsi/constants.h"
+
#if defined(__APPLE__) && (__MACH__)
#include <paths.h>
#include <sys/param.h>
@@ -155,6 +158,8 @@ typedef struct BDRVRawState {
bool page_cache_inconsistent:1;
bool has_fallocate;
bool needs_alignment;
+
+ PRManager *pr_mgr;
} BDRVRawState;
typedef struct BDRVRawReopenState {
@@ -402,6 +407,11 @@ static QemuOptsList raw_runtime_opts = {
.type = QEMU_OPT_STRING,
.help = "file locking mode (on/off/auto, default: auto)",
},
+ {
+ .name = "pr-manager",
+ .type = QEMU_OPT_STRING,
+ .help = "id of persistent reservation manager object (default: none)",
+ },
{ /* end of list */ }
},
};
@@ -413,6 +423,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
QemuOpts *opts;
Error *local_err = NULL;
const char *filename = NULL;
+ const char *str;
BlockdevAioOptions aio, aio_default;
int fd, ret;
struct stat st;
@@ -476,6 +487,16 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
abort();
}
+ str = qemu_opt_get(opts, "pr-manager");
+ if (str) {
+ s->pr_mgr = pr_manager_lookup(str, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto fail;
+ }
+ }
+
s->open_flags = open_flags;
raw_parse_flags(bdrv_flags, &s->open_flags);
@@ -2597,6 +2618,15 @@ static BlockAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
if (fd_open(bs) < 0)
return NULL;
+ if (req == SG_IO && s->pr_mgr) {
+ struct sg_io_hdr *io_hdr = buf;
+ if (io_hdr->cmdp[0] == PERSISTENT_RESERVE_OUT ||
+ io_hdr->cmdp[0] == PERSISTENT_RESERVE_IN) {
+ return pr_manager_execute(s->pr_mgr, bdrv_get_aio_context(bs),
+ s->fd, io_hdr, cb, opaque);
+ }
+ }
+
acb = g_new(RawPosixAIOData, 1);
acb->bs = bs;
acb->aio_type = QEMU_AIO_IOCTL;
diff --git a/chardev/char-fd.c b/chardev/char-fd.c
index 6a62a545f2..2c9b2ce567 100644
--- a/chardev/char-fd.c
+++ b/chardev/char-fd.c
@@ -84,8 +84,7 @@ static GSource *fd_chr_add_watch(Chardev *chr, GIOCondition cond)
return qio_channel_create_watch(s->ioc_out, cond);
}
-static void fd_chr_update_read_handler(Chardev *chr,
- GMainContext *context)
+static void fd_chr_update_read_handler(Chardev *chr)
{
FDChardev *s = FD_CHARDEV(chr);
@@ -94,7 +93,7 @@ static void fd_chr_update_read_handler(Chardev *chr,
chr->gsource = io_add_watch_poll(chr, s->ioc_in,
fd_chr_read_poll,
fd_chr_read, chr,
- context);
+ chr->gcontext);
}
}
diff --git a/chardev/char-fe.c b/chardev/char-fe.c
index f3af6ae584..ee6d596100 100644
--- a/chardev/char-fe.c
+++ b/chardev/char-fe.c
@@ -253,7 +253,6 @@ void qemu_chr_fe_set_handlers(CharBackend *b,
bool set_open)
{
Chardev *s;
- ChardevClass *cc;
int fe_open;
s = b->chr;
@@ -261,7 +260,6 @@ void qemu_chr_fe_set_handlers(CharBackend *b,
return;
}
- cc = CHARDEV_GET_CLASS(s);
if (!opaque && !fd_can_read && !fd_read && !fd_event) {
fe_open = 0;
remove_fd_in_watch(s);
@@ -273,9 +271,8 @@ void qemu_chr_fe_set_handlers(CharBackend *b,
b->chr_event = fd_event;
b->chr_be_change = be_change;
b->opaque = opaque;
- if (cc->chr_update_read_handler) {
- cc->chr_update_read_handler(s, context);
- }
+
+ qemu_chr_be_update_read_handlers(s, context);
if (set_open) {
qemu_chr_fe_set_open(b, fe_open);
diff --git a/chardev/char-pty.c b/chardev/char-pty.c
index e5d20a0e6a..761ae6dec1 100644
--- a/chardev/char-pty.c
+++ b/chardev/char-pty.c
@@ -112,8 +112,7 @@ static void pty_chr_update_read_handler_locked(Chardev *chr)
}
}
-static void pty_chr_update_read_handler(Chardev *chr,
- GMainContext *context)
+static void pty_chr_update_read_handler(Chardev *chr)
{
qemu_mutex_lock(&chr->chr_write_lock);
pty_chr_update_read_handler_locked(chr);
@@ -219,7 +218,7 @@ static void pty_chr_state(Chardev *chr, int connected)
chr->gsource = io_add_watch_poll(chr, s->ioc,
pty_chr_read_poll,
pty_chr_read,
- chr, NULL);
+ chr, chr->gcontext);
}
}
}
diff --git a/chardev/char-socket.c b/chardev/char-socket.c
index 1ae730a4cb..e65148fe97 100644
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -516,13 +516,12 @@ static void tcp_chr_connect(void *opaque)
chr->gsource = io_add_watch_poll(chr, s->ioc,
tcp_chr_read_poll,
tcp_chr_read,
- chr, NULL);
+ chr, chr->gcontext);
}
qemu_chr_be_event(chr, CHR_EVENT_OPENED);
}
-static void tcp_chr_update_read_handler(Chardev *chr,
- GMainContext *context)
+static void tcp_chr_update_read_handler(Chardev *chr)
{
SocketChardev *s = SOCKET_CHARDEV(chr);
@@ -535,7 +534,7 @@ static void tcp_chr_update_read_handler(Chardev *chr,
chr->gsource = io_add_watch_poll(chr, s->ioc,
tcp_chr_read_poll,
tcp_chr_read, chr,
- context);
+ chr->gcontext);
}
}
diff --git a/chardev/char-udp.c b/chardev/char-udp.c
index 4ee11d3ebf..d46ff7ab53 100644
--- a/chardev/char-udp.c
+++ b/chardev/char-udp.c
@@ -100,8 +100,7 @@ static gboolean udp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
return TRUE;
}
-static void udp_chr_update_read_handler(Chardev *chr,
- GMainContext *context)
+static void udp_chr_update_read_handler(Chardev *chr)
{
UdpChardev *s = UDP_CHARDEV(chr);
@@ -110,7 +109,7 @@ static void udp_chr_update_read_handler(Chardev *chr,
chr->gsource = io_add_watch_poll(chr, s->ioc,
udp_chr_read_poll,
udp_chr_read, chr,
- context);
+ chr->gcontext);
}
}
diff --git a/chardev/char.c b/chardev/char.c
index b6fd5eb6a6..2ae4f465ec 100644
--- a/chardev/char.c
+++ b/chardev/char.c
@@ -180,6 +180,17 @@ void qemu_chr_be_write(Chardev *s, uint8_t *buf, int len)
}
}
+void qemu_chr_be_update_read_handlers(Chardev *s,
+ GMainContext *context)
+{
+ ChardevClass *cc = CHARDEV_GET_CLASS(s);
+
+ s->gcontext = context;
+ if (cc->chr_update_read_handler) {
+ cc->chr_update_read_handler(s);
+ }
+}
+
int qemu_chr_add_client(Chardev *s, int fd)
{
return CHARDEV_GET_CLASS(s)->chr_add_client ?
diff --git a/configure b/configure
index f4bbc2ca21..133a5757ef 100755
--- a/configure
+++ b/configure
@@ -290,6 +290,7 @@ netmap="no"
sdl=""
sdlabi=""
virtfs=""
+mpath=""
vnc="yes"
sparse="no"
vde=""
@@ -936,6 +937,10 @@ for opt do
;;
--enable-virtfs) virtfs="yes"
;;
+ --disable-mpath) mpath="no"
+ ;;
+ --enable-mpath) mpath="yes"
+ ;;
--disable-vnc) vnc="no"
;;
--enable-vnc) vnc="yes"
@@ -1479,6 +1484,7 @@ disabled with --disable-FEATURE, default is enabled if available:
vnc-png PNG compression for VNC server
cocoa Cocoa UI (Mac OS X only)
virtfs VirtFS
+ mpath Multipath persistent reservation passthrough
xen xen backend driver support
xen-pci-passthrough
brlapi BrlAPI (Braile)
@@ -3295,6 +3301,30 @@ else
fi
##########################################
+# libmpathpersist probe
+
+if test "$mpath" != "no" ; then
+ cat > $TMPC <<EOF
+#include <libudev.h>
+#include <mpath_persist.h>
+unsigned mpath_mx_alloc_len = 1024;
+int logsink;
+int main(void) {
+ struct udev *udev = udev_new();
+ mpath_lib_init(udev);
+ return 0;
+}
+EOF
+ if compile_prog "" "-ludev -lmultipath -lmpathpersist" ; then
+ mpathpersist=yes
+ else
+ mpathpersist=no
+ fi
+else
+ mpathpersist=no
+fi
+
+##########################################
# libcap probe
if test "$cap" != "no" ; then
@@ -5023,16 +5053,34 @@ if test "$want_tools" = "yes" ; then
fi
fi
if test "$softmmu" = yes ; then
- if test "$virtfs" != no ; then
- if test "$cap" = yes && test "$linux" = yes && test "$attr" = yes ; then
+ if test "$linux" = yes; then
+ if test "$virtfs" != no && test "$cap" = yes && test "$attr" = yes ; then
virtfs=yes
tools="$tools fsdev/virtfs-proxy-helper\$(EXESUF)"
else
if test "$virtfs" = yes; then
- error_exit "VirtFS is supported only on Linux and requires libcap devel and libattr devel"
+ error_exit "VirtFS requires libcap devel and libattr devel"
fi
virtfs=no
fi
+ if test "$mpath" != no && test "$mpathpersist" = yes ; then
+ mpath=yes
+ else
+ if test "$mpath" = yes; then
+ error_exit "Multipath requires libmpathpersist devel"
+ fi
+ mpath=no
+ fi
+ tools="$tools scsi/qemu-pr-helper\$(EXESUF)"
+ else
+ if test "$virtfs" = yes; then
+ error_exit "VirtFS is supported only on Linux"
+ fi
+ virtfs=no
+ if test "$mpath" = yes; then
+ error_exit "Multipath is supported only on Linux"
+ fi
+ mpath=no
fi
fi
@@ -5278,6 +5326,7 @@ echo "Audio drivers $audio_drv_list"
echo "Block whitelist (rw) $block_drv_rw_whitelist"
echo "Block whitelist (ro) $block_drv_ro_whitelist"
echo "VirtFS support $virtfs"
+echo "Multipath support $mpath"
echo "VNC support $vnc"
if test "$vnc" = "yes" ; then
echo "VNC SASL support $vnc_sasl"
@@ -5729,6 +5778,9 @@ fi
if test "$virtfs" = "yes" ; then
echo "CONFIG_VIRTFS=y" >> $config_host_mak
fi
+if test "$mpath" = "yes" ; then
+ echo "CONFIG_MPATH=y" >> $config_host_mak
+fi
if test "$vhost_scsi" = "yes" ; then
echo "CONFIG_VHOST_SCSI=y" >> $config_host_mak
fi
@@ -6510,7 +6562,7 @@ fi
# build tree in object directory in case the source is not in the current directory
DIRS="tests tests/tcg tests/tcg/cris tests/tcg/lm32 tests/libqos tests/qapi-schema tests/tcg/xtensa tests/qemu-iotests tests/vm"
-DIRS="$DIRS docs docs/interop fsdev"
+DIRS="$DIRS docs docs/interop fsdev scsi"
DIRS="$DIRS pc-bios/optionrom pc-bios/spapr-rtas pc-bios/s390-ccw"
DIRS="$DIRS roms/seabios roms/vgabios"
DIRS="$DIRS qapi-generated"
diff --git a/cpus.c b/cpus.c
index 9bed61eefc..c9a624003a 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1764,8 +1764,9 @@ void qemu_init_vcpu(CPUState *cpu)
/* If the target cpu hasn't set up any address spaces itself,
* give it the default one.
*/
- AddressSpace *as = address_space_init_shareable(cpu->memory,
- "cpu-memory");
+ AddressSpace *as = g_new0(AddressSpace, 1);
+
+ address_space_init(as, cpu->memory, "cpu-memory");
cpu->num_ases = 1;
cpu_address_space_init(cpu, as, 0);
}
diff --git a/docs/devel/atomics.txt b/docs/devel/atomics.txt
index 3ef5d85b1b..10c5fa37e8 100644
--- a/docs/devel/atomics.txt
+++ b/docs/devel/atomics.txt
@@ -63,11 +63,23 @@ operations:
typeof(*ptr) atomic_fetch_sub(ptr, val)
typeof(*ptr) atomic_fetch_and(ptr, val)
typeof(*ptr) atomic_fetch_or(ptr, val)
+ typeof(*ptr) atomic_fetch_xor(ptr, val)
+ typeof(*ptr) atomic_fetch_inc_nonzero(ptr)
typeof(*ptr) atomic_xchg(ptr, val)
typeof(*ptr) atomic_cmpxchg(ptr, old, new)
all of which return the old value of *ptr. These operations are
-polymorphic; they operate on any type that is as wide as an int.
+polymorphic; they operate on any type that is as wide as a pointer.
+
+Similar operations return the new value of *ptr:
+
+ typeof(*ptr) atomic_inc_fetch(ptr)
+ typeof(*ptr) atomic_dec_fetch(ptr)
+ typeof(*ptr) atomic_add_fetch(ptr, val)
+ typeof(*ptr) atomic_sub_fetch(ptr, val)
+ typeof(*ptr) atomic_and_fetch(ptr, val)
+ typeof(*ptr) atomic_or_fetch(ptr, val)
+ typeof(*ptr) atomic_xor_fetch(ptr, val)
Sequentially consistent loads and stores can be done using:
diff --git a/docs/interop/pr-helper.rst b/docs/interop/pr-helper.rst
new file mode 100644
index 0000000000..9f76d5bcf9
--- /dev/null
+++ b/docs/interop/pr-helper.rst
@@ -0,0 +1,83 @@
+..
+
+======================================
+Persistent reservation helper protocol
+======================================
+
+QEMU's SCSI passthrough devices, ``scsi-block`` and ``scsi-generic``,
+can delegate implementation of persistent reservations to an external
+(and typically privileged) program. Persistent Reservations allow
+restricting access to block devices to specific initiators in a shared
+storage setup.
+
+For a more detailed reference please refer the the SCSI Primary
+Commands standard, specifically the section on Reservations and the
+"PERSISTENT RESERVE IN" and "PERSISTENT RESERVE OUT" commands.
+
+This document describes the socket protocol used between QEMU's
+``pr-manager-helper`` object and the external program.
+
+.. contents::
+
+Connection and initialization
+-----------------------------
+
+All data transmitted on the socket is big-endian.
+
+After connecting to the helper program's socket, the helper starts a simple
+feature negotiation process by writing four bytes corresponding to
+the features it exposes (``supported_features``). QEMU reads it,
+then writes four bytes corresponding to the desired features of the
+helper program (``requested_features``).
+
+If a bit is 1 in ``requested_features`` and 0 in ``supported_features``,
+the corresponding feature is not supported by the helper and the connection
+is closed. On the other hand, it is acceptable for a bit to be 0 in
+``requested_features`` and 1 in ``supported_features``; in this case,
+the helper will not enable the feature.
+
+Right now no feature is defined, so the two parties always write four
+zero bytes.
+
+Command format
+--------------
+
+It is invalid to send multiple commands concurrently on the same
+socket. It is however possible to connect multiple sockets to the
+helper and send multiple commands to the helper for one or more
+file descriptors.
+
+A command consists of a request and a response. A request consists
+of a 16-byte SCSI CDB. A file descriptor must be passed to the helper
+together with the SCSI CDB using ancillary data.
+
+The CDB has the following limitations:
+
+- the command (stored in the first byte) must be one of 0x5E
+ (PERSISTENT RESERVE IN) or 0x5F (PERSISTENT RESERVE OUT).
+
+- the allocation length (stored in bytes 7-8 of the CDB for PERSISTENT
+ RESERVE IN) or parameter list length (stored in bytes 5-8 of the CDB
+ for PERSISTENT RESERVE OUT) is limited to 8 KiB.
+
+For PERSISTENT RESERVE OUT, the parameter list is sent right after the
+CDB. The length of the parameter list is taken from the CDB itself.
+
+The helper's reply has the following structure:
+
+- 4 bytes for the SCSI status
+
+- 4 bytes for the payload size (nonzero only for PERSISTENT RESERVE IN
+ and only if the SCSI status is 0x00, i.e. GOOD)
+
+- 96 bytes for the SCSI sense data
+
+- if the size is nonzero, the payload follows
+
+The sense data is always sent to keep the protocol simple, even though
+it is only valid if the SCSI status is CHECK CONDITION (0x02).
+
+The payload size is always less than or equal to the allocation length
+specified in the CDB for the PERSISTENT RESERVE IN command.
+
+If the protocol is violated, the helper closes the socket.
diff --git a/docs/pr-manager.rst b/docs/pr-manager.rst
new file mode 100644
index 0000000000..9b1de198b1
--- /dev/null
+++ b/docs/pr-manager.rst
@@ -0,0 +1,111 @@
+======================================
+Persistent reservation managers
+======================================
+
+SCSI persistent Reservations allow restricting access to block devices
+to specific initiators in a shared storage setup. When implementing
+clustering of virtual machines, it is a common requirement for virtual
+machines to send persistent reservation SCSI commands. However,
+the operating system restricts sending these commands to unprivileged
+programs because incorrect usage can disrupt regular operation of the
+storage fabric.
+
+For this reason, QEMU's SCSI passthrough devices, ``scsi-block``
+and ``scsi-generic`` (both are only available on Linux) can delegate
+implementation of persistent reservations to a separate object,
+the "persistent reservation manager". Only PERSISTENT RESERVE OUT and
+PERSISTENT RESERVE IN commands are passed to the persistent reservation
+manager object; other commands are processed by QEMU as usual.
+
+-----------------------------------------
+Defining a persistent reservation manager
+-----------------------------------------
+
+A persistent reservation manager is an instance of a subclass of the
+"pr-manager" QOM class.
+
+Right now only one subclass is defined, ``pr-manager-helper``, which
+forwards the commands to an external privileged helper program
+over Unix sockets. The helper program only allows sending persistent
+reservation commands to devices for which QEMU has a file descriptor,
+so that QEMU will not be able to effect persistent reservations
+unless it has access to both the socket and the device.
+
+``pr-manager-helper`` has a single string property, ``path``, which
+accepts the path to the helper program's Unix socket. For example,
+the following command line defines a ``pr-manager-helper`` object and
+attaches it to a SCSI passthrough device::
+
+ $ qemu-system-x86_64
+ -device virtio-scsi \
+ -object pr-manager-helper,id=helper0,path=/var/run/qemu-pr-helper.sock
+ -drive if=none,id=hd,driver=raw,file.filename=/dev/sdb,file.pr-manager=helper0
+ -device scsi-block,drive=hd
+
+Alternatively, using ``-blockdev``::
+
+ $ qemu-system-x86_64
+ -device virtio-scsi \
+ -object pr-manager-helper,id=helper0,path=/var/run/qemu-pr-helper.sock
+ -blockdev node-name=hd,driver=raw,file.driver=host_device,file.filename=/dev/sdb,file.pr-manager=helper0
+ -device scsi-block,drive=hd
+
+----------------------------------
+Invoking :program:`qemu-pr-helper`
+----------------------------------
+
+QEMU provides an implementation of the persistent reservation helper,
+called :program:`qemu-pr-helper`. The helper should be started as a
+system service and supports the following option:
+
+-d, --daemon run in the background
+-q, --quiet decrease verbosity
+-v, --verbose increase verbosity
+-f, --pidfile=path PID file when running as a daemon
+-k, --socket=path path to the socket
+-T, --trace=trace-opts tracing options
+
+By default, the socket and PID file are placed in the runtime state
+directory, for example :file:`/var/run/qemu-pr-helper.sock` and
+:file:`/var/run/qemu-pr-helper.pid`. The PID file is not created
+unless :option:`-d` is passed too.
+
+:program:`qemu-pr-helper` can also use the systemd socket activation
+protocol. In this case, the systemd socket unit should specify a
+Unix stream socket, like this::
+
+ [Socket]
+ ListenStream=/var/run/qemu-pr-helper.sock
+
+After connecting to the socket, :program:`qemu-pr-helper`` can optionally drop
+root privileges, except for those capabilities that are needed for
+its operation. To do this, add the following options:
+
+-u, --user=user user to drop privileges to
+-g, --group=group group to drop privileges to
+
+---------------------------------------------
+Multipath devices and persistent reservations
+---------------------------------------------
+
+Proper support of persistent reservation for multipath devices requires
+communication with the multipath daemon, so that the reservation is
+registered and applied when a path is newly discovered or becomes online
+again. :command:`qemu-pr-helper` can do this if the ``libmpathpersist``
+library was available on the system at build time.
+
+As of August 2017, a reservation key must be specified in ``multipath.conf``
+for ``multipathd`` to check for persistent reservation for newly
+discovered paths or reinstated paths. The attribute can be added
+to the ``defaults`` section or the ``multipaths`` section; for example::
+
+ multipaths {
+ multipath {
+ wwid XXXXXXXXXXXXXXXX
+ alias yellow
+ reservation_key 0x123abc
+ }
+ }
+
+Linking :program:`qemu-pr-helper` to ``libmpathpersist`` does not impede
+its usage on regular SCSI devices.
diff --git a/exec.c b/exec.c
index a25a4c6018..7a80460725 100644
--- a/exec.c
+++ b/exec.c
@@ -187,21 +187,18 @@ typedef struct PhysPageMap {
} PhysPageMap;
struct AddressSpaceDispatch {
- struct rcu_head rcu;
-
MemoryRegionSection *mru_section;
/* This is a multi-level map on the physical address space.
* The bottom level has pointers to MemoryRegionSections.
*/
PhysPageEntry phys_map;
PhysPageMap map;
- AddressSpace *as;
};
#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
typedef struct subpage_t {
MemoryRegion iomem;
- AddressSpace *as;
+ FlatView *fv;
hwaddr base;
uint16_t sub_section[];
} subpage_t;
@@ -361,7 +358,7 @@ static void phys_page_compact(PhysPageEntry *lp, Node *nodes)
}
}
-static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
+void address_space_dispatch_compact(AddressSpaceDispatch *d)
{
if (d->phys_map.skip) {
phys_page_compact(&d->phys_map, d->map.nodes);
@@ -471,12 +468,13 @@ address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *x
}
/* Called from RCU critical section */
-static MemoryRegionSection address_space_do_translate(AddressSpace *as,
- hwaddr addr,
- hwaddr *xlat,
- hwaddr *plen,
- bool is_write,
- bool is_mmio)
+static MemoryRegionSection flatview_do_translate(FlatView *fv,
+ hwaddr addr,
+ hwaddr *xlat,
+ hwaddr *plen,
+ bool is_write,
+ bool is_mmio,
+ AddressSpace **target_as)
{
IOMMUTLBEntry iotlb;
MemoryRegionSection *section;
@@ -484,8 +482,9 @@ static MemoryRegionSection address_space_do_translate(AddressSpace *as,
IOMMUMemoryRegionClass *imrc;
for (;;) {
- AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
- section = address_space_translate_internal(d, addr, &addr, plen, is_mmio);
+ section = address_space_translate_internal(
+ flatview_to_dispatch(fv), addr, &addr,
+ plen, is_mmio);
iommu_mr = memory_region_get_iommu(section->mr);
if (!iommu_mr) {
@@ -502,7 +501,8 @@ static MemoryRegionSection address_space_do_translate(AddressSpace *as,
goto translate_fail;
}
- as = iotlb.target_as;
+ fv = address_space_to_flatview(iotlb.target_as);
+ *target_as = iotlb.target_as;
}
*xlat = addr;
@@ -524,8 +524,8 @@ IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace *as, hwaddr addr,
plen = (hwaddr)-1;
/* This can never be MMIO. */
- section = address_space_do_translate(as, addr, &xlat, &plen,
- is_write, false);
+ section = flatview_do_translate(address_space_to_flatview(as), addr,
+ &xlat, &plen, is_write, false, &as);
/* Illegal translation */
if (section.mr == &io_mem_unassigned) {
@@ -548,7 +548,7 @@ IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace *as, hwaddr addr,
plen -= 1;
return (IOMMUTLBEntry) {
- .target_as = section.address_space,
+ .target_as = as,
.iova = addr & ~plen,
.translated_addr = xlat & ~plen,
.addr_mask = plen,
@@ -561,15 +561,15 @@ iotlb_fail:
}
/* Called from RCU critical section */
-MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
- hwaddr *xlat, hwaddr *plen,
- bool is_write)
+MemoryRegion *flatview_translate(FlatView *fv, hwaddr addr, hwaddr *xlat,
+ hwaddr *plen, bool is_write)
{
MemoryRegion *mr;
MemoryRegionSection section;
+ AddressSpace *as = NULL;
/* This can be MMIO, so setup MMIO bit. */
- section = address_space_do_translate(as, addr, xlat, plen, is_write, true);
+ section = flatview_do_translate(fv, addr, xlat, plen, is_write, true, &as);
mr = section.mr;
if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
@@ -1219,7 +1219,7 @@ hwaddr memory_region_section_get_iotlb(CPUState *cpu,
} else {
AddressSpaceDispatch *d;
- d = atomic_rcu_read(&section->address_space->dispatch);
+ d = flatview_to_dispatch(section->fv);
iotlb = section - d->map.sections;
iotlb += xlat;
}
@@ -1245,7 +1245,7 @@ hwaddr memory_region_section_get_iotlb(CPUState *cpu,
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
uint16_t section);
-static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
+static subpage_t *subpage_init(FlatView *fv, hwaddr base);
static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
qemu_anon_ram_alloc;
@@ -1302,8 +1302,9 @@ static void phys_sections_free(PhysPageMap *map)
g_free(map->nodes);
}
-static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
+static void register_subpage(FlatView *fv, MemoryRegionSection *section)
{
+ AddressSpaceDispatch *d = flatview_to_dispatch(fv);
subpage_t *subpage;
hwaddr base = section->offset_within_address_space
& TARGET_PAGE_MASK;
@@ -1317,8 +1318,8 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
if (!(existing->mr->subpage)) {
- subpage = subpage_init(d->as, base);
- subsection.address_space = d->as;
+ subpage = subpage_init(fv, base);
+ subsection.fv = fv;
subsection.mr = &subpage->iomem;
phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
phys_section_add(&d->map, &subsection));
@@ -1332,9 +1333,10 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
}
-static void register_multipage(AddressSpaceDispatch *d,
+static void register_multipage(FlatView *fv,
MemoryRegionSection *section)
{
+ AddressSpaceDispatch *d = flatview_to_dispatch(fv);
hwaddr start_addr = section->offset_within_address_space;
uint16_t section_index = phys_section_add(&d->map, section);
uint64_t num_pages = int128_get64(int128_rshift(section->size,
@@ -1344,10 +1346,8 @@ static void register_multipage(AddressSpaceDispatch *d,
phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
}
-static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
+void flatview_add_to_dispatch(FlatView *fv, MemoryRegionSection *section)
{
- AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
- AddressSpaceDispatch *d = as->next_dispatch;
MemoryRegionSection now = *section, remain = *section;
Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
@@ -1356,7 +1356,7 @@ static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
- now.offset_within_address_space;
now.size = int128_min(int128_make64(left), now.size);
- register_subpage(d, &now);
+ register_subpage(fv, &now);
} else {
now.size = int128_zero();
}
@@ -1366,13 +1366,13 @@ static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
remain.offset_within_region += int128_get64(now.size);
now = remain;
if (int128_lt(remain.size, page_size)) {
- register_subpage(d, &now);
+ register_subpage(fv, &now);
} else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
now.size = page_size;
- register_subpage(d, &now);
+ register_subpage(fv, &now);
} else {
now.size = int128_and(now.size, int128_neg(page_size));
- register_multipage(d, &now);
+ register_multipage(fv, &now);
}
}
}
@@ -2500,6 +2500,11 @@ static const MemoryRegionOps watch_mem_ops = {
.endianness = DEVICE_NATIVE_ENDIAN,
};
+static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs,
+ const uint8_t *buf, int len);
+static bool flatview_access_valid(FlatView *fv, hwaddr addr, int len,
+ bool is_write);
+
static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
unsigned len, MemTxAttrs attrs)
{
@@ -2511,8 +2516,7 @@ static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
subpage, len, addr);
#endif
- res = address_space_read(subpage->as, addr + subpage->base,
- attrs, buf, len);
+ res = flatview_read(subpage->fv, addr + subpage->base, attrs, buf, len);
if (res) {
return res;
}
@@ -2561,8 +2565,7 @@ static MemTxResult subpage_write(void *opaque, hwaddr addr,
default:
abort();
}
- return address_space_write(subpage->as, addr + subpage->base,
- attrs, buf, len);
+ return flatview_write(subpage->fv, addr + subpage->base, attrs, buf, len);
}
static bool subpage_accepts(void *opaque, hwaddr addr,
@@ -2574,8 +2577,8 @@ static bool subpage_accepts(void *opaque, hwaddr addr,
__func__, subpage, is_write ? 'w' : 'r', len, addr);
#endif
- return address_space_access_valid(subpage->as, addr + subpage->base,
- len, is_write);
+ return flatview_access_valid(subpage->fv, addr + subpage->base,
+ len, is_write);
}
static const MemoryRegionOps subpage_ops = {
@@ -2609,12 +2612,12 @@ static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
return 0;
}
-static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
+static subpage_t *subpage_init(FlatView *fv, hwaddr base)
{
subpage_t *mmio;
mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t));
- mmio->as = as;
+ mmio->fv = fv;
mmio->base = base;
memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
NULL, TARGET_PAGE_SIZE);
@@ -2628,12 +2631,11 @@ static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
return mmio;
}
-static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
- MemoryRegion *mr)
+static uint16_t dummy_section(PhysPageMap *map, FlatView *fv, MemoryRegion *mr)
{
- assert(as);
+ assert(fv);
MemoryRegionSection section = {
- .address_space = as,
+ .fv = fv,
.mr = mr,
.offset_within_address_space = 0,
.offset_within_region = 0,
@@ -2670,46 +2672,31 @@ static void io_mem_init(void)
NULL, UINT64_MAX);
}
-static void mem_begin(MemoryListener *listener)
+AddressSpaceDispatch *address_space_dispatch_new(FlatView *fv)
{
- AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
uint16_t n;
- n = dummy_section(&d->map, as, &io_mem_unassigned);
+ n = dummy_section(&d->map, fv, &io_mem_unassigned);
assert(n == PHYS_SECTION_UNASSIGNED);
- n = dummy_section(&d->map, as, &io_mem_notdirty);
+ n = dummy_section(&d->map, fv, &io_mem_notdirty);
assert(n == PHYS_SECTION_NOTDIRTY);
- n = dummy_section(&d->map, as, &io_mem_rom);
+ n = dummy_section(&d->map, fv, &io_mem_rom);
assert(n == PHYS_SECTION_ROM);
- n = dummy_section(&d->map, as, &io_mem_watch);
+ n = dummy_section(&d->map, fv, &io_mem_watch);
assert(n == PHYS_SECTION_WATCH);
d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
- d->as = as;
- as->next_dispatch = d;
+
+ return d;
}
-static void address_space_dispatch_free(AddressSpaceDispatch *d)
+void address_space_dispatch_free(AddressSpaceDispatch *d)
{
phys_sections_free(&d->map);
g_free(d);
}
-static void mem_commit(MemoryListener *listener)
-{
- AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
- AddressSpaceDispatch *cur = as->dispatch;
- AddressSpaceDispatch *next = as->next_dispatch;
-
- phys_page_compact_all(next, next->map.nodes_nb);
-
- atomic_rcu_set(&as->dispatch, next);
- if (cur) {
- call_rcu(cur, address_space_dispatch_free, rcu);
- }
-}
-
static void tcg_commit(MemoryListener *listener)
{
CPUAddressSpace *cpuas;
@@ -2723,39 +2710,11 @@ static void tcg_commit(MemoryListener *listener)
* We reload the dispatch pointer now because cpu_reloading_memory_map()
* may have split the RCU critical section.
*/
- d = atomic_rcu_read(&cpuas->as->dispatch);
+ d = address_space_to_dispatch(cpuas->as);
atomic_rcu_set(&cpuas->memory_dispatch, d);
tlb_flush(cpuas->cpu);
}
-void address_space_init_dispatch(AddressSpace *as)
-{
- as->dispatch = NULL;
- as->dispatch_listener = (MemoryListener) {
- .begin = mem_begin,
- .commit = mem_commit,
- .region_add = mem_add,
- .region_nop = mem_add,
- .priority = 0,
- };
- memory_listener_register(&as->dispatch_listener, as);
-}
-
-void address_space_unregister(AddressSpace *as)
-{
- memory_listener_unregister(&as->dispatch_listener);
-}
-
-void address_space_destroy_dispatch(AddressSpace *as)
-{
- AddressSpaceDispatch *d = as->dispatch;
-
- atomic_rcu_set(&as->dispatch, NULL);
- if (d) {
- call_rcu(d, address_space_dispatch_free, rcu);
- }
-}
-
static void memory_map_init(void)
{
system_memory = g_malloc(sizeof(*system_memory));
@@ -2899,11 +2858,11 @@ static bool prepare_mmio_access(MemoryRegion *mr)
}
/* Called within RCU critical section. */
-static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
- MemTxAttrs attrs,
- const uint8_t *buf,
- int len, hwaddr addr1,
- hwaddr l, MemoryRegion *mr)
+static MemTxResult flatview_write_continue(FlatView *fv, hwaddr addr,
+ MemTxAttrs attrs,
+ const uint8_t *buf,
+ int len, hwaddr addr1,
+ hwaddr l, MemoryRegion *mr)
{
uint8_t *ptr;
uint64_t val;
@@ -2965,14 +2924,14 @@ static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
}
l = len;
- mr = address_space_translate(as, addr, &addr1, &l, true);
+ mr = flatview_translate(fv, addr, &addr1, &l, true);
}
return result;
}
-MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
- const uint8_t *buf, int len)
+static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs,
+ const uint8_t *buf, int len)
{
hwaddr l;
hwaddr addr1;
@@ -2982,20 +2941,27 @@ MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
if (len > 0) {
rcu_read_lock();
l = len;
- mr = address_space_translate(as, addr, &addr1, &l, true);
- result = address_space_write_continue(as, addr, attrs, buf, len,
- addr1, l, mr);
+ mr = flatview_translate(fv, addr, &addr1, &l, true);
+ result = flatview_write_continue(fv, addr, attrs, buf, len,
+ addr1, l, mr);
rcu_read_unlock();
}
return result;
}
+MemTxResult address_space_write(AddressSpace *as, hwaddr addr,
+ MemTxAttrs attrs,
+ const uint8_t *buf, int len)
+{
+ return flatview_write(address_space_to_flatview(as), addr, attrs, buf, len);
+}
+
/* Called within RCU critical section. */
-MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
- MemTxAttrs attrs, uint8_t *buf,
- int len, hwaddr addr1, hwaddr l,
- MemoryRegion *mr)
+MemTxResult flatview_read_continue(FlatView *fv, hwaddr addr,
+ MemTxAttrs attrs, uint8_t *buf,
+ int len, hwaddr addr1, hwaddr l,
+ MemoryRegion *mr)
{
uint8_t *ptr;
uint64_t val;
@@ -3055,14 +3021,14 @@ MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
}
l = len;
- mr = address_space_translate(as, addr, &addr1, &l, false);
+ mr = flatview_translate(fv, addr, &addr1, &l, false);
}
return result;
}
-MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
- MemTxAttrs attrs, uint8_t *buf, int len)
+MemTxResult flatview_read_full(FlatView *fv, hwaddr addr,
+ MemTxAttrs attrs, uint8_t *buf, int len)
{
hwaddr l;
hwaddr addr1;
@@ -3072,25 +3038,33 @@ MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
if (len > 0) {
rcu_read_lock();
l = len;
- mr = address_space_translate(as, addr, &addr1, &l, false);
- result = address_space_read_continue(as, addr, attrs, buf, len,
- addr1, l, mr);
+ mr = flatview_translate(fv, addr, &addr1, &l, false);
+ result = flatview_read_continue(fv, addr, attrs, buf, len,
+ addr1, l, mr);
rcu_read_unlock();
}
return result;
}
-MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
- uint8_t *buf, int len, bool is_write)
+static MemTxResult flatview_rw(FlatView *fv, hwaddr addr, MemTxAttrs attrs,
+ uint8_t *buf, int len, bool is_write)
{
if (is_write) {
- return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
+ return flatview_write(fv, addr, attrs, (uint8_t *)buf, len);
} else {
- return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
+ return flatview_read(fv, addr, attrs, (uint8_t *)buf, len);
}
}
+MemTxResult address_space_rw(AddressSpace *as, hwaddr addr,
+ MemTxAttrs attrs, uint8_t *buf,
+ int len, bool is_write)
+{
+ return flatview_rw(address_space_to_flatview(as),
+ addr, attrs, buf, len, is_write);
+}
+
void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
int len, int is_write)
{
@@ -3248,7 +3222,8 @@ static void cpu_notify_map_clients(void)
qemu_mutex_unlock(&map_client_list_lock);
}
-bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
+static bool flatview_access_valid(FlatView *fv, hwaddr addr, int len,
+ bool is_write)
{
MemoryRegion *mr;
hwaddr l, xlat;
@@ -3256,7 +3231,7 @@ bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_
rcu_read_lock();
while (len > 0) {
l = len;
- mr = address_space_translate(as, addr, &xlat, &l, is_write);
+ mr = flatview_translate(fv, addr, &xlat, &l, is_write);
if (!memory_access_is_direct(mr, is_write)) {
l = memory_access_size(mr, l, addr);
if (!memory_region_access_valid(mr, xlat, l, is_write)) {
@@ -3272,8 +3247,16 @@ bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_
return true;
}
+bool address_space_access_valid(AddressSpace *as, hwaddr addr,
+ int len, bool is_write)
+{
+ return flatview_access_valid(address_space_to_flatview(as),
+ addr, len, is_write);
+}
+
static hwaddr
-address_space_extend_translation(AddressSpace *as, hwaddr addr, hwaddr target_len,
+flatview_extend_translation(FlatView *fv, hwaddr addr,
+ hwaddr target_len,
MemoryRegion *mr, hwaddr base, hwaddr len,
bool is_write)
{
@@ -3290,7 +3273,8 @@ address_space_extend_translation(AddressSpace *as, hwaddr addr, hwaddr target_le
}
len = target_len;
- this_mr = address_space_translate(as, addr, &xlat, &len, is_write);
+ this_mr = flatview_translate(fv, addr, &xlat,
+ &len, is_write);
if (this_mr != mr || xlat != base + done) {
return done;
}
@@ -3313,6 +3297,7 @@ void *address_space_map(AddressSpace *as,
hwaddr l, xlat;
MemoryRegion *mr;
void *ptr;
+ FlatView *fv = address_space_to_flatview(as);
if (len == 0) {
return NULL;
@@ -3320,7 +3305,7 @@ void *address_space_map(AddressSpace *as,
l = len;
rcu_read_lock();
- mr = address_space_translate(as, addr, &xlat, &l, is_write);
+ mr = flatview_translate(fv, addr, &xlat, &l, is_write);
if (!memory_access_is_direct(mr, is_write)) {
if (atomic_xchg(&bounce.in_use, true)) {
@@ -3336,7 +3321,7 @@ void *address_space_map(AddressSpace *as,
memory_region_ref(mr);
bounce.mr = mr;
if (!is_write) {
- address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
+ flatview_read(fv, addr, MEMTXATTRS_UNSPECIFIED,
bounce.buffer, l);
}
@@ -3347,7 +3332,8 @@ void *address_space_map(AddressSpace *as,
memory_region_ref(mr);
- *plen = address_space_extend_translation(as, addr, len, mr, xlat, l, is_write);
+ *plen = flatview_extend_translation(fv, addr, len, mr, xlat,
+ l, is_write);
ptr = qemu_ram_ptr_length(mr->ram_block, xlat, plen, true);
rcu_read_unlock();
@@ -3630,3 +3616,87 @@ void page_size_init(void)
}
qemu_host_page_mask = -(intptr_t)qemu_host_page_size;
}
+
+#if !defined(CONFIG_USER_ONLY)
+
+static void mtree_print_phys_entries(fprintf_function mon, void *f,
+ int start, int end, int skip, int ptr)
+{
+ if (start == end - 1) {
+ mon(f, "\t%3d ", start);
+ } else {
+ mon(f, "\t%3d..%-3d ", start, end - 1);
+ }
+ mon(f, " skip=%d ", skip);
+ if (ptr == PHYS_MAP_NODE_NIL) {
+ mon(f, " ptr=NIL");
+ } else if (!skip) {
+ mon(f, " ptr=#%d", ptr);
+ } else {
+ mon(f, " ptr=[%d]", ptr);
+ }
+ mon(f, "\n");
+}
+
+#define MR_SIZE(size) (int128_nz(size) ? (hwaddr)int128_get64( \
+ int128_sub((size), int128_one())) : 0)
+
+void mtree_print_dispatch(fprintf_function mon, void *f,
+ AddressSpaceDispatch *d, MemoryRegion *root)
+{
+ int i;
+
+ mon(f, " Dispatch\n");
+ mon(f, " Physical sections\n");
+
+ for (i = 0; i < d->map.sections_nb; ++i) {
+ MemoryRegionSection *s = d->map.sections + i;
+ const char *names[] = { " [unassigned]", " [not dirty]",
+ " [ROM]", " [watch]" };
+
+ mon(f, " #%d @" TARGET_FMT_plx ".." TARGET_FMT_plx " %s%s%s%s%s",
+ i,
+ s->offset_within_address_space,
+ s->offset_within_address_space + MR_SIZE(s->mr->size),
+ s->mr->name ? s->mr->name : "(noname)",
+ i < ARRAY_SIZE(names) ? names[i] : "",
+ s->mr == root ? " [ROOT]" : "",
+ s == d->mru_section ? " [MRU]" : "",
+ s->mr->is_iommu ? " [iommu]" : "");
+
+ if (s->mr->alias) {
+ mon(f, " alias=%s", s->mr->alias->name ?
+ s->mr->alias->name : "noname");
+ }
+ mon(f, "\n");
+ }
+
+ mon(f, " Nodes (%d bits per level, %d levels) ptr=[%d] skip=%d\n",
+ P_L2_BITS, P_L2_LEVELS, d->phys_map.ptr, d->phys_map.skip);
+ for (i = 0; i < d->map.nodes_nb; ++i) {
+ int j, jprev;
+ PhysPageEntry prev;
+ Node *n = d->map.nodes + i;
+
+ mon(f, " [%d]\n", i);
+
+ for (j = 0, jprev = 0, prev = *n[0]; j < ARRAY_SIZE(*n); ++j) {
+ PhysPageEntry *pe = *n + j;
+
+ if (pe->ptr == prev.ptr && pe->skip == prev.skip) {
+ continue;
+ }
+
+ mtree_print_phys_entries(mon, f, jprev, j, prev.skip, prev.ptr);
+
+ jprev = j;
+ prev = *pe;
+ }
+
+ if (jprev != ARRAY_SIZE(*n)) {
+ mtree_print_phys_entries(mon, f, jprev, j, prev.skip, prev.ptr);
+ }
+ }
+}
+
+#endif
diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
index 1c6772597d..4f1ece93e5 100644
--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
@@ -250,9 +250,10 @@ ETEXI
{
.name = "mtree",
- .args_type = "flatview:-f",
- .params = "[-f]",
- .help = "show memory tree (-f: dump flat view for address spaces)",
+ .args_type = "flatview:-f,dispatch_tree:-d",
+ .params = "[-f][-d]",
+ .help = "show memory tree (-f: dump flat view for address spaces;"
+ "-d: dump dispatch tree, valid with -f only)",
.cmd = hmp_info_mtree,
},
diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c
index 57a680687a..bb2dfc942b 100644
--- a/hw/arm/armv7m.c
+++ b/hw/arm/armv7m.c
@@ -41,7 +41,7 @@ static MemTxResult bitband_read(void *opaque, hwaddr offset,
/* Find address in underlying memory and round down to multiple of size */
addr = bitband_addr(s, offset) & (-size);
- res = address_space_read(s->source_as, addr, attrs, buf, size);
+ res = address_space_read(&s->source_as, addr, attrs, buf, size);
if (res) {
return res;
}
@@ -66,7 +66,7 @@ static MemTxResult bitband_write(void *opaque, hwaddr offset, uint64_t value,
/* Find address in underlying memory and round down to multiple of size */
addr = bitband_addr(s, offset) & (-size);
- res = address_space_read(s->source_as, addr, attrs, buf, size);
+ res = address_space_read(&s->source_as, addr, attrs, buf, size);
if (res) {
return res;
}
@@ -79,7 +79,7 @@ static MemTxResult bitband_write(void *opaque, hwaddr offset, uint64_t value,
} else {
buf[bitpos >> 3] &= ~bit;
}
- return address_space_write(s->source_as, addr, attrs, buf, size);
+ return address_space_write(&s->source_as, addr, attrs, buf, size);
}
static const MemoryRegionOps bitband_ops = {
@@ -111,8 +111,7 @@ static void bitband_realize(DeviceState *dev, Error **errp)
return;
}
- s->source_as = address_space_init_shareable(s->source_memory,
- "bitband-source");
+ address_space_init(&s->source_as, s->source_memory, "bitband-source");
}
/* Board init. */
diff --git a/hw/char/virtio-console.c b/hw/char/virtio-console.c
index 198b2a89c0..172c72d06c 100644
--- a/hw/char/virtio-console.c
+++ b/hw/char/virtio-console.c
@@ -187,6 +187,26 @@ static int chr_be_change(void *opaque)
return 0;
}
+static void virtconsole_enable_backend(VirtIOSerialPort *port, bool enable)
+{
+ VirtConsole *vcon = VIRTIO_CONSOLE(port);
+
+ if (!qemu_chr_fe_backend_connected(&vcon->chr)) {
+ return;
+ }
+
+ if (enable) {
+ VirtIOSerialPortClass *k = VIRTIO_SERIAL_PORT_GET_CLASS(port);
+
+ qemu_chr_fe_set_handlers(&vcon->chr, chr_can_read, chr_read,
+ k->is_console ? NULL : chr_event,
+ chr_be_change, vcon, NULL, false);
+ } else {
+ qemu_chr_fe_set_handlers(&vcon->chr, NULL, NULL, NULL,
+ NULL, NULL, NULL, false);
+ }
+}
+
static void virtconsole_realize(DeviceState *dev, Error **errp)
{
VirtIOSerialPort *port = VIRTIO_SERIAL_PORT(dev);
@@ -258,6 +278,7 @@ static void virtserialport_class_init(ObjectClass *klass, void *data)
k->unrealize = virtconsole_unrealize;
k->have_data = flush_buf;
k->set_guest_connected = set_guest_connected;
+ k->enable_backend = virtconsole_enable_backend;
k->guest_writable = guest_writable;
dc->props = virtserialport_properties;
}
diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c
index 17a1bb008a..9470bd7be7 100644
--- a/hw/char/virtio-serial-bus.c
+++ b/hw/char/virtio-serial-bus.c
@@ -637,6 +637,13 @@ static void set_status(VirtIODevice *vdev, uint8_t status)
if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) {
guest_reset(vser);
}
+
+ QTAILQ_FOREACH(port, &vser->ports, next) {
+ VirtIOSerialPortClass *vsc = VIRTIO_SERIAL_PORT_GET_CLASS(port);
+ if (vsc->enable_backend) {
+ vsc->enable_backend(port, vdev->vm_running);
+ }
+ }
}
static void vser_reset(VirtIODevice *vdev)
diff --git a/hw/intc/openpic_kvm.c b/hw/intc/openpic_kvm.c
index 0518e017c4..fa83420254 100644
--- a/hw/intc/openpic_kvm.c
+++ b/hw/intc/openpic_kvm.c
@@ -124,7 +124,7 @@ static void kvm_openpic_region_add(MemoryListener *listener,
uint64_t reg_base;
int ret;
- if (section->address_space != &address_space_memory) {
+ if (section->fv != address_space_to_flatview(&address_space_memory)) {
abort();
}
diff --git a/include/chardev/char.h b/include/chardev/char.h
index 66dde4637e..43aabccef5 100644
--- a/include/chardev/char.h
+++ b/include/chardev/char.h
@@ -55,6 +55,7 @@ struct Chardev {
int logfd;
int be_open;
GSource *gsource;
+ GMainContext *gcontext;
DECLARE_BITMAP(features, QEMU_CHAR_FEATURE_LAST);
};
@@ -169,6 +170,16 @@ void qemu_chr_be_write(Chardev *s, uint8_t *buf, int len);
void qemu_chr_be_write_impl(Chardev *s, uint8_t *buf, int len);
/**
+ * @qemu_chr_be_update_read_handlers:
+ *
+ * Invoked when frontend read handlers are setup
+ *
+ * @context the gcontext that will be used to attach the watch sources
+ */
+void qemu_chr_be_update_read_handlers(Chardev *s,
+ GMainContext *context);
+
+/**
* @qemu_chr_be_event:
*
* Send an event from the back end to the front end.
@@ -227,7 +238,7 @@ typedef struct ChardevClass {
int (*chr_write)(Chardev *s, const uint8_t *buf, int len);
int (*chr_sync_read)(Chardev *s, const uint8_t *buf, int len);
GSource *(*chr_add_watch)(Chardev *s, GIOCondition cond);
- void (*chr_update_read_handler)(Chardev *s, GMainContext *context);
+ void (*chr_update_read_handler)(Chardev *s);
int (*chr_ioctl)(Chardev *s, int cmd, void *arg);
int (*get_msgfds)(Chardev *s, int* fds, int num);
int (*set_msgfds)(Chardev *s, int *fds, int num);
diff --git a/include/exec/memory-internal.h b/include/exec/memory-internal.h
index fb467acdba..647e9bd5c4 100644
--- a/include/exec/memory-internal.h
+++ b/include/exec/memory-internal.h
@@ -22,14 +22,22 @@
#ifndef CONFIG_USER_ONLY
typedef struct AddressSpaceDispatch AddressSpaceDispatch;
-void address_space_init_dispatch(AddressSpace *as);
-void address_space_unregister(AddressSpace *as);
-void address_space_destroy_dispatch(AddressSpace *as);
-
extern const MemoryRegionOps unassigned_mem_ops;
bool memory_region_access_valid(MemoryRegion *mr, hwaddr addr,
unsigned size, bool is_write);
+void flatview_add_to_dispatch(FlatView *fv, MemoryRegionSection *section);
+AddressSpaceDispatch *address_space_dispatch_new(FlatView *fv);
+void address_space_dispatch_compact(AddressSpaceDispatch *d);
+
+AddressSpaceDispatch *address_space_to_dispatch(AddressSpace *as);
+AddressSpaceDispatch *flatview_to_dispatch(FlatView *fv);
+void address_space_dispatch_free(AddressSpaceDispatch *d);
+
+void mtree_print_dispatch(fprintf_function mon, void *f,
+ struct AddressSpaceDispatch *d,
+ MemoryRegion *root);
+
#endif
#endif
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 1dcd3122d7..5ed4042f87 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -308,21 +308,18 @@ struct AddressSpace {
struct rcu_head rcu;
char *name;
MemoryRegion *root;
- int ref_count;
- bool malloced;
/* Accessed via RCU. */
struct FlatView *current_map;
int ioeventfd_nb;
struct MemoryRegionIoeventfd *ioeventfds;
- struct AddressSpaceDispatch *dispatch;
- struct AddressSpaceDispatch *next_dispatch;
- MemoryListener dispatch_listener;
QTAILQ_HEAD(memory_listeners_as, MemoryListener) listeners;
QTAILQ_ENTRY(AddressSpace) address_spaces_link;
};
+FlatView *address_space_to_flatview(AddressSpace *as);
+
/**
* MemoryRegionSection: describes a fragment of a #MemoryRegion
*
@@ -336,7 +333,7 @@ struct AddressSpace {
*/
struct MemoryRegionSection {
MemoryRegion *mr;
- AddressSpace *address_space;
+ FlatView *fv;
hwaddr offset_within_region;
Int128 size;
hwaddr offset_within_address_space;
@@ -1515,7 +1512,8 @@ void memory_global_dirty_log_start(void);
*/
void memory_global_dirty_log_stop(void);
-void mtree_info(fprintf_function mon_printf, void *f, bool flatview);
+void mtree_info(fprintf_function mon_printf, void *f, bool flatview,
+ bool dispatch_tree);
/**
* memory_region_request_mmio_ptr: request a pointer to an mmio
@@ -1585,23 +1583,6 @@ MemTxResult memory_region_dispatch_write(MemoryRegion *mr,
void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name);
/**
- * address_space_init_shareable: return an address space for a memory region,
- * creating it if it does not already exist
- *
- * @root: a #MemoryRegion that routes addresses for the address space
- * @name: an address space name. The name is only used for debugging
- * output.
- *
- * This function will return a pointer to an existing AddressSpace
- * which was initialized with the specified MemoryRegion, or it will
- * create and initialize one if it does not already exist. The ASes
- * are reference-counted, so the memory will be freed automatically
- * when the AddressSpace is destroyed via address_space_destroy.
- */
-AddressSpace *address_space_init_shareable(MemoryRegion *root,
- const char *name);
-
-/**
* address_space_destroy: destroy an address space
*
* Releases all resources associated with an address space. After an address space
@@ -1845,9 +1826,17 @@ IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace *as, hwaddr addr,
* @len: pointer to length
* @is_write: indicates the transfer direction
*/
-MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
- hwaddr *xlat, hwaddr *len,
- bool is_write);
+MemoryRegion *flatview_translate(FlatView *fv,
+ hwaddr addr, hwaddr *xlat,
+ hwaddr *len, bool is_write);
+
+static inline MemoryRegion *address_space_translate(AddressSpace *as,
+ hwaddr addr, hwaddr *xlat,
+ hwaddr *len, bool is_write)
+{
+ return flatview_translate(address_space_to_flatview(as),
+ addr, xlat, len, is_write);
+}
/* address_space_access_valid: check for validity of accessing an address
* space range
@@ -1898,12 +1887,13 @@ void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
/* Internal functions, part of the implementation of address_space_read. */
-MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
- MemTxAttrs attrs, uint8_t *buf,
- int len, hwaddr addr1, hwaddr l,
- MemoryRegion *mr);
-MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
- MemTxAttrs attrs, uint8_t *buf, int len);
+MemTxResult flatview_read_continue(FlatView *fv, hwaddr addr,
+ MemTxAttrs attrs, uint8_t *buf,
+ int len, hwaddr addr1, hwaddr l,
+ MemoryRegion *mr);
+
+MemTxResult flatview_read_full(FlatView *fv, hwaddr addr,
+ MemTxAttrs attrs, uint8_t *buf, int len);
void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr);
static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
@@ -1930,8 +1920,8 @@ static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
* @buf: buffer with the data transferred
*/
static inline __attribute__((__always_inline__))
-MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
- uint8_t *buf, int len)
+MemTxResult flatview_read(FlatView *fv, hwaddr addr, MemTxAttrs attrs,
+ uint8_t *buf, int len)
{
MemTxResult result = MEMTX_OK;
hwaddr l, addr1;
@@ -1942,22 +1932,29 @@ MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
if (len) {
rcu_read_lock();
l = len;
- mr = address_space_translate(as, addr, &addr1, &l, false);
+ mr = flatview_translate(fv, addr, &addr1, &l, false);
if (len == l && memory_access_is_direct(mr, false)) {
ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
memcpy(buf, ptr, len);
} else {
- result = address_space_read_continue(as, addr, attrs, buf, len,
- addr1, l, mr);
+ result = flatview_read_continue(fv, addr, attrs, buf, len,
+ addr1, l, mr);
}
rcu_read_unlock();
}
} else {
- result = address_space_read_full(as, addr, attrs, buf, len);
+ result = flatview_read_full(fv, addr, attrs, buf, len);
}
return result;
}
+static inline MemTxResult address_space_read(AddressSpace *as, hwaddr addr,
+ MemTxAttrs attrs, uint8_t *buf,
+ int len)
+{
+ return flatview_read(address_space_to_flatview(as), addr, attrs, buf, len);
+}
+
/**
* address_space_read_cached: read from a cached RAM region
*
diff --git a/include/hw/arm/armv7m.h b/include/hw/arm/armv7m.h
index 9ad316c76e..35ab757264 100644
--- a/include/hw/arm/armv7m.h
+++ b/include/hw/arm/armv7m.h
@@ -21,7 +21,7 @@ typedef struct {
SysBusDevice parent_obj;
/*< public >*/
- AddressSpace *source_as;
+ AddressSpace source_as;
MemoryRegion iomem;
uint32_t base;
MemoryRegion *source_memory;
diff --git a/include/hw/virtio/virtio-serial.h b/include/hw/virtio/virtio-serial.h
index b19c44727f..12657a9f39 100644
--- a/include/hw/virtio/virtio-serial.h
+++ b/include/hw/virtio/virtio-serial.h
@@ -58,6 +58,9 @@ typedef struct VirtIOSerialPortClass {
/* Guest opened/closed device. */
void (*set_guest_connected)(VirtIOSerialPort *port, int guest_connected);
+ /* Enable/disable backend for virtio serial port */
+ void (*enable_backend)(VirtIOSerialPort *port, bool enable);
+
/* Guest is now ready to accept data (virtqueues set up). */
void (*guest_ready)(VirtIOSerialPort *port);
diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
index b6b62fb771..d73c9e14d7 100644
--- a/include/qemu/atomic.h
+++ b/include/qemu/atomic.h
@@ -442,4 +442,12 @@
} while(0)
#endif
+#define atomic_fetch_inc_nonzero(ptr) ({ \
+ typeof_strip_qual(*ptr) _oldn = atomic_read(ptr); \
+ while (_oldn && atomic_cmpxchg(ptr, _oldn, _oldn + 1) != _oldn) { \
+ _oldn = atomic_read(ptr); \
+ } \
+ _oldn; \
+})
+
#endif /* QEMU_ATOMIC_H */
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index 163550214c..980d2b330e 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -30,6 +30,7 @@ typedef struct DisplaySurface DisplaySurface;
typedef struct DriveInfo DriveInfo;
typedef struct Error Error;
typedef struct EventNotifier EventNotifier;
+typedef struct FlatView FlatView;
typedef struct FWCfgEntry FWCfgEntry;
typedef struct FWCfgIoState FWCfgIoState;
typedef struct FWCfgMemState FWCfgMemState;
diff --git a/include/scsi/pr-manager.h b/include/scsi/pr-manager.h
new file mode 100644
index 0000000000..b2b37d63bc
--- /dev/null
+++ b/include/scsi/pr-manager.h
@@ -0,0 +1,56 @@
+#ifndef PR_MANAGER_H
+#define PR_MANAGER_H
+
+#include "qom/object.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/visitor.h"
+#include "qom/object_interfaces.h"
+#include "block/aio.h"
+
+#define TYPE_PR_MANAGER "pr-manager"
+
+#define PR_MANAGER_CLASS(klass) \
+ OBJECT_CLASS_CHECK(PRManagerClass, (klass), TYPE_PR_MANAGER)
+#define PR_MANAGER_GET_CLASS(obj) \
+ OBJECT_GET_CLASS(PRManagerClass, (obj), TYPE_PR_MANAGER)
+#define PR_MANAGER(obj) \
+ OBJECT_CHECK(PRManager, (obj), TYPE_PR_MANAGER)
+
+struct sg_io_hdr;
+
+typedef struct PRManager {
+ /* <private> */
+ Object parent;
+} PRManager;
+
+/**
+ * PRManagerClass:
+ * @parent_class: the base class
+ * @run: callback invoked in thread pool context
+ */
+typedef struct PRManagerClass {
+ /* <private> */
+ ObjectClass parent_class;
+
+ /* <public> */
+ int (*run)(PRManager *pr_mgr, int fd, struct sg_io_hdr *hdr);
+} PRManagerClass;
+
+BlockAIOCB *pr_manager_execute(PRManager *pr_mgr,
+ AioContext *ctx, int fd,
+ struct sg_io_hdr *hdr,
+ BlockCompletionFunc *complete,
+ void *opaque);
+
+#ifdef CONFIG_LINUX
+PRManager *pr_manager_lookup(const char *id, Error **errp);
+#else
+static inline PRManager *pr_manager_lookup(const char *id, Error **errp)
+{
+ /* The classes do not exist at all! */
+ error_setg(errp, "No persistent reservation manager with id '%s'", id);
+ return NULL;
+}
+#endif
+
+#endif
diff --git a/include/scsi/utils.h b/include/scsi/utils.h
index d301b31768..00a4bdb080 100644
--- a/include/scsi/utils.h
+++ b/include/scsi/utils.h
@@ -72,10 +72,14 @@ extern const struct SCSISense sense_code_IO_ERROR;
extern const struct SCSISense sense_code_I_T_NEXUS_LOSS;
/* Command aborted, Logical Unit failure */
extern const struct SCSISense sense_code_LUN_FAILURE;
+/* Command aborted, LUN Communication failure */
+extern const struct SCSISense sense_code_LUN_COMM_FAILURE;
/* Command aborted, Overlapped Commands Attempted */
extern const struct SCSISense sense_code_OVERLAPPED_COMMANDS;
/* LUN not ready, Capacity data has changed */
extern const struct SCSISense sense_code_CAPACITY_CHANGED;
+/* Unit attention, SCSI bus reset */
+extern const struct SCSISense sense_code_SCSI_BUS_RESET;
/* LUN not ready, Medium not present */
extern const struct SCSISense sense_code_UNIT_ATTENTION_NO_MEDIUM;
/* Unit attention, Power on, reset or bus device reset occurred */
diff --git a/memory.c b/memory.c
index b9920a6540..5e6351a6c1 100644
--- a/memory.c
+++ b/memory.c
@@ -47,6 +47,8 @@ static QTAILQ_HEAD(memory_listeners, MemoryListener) memory_listeners
static QTAILQ_HEAD(, AddressSpace) address_spaces
= QTAILQ_HEAD_INITIALIZER(address_spaces);
+static GHashTable *flat_views;
+
typedef struct AddrRange AddrRange;
/*
@@ -154,7 +156,8 @@ enum ListenerDirection { Forward, Reverse };
/* No need to ref/unref .mr, the FlatRange keeps it alive. */
#define MEMORY_LISTENER_UPDATE_REGION(fr, as, dir, callback, _args...) \
do { \
- MemoryRegionSection mrs = section_from_flat_range(fr, as); \
+ MemoryRegionSection mrs = section_from_flat_range(fr, \
+ address_space_to_flatview(as)); \
MEMORY_LISTENER_CALL(as, callback, dir, &mrs, ##_args); \
} while(0)
@@ -208,7 +211,6 @@ static bool memory_region_ioeventfd_equal(MemoryRegionIoeventfd a,
}
typedef struct FlatRange FlatRange;
-typedef struct FlatView FlatView;
/* Range of memory in the global map. Addresses are absolute. */
struct FlatRange {
@@ -229,6 +231,8 @@ struct FlatView {
FlatRange *ranges;
unsigned nr;
unsigned nr_allocated;
+ struct AddressSpaceDispatch *dispatch;
+ MemoryRegion *root;
};
typedef struct AddressSpaceOps AddressSpaceOps;
@@ -237,11 +241,11 @@ typedef struct AddressSpaceOps AddressSpaceOps;
for (var = (view)->ranges; var < (view)->ranges + (view)->nr; ++var)
static inline MemoryRegionSection
-section_from_flat_range(FlatRange *fr, AddressSpace *as)
+section_from_flat_range(FlatRange *fr, FlatView *fv)
{
return (MemoryRegionSection) {
.mr = fr->mr,
- .address_space = as,
+ .fv = fv,
.offset_within_region = fr->offset_in_region,
.size = fr->addr.size,
.offset_within_address_space = int128_get64(fr->addr.start),
@@ -258,12 +262,17 @@ static bool flatrange_equal(FlatRange *a, FlatRange *b)
&& a->readonly == b->readonly;
}
-static void flatview_init(FlatView *view)
+static FlatView *flatview_new(MemoryRegion *mr_root)
{
+ FlatView *view;
+
+ view = g_new0(FlatView, 1);
view->ref = 1;
- view->ranges = NULL;
- view->nr = 0;
- view->nr_allocated = 0;
+ view->root = mr_root;
+ memory_region_ref(mr_root);
+ trace_flatview_new(view, mr_root);
+
+ return view;
}
/* Insert a range into a given position. Caller is responsible for maintaining
@@ -287,25 +296,47 @@ static void flatview_destroy(FlatView *view)
{
int i;
+ trace_flatview_destroy(view, view->root);
+ if (view->dispatch) {
+ address_space_dispatch_free(view->dispatch);
+ }
for (i = 0; i < view->nr; i++) {
memory_region_unref(view->ranges[i].mr);
}
g_free(view->ranges);
+ memory_region_unref(view->root);
g_free(view);
}
-static void flatview_ref(FlatView *view)
+static bool flatview_ref(FlatView *view)
{
- atomic_inc(&view->ref);
+ return atomic_fetch_inc_nonzero(&view->ref) > 0;
}
static void flatview_unref(FlatView *view)
{
if (atomic_fetch_dec(&view->ref) == 1) {
- flatview_destroy(view);
+ trace_flatview_destroy_rcu(view, view->root);
+ assert(view->root);
+ call_rcu(view, flatview_destroy, rcu);
}
}
+FlatView *address_space_to_flatview(AddressSpace *as)
+{
+ return atomic_rcu_read(&as->current_map);
+}
+
+AddressSpaceDispatch *flatview_to_dispatch(FlatView *fv)
+{
+ return fv->dispatch;
+}
+
+AddressSpaceDispatch *address_space_to_dispatch(AddressSpace *as)
+{
+ return flatview_to_dispatch(address_space_to_flatview(as));
+}
+
static bool can_merge(FlatRange *r1, FlatRange *r2)
{
return int128_eq(addrrange_end(r1->addr), r2->addr.start)
@@ -560,13 +591,14 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
unsigned size,
unsigned access_size_min,
unsigned access_size_max,
- MemTxResult (*access)(MemoryRegion *mr,
- hwaddr addr,
- uint64_t *value,
- unsigned size,
- unsigned shift,
- uint64_t mask,
- MemTxAttrs attrs),
+ MemTxResult (*access_fn)
+ (MemoryRegion *mr,
+ hwaddr addr,
+ uint64_t *value,
+ unsigned size,
+ unsigned shift,
+ uint64_t mask,
+ MemTxAttrs attrs),
MemoryRegion *mr,
MemTxAttrs attrs)
{
@@ -587,12 +619,12 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
access_mask = -1ULL >> (64 - access_size * 8);
if (memory_region_big_endian(mr)) {
for (i = 0; i < size; i += access_size) {
- r |= access(mr, addr + i, value, access_size,
+ r |= access_fn(mr, addr + i, value, access_size,
(size - access_size - i) * 8, access_mask, attrs);
}
} else {
for (i = 0; i < size; i += access_size) {
- r |= access(mr, addr + i, value, access_size, i * 8,
+ r |= access_fn(mr, addr + i, value, access_size, i * 8,
access_mask, attrs);
}
}
@@ -701,13 +733,57 @@ static void render_memory_region(FlatView *view,
}
}
+static MemoryRegion *memory_region_get_flatview_root(MemoryRegion *mr)
+{
+ while (mr->enabled) {
+ if (mr->alias) {
+ if (!mr->alias_offset && int128_ge(mr->size, mr->alias->size)) {
+ /* The alias is included in its entirety. Use it as
+ * the "real" root, so that we can share more FlatViews.
+ */
+ mr = mr->alias;
+ continue;
+ }
+ } else if (!mr->terminates) {
+ unsigned int found = 0;
+ MemoryRegion *child, *next = NULL;
+ QTAILQ_FOREACH(child, &mr->subregions, subregions_link) {
+ if (child->enabled) {
+ if (++found > 1) {
+ next = NULL;
+ break;
+ }
+ if (!child->addr && int128_ge(mr->size, child->size)) {
+ /* A child is included in its entirety. If it's the only
+ * enabled one, use it in the hope of finding an alias down the
+ * way. This will also let us share FlatViews.
+ */
+ next = child;
+ }
+ }
+ }
+ if (found == 0) {
+ return NULL;
+ }
+ if (next) {
+ mr = next;
+ continue;
+ }
+ }
+
+ return mr;
+ }
+
+ return NULL;
+}
+
/* Render a memory topology into a list of disjoint absolute ranges. */
static FlatView *generate_memory_topology(MemoryRegion *mr)
{
+ int i;
FlatView *view;
- view = g_new(FlatView, 1);
- flatview_init(view);
+ view = flatview_new(mr);
if (mr) {
render_memory_region(view, mr, int128_zero(),
@@ -715,6 +791,15 @@ static FlatView *generate_memory_topology(MemoryRegion *mr)
}
flatview_simplify(view);
+ view->dispatch = address_space_dispatch_new(view);
+ for (i = 0; i < view->nr; i++) {
+ MemoryRegionSection mrs =
+ section_from_flat_range(&view->ranges[i], view);
+ flatview_add_to_dispatch(view, &mrs);
+ }
+ address_space_dispatch_compact(view->dispatch);
+ g_hash_table_replace(flat_views, mr, view);
+
return view;
}
@@ -740,7 +825,7 @@ static void address_space_add_del_ioeventfds(AddressSpace *as,
fds_new[inew]))) {
fd = &fds_old[iold];
section = (MemoryRegionSection) {
- .address_space = as,
+ .fv = address_space_to_flatview(as),
.offset_within_address_space = int128_get64(fd->addr.start),
.size = fd->addr.size,
};
@@ -753,7 +838,7 @@ static void address_space_add_del_ioeventfds(AddressSpace *as,
fds_old[iold]))) {
fd = &fds_new[inew];
section = (MemoryRegionSection) {
- .address_space = as,
+ .fv = address_space_to_flatview(as),
.offset_within_address_space = int128_get64(fd->addr.start),
.size = fd->addr.size,
};
@@ -772,8 +857,12 @@ static FlatView *address_space_get_flatview(AddressSpace *as)
FlatView *view;
rcu_read_lock();
- view = atomic_rcu_read(&as->current_map);
- flatview_ref(view);
+ do {
+ view = address_space_to_flatview(as);
+ /* If somebody has replaced as->current_map concurrently,
+ * flatview_ref returns false.
+ */
+ } while (!flatview_ref(view));
rcu_read_unlock();
return view;
}
@@ -879,18 +968,81 @@ static void address_space_update_topology_pass(AddressSpace *as,
}
}
+static void flatviews_init(void)
+{
+ static FlatView *empty_view;
+
+ if (flat_views) {
+ return;
+ }
-static void address_space_update_topology(AddressSpace *as)
+ flat_views = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL,
+ (GDestroyNotify) flatview_unref);
+ if (!empty_view) {
+ empty_view = generate_memory_topology(NULL);
+ /* We keep it alive forever in the global variable. */
+ flatview_ref(empty_view);
+ } else {
+ g_hash_table_replace(flat_views, NULL, empty_view);
+ flatview_ref(empty_view);
+ }
+}
+
+static void flatviews_reset(void)
+{
+ AddressSpace *as;
+
+ if (flat_views) {
+ g_hash_table_unref(flat_views);
+ flat_views = NULL;
+ }
+ flatviews_init();
+
+ /* Render unique FVs */
+ QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
+ MemoryRegion *physmr = memory_region_get_flatview_root(as->root);
+
+ if (g_hash_table_lookup(flat_views, physmr)) {
+ continue;
+ }
+
+ generate_memory_topology(physmr);
+ }
+}
+
+static void address_space_set_flatview(AddressSpace *as)
{
- FlatView *old_view = address_space_get_flatview(as);
- FlatView *new_view = generate_memory_topology(as->root);
+ FlatView *old_view = address_space_to_flatview(as);
+ MemoryRegion *physmr = memory_region_get_flatview_root(as->root);
+ FlatView *new_view = g_hash_table_lookup(flat_views, physmr);
- address_space_update_topology_pass(as, old_view, new_view, false);
- address_space_update_topology_pass(as, old_view, new_view, true);
+ assert(new_view);
+
+ if (old_view == new_view) {
+ return;
+ }
+
+ if (old_view) {
+ flatview_ref(old_view);
+ }
+
+ flatview_ref(new_view);
+
+ if (!QTAILQ_EMPTY(&as->listeners)) {
+ FlatView tmpview = { .nr = 0 }, *old_view2 = old_view;
+
+ if (!old_view2) {
+ old_view2 = &tmpview;
+ }
+ address_space_update_topology_pass(as, old_view2, new_view, false);
+ address_space_update_topology_pass(as, old_view2, new_view, true);
+ }
/* Writes are protected by the BQL. */
atomic_rcu_set(&as->current_map, new_view);
- call_rcu(old_view, flatview_unref, rcu);
+ if (old_view) {
+ flatview_unref(old_view);
+ }
/* Note that all the old MemoryRegions are still alive up to this
* point. This relieves most MemoryListeners from the need to
@@ -898,9 +1050,20 @@ static void address_space_update_topology(AddressSpace *as)
* outside the iothread mutex, in which case precise reference
* counting is necessary.
*/
- flatview_unref(old_view);
+ if (old_view) {
+ flatview_unref(old_view);
+ }
+}
- address_space_update_ioeventfds(as);
+static void address_space_update_topology(AddressSpace *as)
+{
+ MemoryRegion *physmr = memory_region_get_flatview_root(as->root);
+
+ flatviews_init();
+ if (!g_hash_table_lookup(flat_views, physmr)) {
+ generate_memory_topology(physmr);
+ }
+ address_space_set_flatview(as);
}
void memory_region_transaction_begin(void)
@@ -919,10 +1082,13 @@ void memory_region_transaction_commit(void)
--memory_region_transaction_depth;
if (!memory_region_transaction_depth) {
if (memory_region_update_pending) {
+ flatviews_reset();
+
MEMORY_LISTENER_CALL_GLOBAL(begin, Forward);
QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
- address_space_update_topology(as);
+ address_space_set_flatview(as);
+ address_space_update_ioeventfds(as);
}
memory_region_update_pending = false;
MEMORY_LISTENER_CALL_GLOBAL(commit, Forward);
@@ -1835,7 +2001,7 @@ void memory_region_sync_dirty_bitmap(MemoryRegion *mr)
view = address_space_get_flatview(as);
FOR_EACH_FLAT_RANGE(fr, view) {
if (fr->mr == mr) {
- MemoryRegionSection mrs = section_from_flat_range(fr, as);
+ MemoryRegionSection mrs = section_from_flat_range(fr, view);
listener->log_sync(listener, &mrs);
}
}
@@ -1938,7 +2104,7 @@ static void memory_region_update_coalesced_range_as(MemoryRegion *mr, AddressSpa
FOR_EACH_FLAT_RANGE(fr, view) {
if (fr->mr == mr) {
section = (MemoryRegionSection) {
- .address_space = as,
+ .fv = view,
.offset_within_address_space = int128_get64(fr->addr.start),
.size = fr->addr.size,
};
@@ -2289,7 +2455,7 @@ static MemoryRegionSection memory_region_find_rcu(MemoryRegion *mr,
}
range = addrrange_make(int128_make64(addr), int128_make64(size));
- view = atomic_rcu_read(&as->current_map);
+ view = address_space_to_flatview(as);
fr = flatview_lookup(view, range);
if (!fr) {
return ret;
@@ -2300,7 +2466,7 @@ static MemoryRegionSection memory_region_find_rcu(MemoryRegion *mr,
}
ret.mr = fr->mr;
- ret.address_space = as;
+ ret.fv = view;
range = addrrange_intersection(range, fr->addr);
ret.offset_within_region = fr->offset_in_region;
ret.offset_within_region += int128_get64(int128_sub(range.start,
@@ -2349,7 +2515,8 @@ void memory_global_dirty_log_sync(void)
view = address_space_get_flatview(as);
FOR_EACH_FLAT_RANGE(fr, view) {
if (fr->dirty_log_mask) {
- MemoryRegionSection mrs = section_from_flat_range(fr, as);
+ MemoryRegionSection mrs = section_from_flat_range(fr, view);
+
listener->log_sync(listener, &mrs);
}
}
@@ -2434,7 +2601,7 @@ static void listener_add_address_space(MemoryListener *listener,
FOR_EACH_FLAT_RANGE(fr, view) {
MemoryRegionSection section = {
.mr = fr->mr,
- .address_space = as,
+ .fv = view,
.offset_within_region = fr->offset_in_region,
.size = fr->addr.size,
.offset_within_address_space = int128_get64(fr->addr.start),
@@ -2610,69 +2777,36 @@ void memory_region_invalidate_mmio_ptr(MemoryRegion *mr, hwaddr offset,
void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name)
{
memory_region_ref(root);
- memory_region_transaction_begin();
- as->ref_count = 1;
as->root = root;
- as->malloced = false;
- as->current_map = g_new(FlatView, 1);
- flatview_init(as->current_map);
+ as->current_map = NULL;
as->ioeventfd_nb = 0;
as->ioeventfds = NULL;
QTAILQ_INIT(&as->listeners);
QTAILQ_INSERT_TAIL(&address_spaces, as, address_spaces_link);
as->name = g_strdup(name ? name : "anonymous");
- address_space_init_dispatch(as);
- memory_region_update_pending |= root->enabled;
- memory_region_transaction_commit();
+ address_space_update_topology(as);
+ address_space_update_ioeventfds(as);
}
static void do_address_space_destroy(AddressSpace *as)
{
- bool do_free = as->malloced;
-
- address_space_destroy_dispatch(as);
assert(QTAILQ_EMPTY(&as->listeners));
flatview_unref(as->current_map);
g_free(as->name);
g_free(as->ioeventfds);
memory_region_unref(as->root);
- if (do_free) {
- g_free(as);
- }
-}
-
-AddressSpace *address_space_init_shareable(MemoryRegion *root, const char *name)
-{
- AddressSpace *as;
-
- QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
- if (root == as->root && as->malloced) {
- as->ref_count++;
- return as;
- }
- }
-
- as = g_malloc0(sizeof *as);
- address_space_init(as, root, name);
- as->malloced = true;
- return as;
}
void address_space_destroy(AddressSpace *as)
{
MemoryRegion *root = as->root;
- as->ref_count--;
- if (as->ref_count) {
- return;
- }
/* Flush out anything from MemoryListeners listening in on this */
memory_region_transaction_begin();
as->root = NULL;
memory_region_transaction_commit();
QTAILQ_REMOVE(&address_spaces, as, address_spaces_link);
- address_space_unregister(as);
/* At this point, as->dispatch and as->current_map are dummy
* entries that the guest should never use. Wait for the old
@@ -2807,18 +2941,44 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f,
}
}
-static void mtree_print_flatview(fprintf_function p, void *f,
- AddressSpace *as)
+struct FlatViewInfo {
+ fprintf_function mon_printf;
+ void *f;
+ int counter;
+ bool dispatch_tree;
+};
+
+static void mtree_print_flatview(gpointer key, gpointer value,
+ gpointer user_data)
{
- FlatView *view = address_space_get_flatview(as);
+ FlatView *view = key;
+ GArray *fv_address_spaces = value;
+ struct FlatViewInfo *fvi = user_data;
+ fprintf_function p = fvi->mon_printf;
+ void *f = fvi->f;
FlatRange *range = &view->ranges[0];
MemoryRegion *mr;
int n = view->nr;
+ int i;
+ AddressSpace *as;
+
+ p(f, "FlatView #%d\n", fvi->counter);
+ ++fvi->counter;
+
+ for (i = 0; i < fv_address_spaces->len; ++i) {
+ as = g_array_index(fv_address_spaces, AddressSpace*, i);
+ p(f, " AS \"%s\", root: %s", as->name, memory_region_name(as->root));
+ if (as->root->alias) {
+ p(f, ", alias %s", memory_region_name(as->root->alias));
+ }
+ p(f, "\n");
+ }
+
+ p(f, " Root memory region: %s\n",
+ view->root ? memory_region_name(view->root) : "(none)");
if (n <= 0) {
- p(f, MTREE_INDENT "No rendered FlatView for "
- "address space '%s'\n", as->name);
- flatview_unref(view);
+ p(f, MTREE_INDENT "No rendered FlatView\n\n");
return;
}
@@ -2845,21 +3005,65 @@ static void mtree_print_flatview(fprintf_function p, void *f,
range++;
}
+#if !defined(CONFIG_USER_ONLY)
+ if (fvi->dispatch_tree && view->root) {
+ mtree_print_dispatch(p, f, view->dispatch, view->root);
+ }
+#endif
+
+ p(f, "\n");
+}
+
+static gboolean mtree_info_flatview_free(gpointer key, gpointer value,
+ gpointer user_data)
+{
+ FlatView *view = key;
+ GArray *fv_address_spaces = value;
+
+ g_array_unref(fv_address_spaces);
flatview_unref(view);
+
+ return true;
}
-void mtree_info(fprintf_function mon_printf, void *f, bool flatview)
+void mtree_info(fprintf_function mon_printf, void *f, bool flatview,
+ bool dispatch_tree)
{
MemoryRegionListHead ml_head;
MemoryRegionList *ml, *ml2;
AddressSpace *as;
if (flatview) {
+ FlatView *view;
+ struct FlatViewInfo fvi = {
+ .mon_printf = mon_printf,
+ .f = f,
+ .counter = 0,
+ .dispatch_tree = dispatch_tree
+ };
+ GArray *fv_address_spaces;
+ GHashTable *views = g_hash_table_new(g_direct_hash, g_direct_equal);
+
+ /* Gather all FVs in one table */
QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
- mon_printf(f, "address-space (flat view): %s\n", as->name);
- mtree_print_flatview(mon_printf, f, as);
- mon_printf(f, "\n");
+ view = address_space_get_flatview(as);
+
+ fv_address_spaces = g_hash_table_lookup(views, view);
+ if (!fv_address_spaces) {
+ fv_address_spaces = g_array_new(false, false, sizeof(as));
+ g_hash_table_insert(views, view, fv_address_spaces);
+ }
+
+ g_array_append_val(fv_address_spaces, as);
}
+
+ /* Print */
+ g_hash_table_foreach(views, mtree_print_flatview, &fvi);
+
+ /* Free */
+ g_hash_table_foreach_remove(views, mtree_info_flatview_free, 0);
+ g_hash_table_unref(views);
+
return;
}
diff --git a/monitor.c b/monitor.c
index 058045b3cb..f4856b9268 100644
--- a/monitor.c
+++ b/monitor.c
@@ -1703,8 +1703,9 @@ static void hmp_boot_set(Monitor *mon, const QDict *qdict)
static void hmp_info_mtree(Monitor *mon, const QDict *qdict)
{
bool flatview = qdict_get_try_bool(qdict, "flatview", false);
+ bool dispatch_tree = qdict_get_try_bool(qdict, "dispatch_tree", false);
- mtree_info((fprintf_function)monitor_printf, mon, flatview);
+ mtree_info((fprintf_function)monitor_printf, mon, flatview, dispatch_tree);
}
static void hmp_info_numa(Monitor *mon, const QDict *qdict)
diff --git a/qapi/block-core.json b/qapi/block-core.json
index bb11815608..c69a395804 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2241,6 +2241,9 @@
# Driver specific block device options for the file backend.
#
# @filename: path to the image file
+# @pr-manager: the id for the object that will handle persistent reservations
+# for this device (default: none, forward the commands via SG_IO;
+# since 2.11)
# @aio: AIO backend (default: threads) (since: 2.8)
# @locking: whether to enable file locking. If set to 'auto', only enable
# when Open File Descriptor (OFD) locking API is available
@@ -2250,6 +2253,7 @@
##
{ 'struct': 'BlockdevOptionsFile',
'data': { 'filename': 'str',
+ '*pr-manager': 'str',
'*locking': 'OnOffAuto',
'*aio': 'BlockdevAioOptions' } }
diff --git a/scsi/Makefile.objs b/scsi/Makefile.objs
index 31b82a5a36..4d25e476cf 100644
--- a/scsi/Makefile.objs
+++ b/scsi/Makefile.objs
@@ -1 +1,3 @@
block-obj-y += utils.o
+
+block-obj-$(CONFIG_LINUX) += pr-manager.o pr-manager-helper.o
diff --git a/scsi/pr-helper.h b/scsi/pr-helper.h
new file mode 100644
index 0000000000..96c50a9e5f
--- /dev/null
+++ b/scsi/pr-helper.h
@@ -0,0 +1,41 @@
+/* Definitions for QEMU's persistent reservation helper daemon
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * Author:
+ * Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#ifndef QEMU_PR_HELPER_H
+#define QEMU_PR_HELPER_H 1
+
+#include <stdint.h>
+
+#define PR_HELPER_CDB_SIZE 16
+#define PR_HELPER_SENSE_SIZE 96
+#define PR_HELPER_DATA_SIZE 8192
+
+typedef struct PRHelperResponse {
+ int32_t result;
+ int32_t sz;
+ uint8_t sense[PR_HELPER_SENSE_SIZE];
+} PRHelperResponse;
+
+#endif
diff --git a/scsi/pr-manager-helper.c b/scsi/pr-manager-helper.c
new file mode 100644
index 0000000000..82ff6b6123
--- /dev/null
+++ b/scsi/pr-manager-helper.c
@@ -0,0 +1,302 @@
+/*
+ * Persistent reservation manager that talks to qemu-pr-helper
+ *
+ * Copyright (c) 2017 Red Hat, Inc.
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This code is licensed under the LGPL v2.1 or later.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "scsi/constants.h"
+#include "scsi/pr-manager.h"
+#include "scsi/utils.h"
+#include "io/channel.h"
+#include "io/channel-socket.h"
+#include "pr-helper.h"
+
+#include <scsi/sg.h>
+
+#define PR_MAX_RECONNECT_ATTEMPTS 5
+
+#define TYPE_PR_MANAGER_HELPER "pr-manager-helper"
+
+#define PR_MANAGER_HELPER(obj) \
+ OBJECT_CHECK(PRManagerHelper, (obj), \
+ TYPE_PR_MANAGER_HELPER)
+
+typedef struct PRManagerHelper {
+ /* <private> */
+ PRManager parent;
+
+ char *path;
+
+ QemuMutex lock;
+ QIOChannel *ioc;
+} PRManagerHelper;
+
+/* Called with lock held. */
+static int pr_manager_helper_read(PRManagerHelper *pr_mgr,
+ void *buf, int sz, Error **errp)
+{
+ ssize_t r = qio_channel_read_all(pr_mgr->ioc, buf, sz, errp);
+
+ if (r < 0) {
+ object_unref(OBJECT(pr_mgr->ioc));
+ pr_mgr->ioc = NULL;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* Called with lock held. */
+static int pr_manager_helper_write(PRManagerHelper *pr_mgr,
+ int fd,
+ const void *buf, int sz, Error **errp)
+{
+ size_t nfds = (fd != -1);
+ while (sz > 0) {
+ struct iovec iov;
+ ssize_t n_written;
+
+ iov.iov_base = (void *)buf;
+ iov.iov_len = sz;
+ n_written = qio_channel_writev_full(QIO_CHANNEL(pr_mgr->ioc), &iov, 1,
+ nfds ? &fd : NULL, nfds, errp);
+
+ if (n_written <= 0) {
+ assert(n_written != QIO_CHANNEL_ERR_BLOCK);
+ object_unref(OBJECT(pr_mgr->ioc));
+ return n_written < 0 ? -EINVAL : 0;
+ }
+
+ nfds = 0;
+ buf += n_written;
+ sz -= n_written;
+ }
+
+ return 0;
+}
+
+/* Called with lock held. */
+static int pr_manager_helper_initialize(PRManagerHelper *pr_mgr,
+ Error **errp)
+{
+ char *path = g_strdup(pr_mgr->path);
+ SocketAddress saddr = {
+ .type = SOCKET_ADDRESS_TYPE_UNIX,
+ .u.q_unix.path = path
+ };
+ QIOChannelSocket *sioc = qio_channel_socket_new();
+ Error *local_err = NULL;
+
+ uint32_t flags;
+ int r;
+
+ assert(!pr_mgr->ioc);
+ qio_channel_set_name(QIO_CHANNEL(sioc), "pr-manager-helper");
+ qio_channel_socket_connect_sync(sioc,
+ &saddr,
+ &local_err);
+ g_free(path);
+ if (local_err) {
+ object_unref(OBJECT(sioc));
+ error_propagate(errp, local_err);
+ return -ENOTCONN;
+ }
+
+ qio_channel_set_delay(QIO_CHANNEL(sioc), false);
+ pr_mgr->ioc = QIO_CHANNEL(sioc);
+
+ /* A simple feature negotation protocol, even though there is
+ * no optional feature right now.
+ */
+ r = pr_manager_helper_read(pr_mgr, &flags, sizeof(flags), errp);
+ if (r < 0) {
+ goto out_close;
+ }
+
+ flags = 0;
+ r = pr_manager_helper_write(pr_mgr, -1, &flags, sizeof(flags), errp);
+ if (r < 0) {
+ goto out_close;
+ }
+
+ return 0;
+
+out_close:
+ object_unref(OBJECT(pr_mgr->ioc));
+ pr_mgr->ioc = NULL;
+ return r;
+}
+
+static int pr_manager_helper_run(PRManager *p,
+ int fd, struct sg_io_hdr *io_hdr)
+{
+ PRManagerHelper *pr_mgr = PR_MANAGER_HELPER(p);
+
+ uint32_t len;
+ PRHelperResponse resp;
+ int ret;
+ int expected_dir;
+ int attempts;
+ uint8_t cdb[PR_HELPER_CDB_SIZE] = { 0 };
+
+ if (!io_hdr->cmd_len || io_hdr->cmd_len > PR_HELPER_CDB_SIZE) {
+ return -EINVAL;
+ }
+
+ memcpy(cdb, io_hdr->cmdp, io_hdr->cmd_len);
+ assert(cdb[0] == PERSISTENT_RESERVE_OUT || cdb[0] == PERSISTENT_RESERVE_IN);
+ expected_dir =
+ (cdb[0] == PERSISTENT_RESERVE_OUT ? SG_DXFER_TO_DEV : SG_DXFER_FROM_DEV);
+ if (io_hdr->dxfer_direction != expected_dir) {
+ return -EINVAL;
+ }
+
+ len = scsi_cdb_xfer(cdb);
+ if (io_hdr->dxfer_len < len || len > PR_HELPER_DATA_SIZE) {
+ return -EINVAL;
+ }
+
+ qemu_mutex_lock(&pr_mgr->lock);
+
+ /* Try to reconnect while sending the CDB. */
+ for (attempts = 0; attempts < PR_MAX_RECONNECT_ATTEMPTS; attempts++) {
+ if (!pr_mgr->ioc) {
+ ret = pr_manager_helper_initialize(pr_mgr, NULL);
+ if (ret < 0) {
+ qemu_mutex_unlock(&pr_mgr->lock);
+ g_usleep(G_USEC_PER_SEC);
+ qemu_mutex_lock(&pr_mgr->lock);
+ continue;
+ }
+ }
+
+ ret = pr_manager_helper_write(pr_mgr, fd, cdb, ARRAY_SIZE(cdb), NULL);
+ if (ret >= 0) {
+ break;
+ }
+ }
+ if (ret < 0) {
+ goto out;
+ }
+
+ /* After sending the CDB, any communications failure causes the
+ * command to fail. The failure is transient, retrying the command
+ * will invoke pr_manager_helper_initialize again.
+ */
+ if (expected_dir == SG_DXFER_TO_DEV) {
+ io_hdr->resid = io_hdr->dxfer_len - len;
+ ret = pr_manager_helper_write(pr_mgr, -1, io_hdr->dxferp, len, NULL);
+ if (ret < 0) {
+ goto out;
+ }
+ }
+ ret = pr_manager_helper_read(pr_mgr, &resp, sizeof(resp), NULL);
+ if (ret < 0) {
+ goto out;
+ }
+
+ resp.result = be32_to_cpu(resp.result);
+ resp.sz = be32_to_cpu(resp.sz);
+ if (io_hdr->dxfer_direction == SG_DXFER_FROM_DEV) {
+ assert(resp.sz <= io_hdr->dxfer_len);
+ ret = pr_manager_helper_read(pr_mgr, io_hdr->dxferp, resp.sz, NULL);
+ if (ret < 0) {
+ goto out;
+ }
+ io_hdr->resid = io_hdr->dxfer_len - resp.sz;
+ } else {
+ assert(resp.sz == 0);
+ }
+
+ io_hdr->status = resp.result;
+ if (resp.result == CHECK_CONDITION) {
+ io_hdr->driver_status = SG_ERR_DRIVER_SENSE;
+ io_hdr->sb_len_wr = MIN(io_hdr->mx_sb_len, PR_HELPER_SENSE_SIZE);
+ memcpy(io_hdr->sbp, resp.sense, io_hdr->sb_len_wr);
+ }
+
+out:
+ if (ret < 0) {
+ int sense_len = scsi_build_sense(io_hdr->sbp,
+ SENSE_CODE(LUN_COMM_FAILURE));
+ io_hdr->driver_status = SG_ERR_DRIVER_SENSE;
+ io_hdr->sb_len_wr = MIN(io_hdr->mx_sb_len, sense_len);
+ io_hdr->status = CHECK_CONDITION;
+ }
+ qemu_mutex_unlock(&pr_mgr->lock);
+ return ret;
+}
+
+static void pr_manager_helper_complete(UserCreatable *uc, Error **errp)
+{
+ PRManagerHelper *pr_mgr = PR_MANAGER_HELPER(uc);
+
+ qemu_mutex_lock(&pr_mgr->lock);
+ pr_manager_helper_initialize(pr_mgr, errp);
+ qemu_mutex_unlock(&pr_mgr->lock);
+}
+
+static char *get_path(Object *obj, Error **errp)
+{
+ PRManagerHelper *pr_mgr = PR_MANAGER_HELPER(obj);
+
+ return g_strdup(pr_mgr->path);
+}
+
+static void set_path(Object *obj, const char *str, Error **errp)
+{
+ PRManagerHelper *pr_mgr = PR_MANAGER_HELPER(obj);
+
+ g_free(pr_mgr->path);
+ pr_mgr->path = g_strdup(str);
+}
+
+static void pr_manager_helper_instance_finalize(Object *obj)
+{
+ PRManagerHelper *pr_mgr = PR_MANAGER_HELPER(obj);
+
+ object_unref(OBJECT(pr_mgr->ioc));
+ qemu_mutex_destroy(&pr_mgr->lock);
+}
+
+static void pr_manager_helper_instance_init(Object *obj)
+{
+ PRManagerHelper *pr_mgr = PR_MANAGER_HELPER(obj);
+
+ qemu_mutex_init(&pr_mgr->lock);
+}
+
+static void pr_manager_helper_class_init(ObjectClass *klass,
+ void *class_data G_GNUC_UNUSED)
+{
+ PRManagerClass *prmgr_klass = PR_MANAGER_CLASS(klass);
+ UserCreatableClass *uc_klass = USER_CREATABLE_CLASS(klass);
+
+ object_class_property_add_str(klass, "path", get_path, set_path,
+ &error_abort);
+ uc_klass->complete = pr_manager_helper_complete;
+ prmgr_klass->run = pr_manager_helper_run;
+}
+
+static const TypeInfo pr_manager_helper_info = {
+ .parent = TYPE_PR_MANAGER,
+ .name = TYPE_PR_MANAGER_HELPER,
+ .instance_size = sizeof(PRManagerHelper),
+ .instance_init = pr_manager_helper_instance_init,
+ .instance_finalize = pr_manager_helper_instance_finalize,
+ .class_init = pr_manager_helper_class_init,
+};
+
+static void pr_manager_helper_register_types(void)
+{
+ type_register_static(&pr_manager_helper_info);
+}
+
+type_init(pr_manager_helper_register_types);
diff --git a/scsi/pr-manager.c b/scsi/pr-manager.c
new file mode 100644
index 0000000000..87c45db5d4
--- /dev/null
+++ b/scsi/pr-manager.c
@@ -0,0 +1,109 @@
+/*
+ * Persistent reservation manager abstract class
+ *
+ * Copyright (c) 2017 Red Hat, Inc.
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This code is licensed under the LGPL.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include <scsi/sg.h>
+
+#include "qapi/error.h"
+#include "block/aio.h"
+#include "block/thread-pool.h"
+#include "scsi/pr-manager.h"
+#include "trace.h"
+
+typedef struct PRManagerData {
+ PRManager *pr_mgr;
+ struct sg_io_hdr *hdr;
+ int fd;
+} PRManagerData;
+
+static int pr_manager_worker(void *opaque)
+{
+ PRManagerData *data = opaque;
+ PRManager *pr_mgr = data->pr_mgr;
+ PRManagerClass *pr_mgr_class =
+ PR_MANAGER_GET_CLASS(pr_mgr);
+ struct sg_io_hdr *hdr = data->hdr;
+ int fd = data->fd;
+ int r;
+
+ g_free(data);
+ trace_pr_manager_run(fd, hdr->cmdp[0], hdr->cmdp[1]);
+
+ /* The reference was taken in pr_manager_execute. */
+ r = pr_mgr_class->run(pr_mgr, fd, hdr);
+ object_unref(OBJECT(pr_mgr));
+ return r;
+}
+
+
+BlockAIOCB *pr_manager_execute(PRManager *pr_mgr,
+ AioContext *ctx, int fd,
+ struct sg_io_hdr *hdr,
+ BlockCompletionFunc *complete,
+ void *opaque)
+{
+ PRManagerData *data = g_new(PRManagerData, 1);
+ ThreadPool *pool = aio_get_thread_pool(ctx);
+
+ trace_pr_manager_execute(fd, hdr->cmdp[0], hdr->cmdp[1], opaque);
+ data->pr_mgr = pr_mgr;
+ data->fd = fd;
+ data->hdr = hdr;
+
+ /* The matching object_unref is in pr_manager_worker. */
+ object_ref(OBJECT(pr_mgr));
+ return thread_pool_submit_aio(pool, pr_manager_worker,
+ data, complete, opaque);
+}
+
+static const TypeInfo pr_manager_info = {
+ .parent = TYPE_OBJECT,
+ .name = TYPE_PR_MANAGER,
+ .class_size = sizeof(PRManagerClass),
+ .abstract = true,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_USER_CREATABLE },
+ { }
+ }
+};
+
+PRManager *pr_manager_lookup(const char *id, Error **errp)
+{
+ Object *obj;
+ PRManager *pr_mgr;
+
+ obj = object_resolve_path_component(object_get_objects_root(), id);
+ if (!obj) {
+ error_setg(errp, "No persistent reservation manager with id '%s'", id);
+ return NULL;
+ }
+
+ pr_mgr = (PRManager *)
+ object_dynamic_cast(obj,
+ TYPE_PR_MANAGER);
+ if (!pr_mgr) {
+ error_setg(errp,
+ "Object with id '%s' is not a persistent reservation manager",
+ id);
+ return NULL;
+ }
+
+ return pr_mgr;
+}
+
+static void
+pr_manager_register_types(void)
+{
+ type_register_static(&pr_manager_info);
+}
+
+
+type_init(pr_manager_register_types);
diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c
new file mode 100644
index 0000000000..d58184833f
--- /dev/null
+++ b/scsi/qemu-pr-helper.c
@@ -0,0 +1,1075 @@
+/*
+ * Privileged helper to handle persistent reservation commands for QEMU
+ *
+ * Copyright (C) 2017 Red Hat, Inc. <pbonzini@redhat.com>
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include <getopt.h>
+#include <sys/ioctl.h>
+#include <linux/dm-ioctl.h>
+#include <scsi/sg.h>
+
+#ifdef CONFIG_LIBCAP
+#include <cap-ng.h>
+#endif
+#include <pwd.h>
+#include <grp.h>
+
+#ifdef CONFIG_MPATH
+#include <libudev.h>
+#include <mpath_cmd.h>
+#include <mpath_persist.h>
+#endif
+
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "qemu/cutils.h"
+#include "qemu/main-loop.h"
+#include "qemu/error-report.h"
+#include "qemu/config-file.h"
+#include "qemu/bswap.h"
+#include "qemu/log.h"
+#include "qemu/systemd.h"
+#include "qapi/util.h"
+#include "qapi/qmp/qstring.h"
+#include "io/channel-socket.h"
+#include "trace/control.h"
+#include "qemu-version.h"
+
+#include "block/aio.h"
+#include "block/thread-pool.h"
+
+#include "scsi/constants.h"
+#include "scsi/utils.h"
+#include "pr-helper.h"
+
+#define PR_OUT_FIXED_PARAM_SIZE 24
+
+static char *socket_path;
+static char *pidfile;
+static enum { RUNNING, TERMINATE, TERMINATING } state;
+static QIOChannelSocket *server_ioc;
+static int server_watch;
+static int num_active_sockets = 1;
+static int noisy;
+static int verbose;
+
+#ifdef CONFIG_LIBCAP
+static int uid = -1;
+static int gid = -1;
+#endif
+
+static void usage(const char *name)
+{
+ (printf) (
+"Usage: %s [OPTIONS] FILE\n"
+"Persistent Reservation helper program for QEMU\n"
+"\n"
+" -h, --help display this help and exit\n"
+" -V, --version output version information and exit\n"
+"\n"
+" -d, --daemon run in the background\n"
+" -f, --pidfile=PATH PID file when running as a daemon\n"
+" (default '%s')\n"
+" -k, --socket=PATH path to the unix socket\n"
+" (default '%s')\n"
+" -T, --trace [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
+" specify tracing options\n"
+#ifdef CONFIG_LIBCAP
+" -u, --user=USER user to drop privileges to\n"
+" -g, --group=GROUP group to drop privileges to\n"
+#endif
+"\n"
+QEMU_HELP_BOTTOM "\n"
+ , name, pidfile, socket_path);
+}
+
+static void version(const char *name)
+{
+ printf(
+"%s " QEMU_VERSION QEMU_PKGVERSION "\n"
+"Written by Paolo Bonzini.\n"
+"\n"
+QEMU_COPYRIGHT "\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"
+ , name);
+}
+
+static void write_pidfile(void)
+{
+ int pidfd;
+ char pidstr[32];
+
+ pidfd = qemu_open(pidfile, O_CREAT|O_WRONLY, S_IRUSR|S_IWUSR);
+ if (pidfd == -1) {
+ error_report("Cannot open pid file, %s", strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ if (lockf(pidfd, F_TLOCK, 0)) {
+ error_report("Cannot lock pid file, %s", strerror(errno));
+ goto fail;
+ }
+ if (ftruncate(pidfd, 0)) {
+ error_report("Failed to truncate pid file");
+ goto fail;
+ }
+
+ snprintf(pidstr, sizeof(pidstr), "%d\n", getpid());
+ if (write(pidfd, pidstr, strlen(pidstr)) != strlen(pidstr)) {
+ error_report("Failed to write pid file");
+ goto fail;
+ }
+ return;
+
+fail:
+ unlink(pidfile);
+ close(pidfd);
+ exit(EXIT_FAILURE);
+}
+
+/* SG_IO support */
+
+typedef struct PRHelperSGIOData {
+ int fd;
+ const uint8_t *cdb;
+ uint8_t *sense;
+ uint8_t *buf;
+ int sz; /* input/output */
+ int dir;
+} PRHelperSGIOData;
+
+static int do_sgio_worker(void *opaque)
+{
+ PRHelperSGIOData *data = opaque;
+ struct sg_io_hdr io_hdr;
+ int ret;
+ int status;
+ SCSISense sense_code;
+
+ memset(data->sense, 0, PR_HELPER_SENSE_SIZE);
+ memset(&io_hdr, 0, sizeof(io_hdr));
+ io_hdr.interface_id = 'S';
+ io_hdr.cmd_len = PR_HELPER_CDB_SIZE;
+ io_hdr.cmdp = (uint8_t *)data->cdb;
+ io_hdr.sbp = data->sense;
+ io_hdr.mx_sb_len = PR_HELPER_SENSE_SIZE;
+ io_hdr.timeout = 1;
+ io_hdr.dxfer_direction = data->dir;
+ io_hdr.dxferp = (char *)data->buf;
+ io_hdr.dxfer_len = data->sz;
+ ret = ioctl(data->fd, SG_IO, &io_hdr);
+ status = sg_io_sense_from_errno(ret < 0 ? errno : 0, &io_hdr,
+ &sense_code);
+ if (status == GOOD) {
+ data->sz -= io_hdr.resid;
+ } else {
+ data->sz = 0;
+ }
+
+ if (status == CHECK_CONDITION &&
+ !(io_hdr.driver_status & SG_ERR_DRIVER_SENSE)) {
+ scsi_build_sense(data->sense, sense_code);
+ }
+
+ return status;
+}
+
+static int do_sgio(int fd, const uint8_t *cdb, uint8_t *sense,
+ uint8_t *buf, int *sz, int dir)
+{
+ ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context());
+ int r;
+
+ PRHelperSGIOData data = {
+ .fd = fd,
+ .cdb = cdb,
+ .sense = sense,
+ .buf = buf,
+ .sz = *sz,
+ .dir = dir,
+ };
+
+ r = thread_pool_submit_co(pool, do_sgio_worker, &data);
+ *sz = data.sz;
+ return r;
+}
+
+/* Device mapper interface */
+
+#ifdef CONFIG_MPATH
+#define CONTROL_PATH "/dev/mapper/control"
+
+typedef struct DMData {
+ struct dm_ioctl dm;
+ uint8_t data[1024];
+} DMData;
+
+static int control_fd;
+
+static void *dm_ioctl(int ioc, struct dm_ioctl *dm)
+{
+ static DMData d;
+ memcpy(&d.dm, dm, sizeof(d.dm));
+ QEMU_BUILD_BUG_ON(sizeof(d.data) < sizeof(struct dm_target_spec));
+
+ d.dm.version[0] = DM_VERSION_MAJOR;
+ d.dm.version[1] = 0;
+ d.dm.version[2] = 0;
+ d.dm.data_size = 1024;
+ d.dm.data_start = offsetof(DMData, data);
+ if (ioctl(control_fd, ioc, &d) < 0) {
+ return NULL;
+ }
+ memcpy(dm, &d.dm, sizeof(d.dm));
+ return &d.data;
+}
+
+static void *dm_dev_ioctl(int fd, int ioc, struct dm_ioctl *dm)
+{
+ struct stat st;
+ int r;
+
+ r = fstat(fd, &st);
+ if (r < 0) {
+ perror("fstat");
+ exit(1);
+ }
+
+ dm->dev = st.st_rdev;
+ return dm_ioctl(ioc, dm);
+}
+
+static void dm_init(void)
+{
+ control_fd = open(CONTROL_PATH, O_RDWR);
+ if (control_fd < 0) {
+ perror("Cannot open " CONTROL_PATH);
+ exit(1);
+ }
+ struct dm_ioctl dm = { 0 };
+ if (!dm_ioctl(DM_VERSION, &dm)) {
+ perror("ioctl");
+ exit(1);
+ }
+ if (dm.version[0] != DM_VERSION_MAJOR) {
+ fprintf(stderr, "Unsupported device mapper interface");
+ exit(1);
+ }
+}
+
+/* Variables required by libmultipath and libmpathpersist. */
+QEMU_BUILD_BUG_ON(PR_HELPER_DATA_SIZE > MPATH_MAX_PARAM_LEN);
+unsigned mpath_mx_alloc_len = PR_HELPER_DATA_SIZE;
+int logsink;
+
+static void multipath_pr_init(void)
+{
+ static struct udev *udev;
+
+ udev = udev_new();
+ mpath_lib_init(udev);
+}
+
+static int is_mpath(int fd)
+{
+ struct dm_ioctl dm = { .flags = DM_NOFLUSH_FLAG };
+ struct dm_target_spec *tgt;
+
+ tgt = dm_dev_ioctl(fd, DM_TABLE_STATUS, &dm);
+ if (!tgt) {
+ if (errno == ENXIO) {
+ return 0;
+ }
+ perror("ioctl");
+ exit(EXIT_FAILURE);
+ }
+ return !strncmp(tgt->target_type, "multipath", DM_MAX_TYPE_NAME);
+}
+
+static int mpath_reconstruct_sense(int fd, int r, uint8_t *sense)
+{
+ switch (r) {
+ case MPATH_PR_SUCCESS:
+ return GOOD;
+ case MPATH_PR_SENSE_NOT_READY:
+ case MPATH_PR_SENSE_MEDIUM_ERROR:
+ case MPATH_PR_SENSE_HARDWARE_ERROR:
+ case MPATH_PR_SENSE_ABORTED_COMMAND:
+ {
+ /* libmpathpersist ate the exact sense. Try to find it by
+ * issuing TEST UNIT READY.
+ */
+ uint8_t cdb[6] = { TEST_UNIT_READY };
+ int sz = 0;
+ return do_sgio(fd, cdb, sense, NULL, &sz, SG_DXFER_NONE);
+ }
+
+ case MPATH_PR_SENSE_UNIT_ATTENTION:
+ /* Congratulations libmpathpersist, you ruined the Unit Attention...
+ * Return a heavyweight one.
+ */
+ scsi_build_sense(sense, SENSE_CODE(SCSI_BUS_RESET));
+ return CHECK_CONDITION;
+ case MPATH_PR_SENSE_INVALID_OP:
+ /* Only one valid sense. */
+ scsi_build_sense(sense, SENSE_CODE(INVALID_OPCODE));
+ return CHECK_CONDITION;
+ case MPATH_PR_ILLEGAL_REQ:
+ /* Guess. */
+ scsi_build_sense(sense, SENSE_CODE(INVALID_PARAM));
+ return CHECK_CONDITION;
+ case MPATH_PR_NO_SENSE:
+ scsi_build_sense(sense, SENSE_CODE(NO_SENSE));
+ return CHECK_CONDITION;
+
+ case MPATH_PR_RESERV_CONFLICT:
+ return RESERVATION_CONFLICT;
+
+ case MPATH_PR_OTHER:
+ default:
+ scsi_build_sense(sense, SENSE_CODE(LUN_COMM_FAILURE));
+ return CHECK_CONDITION;
+ }
+}
+
+static int multipath_pr_in(int fd, const uint8_t *cdb, uint8_t *sense,
+ uint8_t *data, int sz)
+{
+ int rq_servact = cdb[1];
+ struct prin_resp resp;
+ size_t written;
+ int r;
+
+ switch (rq_servact) {
+ case MPATH_PRIN_RKEY_SA:
+ case MPATH_PRIN_RRES_SA:
+ case MPATH_PRIN_RCAP_SA:
+ break;
+ case MPATH_PRIN_RFSTAT_SA:
+ /* Nobody implements it anyway, so bail out. */
+ default:
+ /* Cannot parse any other output. */
+ scsi_build_sense(sense, SENSE_CODE(INVALID_FIELD));
+ return CHECK_CONDITION;
+ }
+
+ r = mpath_persistent_reserve_in(fd, rq_servact, &resp, noisy, verbose);
+ if (r == MPATH_PR_SUCCESS) {
+ switch (rq_servact) {
+ case MPATH_PRIN_RKEY_SA:
+ case MPATH_PRIN_RRES_SA: {
+ struct prin_readdescr *out = &resp.prin_descriptor.prin_readkeys;
+ assert(sz >= 8);
+ written = MIN(out->additional_length + 8, sz);
+ stl_be_p(&data[0], out->prgeneration);
+ stl_be_p(&data[4], out->additional_length);
+ memcpy(&data[8], out->key_list, written - 8);
+ break;
+ }
+ case MPATH_PRIN_RCAP_SA: {
+ struct prin_capdescr *out = &resp.prin_descriptor.prin_readcap;
+ assert(sz >= 6);
+ written = 6;
+ stw_be_p(&data[0], out->length);
+ data[2] = out->flags[0];
+ data[3] = out->flags[1];
+ stw_be_p(&data[4], out->pr_type_mask);
+ break;
+ }
+ default:
+ scsi_build_sense(sense, SENSE_CODE(INVALID_OPCODE));
+ return CHECK_CONDITION;
+ }
+ assert(written <= sz);
+ memset(data + written, 0, sz - written);
+ }
+
+ return mpath_reconstruct_sense(fd, r, sense);
+}
+
+static int multipath_pr_out(int fd, const uint8_t *cdb, uint8_t *sense,
+ const uint8_t *param, int sz)
+{
+ int rq_servact = cdb[1];
+ int rq_scope = cdb[2] >> 4;
+ int rq_type = cdb[2] & 0xf;
+ struct prout_param_descriptor paramp;
+ char transportids[PR_HELPER_DATA_SIZE];
+ int r;
+
+ switch (rq_servact) {
+ case MPATH_PROUT_REG_SA:
+ case MPATH_PROUT_RES_SA:
+ case MPATH_PROUT_REL_SA:
+ case MPATH_PROUT_CLEAR_SA:
+ case MPATH_PROUT_PREE_SA:
+ case MPATH_PROUT_PREE_AB_SA:
+ case MPATH_PROUT_REG_IGN_SA:
+ break;
+ case MPATH_PROUT_REG_MOV_SA:
+ /* Not supported by struct prout_param_descriptor. */
+ default:
+ /* Cannot parse any other input. */
+ scsi_build_sense(sense, SENSE_CODE(INVALID_FIELD));
+ return CHECK_CONDITION;
+ }
+
+ /* Convert input data, especially transport IDs, to the structs
+ * used by libmpathpersist (which, of course, will immediately
+ * do the opposite).
+ */
+ memset(&paramp, 0, sizeof(paramp));
+ memcpy(&paramp.key, &param[0], 8);
+ memcpy(&paramp.sa_key, &param[8], 8);
+ paramp.sa_flags = param[10];
+ if (sz > PR_OUT_FIXED_PARAM_SIZE) {
+ size_t transportid_len;
+ int i, j;
+ if (sz < PR_OUT_FIXED_PARAM_SIZE + 4) {
+ scsi_build_sense(sense, SENSE_CODE(INVALID_PARAM_LEN));
+ return CHECK_CONDITION;
+ }
+ transportid_len = ldl_be_p(&param[24]) + PR_OUT_FIXED_PARAM_SIZE + 4;
+ if (transportid_len > sz) {
+ scsi_build_sense(sense, SENSE_CODE(INVALID_PARAM));
+ return CHECK_CONDITION;
+ }
+ for (i = PR_OUT_FIXED_PARAM_SIZE + 4, j = 0; i < transportid_len; ) {
+ struct transportid *id = (struct transportid *) &transportids[j];
+ int len;
+
+ id->format_code = param[i] & 0xc0;
+ id->protocol_id = param[i] & 0x0f;
+ switch (param[i] & 0xcf) {
+ case 0:
+ /* FC transport. */
+ if (i + 24 > transportid_len) {
+ goto illegal_req;
+ }
+ memcpy(id->n_port_name, &param[i + 8], 8);
+ j += offsetof(struct transportid, n_port_name[8]);
+ i += 24;
+ break;
+ case 3:
+ case 0x43:
+ /* iSCSI transport. */
+ len = lduw_be_p(&param[i + 2]);
+ if (len > 252 || (len & 3) || i + len + 4 > transportid_len) {
+ /* For format code 00, the standard says the maximum is 223
+ * plus the NUL terminator. For format code 01 there is no
+ * maximum length, but libmpathpersist ignores the first
+ * byte of id->iscsi_name so our maximum is 252.
+ */
+ goto illegal_req;
+ }
+ if (memchr(&param[i + 4], 0, len) == NULL) {
+ goto illegal_req;
+ }
+ memcpy(id->iscsi_name, &param[i + 2], len + 2);
+ j += offsetof(struct transportid, iscsi_name[len + 2]);
+ i += len + 4;
+ break;
+ case 6:
+ /* SAS transport. */
+ if (i + 24 > transportid_len) {
+ goto illegal_req;
+ }
+ memcpy(id->sas_address, &param[i + 4], 8);
+ j += offsetof(struct transportid, sas_address[8]);
+ i += 24;
+ break;
+ default:
+ illegal_req:
+ scsi_build_sense(sense, SENSE_CODE(INVALID_PARAM));
+ return CHECK_CONDITION;
+ }
+
+ paramp.trnptid_list[paramp.num_transportid++] = id;
+ }
+ }
+
+ r = mpath_persistent_reserve_out(fd, rq_servact, rq_scope, rq_type,
+ &paramp, noisy, verbose);
+ return mpath_reconstruct_sense(fd, r, sense);
+}
+#endif
+
+static int do_pr_in(int fd, const uint8_t *cdb, uint8_t *sense,
+ uint8_t *data, int *resp_sz)
+{
+#ifdef CONFIG_MPATH
+ if (is_mpath(fd)) {
+ /* multipath_pr_in fills the whole input buffer. */
+ return multipath_pr_in(fd, cdb, sense, data, *resp_sz);
+ }
+#endif
+
+ return do_sgio(fd, cdb, sense, data, resp_sz,
+ SG_DXFER_FROM_DEV);
+}
+
+static int do_pr_out(int fd, const uint8_t *cdb, uint8_t *sense,
+ const uint8_t *param, int sz)
+{
+ int resp_sz;
+#ifdef CONFIG_MPATH
+ if (is_mpath(fd)) {
+ return multipath_pr_out(fd, cdb, sense, param, sz);
+ }
+#endif
+
+ resp_sz = sz;
+ return do_sgio(fd, cdb, sense, (uint8_t *)param, &resp_sz,
+ SG_DXFER_TO_DEV);
+}
+
+/* Client */
+
+typedef struct PRHelperClient {
+ QIOChannelSocket *ioc;
+ Coroutine *co;
+ int fd;
+ uint8_t data[PR_HELPER_DATA_SIZE];
+} PRHelperClient;
+
+typedef struct PRHelperRequest {
+ int fd;
+ size_t sz;
+ uint8_t cdb[PR_HELPER_CDB_SIZE];
+} PRHelperRequest;
+
+static int coroutine_fn prh_read(PRHelperClient *client, void *buf, int sz,
+ Error **errp)
+{
+ int ret = 0;
+
+ while (sz > 0) {
+ int *fds = NULL;
+ size_t nfds = 0;
+ int i;
+ struct iovec iov;
+ ssize_t n_read;
+
+ iov.iov_base = buf;
+ iov.iov_len = sz;
+ n_read = qio_channel_readv_full(QIO_CHANNEL(client->ioc), &iov, 1,
+ &fds, &nfds, errp);
+
+ if (n_read == QIO_CHANNEL_ERR_BLOCK) {
+ qio_channel_yield(QIO_CHANNEL(client->ioc), G_IO_IN);
+ continue;
+ }
+ if (n_read <= 0) {
+ ret = n_read ? n_read : -1;
+ goto err;
+ }
+
+ /* Stash one file descriptor per request. */
+ if (nfds) {
+ bool too_many = false;
+ for (i = 0; i < nfds; i++) {
+ if (client->fd == -1) {
+ client->fd = fds[i];
+ } else {
+ close(fds[i]);
+ too_many = true;
+ }
+ }
+ g_free(fds);
+ if (too_many) {
+ ret = -1;
+ goto err;
+ }
+ }
+
+ buf += n_read;
+ sz -= n_read;
+ }
+
+ return 0;
+
+err:
+ if (client->fd != -1) {
+ close(client->fd);
+ client->fd = -1;
+ }
+ return ret;
+}
+
+static int coroutine_fn prh_read_request(PRHelperClient *client,
+ PRHelperRequest *req,
+ PRHelperResponse *resp, Error **errp)
+{
+ uint32_t sz;
+
+ if (prh_read(client, req->cdb, sizeof(req->cdb), NULL) < 0) {
+ return -1;
+ }
+
+ if (client->fd == -1) {
+ error_setg(errp, "No file descriptor in request.");
+ return -1;
+ }
+
+ if (req->cdb[0] != PERSISTENT_RESERVE_OUT &&
+ req->cdb[0] != PERSISTENT_RESERVE_IN) {
+ error_setg(errp, "Invalid CDB, closing socket.");
+ goto out_close;
+ }
+
+ sz = scsi_cdb_xfer(req->cdb);
+ if (sz > sizeof(client->data)) {
+ goto out_close;
+ }
+
+ if (req->cdb[0] == PERSISTENT_RESERVE_OUT) {
+ if (qio_channel_read_all(QIO_CHANNEL(client->ioc),
+ (char *)client->data, sz,
+ errp) < 0) {
+ goto out_close;
+ }
+ if ((fcntl(client->fd, F_GETFL) & O_ACCMODE) == O_RDONLY) {
+ scsi_build_sense(resp->sense, SENSE_CODE(INVALID_OPCODE));
+ sz = 0;
+ } else if (sz < PR_OUT_FIXED_PARAM_SIZE) {
+ /* Illegal request, Parameter list length error. This isn't fatal;
+ * we have read the data, send an error without closing the socket.
+ */
+ scsi_build_sense(resp->sense, SENSE_CODE(INVALID_PARAM_LEN));
+ sz = 0;
+ }
+ if (sz == 0) {
+ resp->result = CHECK_CONDITION;
+ close(client->fd);
+ client->fd = -1;
+ }
+ }
+
+ req->fd = client->fd;
+ req->sz = sz;
+ client->fd = -1;
+ return sz;
+
+out_close:
+ close(client->fd);
+ client->fd = -1;
+ return -1;
+}
+
+static int coroutine_fn prh_write_response(PRHelperClient *client,
+ PRHelperRequest *req,
+ PRHelperResponse *resp, Error **errp)
+{
+ ssize_t r;
+ size_t sz;
+
+ if (req->cdb[0] == PERSISTENT_RESERVE_IN && resp->result == GOOD) {
+ assert(resp->sz <= req->sz && resp->sz <= sizeof(client->data));
+ } else {
+ assert(resp->sz == 0);
+ }
+
+ sz = resp->sz;
+
+ resp->result = cpu_to_be32(resp->result);
+ resp->sz = cpu_to_be32(resp->sz);
+ r = qio_channel_write_all(QIO_CHANNEL(client->ioc),
+ (char *) resp, sizeof(*resp), errp);
+ if (r < 0) {
+ return r;
+ }
+
+ r = qio_channel_write_all(QIO_CHANNEL(client->ioc),
+ (char *) client->data,
+ sz, errp);
+ return r < 0 ? r : 0;
+}
+
+static void coroutine_fn prh_co_entry(void *opaque)
+{
+ PRHelperClient *client = opaque;
+ Error *local_err = NULL;
+ uint32_t flags;
+ int r;
+
+ qio_channel_set_blocking(QIO_CHANNEL(client->ioc),
+ false, NULL);
+ qio_channel_attach_aio_context(QIO_CHANNEL(client->ioc),
+ qemu_get_aio_context());
+
+ /* A very simple negotiation for future extensibility. No features
+ * are defined so write 0.
+ */
+ flags = cpu_to_be32(0);
+ r = qio_channel_write_all(QIO_CHANNEL(client->ioc),
+ (char *) &flags, sizeof(flags), NULL);
+ if (r < 0) {
+ goto out;
+ }
+
+ r = qio_channel_read_all(QIO_CHANNEL(client->ioc),
+ (char *) &flags, sizeof(flags), NULL);
+ if (be32_to_cpu(flags) != 0 || r < 0) {
+ goto out;
+ }
+
+ while (atomic_read(&state) == RUNNING) {
+ PRHelperRequest req;
+ PRHelperResponse resp;
+ int sz;
+
+ sz = prh_read_request(client, &req, &resp, &local_err);
+ if (sz < 0) {
+ break;
+ }
+
+ if (sz > 0) {
+ num_active_sockets++;
+ if (req.cdb[0] == PERSISTENT_RESERVE_OUT) {
+ r = do_pr_out(req.fd, req.cdb, resp.sense,
+ client->data, sz);
+ resp.sz = 0;
+ } else {
+ resp.sz = sizeof(client->data);
+ r = do_pr_in(req.fd, req.cdb, resp.sense,
+ client->data, &resp.sz);
+ resp.sz = MIN(resp.sz, sz);
+ }
+ num_active_sockets--;
+ close(req.fd);
+ if (r == -1) {
+ break;
+ }
+ resp.result = r;
+ }
+
+ if (prh_write_response(client, &req, &resp, &local_err) < 0) {
+ break;
+ }
+ }
+
+ if (local_err) {
+ if (verbose == 0) {
+ error_free(local_err);
+ } else {
+ error_report_err(local_err);
+ }
+ }
+
+out:
+ qio_channel_detach_aio_context(QIO_CHANNEL(client->ioc));
+ object_unref(OBJECT(client->ioc));
+ g_free(client);
+}
+
+static gboolean accept_client(QIOChannel *ioc, GIOCondition cond, gpointer opaque)
+{
+ QIOChannelSocket *cioc;
+ PRHelperClient *prh;
+
+ cioc = qio_channel_socket_accept(QIO_CHANNEL_SOCKET(ioc),
+ NULL);
+ if (!cioc) {
+ return TRUE;
+ }
+
+ prh = g_new(PRHelperClient, 1);
+ prh->ioc = cioc;
+ prh->fd = -1;
+ prh->co = qemu_coroutine_create(prh_co_entry, prh);
+ qemu_coroutine_enter(prh->co);
+
+ return TRUE;
+}
+
+
+/*
+ * Check socket parameters compatibility when socket activation is used.
+ */
+static const char *socket_activation_validate_opts(void)
+{
+ if (socket_path != NULL) {
+ return "Unix socket can't be set when using socket activation";
+ }
+
+ return NULL;
+}
+
+static void compute_default_paths(void)
+{
+ if (!socket_path) {
+ socket_path = qemu_get_local_state_pathname("run/qemu-pr-helper.sock");
+ }
+}
+
+static void termsig_handler(int signum)
+{
+ atomic_cmpxchg(&state, RUNNING, TERMINATE);
+ qemu_notify_event();
+}
+
+static void close_server_socket(void)
+{
+ assert(server_ioc);
+
+ g_source_remove(server_watch);
+ server_watch = -1;
+ object_unref(OBJECT(server_ioc));
+ num_active_sockets--;
+}
+
+#ifdef CONFIG_LIBCAP
+static int drop_privileges(void)
+{
+ /* clear all capabilities */
+ capng_clear(CAPNG_SELECT_BOTH);
+
+ if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE | CAPNG_PERMITTED,
+ CAP_SYS_RAWIO) < 0) {
+ return -1;
+ }
+
+#ifdef CONFIG_MPATH
+ /* For /dev/mapper/control ioctls */
+ if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE | CAPNG_PERMITTED,
+ CAP_SYS_ADMIN) < 0) {
+ return -1;
+ }
+#endif
+
+ /* Change user/group id, retaining the capabilities. Because file descriptors
+ * are passed via SCM_RIGHTS, we don't need supplementary groups (and in
+ * fact the helper can run as "nobody").
+ */
+ if (capng_change_id(uid != -1 ? uid : getuid(),
+ gid != -1 ? gid : getgid(),
+ CAPNG_DROP_SUPP_GRP | CAPNG_CLEAR_BOUNDING)) {
+ return -1;
+ }
+
+ return 0;
+}
+#endif
+
+int main(int argc, char **argv)
+{
+ const char *sopt = "hVk:fdT:u:g:vq";
+ struct option lopt[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, 'V' },
+ { "socket", required_argument, NULL, 'k' },
+ { "pidfile", no_argument, NULL, 'f' },
+ { "daemon", no_argument, NULL, 'd' },
+ { "trace", required_argument, NULL, 'T' },
+ { "user", required_argument, NULL, 'u' },
+ { "group", required_argument, NULL, 'g' },
+ { "verbose", no_argument, NULL, 'v' },
+ { "quiet", no_argument, NULL, 'q' },
+ { NULL, 0, NULL, 0 }
+ };
+ int opt_ind = 0;
+ int loglevel = 1;
+ int quiet = 0;
+ int ch;
+ Error *local_err = NULL;
+ char *trace_file = NULL;
+ bool daemonize = false;
+ unsigned socket_activation;
+
+ struct sigaction sa_sigterm;
+ memset(&sa_sigterm, 0, sizeof(sa_sigterm));
+ sa_sigterm.sa_handler = termsig_handler;
+ sigaction(SIGTERM, &sa_sigterm, NULL);
+ sigaction(SIGINT, &sa_sigterm, NULL);
+ sigaction(SIGHUP, &sa_sigterm, NULL);
+
+ signal(SIGPIPE, SIG_IGN);
+
+ module_call_init(MODULE_INIT_TRACE);
+ module_call_init(MODULE_INIT_QOM);
+ qemu_add_opts(&qemu_trace_opts);
+ qemu_init_exec_dir(argv[0]);
+
+ pidfile = qemu_get_local_state_pathname("run/qemu-pr-helper.pid");
+
+ while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) {
+ switch (ch) {
+ case 'k':
+ socket_path = optarg;
+ if (socket_path[0] != '/') {
+ error_report("socket path must be absolute");
+ exit(EXIT_FAILURE);
+ }
+ break;
+ case 'f':
+ pidfile = optarg;
+ break;
+#ifdef CONFIG_LIBCAP
+ case 'u': {
+ unsigned long res;
+ struct passwd *userinfo = getpwnam(optarg);
+ if (userinfo) {
+ uid = userinfo->pw_uid;
+ } else if (qemu_strtoul(optarg, NULL, 10, &res) == 0 &&
+ (uid_t)res == res) {
+ uid = res;
+ } else {
+ error_report("invalid user '%s'", optarg);
+ exit(EXIT_FAILURE);
+ }
+ break;
+ }
+ case 'g': {
+ unsigned long res;
+ struct group *groupinfo = getgrnam(optarg);
+ if (groupinfo) {
+ gid = groupinfo->gr_gid;
+ } else if (qemu_strtoul(optarg, NULL, 10, &res) == 0 &&
+ (gid_t)res == res) {
+ gid = res;
+ } else {
+ error_report("invalid group '%s'", optarg);
+ exit(EXIT_FAILURE);
+ }
+ break;
+ }
+#else
+ case 'u':
+ case 'g':
+ error_report("-%c not supported by this %s", ch, argv[0]);
+ exit(1);
+#endif
+ case 'd':
+ daemonize = true;
+ break;
+ case 'q':
+ quiet = 1;
+ break;
+ case 'v':
+ ++loglevel;
+ break;
+ case 'T':
+ g_free(trace_file);
+ trace_file = trace_opt_parse(optarg);
+ break;
+ case 'V':
+ version(argv[0]);
+ exit(EXIT_SUCCESS);
+ break;
+ case 'h':
+ usage(argv[0]);
+ exit(EXIT_SUCCESS);
+ break;
+ case '?':
+ error_report("Try `%s --help' for more information.", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ /* set verbosity */
+ noisy = !quiet && (loglevel >= 3);
+ verbose = quiet ? 0 : MIN(loglevel, 3);
+
+ if (!trace_init_backends()) {
+ exit(EXIT_FAILURE);
+ }
+ trace_init_file(trace_file);
+ qemu_set_log(LOG_TRACE);
+
+#ifdef CONFIG_MPATH
+ dm_init();
+ multipath_pr_init();
+#endif
+
+ socket_activation = check_socket_activation();
+ if (socket_activation == 0) {
+ SocketAddress saddr;
+ compute_default_paths();
+ saddr = (SocketAddress){
+ .type = SOCKET_ADDRESS_TYPE_UNIX,
+ .u.q_unix.path = g_strdup(socket_path)
+ };
+ server_ioc = qio_channel_socket_new();
+ if (qio_channel_socket_listen_sync(server_ioc, &saddr, &local_err) < 0) {
+ object_unref(OBJECT(server_ioc));
+ error_report_err(local_err);
+ return 1;
+ }
+ g_free(saddr.u.q_unix.path);
+ } else {
+ /* Using socket activation - check user didn't use -p etc. */
+ const char *err_msg = socket_activation_validate_opts();
+ if (err_msg != NULL) {
+ error_report("%s", err_msg);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Can only listen on a single socket. */
+ if (socket_activation > 1) {
+ error_report("%s does not support socket activation with LISTEN_FDS > 1",
+ argv[0]);
+ exit(EXIT_FAILURE);
+ }
+ server_ioc = qio_channel_socket_new_fd(FIRST_SOCKET_ACTIVATION_FD,
+ &local_err);
+ if (server_ioc == NULL) {
+ error_report("Failed to use socket activation: %s",
+ error_get_pretty(local_err));
+ exit(EXIT_FAILURE);
+ }
+ socket_path = NULL;
+ }
+
+ if (qemu_init_main_loop(&local_err)) {
+ error_report_err(local_err);
+ exit(EXIT_FAILURE);
+ }
+
+ server_watch = qio_channel_add_watch(QIO_CHANNEL(server_ioc),
+ G_IO_IN,
+ accept_client,
+ NULL, NULL);
+
+#ifdef CONFIG_LIBCAP
+ if (drop_privileges() < 0) {
+ error_report("Failed to drop privileges: %s", strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+#endif
+
+ if (daemonize) {
+ if (daemon(0, 0) < 0) {
+ error_report("Failed to daemonize: %s", strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ write_pidfile();
+ }
+
+ state = RUNNING;
+ do {
+ main_loop_wait(false);
+ if (state == TERMINATE) {
+ state = TERMINATING;
+ close_server_socket();
+ }
+ } while (num_active_sockets > 0);
+
+ exit(EXIT_SUCCESS);
+}
diff --git a/scsi/trace-events b/scsi/trace-events
new file mode 100644
index 0000000000..45f5b6e49b
--- /dev/null
+++ b/scsi/trace-events
@@ -0,0 +1,3 @@
+# scsi/pr-manager.c
+pr_manager_execute(int fd, int cmd, int sa, void *opaque) "fd=%d cmd=0x%02x service action=0x%02x opaque=%p"
+pr_manager_run(int fd, int cmd, int sa) "fd=%d cmd=0x%02x service action=0x%02x"
diff --git a/scsi/utils.c b/scsi/utils.c
index fab60bdf20..5684951b12 100644
--- a/scsi/utils.c
+++ b/scsi/utils.c
@@ -206,6 +206,11 @@ const struct SCSISense sense_code_OVERLAPPED_COMMANDS = {
.key = ABORTED_COMMAND, .asc = 0x4e, .ascq = 0x00
};
+/* Command aborted, LUN Communication Failure */
+const struct SCSISense sense_code_LUN_COMM_FAILURE = {
+ .key = ABORTED_COMMAND, .asc = 0x08, .ascq = 0x00
+};
+
/* Unit attention, Capacity data has changed */
const struct SCSISense sense_code_CAPACITY_CHANGED = {
.key = UNIT_ATTENTION, .asc = 0x2a, .ascq = 0x09
@@ -216,6 +221,11 @@ const struct SCSISense sense_code_RESET = {
.key = UNIT_ATTENTION, .asc = 0x29, .ascq = 0x00
};
+/* Unit attention, SCSI bus reset */
+const struct SCSISense sense_code_SCSI_BUS_RESET = {
+ .key = UNIT_ATTENTION, .asc = 0x29, .ascq = 0x02
+};
+
/* Unit attention, No medium */
const struct SCSISense sense_code_UNIT_ATTENTION_NO_MEDIUM = {
.key = UNIT_ATTENTION, .asc = 0x3a, .ascq = 0x00
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 33449790e5..4300de66e2 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -691,6 +691,9 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
CPUARMState *env = &cpu->env;
int pagebits;
Error *local_err = NULL;
+#ifndef CONFIG_USER_ONLY
+ AddressSpace *as;
+#endif
cpu_exec_realizefn(cs, &local_err);
if (local_err != NULL) {
@@ -881,24 +884,21 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
#ifndef CONFIG_USER_ONLY
if (cpu->has_el3 || arm_feature(env, ARM_FEATURE_M_SECURITY)) {
- AddressSpace *as;
+ as = g_new0(AddressSpace, 1);
cs->num_ases = 2;
if (!cpu->secure_memory) {
cpu->secure_memory = cs->memory;
}
- as = address_space_init_shareable(cpu->secure_memory,
- "cpu-secure-memory");
+ address_space_init(as, cpu->secure_memory, "cpu-secure-memory");
cpu_address_space_init(cs, as, ARMASIdx_S);
} else {
cs->num_ases = 1;
}
-
- cpu_address_space_init(cs,
- address_space_init_shareable(cs->memory,
- "cpu-memory"),
- ARMASIdx_NS);
+ as = g_new0(AddressSpace, 1);
+ address_space_init(as, cs->memory, "cpu-memory");
+ cpu_address_space_init(cs, as, ARMASIdx_NS);
#endif
qemu_init_vcpu(cs);
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 0aa28fc775..98732cd65f 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3738,10 +3738,11 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
#ifndef CONFIG_USER_ONLY
if (tcg_enabled()) {
- AddressSpace *as_normal = address_space_init_shareable(cs->memory,
- "cpu-memory");
+ AddressSpace *as_normal = g_new0(AddressSpace, 1);
AddressSpace *as_smm = g_new(AddressSpace, 1);
+ address_space_init(as_normal, cs->memory, "cpu-memory");
+
cpu->cpu_as_mem = g_new(MemoryRegion, 1);
cpu->cpu_as_root = g_new(MemoryRegion, 1);
diff --git a/trace-events b/trace-events
index 1f50f56d9d..1d2eb5d3e4 100644
--- a/trace-events
+++ b/trace-events
@@ -64,6 +64,9 @@ memory_region_tb_read(int cpu_index, uint64_t addr, uint64_t value, unsigned siz
memory_region_tb_write(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr 0x%"PRIx64" value 0x%"PRIx64" size %u"
memory_region_ram_device_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u"
memory_region_ram_device_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u"
+flatview_new(FlatView *view, MemoryRegion *root) "%p (root %p)"
+flatview_destroy(FlatView *view, MemoryRegion *root) "%p (root %p)"
+flatview_destroy_rcu(FlatView *view, MemoryRegion *root) "%p (root %p)"
### Guest events, keep at bottom
diff --git a/vl.c b/vl.c
index 88ae727486..4fd01fda91 100644
--- a/vl.c
+++ b/vl.c
@@ -2889,7 +2889,8 @@ static int machine_set_property(void *opaque,
*/
static bool object_create_initial(const char *type)
{
- if (g_str_equal(type, "rng-egd")) {
+ if (g_str_equal(type, "rng-egd") ||
+ g_str_has_prefix(type, "pr-manager-")) {
return false;
}