109 files changed, 3242 insertions, 1212 deletions
diff --git a/.gitignore b/.gitignore
index 55a001e3b8..09c2363acf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -50,6 +50,7 @@
 /qemu-version.h.tmp
 /module_block.h
 /vscclient
+/vhost-user-scsi
 /fsdev/virtfs-proxy-helper
 *.[1-9]
 *.a
@@ -99,14 +100,14 @@
 /pc-bios/optionrom/kvmvapic.img
 /pc-bios/s390-ccw/s390-ccw.elf
 /pc-bios/s390-ccw/s390-ccw.img
-/docs/qemu-ga-qapi.texi
-/docs/qemu-ga-ref.html
-/docs/qemu-ga-ref.info*
-/docs/qemu-ga-ref.txt
-/docs/qemu-qmp-qapi.texi
-/docs/qemu-qmp-ref.html
-/docs/qemu-qmp-ref.info*
-/docs/qemu-qmp-ref.txt
+/docs/interop/qemu-ga-qapi.texi
+/docs/interop/qemu-ga-ref.html
+/docs/interop/qemu-ga-ref.info*
+/docs/interop/qemu-ga-ref.txt
+/docs/interop/qemu-qmp-qapi.texi
+/docs/interop/qemu-qmp-ref.html
+/docs/interop/qemu-qmp-ref.info*
+/docs/interop/qemu-qmp-ref.txt
 /docs/version.texi
 *.tps
 .stgit-*
diff --git a/Makefile b/Makefile
index c830d7a46c..16a0430c6c 100644
--- a/Makefile
+++ b/Makefile
@@ -207,8 +207,8 @@ HELPERS-$(CONFIG_LINUX) = qemu-bridge-helper$(EXESUF)
 
 ifdef BUILD_DOCS
 DOCS=qemu-doc.html qemu-doc.txt qemu.1 qemu-img.1 qemu-nbd.8 qemu-ga.8
-DOCS+=docs/qemu-qmp-ref.html docs/qemu-qmp-ref.txt docs/qemu-qmp-ref.7
-DOCS+=docs/qemu-ga-ref.html docs/qemu-ga-ref.txt docs/qemu-ga-ref.7
+DOCS+=docs/interop/qemu-qmp-ref.html docs/interop/qemu-qmp-ref.txt docs/interop/qemu-qmp-ref.7
+DOCS+=docs/interop/qemu-ga-ref.html docs/interop/qemu-ga-ref.txt docs/interop/qemu-ga-ref.7
 ifdef CONFIG_VIRTFS
 DOCS+=fsdev/virtfs-proxy-helper.1
 endif
@@ -269,6 +269,7 @@ dummy := $(call unnest-vars,, \
                 ivshmem-client-obj-y \
                 ivshmem-server-obj-y \
                 libvhost-user-obj-y \
+                vhost-user-scsi-obj-y \
                 qga-vss-dll-obj-y \
                 block-obj-y \
                 block-obj-m \
@@ -473,6 +474,8 @@ ivshmem-client$(EXESUF): $(ivshmem-client-obj-y) $(COMMON_LDADDS)
 	$(call LINK, $^)
 ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) $(COMMON_LDADDS)
 	$(call LINK, $^)
+vhost-user-scsi$(EXESUF): $(vhost-user-scsi-obj-y)
+	$(call LINK, $^)
 
 module_block.h: $(SRC_PATH)/scripts/modules/module_block.py config-host.mak
 	$(call quiet-command,$(PYTHON) $< $@ \
@@ -519,11 +522,12 @@ distclean: clean
 	rm -f qemu-doc.vr qemu-doc.txt
 	rm -f config.log
 	rm -f linux-headers/asm
-	rm -f docs/qemu-ga-qapi.texi docs/qemu-qmp-qapi.texi docs/version.texi
-	rm -f docs/qemu-qmp-ref.7 docs/qemu-ga-ref.7
-	rm -f docs/qemu-qmp-ref.txt docs/qemu-ga-ref.txt
-	rm -f docs/qemu-qmp-ref.pdf docs/qemu-ga-ref.pdf
-	rm -f docs/qemu-qmp-ref.html docs/qemu-ga-ref.html
+	rm -f docs/version.texi
+	rm -f docs/interop/qemu-ga-qapi.texi docs/interop/qemu-qmp-qapi.texi
+	rm -f docs/interop/qemu-qmp-ref.7 docs/interop/qemu-ga-ref.7
+	rm -f docs/interop/qemu-qmp-ref.txt docs/interop/qemu-ga-ref.txt
+	rm -f docs/interop/qemu-qmp-ref.pdf docs/interop/qemu-ga-ref.pdf
+	rm -f docs/interop/qemu-qmp-ref.html docs/interop/qemu-ga-ref.html
 	for d in $(TARGET_DIRS); do \
 	rm -rf $$d || exit 1 ; \
         done
@@ -562,13 +566,13 @@ install-doc: $(DOCS)
 	$(INSTALL_DIR) "$(DESTDIR)$(qemu_docdir)"
 	$(INSTALL_DATA) qemu-doc.html "$(DESTDIR)$(qemu_docdir)"
 	$(INSTALL_DATA) qemu-doc.txt "$(DESTDIR)$(qemu_docdir)"
-	$(INSTALL_DATA) docs/qemu-qmp-ref.html "$(DESTDIR)$(qemu_docdir)"
-	$(INSTALL_DATA) docs/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)"
+	$(INSTALL_DATA) docs/interop/qemu-qmp-ref.html "$(DESTDIR)$(qemu_docdir)"
+	$(INSTALL_DATA) docs/interop/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)"
 ifdef CONFIG_POSIX
 	$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1"
 	$(INSTALL_DATA) qemu.1 "$(DESTDIR)$(mandir)/man1"
 	$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man7"
-	$(INSTALL_DATA) docs/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7"
+	$(INSTALL_DATA) docs/interop/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7"
 ifneq ($(TOOLS),)
 	$(INSTALL_DATA) qemu-img.1 "$(DESTDIR)$(mandir)/man1"
 	$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man8"
@@ -576,9 +580,9 @@ ifneq ($(TOOLS),)
 endif
 ifneq (,$(findstring qemu-ga,$(TOOLS)))
 	$(INSTALL_DATA) qemu-ga.8 "$(DESTDIR)$(mandir)/man8"
-	$(INSTALL_DATA) docs/qemu-ga-ref.html "$(DESTDIR)$(qemu_docdir)"
-	$(INSTALL_DATA) docs/qemu-ga-ref.txt "$(DESTDIR)$(qemu_docdir)"
-	$(INSTALL_DATA) docs/qemu-ga-ref.7 "$(DESTDIR)$(mandir)/man7"
+	$(INSTALL_DATA) docs/interop/qemu-ga-ref.html "$(DESTDIR)$(qemu_docdir)"
+	$(INSTALL_DATA) docs/interop/qemu-ga-ref.txt "$(DESTDIR)$(qemu_docdir)"
+	$(INSTALL_DATA) docs/interop/qemu-ga-ref.7 "$(DESTDIR)$(mandir)/man7"
 endif
 endif
 ifdef CONFIG_VIRTFS
@@ -666,28 +670,27 @@ ui/console-gl.o: $(SRC_PATH)/ui/console-gl.c \
 
 # documentation
 MAKEINFO=makeinfo
-MAKEINFOFLAGS=--no-split --number-sections -I docs
-TEXIFLAG=$(if $(V),,--quiet)
+MAKEINFOINCLUDES= -I docs -I $(<D) -I $(@D)
+MAKEINFOFLAGS=--no-split --number-sections $(MAKEINFOINCLUDES)
+TEXI2PODFLAGS=$(MAKEINFOINCLUDES) "-DVERSION=$(VERSION)"
+TEXI2PDFFLAGS=$(if $(V),,--quiet) -I $(SRC_PATH) $(MAKEINFOINCLUDES)
 
 docs/version.texi: $(SRC_PATH)/VERSION
 	$(call quiet-command,echo "@set VERSION $(VERSION)" > $@,"GEN","$@")
 
-%.html: %.texi
+%.html: %.texi docs/version.texi
 	$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \
 	--html $< -o $@,"GEN","$@")
 
-%.info: %.texi
+%.info: %.texi docs/version.texi
 	$(call quiet-command,$(MAKEINFO) $(MAKEINFOFLAGS) $< -o $@,"GEN","$@")
 
-%.txt: %.texi
+%.txt: %.texi docs/version.texi
 	$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \
 	--plaintext $< -o $@,"GEN","$@")
 
-%.pdf: %.texi
-	$(call quiet-command,texi2pdf $(TEXIFLAG) -I $(SRC_PATH) -I docs $< -o $@,"GEN","$@")
-
-docs/qemu-ga-ref.html docs/qemu-ga-ref.info docs/qemu-ga-ref.txt docs/qemu-ga-ref.pdf docs/qemu-ga-ref.7.pod: docs/version.texi
-docs/qemu-qmp-ref.html docs/qemu-qmp-ref.info docs/qemu-qmp-ref.txt docs/qemu-qmp-ref.pdf docs/qemu-qmp-ref.pod: docs/version.texi
+%.pdf: %.texi docs/version.texi
+	$(call quiet-command,texi2pdf $(TEXI2PDFFLAGS) $< -o $@,"GEN","$@")
 
 qemu-options.texi: $(SRC_PATH)/qemu-options.hx $(SRC_PATH)/scripts/hxtool
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@")
@@ -701,12 +704,12 @@ qemu-monitor-info.texi: $(SRC_PATH)/hmp-commands-info.hx $(SRC_PATH)/scripts/hxt
 qemu-img-cmds.texi: $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/scripts/hxtool
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@")
 
-docs/qemu-qmp-qapi.texi docs/qemu-ga-qapi.texi: $(SRC_PATH)/scripts/qapi2texi.py $(qapi-py)
+docs/interop/qemu-qmp-qapi.texi docs/interop/qemu-ga-qapi.texi: $(SRC_PATH)/scripts/qapi2texi.py $(qapi-py)
 
-docs/qemu-qmp-qapi.texi: $(qapi-modules)
+docs/interop/qemu-qmp-qapi.texi: $(qapi-modules)
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi2texi.py $< > $@,"GEN","$@")
 
-docs/qemu-ga-qapi.texi: $(SRC_PATH)/qga/qapi-schema.json
+docs/interop/qemu-ga-qapi.texi: $(SRC_PATH)/qga/qapi-schema.json
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi2texi.py $< > $@,"GEN","$@")
 
 qemu.1: qemu-doc.texi qemu-options.texi qemu-monitor.texi qemu-monitor-info.texi
@@ -716,21 +719,25 @@ fsdev/virtfs-proxy-helper.1: fsdev/virtfs-proxy-helper.texi
 qemu-nbd.8: qemu-nbd.texi qemu-option-trace.texi
 qemu-ga.8: qemu-ga.texi
 
-html: qemu-doc.html docs/qemu-qmp-ref.html docs/qemu-ga-ref.html
-info: qemu-doc.info docs/qemu-qmp-ref.info docs/qemu-ga-ref.info
-pdf: qemu-doc.pdf docs/qemu-qmp-ref.pdf docs/qemu-ga-ref.pdf
-txt: qemu-doc.txt docs/qemu-qmp-ref.txt docs/qemu-ga-ref.txt
+html: qemu-doc.html docs/interop/qemu-qmp-ref.html docs/interop/qemu-ga-ref.html
+info: qemu-doc.info docs/interop/qemu-qmp-ref.info docs/interop/qemu-ga-ref.info
+pdf: qemu-doc.pdf docs/interop/qemu-qmp-ref.pdf docs/interop/qemu-ga-ref.pdf
+txt: qemu-doc.txt docs/interop/qemu-qmp-ref.txt docs/interop/qemu-ga-ref.txt
 
 qemu-doc.html qemu-doc.info qemu-doc.pdf qemu-doc.txt: \
 	qemu-img.texi qemu-nbd.texi qemu-options.texi qemu-option-trace.texi \
 	qemu-monitor.texi qemu-img-cmds.texi qemu-ga.texi \
 	qemu-monitor-info.texi
 
-docs/qemu-ga-ref.dvi docs/qemu-ga-ref.html docs/qemu-ga-ref.info docs/qemu-ga-ref.pdf docs/qemu-ga-ref.txt docs/qemu-ga-ref.7: \
-docs/qemu-ga-ref.texi docs/qemu-ga-qapi.texi
+docs/interop/qemu-ga-ref.dvi docs/interop/qemu-ga-ref.html \
+    docs/interop/qemu-ga-ref.info docs/interop/qemu-ga-ref.pdf \
+    docs/interop/qemu-ga-ref.txt docs/interop/qemu-ga-ref.7: \
+	docs/interop/qemu-ga-ref.texi docs/interop/qemu-ga-qapi.texi
 
-docs/qemu-qmp-ref.dvi docs/qemu-qmp-ref.html docs/qemu-qmp-ref.info docs/qemu-qmp-ref.pdf docs/qemu-qmp-ref.txt docs/qemu-qmp-ref.7: \
-docs/qemu-qmp-ref.texi docs/qemu-qmp-qapi.texi
+docs/interop/qemu-qmp-ref.dvi docs/interop/qemu-qmp-ref.html \
+    docs/interop/qemu-qmp-ref.info docs/interop/qemu-qmp-ref.pdf \
+    docs/interop/qemu-qmp-ref.txt docs/interop/qemu-qmp-ref.7: \
+	docs/interop/qemu-qmp-ref.texi docs/interop/qemu-qmp-qapi.texi
 
 
 ifdef CONFIG_WIN32
@@ -791,9 +798,11 @@ endif # CONFIG_WIN
 
 # Add a dependency on the generated files, so that they are always
 # rebuilt before other object files
+ifneq ($(wildcard config-host.mak),)
 ifneq ($(filter-out $(UNCHECKED_GOALS),$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
 Makefile: $(GENERATED_FILES)
 endif
+endif
 
 .SECONDARY: $(TRACE_HEADERS) $(TRACE_HEADERS:%=%-timestamp) \
 	$(TRACE_SOURCES) $(TRACE_SOURCES:%=%-timestamp) \
diff --git a/Makefile.objs b/Makefile.objs
index 0575802440..b2e6322ef0 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -52,7 +52,6 @@ common-obj-y += migration/
 
 common-obj-y += audio/
 common-obj-y += hw/
-common-obj-y += accel.o
 
 common-obj-y += replay/
 
@@ -111,6 +110,10 @@ qga-vss-dll-obj-y = qga/
 ivshmem-client-obj-y = contrib/ivshmem-client/
 ivshmem-server-obj-y = contrib/ivshmem-server/
 libvhost-user-obj-y = contrib/libvhost-user/
+vhost-user-scsi.o-cflags := $(LIBISCSI_CFLAGS)
+vhost-user-scsi.o-libs := $(LIBISCSI_LIBS)
+vhost-user-scsi-obj-y = contrib/vhost-user-scsi/
+vhost-user-scsi-obj-y += contrib/libvhost-user/libvhost-user.o
 
 ######################################################################
 trace-events-subdirs =
@@ -163,6 +166,8 @@ trace-events-subdirs += target/ppc
 trace-events-subdirs += qom
 trace-events-subdirs += linux-user
 trace-events-subdirs += qapi
+trace-events-subdirs += accel/tcg
+trace-events-subdirs += accel/kvm
 
 trace-events-files = $(SRC_PATH)/trace-events $(trace-events-subdirs:%=$(SRC_PATH)/%/trace-events)
 
diff --git a/Makefile.target b/Makefile.target
index ce8dfe44a8..0066579090 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -88,20 +88,17 @@ all: $(PROGS) stap
 
 #########################################################
 # cpu emulator library
-obj-y = exec.o translate-all.o cpu-exec.o
-obj-y += translate-common.o
-obj-y += cpu-exec-common.o
+obj-y += exec.o
+obj-y += accel/
 obj-y += tcg/tcg.o tcg/tcg-op.o tcg/optimize.o
-obj-$(CONFIG_TCG_INTERPRETER) += tci.o
-obj-y += tcg/tcg-common.o
+obj-y += tcg/tcg-common.o tcg/tcg-runtime.o
+obj-$(CONFIG_TCG_INTERPRETER) += tcg/tci.o
 obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o
 obj-y += fpu/softfloat.o
 obj-y += target/$(TARGET_BASE_ARCH)/
 obj-y += disas.o
-obj-y += tcg-runtime.o
 obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o
 obj-$(call lnot,$(CONFIG_HAX)) += hax-stub.o
-obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
 
 obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/decContext.o
 obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/decNumber.o
@@ -142,8 +139,7 @@ ifdef CONFIG_SOFTMMU
 obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o numa.o
 obj-y += qtest.o bootdevice.o
 obj-y += hw/
-obj-$(CONFIG_KVM) += kvm-all.o
-obj-y += memory.o cputlb.o
+obj-y += memory.o
 obj-y += memory_mapping.o
 obj-y += dump.o
 obj-y += migration/ram.o
diff --git a/accel/Makefile.objs b/accel/Makefile.objs
new file mode 100644
index 0000000000..cd5702f347
--- /dev/null
+++ b/accel/Makefile.objs
@@ -0,0 +1,4 @@
+obj-$(CONFIG_SOFTMMU) += accel.o
+obj-y += kvm/
+obj-y += tcg/
+obj-y += stubs/
diff --git a/accel.c b/accel/accel.c
index 664bb88422..7c079a5611 100644
--- a/accel.c
+++ b/accel/accel.c
@@ -34,15 +34,6 @@
 #include "hw/xen/xen.h"
 #include "qom/object.h"
 
-int tcg_tb_size;
-static bool tcg_allowed = true;
-
-static int tcg_init(MachineState *ms)
-{
-    tcg_exec_init(tcg_tb_size * 1024 * 1024);
-    return 0;
-}
-
 static const TypeInfo accel_type = {
     .name = TYPE_ACCEL,
     .parent = TYPE_OBJECT,
@@ -129,27 +120,9 @@ void configure_accelerator(MachineState *ms)
     }
 }
 
-
-static void tcg_accel_class_init(ObjectClass *oc, void *data)
-{
-    AccelClass *ac = ACCEL_CLASS(oc);
-    ac->name = "tcg";
-    ac->init_machine = tcg_init;
-    ac->allowed = &tcg_allowed;
-}
-
-#define TYPE_TCG_ACCEL ACCEL_CLASS_NAME("tcg")
-
-static const TypeInfo tcg_accel_type = {
-    .name = TYPE_TCG_ACCEL,
-    .parent = TYPE_ACCEL,
-    .class_init = tcg_accel_class_init,
-};
-
 static void register_accel_types(void)
 {
     type_register_static(&accel_type);
-    type_register_static(&tcg_accel_type);
 }
 
 type_init(register_accel_types);
diff --git a/accel/kvm/Makefile.objs b/accel/kvm/Makefile.objs
new file mode 100644
index 0000000000..85351e7de7
--- /dev/null
+++ b/accel/kvm/Makefile.objs
@@ -0,0 +1 @@
+obj-$(CONFIG_KVM) += kvm-all.o
diff --git a/kvm-all.c b/accel/kvm/kvm-all.c
index ab8262f672..75feffa504 100644
--- a/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -36,7 +36,7 @@
 #include "exec/ram_addr.h"
 #include "exec/address-spaces.h"
 #include "qemu/event_notifier.h"
-#include "trace-root.h"
+#include "trace.h"
 #include "hw/irq.h"
 
 #include "hw/boards.h"
@@ -1977,6 +1977,7 @@ int kvm_cpu_exec(CPUState *cpu)
     }
 
     qemu_mutex_unlock_iothread();
+    cpu_exec_start(cpu);
 
     do {
         MemTxAttrs attrs;
@@ -2106,6 +2107,7 @@ int kvm_cpu_exec(CPUState *cpu)
         }
     } while (ret == 0);
 
+    cpu_exec_end(cpu);
     qemu_mutex_lock_iothread();
 
     if (ret < 0) {
diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events
new file mode 100644
index 0000000000..f89ba5578d
--- /dev/null
+++ b/accel/kvm/trace-events
@@ -0,0 +1,15 @@
+# Trace events for debugging and performance instrumentation
+
+# kvm-all.c
+kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"
+kvm_vm_ioctl(int type, void *arg) "type 0x%x, arg %p"
+kvm_vcpu_ioctl(int cpu_index, int type, void *arg) "cpu_index %d, type 0x%x, arg %p"
+kvm_run_exit(int cpu_index, uint32_t reason) "cpu_index %d, reason %d"
+kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p"
+kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s"
+kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s"
+kvm_irqchip_commit_routes(void) ""
+kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d"
+kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
+kvm_irqchip_release_virq(int virq) "virq %d"
+
diff --git a/accel/stubs/Makefile.objs b/accel/stubs/Makefile.objs
new file mode 100644
index 0000000000..bd5794f222
--- /dev/null
+++ b/accel/stubs/Makefile.objs
@@ -0,0 +1 @@
+obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
diff --git a/kvm-stub.c b/accel/stubs/kvm-stub.c
index ef0c7346af..ef0c7346af 100644
--- a/kvm-stub.c
+++ b/accel/stubs/kvm-stub.c
diff --git a/accel/tcg/Makefile.objs b/accel/tcg/Makefile.objs
new file mode 100644
index 0000000000..f173cd5397
--- /dev/null
+++ b/accel/tcg/Makefile.objs
@@ -0,0 +1,3 @@
+obj-$(CONFIG_SOFTMMU) += tcg-all.o
+obj-$(CONFIG_SOFTMMU) += cputlb.o
+obj-y += cpu-exec.o cpu-exec-common.o translate-all.o translate-common.o
diff --git a/cpu-exec-common.c b/accel/tcg/cpu-exec-common.c
index e81da276bb..e81da276bb 100644
--- a/cpu-exec-common.c
+++ b/accel/tcg/cpu-exec-common.c
diff --git a/cpu-exec.c b/accel/tcg/cpu-exec.c
index 5b181c18ed..3581618bc0 100644
--- a/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -18,7 +18,7 @@
  */
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "trace-root.h"
+#include "trace.h"
 #include "disas/disas.h"
 #include "exec/exec-all.h"
 #include "tcg.h"
diff --git a/cputlb.c b/accel/tcg/cputlb.c
index 743776ae19..743776ae19 100644
--- a/cputlb.c
+++ b/accel/tcg/cputlb.c
diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c
new file mode 100644
index 0000000000..dba99315e3
--- /dev/null
+++ b/accel/tcg/tcg-all.c
@@ -0,0 +1,61 @@
+/*
+ * QEMU System Emulator, accelerator interfaces
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2014 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/accel.h"
+#include "sysemu/sysemu.h"
+#include "qom/object.h"
+
+int tcg_tb_size;
+static bool tcg_allowed = true;
+
+static int tcg_init(MachineState *ms)
+{
+    tcg_exec_init(tcg_tb_size * 1024 * 1024);
+    return 0;
+}
+
+static void tcg_accel_class_init(ObjectClass *oc, void *data)
+{
+    AccelClass *ac = ACCEL_CLASS(oc);
+    ac->name = "tcg";
+    ac->init_machine = tcg_init;
+    ac->allowed = &tcg_allowed;
+}
+
+#define TYPE_TCG_ACCEL ACCEL_CLASS_NAME("tcg")
+
+static const TypeInfo tcg_accel_type = {
+    .name = TYPE_TCG_ACCEL,
+    .parent = TYPE_ACCEL,
+    .class_init = tcg_accel_class_init,
+};
+
+static void register_accel_types(void)
+{
+    type_register_static(&tcg_accel_type);
+}
+
+type_init(register_accel_types);
diff --git a/accel/tcg/trace-events b/accel/tcg/trace-events
new file mode 100644
index 0000000000..2de8359670
--- /dev/null
+++ b/accel/tcg/trace-events
@@ -0,0 +1,10 @@
+# Trace events for debugging and performance instrumentation
+
+# TCG related tracing (mostly disabled by default)
+# cpu-exec.c
+disable exec_tb(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR
+disable exec_tb_nocache(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR
+disable exec_tb_exit(void *last_tb, unsigned int flags) "tb:%p flags=%x"
+
+# translate-all.c
+translate_block(void *tb, uintptr_t pc, uint8_t *tb_code) "tb:%p, pc:0x%"PRIxPTR", tb_code:%p"
diff --git a/translate-all.c b/accel/tcg/translate-all.c
index d4f364d6ff..f6ad46b613 100644
--- a/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -25,7 +25,7 @@
 #include "qemu-common.h"
 #define NO_CPU_IO_DEFS
 #include "cpu.h"
-#include "trace-root.h"
+#include "trace.h"
 #include "disas/disas.h"
 #include "exec/exec-all.h"
 #include "tcg.h"
diff --git a/translate-all.h b/accel/tcg/translate-all.h
index ba8e4d63c4..ba8e4d63c4 100644
--- a/translate-all.h
+++ b/accel/tcg/translate-all.h
diff --git a/translate-common.c b/accel/tcg/translate-common.c
index 40fe5a19bb..40fe5a19bb 100644
--- a/translate-common.c
+++ b/accel/tcg/translate-common.c
diff --git a/block.c b/block.c
index fa1d06d846..694396281b 100644
--- a/block.c
+++ b/block.c
@@ -320,6 +320,8 @@ BlockDriverState *bdrv_new(void)
         QLIST_INIT(&bs->op_blockers[i]);
     }
     notifier_with_return_list_init(&bs->before_write_notifiers);
+    qemu_co_mutex_init(&bs->reqs_lock);
+    qemu_mutex_init(&bs->dirty_bitmap_mutex);
     bs->refcnt = 1;
     bs->aio_context = qemu_get_aio_context();
 
@@ -1300,7 +1302,9 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
         goto fail_opts;
     }
 
-    assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
+    /* bdrv_new() and bdrv_close() make it so */
+    assert(atomic_read(&bs->copy_on_read) == 0);
+
     if (bs->open_flags & BDRV_O_COPY_ON_READ) {
         if (!bs->read_only) {
             bdrv_enable_copy_on_read(bs);
@@ -3063,7 +3067,7 @@ static void bdrv_close(BlockDriverState *bs)
 
         g_free(bs->opaque);
         bs->opaque = NULL;
-        bs->copy_on_read = 0;
+        atomic_set(&bs->copy_on_read, 0);
         bs->backing_file[0] = '\0';
         bs->backing_format[0] = '\0';
         bs->total_sectors = 0;
@@ -3422,7 +3426,7 @@ int bdrv_truncate(BdrvChild *child, int64_t offset, Error **errp)
         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
         bdrv_dirty_bitmap_truncate(bs);
         bdrv_parent_cb_resize(bs);
-        ++bs->write_gen;
+        atomic_inc(&bs->write_gen);
     }
     return ret;
 }
diff --git a/block/accounting.c b/block/accounting.c
index 3f457c4e73..87ef5bbfaa 100644
--- a/block/accounting.c
+++ b/block/accounting.c
@@ -32,23 +32,28 @@
 static QEMUClockType clock_type = QEMU_CLOCK_REALTIME;
 static const int qtest_latency_ns = NANOSECONDS_PER_SECOND / 1000;
 
-void block_acct_init(BlockAcctStats *stats, bool account_invalid,
-                     bool account_failed)
+void block_acct_init(BlockAcctStats *stats)
 {
-    stats->account_invalid = account_invalid;
-    stats->account_failed = account_failed;
-
+    qemu_mutex_init(&stats->lock);
     if (qtest_enabled()) {
         clock_type = QEMU_CLOCK_VIRTUAL;
     }
 }
 
+void block_acct_setup(BlockAcctStats *stats, bool account_invalid,
+                      bool account_failed)
+{
+    stats->account_invalid = account_invalid;
+    stats->account_failed = account_failed;
+}
+
 void block_acct_cleanup(BlockAcctStats *stats)
 {
     BlockAcctTimedStats *s, *next;
     QSLIST_FOREACH_SAFE(s, &stats->intervals, entries, next) {
         g_free(s);
     }
+    qemu_mutex_destroy(&stats->lock);
 }
 
 void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length)
@@ -58,12 +63,15 @@ void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length)
 
     s = g_new0(BlockAcctTimedStats, 1);
     s->interval_length = interval_length;
+    s->stats = stats;
+    qemu_mutex_lock(&stats->lock);
     QSLIST_INSERT_HEAD(&stats->intervals, s, entries);
 
     for (i = 0; i < BLOCK_MAX_IOTYPE; i++) {
         timed_average_init(&s->latency[i], clock_type,
                            (uint64_t) interval_length * NANOSECONDS_PER_SECOND);
     }
+    qemu_mutex_unlock(&stats->lock);
 }
 
 BlockAcctTimedStats *block_acct_interval_next(BlockAcctStats *stats,
@@ -86,7 +94,8 @@ void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie,
     cookie->type = type;
 }
 
-void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
+static void block_account_one_io(BlockAcctStats *stats, BlockAcctCookie *cookie,
+                                 bool failed)
 {
     BlockAcctTimedStats *s;
     int64_t time_ns = qemu_clock_get_ns(clock_type);
@@ -98,31 +107,16 @@ void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
 
     assert(cookie->type < BLOCK_MAX_IOTYPE);
 
-    stats->nr_bytes[cookie->type] += cookie->bytes;
-    stats->nr_ops[cookie->type]++;
-    stats->total_time_ns[cookie->type] += latency_ns;
-    stats->last_access_time_ns = time_ns;
+    qemu_mutex_lock(&stats->lock);
 
-    QSLIST_FOREACH(s, &stats->intervals, entries) {
-        timed_average_account(&s->latency[cookie->type], latency_ns);
+    if (failed) {
+        stats->failed_ops[cookie->type]++;
+    } else {
+        stats->nr_bytes[cookie->type] += cookie->bytes;
+        stats->nr_ops[cookie->type]++;
     }
-}
-
-void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie)
-{
-    assert(cookie->type < BLOCK_MAX_IOTYPE);
-
-    stats->failed_ops[cookie->type]++;
-
-    if (stats->account_failed) {
-        BlockAcctTimedStats *s;
-        int64_t time_ns = qemu_clock_get_ns(clock_type);
-        int64_t latency_ns = time_ns - cookie->start_time_ns;
-
-        if (qtest_enabled()) {
-            latency_ns = qtest_latency_ns;
-        }
 
+    if (!failed || stats->account_failed) {
         stats->total_time_ns[cookie->type] += latency_ns;
         stats->last_access_time_ns = time_ns;
 
@@ -130,29 +124,45 @@ void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie)
             timed_average_account(&s->latency[cookie->type], latency_ns);
         }
     }
+
+    qemu_mutex_unlock(&stats->lock);
+}
+
+void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
+{
+    block_account_one_io(stats, cookie, false);
+}
+
+void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie)
+{
+    block_account_one_io(stats, cookie, true);
 }
 
 void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type)
 {
     assert(type < BLOCK_MAX_IOTYPE);
 
-    /* block_acct_done() and block_acct_failed() update
-     * total_time_ns[], but this one does not. The reason is that
-     * invalid requests are accounted during their submission,
-     * therefore there's no actual I/O involved. */
-
+    /* block_account_one_io() updates total_time_ns[], but this one does
+     * not.  The reason is that invalid requests are accounted during their
+     * submission, therefore there's no actual I/O involved.
+     */
+    qemu_mutex_lock(&stats->lock);
     stats->invalid_ops[type]++;
 
     if (stats->account_invalid) {
         stats->last_access_time_ns = qemu_clock_get_ns(clock_type);
     }
+    qemu_mutex_unlock(&stats->lock);
 }
 
 void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
                       int num_requests)
 {
     assert(type < BLOCK_MAX_IOTYPE);
+
+    qemu_mutex_lock(&stats->lock);
     stats->merged[type] += num_requests;
+    qemu_mutex_unlock(&stats->lock);
 }
 
 int64_t block_acct_idle_time_ns(BlockAcctStats *stats)
@@ -167,7 +177,9 @@ double block_acct_queue_depth(BlockAcctTimedStats *stats,
 
     assert(type < BLOCK_MAX_IOTYPE);
 
+    qemu_mutex_lock(&stats->stats->lock);
     sum = timed_average_sum(&stats->latency[type], &elapsed);
+    qemu_mutex_unlock(&stats->stats->lock);
 
     return (double) sum / elapsed;
 }
diff --git a/block/block-backend.c b/block/block-backend.c
index 7d7f3697d1..a2bbae90b1 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -216,8 +216,10 @@ BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm)
     blk->shared_perm = shared_perm;
     blk_set_enable_write_cache(blk, true);
 
+    qemu_co_mutex_init(&blk->public.throttled_reqs_lock);
     qemu_co_queue_init(&blk->public.throttled_reqs[0]);
     qemu_co_queue_init(&blk->public.throttled_reqs[1]);
+    block_acct_init(&blk->stats);
 
     notifier_list_init(&blk->remove_bs_notifiers);
     notifier_list_init(&blk->insert_bs_notifiers);
@@ -1953,7 +1955,7 @@ static void blk_root_drained_begin(BdrvChild *child)
     /* Note that blk->root may not be accessible here yet if we are just
      * attaching to a BlockDriverState that is drained. Use child instead. */
 
-    if (blk->public.io_limits_disabled++ == 0) {
+    if (atomic_fetch_inc(&blk->public.io_limits_disabled) == 0) {
         throttle_group_restart_blk(blk);
     }
 }
@@ -1964,7 +1966,7 @@ static void blk_root_drained_end(BdrvChild *child)
     assert(blk->quiesce_counter);
 
     assert(blk->public.io_limits_disabled);
-    --blk->public.io_limits_disabled;
+    atomic_dec(&blk->public.io_limits_disabled);
 
     if (--blk->quiesce_counter == 0) {
         if (blk->dev_ops && blk->dev_ops->drained_end) {
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 519737c8d3..a04c6e4154 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -37,6 +37,7 @@
  *     or enabled. A frozen bitmap can only abdicate() or reclaim().
  */
 struct BdrvDirtyBitmap {
+    QemuMutex *mutex;
     HBitmap *bitmap;            /* Dirty sector bitmap implementation */
     HBitmap *meta;              /* Meta dirty bitmap */
     BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
@@ -52,6 +53,27 @@ struct BdrvDirtyBitmapIter {
     BdrvDirtyBitmap *bitmap;
 };
 
+static inline void bdrv_dirty_bitmaps_lock(BlockDriverState *bs)
+{
+    qemu_mutex_lock(&bs->dirty_bitmap_mutex);
+}
+
+static inline void bdrv_dirty_bitmaps_unlock(BlockDriverState *bs)
+{
+    qemu_mutex_unlock(&bs->dirty_bitmap_mutex);
+}
+
+void bdrv_dirty_bitmap_lock(BdrvDirtyBitmap *bitmap)
+{
+    qemu_mutex_lock(bitmap->mutex);
+}
+
+void bdrv_dirty_bitmap_unlock(BdrvDirtyBitmap *bitmap)
+{
+    qemu_mutex_unlock(bitmap->mutex);
+}
+
+/* Called with BQL or dirty_bitmap lock taken.  */
 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
 {
     BdrvDirtyBitmap *bm;
@@ -65,6 +87,7 @@ BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
     return NULL;
 }
 
+/* Called with BQL taken.  */
 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
 {
     assert(!bdrv_dirty_bitmap_frozen(bitmap));
@@ -72,6 +95,7 @@ void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
     bitmap->name = NULL;
 }
 
+/* Called with BQL taken.  */
 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
                                           uint32_t granularity,
                                           const char *name,
@@ -96,11 +120,14 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
         return NULL;
     }
     bitmap = g_new0(BdrvDirtyBitmap, 1);
+    bitmap->mutex = &bs->dirty_bitmap_mutex;
     bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
     bitmap->size = bitmap_size;
     bitmap->name = g_strdup(name);
     bitmap->disabled = false;
+    bdrv_dirty_bitmaps_lock(bs);
     QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
+    bdrv_dirty_bitmaps_unlock(bs);
     return bitmap;
 }
 
@@ -119,20 +146,24 @@ void bdrv_create_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap,
                                    int chunk_size)
 {
     assert(!bitmap->meta);
+    qemu_mutex_lock(bitmap->mutex);
     bitmap->meta = hbitmap_create_meta(bitmap->bitmap,
                                        chunk_size * BITS_PER_BYTE);
+    qemu_mutex_unlock(bitmap->mutex);
 }
 
 void bdrv_release_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap)
 {
     assert(bitmap->meta);
+    qemu_mutex_lock(bitmap->mutex);
     hbitmap_free_meta(bitmap->bitmap);
     bitmap->meta = NULL;
+    qemu_mutex_unlock(bitmap->mutex);
 }
 
-int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs,
-                               BdrvDirtyBitmap *bitmap, int64_t sector,
-                               int nb_sectors)
+int bdrv_dirty_bitmap_get_meta_locked(BlockDriverState *bs,
+                                      BdrvDirtyBitmap *bitmap, int64_t sector,
+                                      int nb_sectors)
 {
     uint64_t i;
     int sectors_per_bit = 1 << hbitmap_granularity(bitmap->meta);
@@ -147,11 +178,26 @@ int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs,
     return false;
 }
 
+int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs,
+                               BdrvDirtyBitmap *bitmap, int64_t sector,
+                               int nb_sectors)
+{
+    bool dirty;
+
+    qemu_mutex_lock(bitmap->mutex);
+    dirty = bdrv_dirty_bitmap_get_meta_locked(bs, bitmap, sector, nb_sectors);
+    qemu_mutex_unlock(bitmap->mutex);
+
+    return dirty;
+}
+
 void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs,
                                   BdrvDirtyBitmap *bitmap, int64_t sector,
                                   int nb_sectors)
 {
+    qemu_mutex_lock(bitmap->mutex);
     hbitmap_reset(bitmap->meta, sector, nb_sectors);
+    qemu_mutex_unlock(bitmap->mutex);
 }
 
 int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap)
@@ -164,16 +210,19 @@ const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap)
     return bitmap->name;
 }
 
+/* Called with BQL taken.  */
 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
 {
     return bitmap->successor;
 }
 
+/* Called with BQL taken.  */
 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
 {
     return !(bitmap->disabled || bitmap->successor);
 }
 
+/* Called with BQL taken.  */
 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
 {
     if (bdrv_dirty_bitmap_frozen(bitmap)) {
@@ -188,6 +237,7 @@ DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
 /**
  * Create a successor bitmap destined to replace this bitmap after an operation.
  * Requires that the bitmap is not frozen and has no successor.
+ * Called with BQL taken.
  */
 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
                                        BdrvDirtyBitmap *bitmap, Error **errp)
@@ -220,6 +270,7 @@ int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
 /**
  * For a bitmap with a successor, yield our name to the successor,
  * delete the old bitmap, and return a handle to the new bitmap.
+ * Called with BQL taken.
  */
 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
                                             BdrvDirtyBitmap *bitmap,
@@ -247,6 +298,7 @@ BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
  * In cases of failure where we can no longer safely delete the parent,
  * we may wish to re-join the parent and child/successor.
  * The merged parent will be un-frozen, but not explicitly re-enabled.
+ * Called with BQL taken.
  */
 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
                                            BdrvDirtyBitmap *parent,
@@ -271,25 +323,30 @@ BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
 
 /**
  * Truncates _all_ bitmaps attached to a BDS.
+ * Called with BQL taken.
  */
 void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
 {
     BdrvDirtyBitmap *bitmap;
     uint64_t size = bdrv_nb_sectors(bs);
 
+    bdrv_dirty_bitmaps_lock(bs);
     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
         assert(!bdrv_dirty_bitmap_frozen(bitmap));
         assert(!bitmap->active_iterators);
         hbitmap_truncate(bitmap->bitmap, size);
         bitmap->size = size;
     }
+    bdrv_dirty_bitmaps_unlock(bs);
 }
 
+/* Called with BQL taken.  */
 static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
                                                   BdrvDirtyBitmap *bitmap,
                                                   bool only_named)
 {
     BdrvDirtyBitmap *bm, *next;
+    bdrv_dirty_bitmaps_lock(bs);
     QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
         if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
             assert(!bm->active_iterators);
@@ -301,15 +358,19 @@ static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
             g_free(bm);
 
             if (bitmap) {
-                return;
+                goto out;
             }
         }
     }
     if (bitmap) {
         abort();
     }
+
+out:
+    bdrv_dirty_bitmaps_unlock(bs);
 }
 
+/* Called with BQL taken.  */
 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
 {
     bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false);
@@ -318,18 +379,21 @@ void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
 /**
  * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
  * There must not be any frozen bitmaps attached.
+ * Called with BQL taken.
  */
 void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
 {
     bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
 }
 
+/* Called with BQL taken.  */
 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
 {
     assert(!bdrv_dirty_bitmap_frozen(bitmap));
     bitmap->disabled = true;
 }
 
+/* Called with BQL taken.  */
 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
 {
     assert(!bdrv_dirty_bitmap_frozen(bitmap));
@@ -342,6 +406,7 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
     BlockDirtyInfoList *list = NULL;
     BlockDirtyInfoList **plist = &list;
 
+    bdrv_dirty_bitmaps_lock(bs);
     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
         BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
         BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
@@ -354,12 +419,14 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
         *plist = entry;
         plist = &entry->next;
     }
+    bdrv_dirty_bitmaps_unlock(bs);
 
     return list;
 }
 
-int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
-                   int64_t sector)
+/* Called within bdrv_dirty_bitmap_lock..unlock */
+int bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+                          int64_t sector)
 {
     if (bitmap) {
         return hbitmap_get(bitmap->bitmap, sector);
@@ -432,23 +499,42 @@ int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter)
     return hbitmap_iter_next(&iter->hbi);
 }
 
+/* Called within bdrv_dirty_bitmap_lock..unlock */
+void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
+                                  int64_t cur_sector, int64_t nr_sectors)
+{
+    assert(bdrv_dirty_bitmap_enabled(bitmap));
+    hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
+}
+
 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
                            int64_t cur_sector, int64_t nr_sectors)
 {
+    bdrv_dirty_bitmap_lock(bitmap);
+    bdrv_set_dirty_bitmap_locked(bitmap, cur_sector, nr_sectors);
+    bdrv_dirty_bitmap_unlock(bitmap);
+}
+
+/* Called within bdrv_dirty_bitmap_lock..unlock */
+void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
+                                    int64_t cur_sector, int64_t nr_sectors)
+{
     assert(bdrv_dirty_bitmap_enabled(bitmap));
-    hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
+    hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
 }
 
 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
                              int64_t cur_sector, int64_t nr_sectors)
 {
-    assert(bdrv_dirty_bitmap_enabled(bitmap));
-    hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
+    bdrv_dirty_bitmap_lock(bitmap);
+    bdrv_reset_dirty_bitmap_locked(bitmap, cur_sector, nr_sectors);
+    bdrv_dirty_bitmap_unlock(bitmap);
 }
 
 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
 {
     assert(bdrv_dirty_bitmap_enabled(bitmap));
+    bdrv_dirty_bitmap_lock(bitmap);
     if (!out) {
         hbitmap_reset_all(bitmap->bitmap);
     } else {
@@ -457,6 +543,7 @@ void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
                                        hbitmap_granularity(backup));
         *out = backup;
     }
+    bdrv_dirty_bitmap_unlock(bitmap);
 }
 
 void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
@@ -508,12 +595,19 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
                     int64_t nr_sectors)
 {
     BdrvDirtyBitmap *bitmap;
+
+    if (QLIST_EMPTY(&bs->dirty_bitmaps)) {
+        return;
+    }
+
+    bdrv_dirty_bitmaps_lock(bs);
     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
         if (!bdrv_dirty_bitmap_enabled(bitmap)) {
             continue;
         }
         hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
     }
+    bdrv_dirty_bitmaps_unlock(bs);
 }
 
 /**
diff --git a/block/io.c b/block/io.c
index ed31810c0a..91611ffb2a 100644
--- a/block/io.c
+++ b/block/io.c
@@ -130,13 +130,13 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
  */
 void bdrv_enable_copy_on_read(BlockDriverState *bs)
 {
-    bs->copy_on_read++;
+    atomic_inc(&bs->copy_on_read);
 }
 
 void bdrv_disable_copy_on_read(BlockDriverState *bs)
 {
-    assert(bs->copy_on_read > 0);
-    bs->copy_on_read--;
+    int old = atomic_fetch_dec(&bs->copy_on_read);
+    assert(old >= 1);
 }
 
 /* Check if any requests are in-flight (including throttled requests) */
@@ -241,7 +241,7 @@ void bdrv_drained_begin(BlockDriverState *bs)
         return;
     }
 
-    if (!bs->quiesce_counter++) {
+    if (atomic_fetch_inc(&bs->quiesce_counter) == 0) {
         aio_disable_external(bdrv_get_aio_context(bs));
         bdrv_parent_drained_begin(bs);
     }
@@ -252,7 +252,7 @@ void bdrv_drained_begin(BlockDriverState *bs)
 void bdrv_drained_end(BlockDriverState *bs)
 {
     assert(bs->quiesce_counter > 0);
-    if (--bs->quiesce_counter > 0) {
+    if (atomic_fetch_dec(&bs->quiesce_counter) > 1) {
         return;
     }
 
@@ -375,11 +375,13 @@ void bdrv_drain_all(void)
 static void tracked_request_end(BdrvTrackedRequest *req)
 {
     if (req->serialising) {
-        req->bs->serialising_in_flight--;
+        atomic_dec(&req->bs->serialising_in_flight);
     }
 
+    qemu_co_mutex_lock(&req->bs->reqs_lock);
     QLIST_REMOVE(req, list);
     qemu_co_queue_restart_all(&req->wait_queue);
+    qemu_co_mutex_unlock(&req->bs->reqs_lock);
 }
 
 /**
@@ -404,7 +406,9 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
 
     qemu_co_queue_init(&req->wait_queue);
 
+    qemu_co_mutex_lock(&bs->reqs_lock);
     QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
+    qemu_co_mutex_unlock(&bs->reqs_lock);
 }
 
 static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
@@ -414,7 +418,7 @@ static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
                                - overlap_offset;
 
     if (!req->serialising) {
-        req->bs->serialising_in_flight++;
+        atomic_inc(&req->bs->serialising_in_flight);
         req->serialising = true;
     }
 
@@ -501,7 +505,8 @@ static void dummy_bh_cb(void *opaque)
 
 void bdrv_wakeup(BlockDriverState *bs)
 {
-    if (bs->wakeup) {
+    /* The barrier (or an atomic op) is in the caller.  */
+    if (atomic_read(&bs->wakeup)) {
         aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
     }
 }
@@ -519,12 +524,13 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
     bool retry;
     bool waited = false;
 
-    if (!bs->serialising_in_flight) {
+    if (!atomic_read(&bs->serialising_in_flight)) {
         return false;
     }
 
     do {
         retry = false;
+        qemu_co_mutex_lock(&bs->reqs_lock);
         QLIST_FOREACH(req, &bs->tracked_requests, list) {
             if (req == self || (!req->serialising && !self->serialising)) {
                 continue;
@@ -543,7 +549,7 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
                  * (instead of producing a deadlock in the former case). */
                 if (!req->waiting_for) {
                     self->waiting_for = req;
-                    qemu_co_queue_wait(&req->wait_queue, NULL);
+                    qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock);
                     self->waiting_for = NULL;
                     retry = true;
                     waited = true;
@@ -551,6 +557,7 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
                 }
             }
         }
+        qemu_co_mutex_unlock(&bs->reqs_lock);
     } while (retry);
 
     return waited;
@@ -1144,7 +1151,7 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
     bdrv_inc_in_flight(bs);
 
     /* Don't do copy-on-read if we read data before write operation */
-    if (bs->copy_on_read && !(flags & BDRV_REQ_NO_SERIALISING)) {
+    if (atomic_read(&bs->copy_on_read) && !(flags & BDRV_REQ_NO_SERIALISING)) {
         flags |= BDRV_REQ_COPY_ON_READ;
     }
 
@@ -1401,12 +1408,10 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
     }
     bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
 
-    ++bs->write_gen;
+    atomic_inc(&bs->write_gen);
     bdrv_set_dirty(bs, start_sector, end_sector - start_sector);
 
-    if (bs->wr_highest_offset < offset + bytes) {
-        bs->wr_highest_offset = offset + bytes;
-    }
+    stat64_max(&bs->wr_highest_offset, offset + bytes);
 
     if (ret >= 0) {
         bs->total_sectors = MAX(bs->total_sectors, end_sector);
@@ -2292,14 +2297,17 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
         goto early_exit;
     }
 
-    current_gen = bs->write_gen;
+    qemu_co_mutex_lock(&bs->reqs_lock);
+    current_gen = atomic_read(&bs->write_gen);
 
     /* Wait until any previous flushes are completed */
     while (bs->active_flush_req) {
-        qemu_co_queue_wait(&bs->flush_queue, NULL);
+        qemu_co_queue_wait(&bs->flush_queue, &bs->reqs_lock);
     }
 
+    /* Flushes reach this point in nondecreasing current_gen order.  */
     bs->active_flush_req = true;
+    qemu_co_mutex_unlock(&bs->reqs_lock);
 
     /* Write back all layers by calling one driver function */
     if (bs->drv->bdrv_co_flush) {
@@ -2371,9 +2379,12 @@ out:
     if (ret == 0) {
         bs->flushed_gen = current_gen;
     }
+
+    qemu_co_mutex_lock(&bs->reqs_lock);
     bs->active_flush_req = false;
     /* Return value is ignored - it's ok if wait queue is empty */
     qemu_co_queue_next(&bs->flush_queue);
+    qemu_co_mutex_unlock(&bs->reqs_lock);
 
 early_exit:
     bdrv_dec_in_flight(bs);
@@ -2517,7 +2528,7 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
     }
     ret = 0;
 out:
-    ++bs->write_gen;
+    atomic_inc(&bs->write_gen);
     bdrv_set_dirty(bs, req.offset >> BDRV_SECTOR_BITS,
                    req.bytes >> BDRV_SECTOR_BITS);
     tracked_request_end(&req);
@@ -2644,7 +2655,7 @@ void bdrv_io_plug(BlockDriverState *bs)
         bdrv_io_plug(child->bs);
     }
 
-    if (bs->io_plugged++ == 0) {
+    if (atomic_fetch_inc(&bs->io_plugged) == 0) {
         BlockDriver *drv = bs->drv;
         if (drv && drv->bdrv_io_plug) {
             drv->bdrv_io_plug(bs);
@@ -2657,7 +2668,7 @@ void bdrv_io_unplug(BlockDriverState *bs)
     BdrvChild *child;
 
     assert(bs->io_plugged);
-    if (--bs->io_plugged == 0) {
+    if (atomic_fetch_dec(&bs->io_plugged) == 1) {
         BlockDriver *drv = bs->drv;
         if (drv && drv->bdrv_io_unplug) {
             drv->bdrv_io_unplug(bs);
diff --git a/block/iscsi.c b/block/iscsi.c
index 5daa201181..b5f7a228b9 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1732,6 +1732,10 @@ static QemuOptsList runtime_opts = {
             .name = "timeout",
             .type = QEMU_OPT_NUMBER,
         },
+        {
+            .name = "filename",
+            .type = QEMU_OPT_STRING,
+        },
         { /* end of list */ }
     },
 };
@@ -1747,12 +1751,27 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
     char *initiator_name = NULL;
     QemuOpts *opts;
     Error *local_err = NULL;
-    const char *transport_name, *portal, *target;
+    const char *transport_name, *portal, *target, *filename;
 #if LIBISCSI_API_VERSION >= (20160603)
     enum iscsi_transport_type transport;
 #endif
     int i, ret = 0, timeout = 0, lun;
 
+    /* If we are given a filename, parse the filename, with precedence given to
+     * filename encoded options */
+    filename = qdict_get_try_str(options, "filename");
+    if (filename) {
+        error_report("Warning: 'filename' option specified. "
+                      "This is an unsupported option, and may be deprecated "
+                      "in the future");
+        iscsi_parse_filename(filename, options, &local_err);
+        if (local_err) {
+            ret = -EINVAL;
+            error_propagate(errp, local_err);
+            goto exit;
+        }
+    }
+
     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
     qemu_opts_absorb_qdict(opts, options, &local_err);
     if (local_err) {
@@ -1967,6 +1986,7 @@ out:
         }
         memset(iscsilun, 0, sizeof(IscsiLun));
     }
+exit:
     return ret;
 }
 
diff --git a/block/mirror.c b/block/mirror.c
index a2a970301c..19afcc6f1a 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -342,6 +342,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
     int max_io_sectors = MAX((s->buf_size >> BDRV_SECTOR_BITS) / MAX_IN_FLIGHT,
                              MAX_IO_SECTORS);
 
+    bdrv_dirty_bitmap_lock(s->dirty_bitmap);
     sector_num = bdrv_dirty_iter_next(s->dbi);
     if (sector_num < 0) {
         bdrv_set_dirty_iter(s->dbi, 0);
@@ -349,6 +350,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
         trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
         assert(sector_num >= 0);
     }
+    bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
 
     first_chunk = sector_num / sectors_per_chunk;
     while (test_bit(first_chunk, s->in_flight_bitmap)) {
@@ -360,12 +362,13 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
 
     /* Find the number of consective dirty chunks following the first dirty
      * one, and wait for in flight requests in them. */
+    bdrv_dirty_bitmap_lock(s->dirty_bitmap);
     while (nb_chunks * sectors_per_chunk < (s->buf_size >> BDRV_SECTOR_BITS)) {
         int64_t next_dirty;
         int64_t next_sector = sector_num + nb_chunks * sectors_per_chunk;
         int64_t next_chunk = next_sector / sectors_per_chunk;
         if (next_sector >= end ||
-            !bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) {
+            !bdrv_get_dirty_locked(source, s->dirty_bitmap, next_sector)) {
             break;
         }
         if (test_bit(next_chunk, s->in_flight_bitmap)) {
@@ -386,8 +389,10 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
      * calling bdrv_get_block_status_above could yield - if some blocks are
      * marked dirty in this window, we need to know.
      */
-    bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num,
-                            nb_chunks * sectors_per_chunk);
+    bdrv_reset_dirty_bitmap_locked(s->dirty_bitmap, sector_num,
+                                  nb_chunks * sectors_per_chunk);
+    bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
+
     bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks);
     while (nb_chunks > 0 && sector_num < end) {
         int64_t ret;
@@ -506,6 +511,8 @@ static void mirror_exit(BlockJob *job, void *opaque)
     BlockDriverState *mirror_top_bs = s->mirror_top_bs;
     Error *local_err = NULL;
 
+    bdrv_release_dirty_bitmap(src, s->dirty_bitmap);
+
     /* Make sure that the source BDS doesn't go away before we called
      * block_job_completed(). */
     bdrv_ref(src);
@@ -904,7 +911,6 @@ immediate_exit:
     g_free(s->cow_bitmap);
     g_free(s->in_flight_bitmap);
     bdrv_dirty_iter_free(s->dbi);
-    bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
 
     data = g_malloc(sizeof(*data));
     data->ret = ret;
diff --git a/block/nbd-client.c b/block/nbd-client.c
index 87d19c7253..d64e775385 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -144,8 +144,8 @@ static int nbd_co_send_request(BlockDriverState *bs,
         qio_channel_set_cork(s->ioc, true);
         rc = nbd_send_request(s->ioc, request);
         if (rc >= 0) {
-            ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, request->len,
-                               false, NULL);
+            ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, false,
+                          NULL);
             if (ret != request->len) {
                 rc = -EIO;
             }
@@ -173,8 +173,8 @@ static void nbd_co_receive_reply(NBDClientSession *s,
         reply->error = EIO;
     } else {
         if (qiov && reply->error == 0) {
-            ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, request->len,
-                               true, NULL);
+            ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, true,
+                          NULL);
             if (ret != request->len) {
                 reply->error = EIO;
             }
diff --git a/block/nfs.c b/block/nfs.c
index 848b2c0bb0..18c87d2f25 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -730,7 +730,9 @@ nfs_get_allocated_file_size_cb(int ret, struct nfs_context *nfs, void *data,
     if (task->ret < 0) {
         error_report("NFS Error: %s", nfs_get_error(nfs));
     }
-    task->complete = 1;
+
+    /* Set task->complete before reading bs->wakeup.  */
+    atomic_mb_set(&task->complete, 1);
     bdrv_wakeup(task->bs);
 }
 
diff --git a/block/qapi.c b/block/qapi.c
index a40922ea26..14b60ae66c 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -441,7 +441,7 @@ static BlockStats *bdrv_query_bds_stats(const BlockDriverState *bs,
         s->node_name = g_strdup(bdrv_get_node_name(bs));
     }
 
-    s->stats->wr_highest_offset = bs->wr_highest_offset;
+    s->stats->wr_highest_offset = stat64_get(&bs->wr_highest_offset);
 
     if (bs->file) {
         s->has_parent = true;
diff --git a/block/rbd.c b/block/rbd.c
index e551639e47..ff44e5f437 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -340,6 +340,10 @@ static QemuOptsList runtime_opts = {
             .type = QEMU_OPT_STRING,
             .help = "Legacy rados key/value option parameters",
         },
+        {
+            .name = "filename",
+            .type = QEMU_OPT_STRING,
+        },
         { /* end of list */ }
     },
 };
@@ -541,12 +545,27 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
 {
     BDRVRBDState *s = bs->opaque;
     const char *pool, *snap, *conf, *user, *image_name, *keypairs;
-    const char *secretid;
+    const char *secretid, *filename;
     QemuOpts *opts;
     Error *local_err = NULL;
     char *mon_host = NULL;
     int r;
 
+    /* If we are given a filename, parse the filename, with precedence given to
+     * filename encoded options */
+    filename = qdict_get_try_str(options, "filename");
+    if (filename) {
+        error_report("Warning: 'filename' option specified. "
+                      "This is an unsupported option, and may be deprecated "
+                      "in the future");
+        qemu_rbd_parse_filename(filename, options, &local_err);
+        if (local_err) {
+            r = -EINVAL;
+            error_propagate(errp, local_err);
+            goto exit;
+        }
+    }
+
     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
     qemu_opts_absorb_qdict(opts, options, &local_err);
     if (local_err) {
@@ -665,6 +684,7 @@ failed_shutdown:
 failed_opts:
     qemu_opts_del(opts);
     g_free(mon_host);
+exit:
     return r;
 }
 
diff --git a/block/sheepdog.c b/block/sheepdog.c
index a18315a1ca..5ebf5d9fbb 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -698,7 +698,8 @@ out:
 
     srco->co = NULL;
     srco->ret = ret;
-    srco->finished = true;
+    /* Set srco->finished before reading bs->wakeup.  */
+    atomic_mb_set(&srco->finished, true);
     if (srco->bs) {
         bdrv_wakeup(srco->bs);
     }
diff --git a/block/throttle-groups.c b/block/throttle-groups.c
index b73e7a800b..a181cb1dee 100644
--- a/block/throttle-groups.c
+++ b/block/throttle-groups.c
@@ -240,7 +240,7 @@ static bool throttle_group_schedule_timer(BlockBackend *blk, bool is_write)
     ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
     bool must_wait;
 
-    if (blkp->io_limits_disabled) {
+    if (atomic_read(&blkp->io_limits_disabled)) {
         return false;
     }
 
@@ -260,6 +260,25 @@ static bool throttle_group_schedule_timer(BlockBackend *blk, bool is_write)
     return must_wait;
 }
 
+/* Start the next pending I/O request for a BlockBackend.  Return whether
+ * any request was actually pending.
+ *
+ * @blk:       the current BlockBackend
+ * @is_write:  the type of operation (read/write)
+ */
+static bool coroutine_fn throttle_group_co_restart_queue(BlockBackend *blk,
+                                                         bool is_write)
+{
+    BlockBackendPublic *blkp = blk_get_public(blk);
+    bool ret;
+
+    qemu_co_mutex_lock(&blkp->throttled_reqs_lock);
+    ret = qemu_co_queue_next(&blkp->throttled_reqs[is_write]);
+    qemu_co_mutex_unlock(&blkp->throttled_reqs_lock);
+
+    return ret;
+}
+
 /* Look for the next pending I/O request and schedule it.
  *
  * This assumes that tg->lock is held.
@@ -287,12 +306,12 @@ static void schedule_next_request(BlockBackend *blk, bool is_write)
     if (!must_wait) {
         /* Give preference to requests from the current blk */
         if (qemu_in_coroutine() &&
-            qemu_co_queue_next(&blkp->throttled_reqs[is_write])) {
+            throttle_group_co_restart_queue(blk, is_write)) {
             token = blk;
         } else {
             ThrottleTimers *tt = &blk_get_public(token)->throttle_timers;
             int64_t now = qemu_clock_get_ns(tt->clock_type);
-            timer_mod(tt->timers[is_write], now + 1);
+            timer_mod(tt->timers[is_write], now);
             tg->any_timer_armed[is_write] = true;
         }
         tg->tokens[is_write] = token;
@@ -326,7 +345,10 @@ void coroutine_fn throttle_group_co_io_limits_intercept(BlockBackend *blk,
     if (must_wait || blkp->pending_reqs[is_write]) {
         blkp->pending_reqs[is_write]++;
         qemu_mutex_unlock(&tg->lock);
-        qemu_co_queue_wait(&blkp->throttled_reqs[is_write], NULL);
+        qemu_co_mutex_lock(&blkp->throttled_reqs_lock);
+        qemu_co_queue_wait(&blkp->throttled_reqs[is_write],
+                           &blkp->throttled_reqs_lock);
+        qemu_co_mutex_unlock(&blkp->throttled_reqs_lock);
         qemu_mutex_lock(&tg->lock);
         blkp->pending_reqs[is_write]--;
     }
@@ -340,15 +362,50 @@ void coroutine_fn throttle_group_co_io_limits_intercept(BlockBackend *blk,
     qemu_mutex_unlock(&tg->lock);
 }
 
+typedef struct {
+    BlockBackend *blk;
+    bool is_write;
+} RestartData;
+
+static void coroutine_fn throttle_group_restart_queue_entry(void *opaque)
+{
+    RestartData *data = opaque;
+    BlockBackend *blk = data->blk;
+    bool is_write = data->is_write;
+    BlockBackendPublic *blkp = blk_get_public(blk);
+    ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts);
+    bool empty_queue;
+
+    empty_queue = !throttle_group_co_restart_queue(blk, is_write);
+
+    /* If the request queue was empty then we have to take care of
+     * scheduling the next one */
+    if (empty_queue) {
+        qemu_mutex_lock(&tg->lock);
+        schedule_next_request(blk, is_write);
+        qemu_mutex_unlock(&tg->lock);
+    }
+}
+
+static void throttle_group_restart_queue(BlockBackend *blk, bool is_write)
+{
+    Coroutine *co;
+    RestartData rd = {
+        .blk = blk,
+        .is_write = is_write
+    };
+
+    co = qemu_coroutine_create(throttle_group_restart_queue_entry, &rd);
+    aio_co_enter(blk_get_aio_context(blk), co);
+}
+
 void throttle_group_restart_blk(BlockBackend *blk)
 {
     BlockBackendPublic *blkp = blk_get_public(blk);
-    int i;
 
-    for (i = 0; i < 2; i++) {
-        while (qemu_co_enter_next(&blkp->throttled_reqs[i])) {
-            ;
-        }
+    if (blkp->throttle_state) {
+        throttle_group_restart_queue(blk, 0);
+        throttle_group_restart_queue(blk, 1);
     }
 }
 
@@ -376,8 +433,7 @@ void throttle_group_config(BlockBackend *blk, ThrottleConfig *cfg)
     throttle_config(ts, tt, cfg);
     qemu_mutex_unlock(&tg->lock);
 
-    qemu_co_enter_next(&blkp->throttled_reqs[0]);
-    qemu_co_enter_next(&blkp->throttled_reqs[1]);
+    throttle_group_restart_blk(blk);
 }
 
 /* Get the throttle configuration from a particular group. Similar to
@@ -408,7 +464,6 @@ static void timer_cb(BlockBackend *blk, bool is_write)
     BlockBackendPublic *blkp = blk_get_public(blk);
     ThrottleState *ts = blkp->throttle_state;
     ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
-    bool empty_queue;
 
     /* The timer has just been fired, so we can update the flag */
     qemu_mutex_lock(&tg->lock);
@@ -416,17 +471,7 @@ static void timer_cb(BlockBackend *blk, bool is_write)
     qemu_mutex_unlock(&tg->lock);
 
     /* Run the request that was waiting for this timer */
-    aio_context_acquire(blk_get_aio_context(blk));
-    empty_queue = !qemu_co_enter_next(&blkp->throttled_reqs[is_write]);
-    aio_context_release(blk_get_aio_context(blk));
-
-    /* If the request queue was empty then we have to take care of
-     * scheduling the next one */
-    if (empty_queue) {
-        qemu_mutex_lock(&tg->lock);
-        schedule_next_request(blk, is_write);
-        qemu_mutex_unlock(&tg->lock);
-    }
+    throttle_group_restart_queue(blk, is_write);
 }
 
 static void read_timer_cb(void *opaque)
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index dd0860f4a6..28f551a7b0 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -27,6 +27,10 @@ typedef struct NBDServerData {
 
 static NBDServerData *nbd_server;
 
+static void nbd_blockdev_client_closed(NBDClient *client, bool ignored)
+{
+    nbd_client_put(client);
+}
 
 static gboolean nbd_accept(QIOChannel *ioc, GIOCondition condition,
                            gpointer opaque)
@@ -46,7 +50,7 @@ static gboolean nbd_accept(QIOChannel *ioc, GIOCondition condition,
     qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server");
     nbd_client_new(NULL, cioc,
                    nbd_server->tlscreds, NULL,
-                   nbd_client_put);
+                   nbd_blockdev_client_closed);
     object_unref(OBJECT(cioc));
     return TRUE;
 }
diff --git a/blockdev.c b/blockdev.c
index 6472548186..39d6b9712b 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -595,7 +595,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
             autostart = 0;
         }
 
-        block_acct_init(blk_get_stats(blk), account_invalid, account_failed);
+        block_acct_setup(blk_get_stats(blk), account_invalid, account_failed);
 
         if (!parse_stats_intervals(blk_get_stats(blk), interval_list, errp)) {
             blk_unref(blk);
@@ -1362,12 +1362,10 @@ out_aio_context:
 static BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node,
                                                   const char *name,
                                                   BlockDriverState **pbs,
-                                                  AioContext **paio,
                                                   Error **errp)
 {
     BlockDriverState *bs;
     BdrvDirtyBitmap *bitmap;
-    AioContext *aio_context;
 
     if (!node) {
         error_setg(errp, "Node cannot be NULL");
@@ -1383,29 +1381,17 @@ static BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node,
         return NULL;
     }
 
-    aio_context = bdrv_get_aio_context(bs);
-    aio_context_acquire(aio_context);
-
     bitmap = bdrv_find_dirty_bitmap(bs, name);
     if (!bitmap) {
         error_setg(errp, "Dirty bitmap '%s' not found", name);
-        goto fail;
+        return NULL;
     }
 
     if (pbs) {
         *pbs = bs;
     }
-    if (paio) {
-        *paio = aio_context;
-    } else {
-        aio_context_release(aio_context);
-    }
 
     return bitmap;
-
- fail:
-    aio_context_release(aio_context);
-    return NULL;
 }
 
 /* New and old BlockDriverState structs for atomic group operations */
@@ -1791,7 +1777,7 @@ static void external_snapshot_commit(BlkActionState *common)
     /* We don't need (or want) to use the transactional
      * bdrv_reopen_multiple() across all the entries at once, because we
      * don't want to abort all of them if one of them fails the reopen */
-    if (!state->old_bs->copy_on_read) {
+    if (!atomic_read(&state->old_bs->copy_on_read)) {
         bdrv_reopen(state->old_bs, state->old_bs->open_flags & ~BDRV_O_RDWR,
                     NULL);
     }
@@ -2025,7 +2011,6 @@ static void block_dirty_bitmap_clear_prepare(BlkActionState *common,
     state->bitmap = block_dirty_bitmap_lookup(action->node,
                                               action->name,
                                               &state->bs,
-                                              &state->aio_context,
                                               errp);
     if (!state->bitmap) {
         return;
@@ -2733,7 +2718,6 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name,
                                 bool has_granularity, uint32_t granularity,
                                 Error **errp)
 {
-    AioContext *aio_context;
     BlockDriverState *bs;
 
     if (!name || name[0] == '\0') {
@@ -2746,14 +2730,11 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name,
         return;
     }
 
-    aio_context = bdrv_get_aio_context(bs);
-    aio_context_acquire(aio_context);
-
     if (has_granularity) {
         if (granularity < 512 || !is_power_of_2(granularity)) {
             error_setg(errp, "Granularity must be power of 2 "
                              "and at least 512");
-            goto out;
+            return;
         }
     } else {
         /* Default to cluster size, if available: */
@@ -2761,19 +2742,15 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name,
     }
 
     bdrv_create_dirty_bitmap(bs, granularity, name, errp);
-
- out:
-    aio_context_release(aio_context);
 }
 
 void qmp_block_dirty_bitmap_remove(const char *node, const char *name,
                                    Error **errp)
 {
-    AioContext *aio_context;
     BlockDriverState *bs;
     BdrvDirtyBitmap *bitmap;
 
-    bitmap = block_dirty_bitmap_lookup(node, name, &bs, &aio_context, errp);
+    bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp);
     if (!bitmap || !bs) {
         return;
     }
@@ -2782,13 +2759,10 @@ void qmp_block_dirty_bitmap_remove(const char *node, const char *name,
         error_setg(errp,
                    "Bitmap '%s' is currently frozen and cannot be removed",
                    name);
-        goto out;
+        return;
     }
     bdrv_dirty_bitmap_make_anon(bitmap);
     bdrv_release_dirty_bitmap(bs, bitmap);
-
- out:
-    aio_context_release(aio_context);
 }
 
 /**
@@ -2798,11 +2772,10 @@ void qmp_block_dirty_bitmap_remove(const char *node, const char *name,
 void qmp_block_dirty_bitmap_clear(const char *node, const char *name,
                                   Error **errp)
 {
-    AioContext *aio_context;
     BdrvDirtyBitmap *bitmap;
     BlockDriverState *bs;
 
-    bitmap = block_dirty_bitmap_lookup(node, name, &bs, &aio_context, errp);
+    bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp);
     if (!bitmap || !bs) {
         return;
     }
@@ -2811,18 +2784,15 @@ void qmp_block_dirty_bitmap_clear(const char *node, const char *name,
         error_setg(errp,
                    "Bitmap '%s' is currently frozen and cannot be modified",
                    name);
-        goto out;
+        return;
     } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
         error_setg(errp,
                    "Bitmap '%s' is currently disabled and cannot be cleared",
                    name);
-        goto out;
+        return;
     }
 
     bdrv_clear_dirty_bitmap(bitmap, NULL);
-
- out:
-    aio_context_release(aio_context);
 }
 
 void hmp_drive_del(Monitor *mon, const QDict *qdict)
diff --git a/configure b/configure
index b147191ae6..ff0f8b915c 100755
--- a/configure
+++ b/configure
@@ -407,7 +407,7 @@ QEMU_CFLAGS="-fno-strict-aliasing -fno-common -fwrapv $QEMU_CFLAGS"
 QEMU_CFLAGS="-Wall -Wundef -Wwrite-strings -Wmissing-prototypes $QEMU_CFLAGS"
 QEMU_CFLAGS="-Wstrict-prototypes -Wredundant-decls $QEMU_CFLAGS"
 QEMU_CFLAGS="-D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE $QEMU_CFLAGS"
-QEMU_INCLUDES="-I. -I\$(SRC_PATH) -I\$(SRC_PATH)/include"
+QEMU_INCLUDES="-I. -I\$(SRC_PATH) -I\$(SRC_PATH)/accel/tcg -I\$(SRC_PATH)/include"
 if test "$debug_info" = "yes"; then
     CFLAGS="-g $CFLAGS"
     LDFLAGS="-g $LDFLAGS"
@@ -2301,14 +2301,14 @@ fi
 # GTK probe
 
 if test "$gtkabi" = ""; then
-    # The GTK ABI was not specified explicitly, so try whether 2.0 is available.
-    # Use 3.0 as a fallback if that is available.
-    if $pkg_config --exists "gtk+-2.0 >= 2.18.0"; then
-        gtkabi=2.0
-    elif $pkg_config --exists "gtk+-3.0 >= 3.0.0"; then
+    # The GTK ABI was not specified explicitly, so try whether 3.0 is available.
+    # Use 2.0 as a fallback if that is available.
+    if $pkg_config --exists "gtk+-3.0 >= 3.0.0"; then
         gtkabi=3.0
-    else
+    elif $pkg_config --exists "gtk+-2.0 >= 2.18.0"; then
         gtkabi=2.0
+    else
+        gtkabi=3.0
     fi
 fi
 
@@ -2331,7 +2331,7 @@ if test "$gtk" != "no"; then
         libs_softmmu="$gtk_libs $libs_softmmu"
         gtk="yes"
     elif test "$gtk" = "yes"; then
-        feature_not_found "gtk" "Install gtk2 or gtk3 devel"
+        feature_not_found "gtk" "Install gtk3-devel"
     else
         gtk="no"
     fi
@@ -2598,12 +2598,12 @@ fi
 # sdl-config even without cross prefix, and favour pkg-config over sdl-config.
 
 if test "$sdlabi" = ""; then
-    if $pkg_config --exists "sdl"; then
-        sdlabi=1.2
-    elif $pkg_config --exists "sdl2"; then
+    if $pkg_config --exists "sdl2"; then
         sdlabi=2.0
-    else
+    elif $pkg_config --exists "sdl"; then
         sdlabi=1.2
+    else
+        sdlabi=2.0
     fi
 fi
 
@@ -2630,7 +2630,7 @@ elif has ${sdl_config}; then
   sdlversion=$($sdlconfig --version)
 else
   if test "$sdl" = "yes" ; then
-    feature_not_found "sdl" "Install SDL devel"
+    feature_not_found "sdl" "Install SDL2-devel"
   fi
   sdl=no
 fi
@@ -6374,7 +6374,7 @@ fi
 
 # build tree in object directory in case the source is not in the current directory
 DIRS="tests tests/tcg tests/tcg/cris tests/tcg/lm32 tests/libqos tests/qapi-schema tests/tcg/xtensa tests/qemu-iotests"
-DIRS="$DIRS docs fsdev"
+DIRS="$DIRS docs docs/interop fsdev"
 DIRS="$DIRS pc-bios/optionrom pc-bios/spapr-rtas pc-bios/s390-ccw"
 DIRS="$DIRS roms/seabios roms/vgabios"
 DIRS="$DIRS qapi-generated"
diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h
index af02a31ebe..53ef222c0b 100644
--- a/contrib/libvhost-user/libvhost-user.h
+++ b/contrib/libvhost-user/libvhost-user.h
@@ -17,6 +17,7 @@
 #include <stdint.h>
 #include <stdbool.h>
 #include <stddef.h>
+#include <sys/poll.h>
 #include <linux/vhost.h>
 #include "standard-headers/linux/virtio_ring.h"
 
@@ -192,11 +193,11 @@ typedef struct VuVirtq {
 } VuVirtq;
 
 enum VuWatchCondtion {
-    VU_WATCH_IN = 1 << 0,
-    VU_WATCH_OUT = 1 << 1,
-    VU_WATCH_PRI = 1 << 2,
-    VU_WATCH_ERR = 1 << 3,
-    VU_WATCH_HUP = 1 << 4,
+    VU_WATCH_IN = POLLIN,
+    VU_WATCH_OUT = POLLOUT,
+    VU_WATCH_PRI = POLLPRI,
+    VU_WATCH_ERR = POLLERR,
+    VU_WATCH_HUP = POLLHUP,
 };
 
 typedef void (*vu_panic_cb) (VuDev *dev, const char *err);
diff --git a/contrib/vhost-user-scsi/Makefile.objs b/contrib/vhost-user-scsi/Makefile.objs
new file mode 100644
index 0000000000..e83a38a85b
--- /dev/null
+++ b/contrib/vhost-user-scsi/Makefile.objs
@@ -0,0 +1 @@
+vhost-user-scsi-obj-y = vhost-user-scsi.o
diff --git a/contrib/vhost-user-scsi/vhost-user-scsi.c b/contrib/vhost-user-scsi/vhost-user-scsi.c
new file mode 100644
index 0000000000..b5ae02c96f
--- /dev/null
+++ b/contrib/vhost-user-scsi/vhost-user-scsi.c
@@ -0,0 +1,886 @@
+/*
+ * vhost-user-scsi sample application
+ *
+ * Copyright (c) 2016 Nutanix Inc. All rights reserved.
+ *
+ * Author:
+ *  Felipe Franciosi <felipe@nutanix.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 only.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "contrib/libvhost-user/libvhost-user.h"
+#include "hw/virtio/virtio-scsi.h"
+#include "iscsi/iscsi.h"
+
+#include <glib.h>
+
+/* Small compat shim from glib 2.32 */
+#ifndef G_SOURCE_CONTINUE
+#define G_SOURCE_CONTINUE TRUE
+#endif
+#ifndef G_SOURCE_REMOVE
+#define G_SOURCE_REMOVE FALSE
+#endif
+
+/* #define VUS_DEBUG 1 */
+
+/** Log helpers **/
+
+#define PPRE                                                          \
+    struct timespec ts;                                               \
+    char   timebuf[64];                                               \
+    struct tm tm;                                                     \
+    (void)clock_gettime(CLOCK_REALTIME, &ts);                         \
+    (void)strftime(timebuf, 64, "%Y%m%d %T", gmtime_r(&ts.tv_sec, &tm))
+
+#define PEXT(lvl, msg, ...) do {                                      \
+    PPRE;                                                             \
+    fprintf(stderr, "%s.%06ld " lvl ": %s:%s():%d: " msg "\n",        \
+            timebuf, ts.tv_nsec / 1000,                               \
+            __FILE__, __func__, __LINE__, ## __VA_ARGS__);            \
+} while (0)
+
+#define PNOR(lvl, msg, ...) do {                                      \
+    PPRE;                                                             \
+    fprintf(stderr, "%s.%06ld " lvl ": " msg "\n",                    \
+            timebuf, ts.tv_nsec / 1000, ## __VA_ARGS__);              \
+} while (0)
+
+#ifdef VUS_DEBUG
+#define PDBG(msg, ...) PEXT("DBG", msg, ## __VA_ARGS__)
+#define PERR(msg, ...) PEXT("ERR", msg, ## __VA_ARGS__)
+#define PLOG(msg, ...) PEXT("LOG", msg, ## __VA_ARGS__)
+#else
+#define PDBG(msg, ...) { }
+#define PERR(msg, ...) PNOR("ERR", msg, ## __VA_ARGS__)
+#define PLOG(msg, ...) PNOR("LOG", msg, ## __VA_ARGS__)
+#endif
+
+/** vhost-user-scsi specific definitions **/
+
+ /* Only 1 LUN and device supported today */
+#define VUS_MAX_LUNS 1
+#define VUS_MAX_DEVS 1
+
+#define VUS_ISCSI_INITIATOR "iqn.2016-11.com.nutanix:vhost-user-scsi"
+
+typedef struct iscsi_lun {
+    struct iscsi_context *iscsi_ctx;
+    int iscsi_lun;
+} iscsi_lun_t;
+
+typedef struct vhost_scsi_dev {
+    VuDev vu_dev;
+    int server_sock;
+    GMainLoop *loop;
+    GTree *fdmap;   /* fd -> gsource context id */
+    iscsi_lun_t luns[VUS_MAX_LUNS];
+} vhost_scsi_dev_t;
+
+static vhost_scsi_dev_t *vhost_scsi_devs[VUS_MAX_DEVS];
+
+/** glib event loop integration for libvhost-user and misc callbacks **/
+
+QEMU_BUILD_BUG_ON((int)G_IO_IN != (int)VU_WATCH_IN);
+QEMU_BUILD_BUG_ON((int)G_IO_OUT != (int)VU_WATCH_OUT);
+QEMU_BUILD_BUG_ON((int)G_IO_PRI != (int)VU_WATCH_PRI);
+QEMU_BUILD_BUG_ON((int)G_IO_ERR != (int)VU_WATCH_ERR);
+QEMU_BUILD_BUG_ON((int)G_IO_HUP != (int)VU_WATCH_HUP);
+
+typedef struct vus_gsrc {
+    GSource parent;
+    vhost_scsi_dev_t *vdev_scsi;
+    GPollFD gfd;
+    vu_watch_cb vu_cb;
+} vus_gsrc_t;
+
+static gint vus_fdmap_compare(gconstpointer a, gconstpointer b)
+{
+    return (b > a) - (b < a);
+}
+
+static gboolean vus_gsrc_prepare(GSource *src, gint *timeout)
+{
+    assert(timeout);
+
+    *timeout = -1;
+    return FALSE;
+}
+
+static gboolean vus_gsrc_check(GSource *src)
+{
+    vus_gsrc_t *vus_src = (vus_gsrc_t *)src;
+
+    assert(vus_src);
+
+    return vus_src->gfd.revents & vus_src->gfd.events;
+}
+
+static gboolean vus_gsrc_dispatch(GSource *src, GSourceFunc cb, gpointer data)
+{
+    vhost_scsi_dev_t *vdev_scsi;
+    vus_gsrc_t *vus_src = (vus_gsrc_t *)src;
+
+    assert(vus_src);
+    assert(!(vus_src->vu_cb && cb));
+
+    vdev_scsi = vus_src->vdev_scsi;
+
+    assert(vdev_scsi);
+
+    if (cb) {
+        return cb(data);
+    }
+    if (vus_src->vu_cb) {
+        vus_src->vu_cb(&vdev_scsi->vu_dev, vus_src->gfd.revents, data);
+    }
+    return G_SOURCE_CONTINUE;
+}
+
+static GSourceFuncs vus_gsrc_funcs = {
+    vus_gsrc_prepare,
+    vus_gsrc_check,
+    vus_gsrc_dispatch,
+    NULL
+};
+
+static int vus_gsrc_new(vhost_scsi_dev_t *vdev_scsi, int fd, GIOCondition cond,
+                        vu_watch_cb vu_cb, GSourceFunc gsrc_cb, gpointer data)
+{
+    GSource *vus_gsrc;
+    vus_gsrc_t *vus_src;
+    guint id;
+
+    assert(vdev_scsi);
+    assert(fd >= 0);
+    assert(vu_cb || gsrc_cb);
+    assert(!(vu_cb && gsrc_cb));
+
+    vus_gsrc = g_source_new(&vus_gsrc_funcs, sizeof(vus_gsrc_t));
+    if (!vus_gsrc) {
+        PERR("Error creating GSource for new watch");
+        return -1;
+    }
+    vus_src = (vus_gsrc_t *)vus_gsrc;
+
+    vus_src->vdev_scsi = vdev_scsi;
+    vus_src->gfd.fd = fd;
+    vus_src->gfd.events = cond;
+    vus_src->vu_cb = vu_cb;
+
+    g_source_add_poll(vus_gsrc, &vus_src->gfd);
+    g_source_set_callback(vus_gsrc, gsrc_cb, data, NULL);
+    id = g_source_attach(vus_gsrc, NULL);
+    assert(id);
+    g_source_unref(vus_gsrc);
+
+    g_tree_insert(vdev_scsi->fdmap, (gpointer)(uintptr_t)fd,
+                                    (gpointer)(uintptr_t)id);
+
+    return 0;
+}
+
+/* from libiscsi's scsi-lowlevel.h **
+ *
+ * nb. We can't directly include scsi-lowlevel.h due to a namespace conflict:
+ *     QEMU's scsi.h also defines "SCSI_XFER_NONE".
+ */
+
+#define SCSI_CDB_MAX_SIZE           16
+
+struct scsi_iovector {
+    struct scsi_iovec *iov;
+    int niov;
+    int nalloc;
+    size_t offset;
+    int consumed;
+};
+
+struct scsi_allocated_memory {
+    struct scsi_allocated_memory *next;
+    char buf[0];
+};
+
+struct scsi_data {
+    int            size;
+    unsigned char *data;
+};
+
+enum scsi_sense_key {
+    SCSI_SENSE_NO_SENSE            = 0x00,
+    SCSI_SENSE_RECOVERED_ERROR     = 0x01,
+    SCSI_SENSE_NOT_READY           = 0x02,
+    SCSI_SENSE_MEDIUM_ERROR        = 0x03,
+    SCSI_SENSE_HARDWARE_ERROR      = 0x04,
+    SCSI_SENSE_ILLEGAL_REQUEST     = 0x05,
+    SCSI_SENSE_UNIT_ATTENTION      = 0x06,
+    SCSI_SENSE_DATA_PROTECTION     = 0x07,
+    SCSI_SENSE_BLANK_CHECK         = 0x08,
+    SCSI_SENSE_VENDOR_SPECIFIC     = 0x09,
+    SCSI_SENSE_COPY_ABORTED        = 0x0a,
+    SCSI_SENSE_COMMAND_ABORTED     = 0x0b,
+    SCSI_SENSE_OBSOLETE_ERROR_CODE = 0x0c,
+    SCSI_SENSE_OVERFLOW_COMMAND    = 0x0d,
+    SCSI_SENSE_MISCOMPARE          = 0x0e
+};
+
+struct scsi_sense {
+    unsigned char       error_type;
+    enum scsi_sense_key key;
+    int                 ascq;
+    unsigned            sense_specific:1;
+    unsigned            ill_param_in_cdb:1;
+    unsigned            bit_pointer_valid:1;
+    unsigned char       bit_pointer;
+    uint16_t            field_pointer;
+};
+
+enum scsi_residual {
+    SCSI_RESIDUAL_NO_RESIDUAL = 0,
+    SCSI_RESIDUAL_UNDERFLOW,
+    SCSI_RESIDUAL_OVERFLOW
+};
+
+struct scsi_task {
+    int status;
+    int cdb_size;
+    int xfer_dir;
+    int expxferlen;
+    unsigned char cdb[SCSI_CDB_MAX_SIZE];
+    enum scsi_residual residual_status;
+    size_t residual;
+    struct scsi_sense sense;
+    struct scsi_data datain;
+    struct scsi_allocated_memory *mem;
+    void *ptr;
+
+    uint32_t itt;
+    uint32_t cmdsn;
+    uint32_t lun;
+
+    struct scsi_iovector iovector_in;
+    struct scsi_iovector iovector_out;
+};
+
+/** libiscsi integration **/
+
+static int iscsi_add_lun(iscsi_lun_t *lun, char *iscsi_uri)
+{
+    struct iscsi_url *iscsi_url;
+    struct iscsi_context *iscsi_ctx;
+    int ret = 0;
+
+    assert(lun);
+    assert(iscsi_uri);
+
+    iscsi_ctx = iscsi_create_context(VUS_ISCSI_INITIATOR);
+    if (!iscsi_ctx) {
+        PERR("Unable to create iSCSI context");
+        return -1;
+    }
+
+    iscsi_url = iscsi_parse_full_url(iscsi_ctx, iscsi_uri);
+    if (!iscsi_url) {
+        PERR("Unable to parse iSCSI URL: %s", iscsi_get_error(iscsi_ctx));
+        goto fail;
+    }
+
+    iscsi_set_session_type(iscsi_ctx, ISCSI_SESSION_NORMAL);
+    iscsi_set_header_digest(iscsi_ctx, ISCSI_HEADER_DIGEST_NONE_CRC32C);
+    if (iscsi_full_connect_sync(iscsi_ctx, iscsi_url->portal, iscsi_url->lun)) {
+        PERR("Unable to login to iSCSI portal: %s", iscsi_get_error(iscsi_ctx));
+        goto fail;
+    }
+
+    lun->iscsi_ctx = iscsi_ctx;
+    lun->iscsi_lun = iscsi_url->lun;
+
+    PDBG("Context %p created for lun 0: %s", iscsi_ctx, iscsi_uri);
+
+out:
+    if (iscsi_url) {
+        iscsi_destroy_url(iscsi_url);
+    }
+    return ret;
+
+fail:
+    (void)iscsi_destroy_context(iscsi_ctx);
+    ret = -1;
+    goto out;
+}
+
+static struct scsi_task *scsi_task_new(int cdb_len, uint8_t *cdb, int dir,
+                                       int xfer_len) {
+    struct scsi_task *task;
+
+    assert(cdb_len > 0);
+    assert(cdb);
+
+    task = calloc(1, sizeof(struct scsi_task));
+    if (!task) {
+        PERR("Error allocating task: %s", strerror(errno));
+        return NULL;
+    }
+
+    memcpy(task->cdb, cdb, cdb_len);
+    task->cdb_size = cdb_len;
+    task->xfer_dir = dir;
+    task->expxferlen = xfer_len;
+
+    return task;
+}
+
+static int get_cdb_len(uint8_t *cdb)
+{
+    assert(cdb);
+
+    switch (cdb[0] >> 5) {
+    case 0: return 6;
+    case 1: /* fall through */
+    case 2: return 10;
+    case 4: return 16;
+    case 5: return 12;
+    }
+    PERR("Unable to determine cdb len (0x%02hhX)", cdb[0] >> 5);
+    return -1;
+}
+
+static int handle_cmd_sync(struct iscsi_context *ctx,
+                           VirtIOSCSICmdReq *req,
+                           struct iovec *out, unsigned int out_len,
+                           VirtIOSCSICmdResp *rsp,
+                           struct iovec *in, unsigned int in_len) {
+    struct scsi_task *task;
+    uint32_t dir;
+    uint32_t len;
+    int cdb_len;
+    int i;
+
+    assert(ctx);
+    assert(req);
+    assert(rsp);
+
+    if (!(!req->lun[1] && req->lun[2] == 0x40 && !req->lun[3])) {
+        /* Ignore anything different than target=0, lun=0 */
+        PDBG("Ignoring unconnected lun (0x%hhX, 0x%hhX)",
+             req->lun[1], req->lun[3]);
+        rsp->status = SCSI_STATUS_CHECK_CONDITION;
+        memset(rsp->sense, 0, sizeof(rsp->sense));
+        rsp->sense_len = 18;
+        rsp->sense[0] = 0x70;
+        rsp->sense[2] = SCSI_SENSE_ILLEGAL_REQUEST;
+        rsp->sense[7] = 10;
+        rsp->sense[12] = 0x24;
+
+        return 0;
+    }
+
+    cdb_len = get_cdb_len(req->cdb);
+    if (cdb_len == -1) {
+        return -1;
+    }
+
+    len = 0;
+    if (!out_len && !in_len) {
+        dir = SCSI_XFER_NONE;
+    } else if (out_len) {
+        dir = SCSI_XFER_TO_DEV;
+        for (i = 0; i < out_len; i++) {
+            len += out[i].iov_len;
+        }
+    } else {
+        dir = SCSI_XFER_FROM_DEV;
+        for (i = 0; i < in_len; i++) {
+            len += in[i].iov_len;
+        }
+    }
+
+    task = scsi_task_new(cdb_len, req->cdb, dir, len);
+    if (!task) {
+        PERR("Unable to create iscsi task");
+        return -1;
+    }
+
+    if (dir == SCSI_XFER_TO_DEV) {
+        task->iovector_out.iov = (struct scsi_iovec *)out;
+        task->iovector_out.niov = out_len;
+    } else if (dir == SCSI_XFER_FROM_DEV) {
+        task->iovector_in.iov = (struct scsi_iovec *)in;
+        task->iovector_in.niov = in_len;
+    }
+
+    PDBG("Sending iscsi cmd (cdb_len=%d, dir=%d, task=%p)",
+         cdb_len, dir, task);
+    if (!iscsi_scsi_command_sync(ctx, 0, task, NULL)) {
+        PERR("Error serving SCSI command");
+        free(task);
+        return -1;
+    }
+
+    memset(rsp, 0, sizeof(*rsp));
+
+    rsp->status = task->status;
+    rsp->resid  = task->residual;
+
+    if (task->status == SCSI_STATUS_CHECK_CONDITION) {
+        rsp->response = VIRTIO_SCSI_S_FAILURE;
+        rsp->sense_len = task->datain.size - 2;
+        memcpy(rsp->sense, &task->datain.data[2], rsp->sense_len);
+    }
+
+    free(task);
+
+    PDBG("Filled in rsp: status=%hhX, resid=%u, response=%hhX, sense_len=%u",
+         rsp->status, rsp->resid, rsp->response, rsp->sense_len);
+
+    return 0;
+}
+
+/** libvhost-user callbacks **/
+
+static vhost_scsi_dev_t *vdev_scsi_find_by_vu(VuDev *vu_dev);
+
+static void vus_panic_cb(VuDev *vu_dev, const char *buf)
+{
+    vhost_scsi_dev_t *vdev_scsi;
+
+    assert(vu_dev);
+
+    vdev_scsi = vdev_scsi_find_by_vu(vu_dev);
+
+    if (buf) {
+        PERR("vu_panic: %s", buf);
+    }
+
+    if (vdev_scsi) {
+        assert(vdev_scsi->loop);
+        g_main_loop_quit(vdev_scsi->loop);
+    }
+}
+
+static void vus_add_watch_cb(VuDev *vu_dev, int fd, int vu_evt, vu_watch_cb cb,
+                             void *pvt) {
+    vhost_scsi_dev_t *vdev_scsi;
+    guint id;
+
+    assert(vu_dev);
+    assert(fd >= 0);
+    assert(cb);
+
+    vdev_scsi = vdev_scsi_find_by_vu(vu_dev);
+    if (!vdev_scsi) {
+        vus_panic_cb(vu_dev, NULL);
+        return;
+    }
+
+    id = (guint)(uintptr_t)g_tree_lookup(vdev_scsi->fdmap,
+                                         (gpointer)(uintptr_t)fd);
+    if (id) {
+        GSource *vus_src = g_main_context_find_source_by_id(NULL, id);
+        assert(vus_src);
+        g_source_destroy(vus_src);
+        (void)g_tree_remove(vdev_scsi->fdmap, (gpointer)(uintptr_t)fd);
+    }
+
+    if (vus_gsrc_new(vdev_scsi, fd, vu_evt, cb, NULL, pvt)) {
+        vus_panic_cb(vu_dev, NULL);
+    }
+}
+
+static void vus_del_watch_cb(VuDev *vu_dev, int fd)
+{
+    vhost_scsi_dev_t *vdev_scsi;
+    guint id;
+
+    assert(vu_dev);
+    assert(fd >= 0);
+
+    vdev_scsi = vdev_scsi_find_by_vu(vu_dev);
+    if (!vdev_scsi) {
+        vus_panic_cb(vu_dev, NULL);
+        return;
+    }
+
+    id = (guint)(uintptr_t)g_tree_lookup(vdev_scsi->fdmap,
+                                         (gpointer)(uintptr_t)fd);
+    if (id) {
+        GSource *vus_src = g_main_context_find_source_by_id(NULL, id);
+        assert(vus_src);
+        g_source_destroy(vus_src);
+        (void)g_tree_remove(vdev_scsi->fdmap, (gpointer)(uintptr_t)fd);
+    }
+}
+
+static void vus_proc_ctl(VuDev *vu_dev, int idx)
+{
+    /* Control VQ not implemented */
+}
+
+static void vus_proc_evt(VuDev *vu_dev, int idx)
+{
+    /* Event VQ not implemented */
+}
+
+static void vus_proc_req(VuDev *vu_dev, int idx)
+{
+    vhost_scsi_dev_t *vdev_scsi;
+    VuVirtq *vq;
+
+    assert(vu_dev);
+
+    vdev_scsi = vdev_scsi_find_by_vu(vu_dev);
+    if (!vdev_scsi) {
+        vus_panic_cb(vu_dev, NULL);
+        return;
+    }
+
+    if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) {
+        PERR("VQ Index out of range: %d", idx);
+        vus_panic_cb(vu_dev, NULL);
+        return;
+    }
+
+    vq = vu_get_queue(vu_dev, idx);
+    if (!vq) {
+        PERR("Error fetching VQ (dev=%p, idx=%d)", vu_dev, idx);
+        vus_panic_cb(vu_dev, NULL);
+        return;
+    }
+
+    PDBG("Got kicked on vq[%d]@%p", idx, vq);
+
+    while (1) {
+        VuVirtqElement *elem;
+        VirtIOSCSICmdReq *req;
+        VirtIOSCSICmdResp *rsp;
+
+        elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement));
+        if (!elem) {
+            PDBG("No more elements pending on vq[%d]@%p", idx, vq);
+            break;
+        }
+        PDBG("Popped elem@%p", elem);
+
+        assert(!((elem->out_num > 1) && (elem->in_num > 1)));
+        assert((elem->out_num > 0) && (elem->in_num > 0));
+
+        if (elem->out_sg[0].iov_len < sizeof(VirtIOSCSICmdReq)) {
+            PERR("Invalid virtio-scsi req header");
+            vus_panic_cb(vu_dev, NULL);
+            break;
+        }
+        req = (VirtIOSCSICmdReq *)elem->out_sg[0].iov_base;
+
+        if (elem->in_sg[0].iov_len < sizeof(VirtIOSCSICmdResp)) {
+            PERR("Invalid virtio-scsi rsp header");
+            vus_panic_cb(vu_dev, NULL);
+            break;
+        }
+        rsp = (VirtIOSCSICmdResp *)elem->in_sg[0].iov_base;
+
+        if (handle_cmd_sync(vdev_scsi->luns[0].iscsi_ctx,
+                            req, &elem->out_sg[1], elem->out_num - 1,
+                            rsp, &elem->in_sg[1], elem->in_num - 1) != 0) {
+            vus_panic_cb(vu_dev, NULL);
+            break;
+        }
+
+        vu_queue_push(vu_dev, vq, elem, 0);
+        vu_queue_notify(vu_dev, vq);
+
+        free(elem);
+    }
+}
+
+static void vus_queue_set_started(VuDev *vu_dev, int idx, bool started)
+{
+    VuVirtq *vq;
+
+    assert(vu_dev);
+
+    if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) {
+        PERR("VQ Index out of range: %d", idx);
+        vus_panic_cb(vu_dev, NULL);
+        return;
+    }
+
+    vq = vu_get_queue(vu_dev, idx);
+
+    switch (idx) {
+    case 0:
+        vu_set_queue_handler(vu_dev, vq, started ? vus_proc_ctl : NULL);
+        break;
+    case 1:
+        vu_set_queue_handler(vu_dev, vq, started ? vus_proc_evt : NULL);
+        break;
+    default:
+        vu_set_queue_handler(vu_dev, vq, started ? vus_proc_req : NULL);
+    }
+}
+
+static const VuDevIface vus_iface = {
+    .queue_set_started = vus_queue_set_started,
+};
+
+static gboolean vus_vhost_cb(gpointer data)
+{
+    VuDev *vu_dev = (VuDev *)data;
+
+    assert(vu_dev);
+
+    if (!vu_dispatch(vu_dev) != 0) {
+        PERR("Error processing vhost message");
+        vus_panic_cb(vu_dev, NULL);
+        return G_SOURCE_REMOVE;
+    }
+
+    return G_SOURCE_CONTINUE;
+}
+
+/** misc helpers **/
+
+static int unix_sock_new(char *unix_fn)
+{
+    int sock;
+    struct sockaddr_un un;
+    size_t len;
+
+    assert(unix_fn);
+
+    sock = socket(AF_UNIX, SOCK_STREAM, 0);
+    if (sock <= 0) {
+        perror("socket");
+        return -1;
+    }
+
+    un.sun_family = AF_UNIX;
+    (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn);
+    len = sizeof(un.sun_family) + strlen(un.sun_path);
+
+    (void)unlink(unix_fn);
+    if (bind(sock, (struct sockaddr *)&un, len) < 0) {
+        perror("bind");
+        goto fail;
+    }
+
+    if (listen(sock, 1) < 0) {
+        perror("listen");
+        goto fail;
+    }
+
+    return sock;
+
+fail:
+    (void)close(sock);
+
+    return -1;
+}
+
+/** vhost-user-scsi **/
+
+static vhost_scsi_dev_t *vdev_scsi_find_by_vu(VuDev *vu_dev)
+{
+    int i;
+
+    assert(vu_dev);
+
+    for (i = 0; i < VUS_MAX_DEVS; i++) {
+        if (&vhost_scsi_devs[i]->vu_dev == vu_dev) {
+            return vhost_scsi_devs[i];
+        }
+    }
+
+    PERR("Unknown VuDev %p", vu_dev);
+    return NULL;
+}
+
+static void vdev_scsi_deinit(vhost_scsi_dev_t *vdev_scsi)
+{
+    if (!vdev_scsi) {
+        return;
+    }
+
+    if (vdev_scsi->server_sock >= 0) {
+        struct sockaddr_storage ss;
+        socklen_t sslen = sizeof(ss);
+
+        if (getsockname(vdev_scsi->server_sock, (struct sockaddr *)&ss,
+                        &sslen) == 0) {
+            struct sockaddr_un *su = (struct sockaddr_un *)&ss;
+            (void)unlink(su->sun_path);
+        }
+
+        (void)close(vdev_scsi->server_sock);
+        vdev_scsi->server_sock = -1;
+    }
+
+    if (vdev_scsi->loop) {
+        g_main_loop_unref(vdev_scsi->loop);
+        vdev_scsi->loop = NULL;
+    }
+}
+
+static vhost_scsi_dev_t *vdev_scsi_new(char *unix_fn)
+{
+    vhost_scsi_dev_t *vdev_scsi = NULL;
+
+    assert(unix_fn);
+
+    vdev_scsi = calloc(1, sizeof(vhost_scsi_dev_t));
+    if (!vdev_scsi) {
+        PERR("calloc: %s", strerror(errno));
+        return NULL;
+    }
+
+    vdev_scsi->server_sock = unix_sock_new(unix_fn);
+    if (vdev_scsi->server_sock < 0) {
+        goto err;
+    }
+
+    vdev_scsi->loop = g_main_loop_new(NULL, FALSE);
+    if (!vdev_scsi->loop) {
+        PERR("Error creating glib event loop");
+        goto err;
+    }
+
+    vdev_scsi->fdmap = g_tree_new(vus_fdmap_compare);
+    if (!vdev_scsi->fdmap) {
+        PERR("Error creating glib tree for fdmap");
+        goto err;
+    }
+
+    return vdev_scsi;
+
+err:
+    vdev_scsi_deinit(vdev_scsi);
+    free(vdev_scsi);
+
+    return NULL;
+}
+
+static int vdev_scsi_add_iscsi_lun(vhost_scsi_dev_t *vdev_scsi,
+                                   char *iscsi_uri, uint32_t lun) {
+    assert(vdev_scsi);
+    assert(iscsi_uri);
+    assert(lun < VUS_MAX_LUNS);
+
+    if (vdev_scsi->luns[lun].iscsi_ctx) {
+        PERR("Lun %d already configured", lun);
+        return -1;
+    }
+
+    if (iscsi_add_lun(&vdev_scsi->luns[lun], iscsi_uri) != 0) {
+        return -1;
+    }
+
+    return 0;
+}
+
+static int vdev_scsi_run(vhost_scsi_dev_t *vdev_scsi)
+{
+    int cli_sock;
+    int ret = 0;
+
+    assert(vdev_scsi);
+    assert(vdev_scsi->server_sock >= 0);
+    assert(vdev_scsi->loop);
+
+    cli_sock = accept(vdev_scsi->server_sock, (void *)0, (void *)0);
+    if (cli_sock < 0) {
+        perror("accept");
+        return -1;
+    }
+
+    vu_init(&vdev_scsi->vu_dev,
+            cli_sock,
+            vus_panic_cb,
+            vus_add_watch_cb,
+            vus_del_watch_cb,
+            &vus_iface);
+
+    if (vus_gsrc_new(vdev_scsi, cli_sock, G_IO_IN, NULL, vus_vhost_cb,
+                     &vdev_scsi->vu_dev)) {
+        goto fail;
+    }
+
+    g_main_loop_run(vdev_scsi->loop);
+
+out:
+    vu_deinit(&vdev_scsi->vu_dev);
+
+    return ret;
+
+fail:
+    ret = -1;
+    goto out;
+}
+
+int main(int argc, char **argv)
+{
+    vhost_scsi_dev_t *vdev_scsi = NULL;
+    char *unix_fn = NULL;
+    char *iscsi_uri = NULL;
+    int opt, err = EXIT_SUCCESS;
+
+    while ((opt = getopt(argc, argv, "u:i:")) != -1) {
+        switch (opt) {
+        case 'h':
+            goto help;
+        case 'u':
+            unix_fn = strdup(optarg);
+            break;
+        case 'i':
+            iscsi_uri = strdup(optarg);
+            break;
+        default:
+            goto help;
+        }
+    }
+    if (!unix_fn || !iscsi_uri) {
+        goto help;
+    }
+
+    vdev_scsi = vdev_scsi_new(unix_fn);
+    if (!vdev_scsi) {
+        goto err;
+    }
+    vhost_scsi_devs[0] = vdev_scsi;
+
+    if (vdev_scsi_add_iscsi_lun(vdev_scsi, iscsi_uri, 0) != 0) {
+        goto err;
+    }
+
+    if (vdev_scsi_run(vdev_scsi) != 0) {
+        goto err;
+    }
+
+out:
+    if (vdev_scsi) {
+        vdev_scsi_deinit(vdev_scsi);
+        free(vdev_scsi);
+    }
+    if (unix_fn) {
+        free(unix_fn);
+    }
+    if (iscsi_uri) {
+        free(iscsi_uri);
+    }
+
+    return err;
+
+err:
+    err = EXIT_FAILURE;
+    goto out;
+
+help:
+    fprintf(stderr, "Usage: %s [ -u unix_sock_path -i iscsi_uri ] | [ -h ]\n",
+            argv[0]);
+    fprintf(stderr, "          -u path to unix socket\n");
+    fprintf(stderr, "          -i iscsi uri for lun 0\n");
+    fprintf(stderr, "          -h print help and quit\n");
+
+    goto err;
+}
diff --git a/default-configs/pci.mak b/default-configs/pci.mak
index 3bbeb62d9a..53ff10975c 100644
--- a/default-configs/pci.mak
+++ b/default-configs/pci.mak
@@ -43,3 +43,4 @@ CONFIG_VGA=y
 CONFIG_VGA_PCI=y
 CONFIG_IVSHMEM=$(CONFIG_EVENTFD)
 CONFIG_ROCKER=y
+CONFIG_VHOST_USER_SCSI=$(CONFIG_LINUX)
diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak
index 18aed56fc0..b227a36179 100644
--- a/default-configs/s390x-softmmu.mak
+++ b/default-configs/s390x-softmmu.mak
@@ -1,5 +1,6 @@
 CONFIG_PCI=y
 CONFIG_VIRTIO_PCI=y
+CONFIG_VHOST_USER_SCSI=$(CONFIG_LINUX)
 CONFIG_VIRTIO=y
 CONFIG_SCLPCONSOLE=y
 CONFIG_TERMINAL3270=y
diff --git a/docs/specs/parallels.txt b/docs/interop/parallels.txt
index e9271eba5d..e9271eba5d 100644
--- a/docs/specs/parallels.txt
+++ b/docs/interop/parallels.txt
diff --git a/docs/specs/qcow2.txt b/docs/interop/qcow2.txt
index 80cdfd0e91..80cdfd0e91 100644
--- a/docs/specs/qcow2.txt
+++ b/docs/interop/qcow2.txt
diff --git a/docs/specs/qed_spec.txt b/docs/interop/qed_spec.txt
index 7982e058b2..7982e058b2 100644
--- a/docs/specs/qed_spec.txt
+++ b/docs/interop/qed_spec.txt
diff --git a/docs/qemu-ga-ref.texi b/docs/interop/qemu-ga-ref.texi
index ddb76ce1c2..ddb76ce1c2 100644
--- a/docs/qemu-ga-ref.texi
+++ b/docs/interop/qemu-ga-ref.texi
diff --git a/docs/qemu-qmp-ref.texi b/docs/interop/qemu-qmp-ref.texi
index bb25758bd0..bb25758bd0 100644
--- a/docs/qemu-qmp-ref.texi
+++ b/docs/interop/qemu-qmp-ref.texi
diff --git a/docs/qmp-intro.txt b/docs/interop/qmp-intro.txt
index 60deafbae6..60deafbae6 100644
--- a/docs/qmp-intro.txt
+++ b/docs/interop/qmp-intro.txt
diff --git a/docs/qmp-spec.txt b/docs/interop/qmp-spec.txt
index f8b5356015..f8b5356015 100644
--- a/docs/qmp-spec.txt
+++ b/docs/interop/qmp-spec.txt
diff --git a/docs/specs/vhost-user.txt b/docs/interop/vhost-user.txt
index 481ab56e35..481ab56e35 100644
--- a/docs/specs/vhost-user.txt
+++ b/docs/interop/vhost-user.txt
diff --git a/docs/vnc-ledstate-Pseudo-encoding.txt b/docs/interop/vnc-ledstate-Pseudo-encoding.txt
index 0f124f68b1..0f124f68b1 100644
--- a/docs/vnc-ledstate-Pseudo-encoding.txt
+++ b/docs/interop/vnc-ledstate-Pseudo-encoding.txt
diff --git a/exec.c b/exec.c
index a93e209625..42ad1eaedd 100644
--- a/exec.c
+++ b/exec.c
@@ -1482,25 +1482,17 @@ static int64_t get_file_size(int fd)
     return size;
 }
 
-static void *file_ram_alloc(RAMBlock *block,
-                            ram_addr_t memory,
-                            const char *path,
-                            Error **errp)
+static int file_ram_open(const char *path,
+                         const char *region_name,
+                         bool *created,
+                         Error **errp)
 {
-    bool unlink_on_error = false;
     char *filename;
     char *sanitized_name;
     char *c;
-    void *area = MAP_FAILED;
     int fd = -1;
-    int64_t file_size;
-
-    if (kvm_enabled() && !kvm_has_sync_mmu()) {
-        error_setg(errp,
-                   "host lacks kvm mmu notifiers, -mem-path unsupported");
-        return NULL;
-    }
 
+    *created = false;
     for (;;) {
         fd = open(path, O_RDWR);
         if (fd >= 0) {
@@ -1511,13 +1503,13 @@ static void *file_ram_alloc(RAMBlock *block,
             /* @path names a file that doesn't exist, create it */
             fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
             if (fd >= 0) {
-                unlink_on_error = true;
+                *created = true;
                 break;
             }
         } else if (errno == EISDIR) {
             /* @path names a directory, create a file there */
             /* Make name safe to use with mkstemp by replacing '/' with '_'. */
-            sanitized_name = g_strdup(memory_region_name(block->mr));
+            sanitized_name = g_strdup(region_name);
             for (c = sanitized_name; *c != '\0'; c++) {
                 if (*c == '/') {
                     *c = '_';
@@ -1540,7 +1532,7 @@ static void *file_ram_alloc(RAMBlock *block,
             error_setg_errno(errp, errno,
                              "can't open backing store %s for guest RAM",
                              path);
-            goto error;
+            return -1;
         }
         /*
          * Try again on EINTR and EEXIST.  The latter happens when
@@ -1548,6 +1540,17 @@ static void *file_ram_alloc(RAMBlock *block,
          */
     }
 
+    return fd;
+}
+
+static void *file_ram_alloc(RAMBlock *block,
+                            ram_addr_t memory,
+                            int fd,
+                            bool truncate,
+                            Error **errp)
+{
+    void *area;
+
     block->page_size = qemu_fd_getpagesize(fd);
     block->mr->align = block->page_size;
 #if defined(__s390x__)
@@ -1556,20 +1559,11 @@ static void *file_ram_alloc(RAMBlock *block,
     }
 #endif
 
-    file_size = get_file_size(fd);
-
     if (memory < block->page_size) {
         error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
                    "or larger than page size 0x%zx",
                    memory, block->page_size);
-        goto error;
-    }
-
-    if (file_size > 0 && file_size < memory) {
-        error_setg(errp, "backing store %s size 0x%" PRIx64
-                   " does not match 'size' option 0x" RAM_ADDR_FMT,
-                   path, file_size, memory);
-        goto error;
+        return NULL;
     }
 
     memory = ROUND_UP(memory, block->page_size);
@@ -1588,7 +1582,7 @@ static void *file_ram_alloc(RAMBlock *block,
      * those labels. Therefore, extending the non-empty backend file
      * is disabled as well.
      */
-    if (!file_size && ftruncate(fd, memory)) {
+    if (truncate && ftruncate(fd, memory)) {
         perror("ftruncate");
     }
 
@@ -1597,30 +1591,19 @@ static void *file_ram_alloc(RAMBlock *block,
     if (area == MAP_FAILED) {
         error_setg_errno(errp, errno,
                          "unable to map backing store for guest RAM");
-        goto error;
+        return NULL;
     }
 
     if (mem_prealloc) {
         os_mem_prealloc(fd, area, memory, smp_cpus, errp);
         if (errp && *errp) {
-            goto error;
+            qemu_ram_munmap(area, memory);
+            return NULL;
         }
     }
 
     block->fd = fd;
     return area;
-
-error:
-    if (area != MAP_FAILED) {
-        qemu_ram_munmap(area, memory);
-    }
-    if (unlink_on_error) {
-        unlink(path);
-    }
-    if (fd != -1) {
-        close(fd);
-    }
-    return NULL;
 }
 #endif
 
@@ -1931,18 +1914,25 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
 }
 
 #ifdef __linux__
-RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
-                                   bool share, const char *mem_path,
-                                   Error **errp)
+RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
+                                 bool share, int fd,
+                                 Error **errp)
 {
     RAMBlock *new_block;
     Error *local_err = NULL;
+    int64_t file_size;
 
     if (xen_enabled()) {
         error_setg(errp, "-mem-path not supported with Xen");
         return NULL;
     }
 
+    if (kvm_enabled() && !kvm_has_sync_mmu()) {
+        error_setg(errp,
+                   "host lacks kvm mmu notifiers, -mem-path unsupported");
+        return NULL;
+    }
+
     if (phys_mem_alloc != qemu_anon_ram_alloc) {
         /*
          * file_ram_alloc() needs to allocate just like
@@ -1955,13 +1945,20 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
     }
 
     size = HOST_PAGE_ALIGN(size);
+    file_size = get_file_size(fd);
+    if (file_size > 0 && file_size < size) {
+        error_setg(errp, "backing store %s size 0x%" PRIx64
+                   " does not match 'size' option 0x" RAM_ADDR_FMT,
+                   mem_path, file_size, size);
+        return NULL;
+    }
+
     new_block = g_malloc0(sizeof(*new_block));
     new_block->mr = mr;
     new_block->used_length = size;
     new_block->max_length = size;
     new_block->flags = share ? RAM_SHARED : 0;
-    new_block->host = file_ram_alloc(new_block, size,
-                                     mem_path, errp);
+    new_block->host = file_ram_alloc(new_block, size, fd, !file_size, errp);
     if (!new_block->host) {
         g_free(new_block);
         return NULL;
@@ -1974,6 +1971,33 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
         return NULL;
     }
     return new_block;
+
+}
+
+
+RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
+                                   bool share, const char *mem_path,
+                                   Error **errp)
+{
+    int fd;
+    bool created;
+    RAMBlock *block;
+
+    fd = file_ram_open(mem_path, memory_region_name(mr), &created, errp);
+    if (fd < 0) {
+        return NULL;
+    }
+
+    block = qemu_ram_alloc_from_fd(size, mr, share, fd, errp);
+    if (!block) {
+        if (created) {
+            unlink(mem_path);
+        }
+        close(fd);
+        return NULL;
+    }
+
+    return block;
 }
 #endif
 
diff --git a/fpu/softfloat-specialize.h b/fpu/softfloat-specialize.h
index 100c8a98bf..de2c5d5702 100644
--- a/fpu/softfloat-specialize.h
+++ b/fpu/softfloat-specialize.h
@@ -111,7 +111,7 @@ float16 float16_default_nan(float_status *status)
 *----------------------------------------------------------------------------*/
 float32 float32_default_nan(float_status *status)
 {
-#if defined(TARGET_SPARC)
+#if defined(TARGET_SPARC) || defined(TARGET_M68K)
     return const_float32(0x7FFFFFFF);
 #elif defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA) || \
       defined(TARGET_XTENSA) || defined(TARGET_S390X) || defined(TARGET_TRICORE)
@@ -136,7 +136,7 @@ float32 float32_default_nan(float_status *status)
 *----------------------------------------------------------------------------*/
 float64 float64_default_nan(float_status *status)
 {
-#if defined(TARGET_SPARC)
+#if defined(TARGET_SPARC) || defined(TARGET_M68K)
     return const_float64(LIT64(0x7FFFFFFFFFFFFFFF));
 #elif defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA) || \
       defined(TARGET_S390X)
@@ -162,7 +162,10 @@ float64 float64_default_nan(float_status *status)
 floatx80 floatx80_default_nan(float_status *status)
 {
     floatx80 r;
-
+#if defined(TARGET_M68K)
+    r.low = LIT64(0xFFFFFFFFFFFFFFFF);
+    r.high = 0x7FFF;
+#else
     if (status->snan_bit_is_one) {
         r.low = LIT64(0xBFFFFFFFFFFFFFFF);
         r.high = 0x7FFF;
@@ -170,6 +173,7 @@ floatx80 floatx80_default_nan(float_status *status)
         r.low = LIT64(0xC000000000000000);
         r.high = 0xFFFF;
     }
+#endif
     return r;
 }
 
@@ -502,6 +506,30 @@ static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN,
         return 1;
     }
 }
+#elif defined(TARGET_M68K)
+static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN,
+                   flag aIsLargerSignificand)
+{
+    /* M68000 FAMILY PROGRAMMER'S REFERENCE MANUAL
+     * 3.4 FLOATING-POINT INSTRUCTION DETAILS
+     * If either operand, but not both operands, of an operation is a
+     * nonsignaling NaN, then that NaN is returned as the result. If both
+     * operands are nonsignaling NaNs, then the destination operand
+     * nonsignaling NaN is returned as the result.
+     * If either operand to an operation is a signaling NaN (SNaN), then the
+     * SNaN bit is set in the FPSR EXC byte. If the SNaN exception enable bit
+     * is set in the FPCR ENABLE byte, then the exception is taken and the
+     * destination is not modified. If the SNaN exception enable bit is not
+     * set, setting the SNaN bit in the operand to a one converts the SNaN to
+     * a nonsignaling NaN. The operation then continues as described in the
+     * preceding paragraph for nonsignaling NaNs.
+     */
+    if (aIsQNaN || aIsSNaN) { /* a is the destination operand */
+        return 0; /* return the destination operand */
+    } else {
+        return 1; /* return b */
+    }
+}
 #else
 static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN,
                     flag aIsLargerSignificand)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 15610b9de8..a9b59bdce5 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -37,24 +37,6 @@
 #include "kvm_i386.h"
 #include "trace.h"
 
-/*#define DEBUG_INTEL_IOMMU*/
-#ifdef DEBUG_INTEL_IOMMU
-enum {
-    DEBUG_GENERAL, DEBUG_CSR, DEBUG_INV, DEBUG_MMU, DEBUG_FLOG,
-    DEBUG_CACHE, DEBUG_IR,
-};
-#define VTD_DBGBIT(x)   (1 << DEBUG_##x)
-static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT(CSR);
-
-#define VTD_DPRINTF(what, fmt, ...) do { \
-    if (vtd_dbgflags & VTD_DBGBIT(what)) { \
-        fprintf(stderr, "(vtd)%s: " fmt "\n", __func__, \
-                ## __VA_ARGS__); } \
-    } while (0)
-#else
-#define VTD_DPRINTF(what, fmt, ...) do {} while (0)
-#endif
-
 static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
                             uint64_t wmask, uint64_t w1cmask)
 {
@@ -199,9 +181,10 @@ static void vtd_reset_context_cache(IntelIOMMUState *s)
     GHashTableIter bus_it;
     uint32_t devfn_it;
 
+    trace_vtd_context_cache_reset();
+
     g_hash_table_iter_init(&bus_it, s->vtd_as_by_busptr);
 
-    VTD_DPRINTF(CACHE, "global context_cache_gen=1");
     while (g_hash_table_iter_next (&bus_it, NULL, (void**)&vtd_bus)) {
         for (devfn_it = 0; devfn_it < X86_IOMMU_PCI_DEVFN_MAX; ++devfn_it) {
             vtd_as = vtd_bus->dev_as[devfn_it];
@@ -291,8 +274,8 @@ static void vtd_generate_interrupt(IntelIOMMUState *s, hwaddr mesg_addr_reg,
     msi.address = vtd_get_long_raw(s, mesg_addr_reg);
     msi.data = vtd_get_long_raw(s, mesg_data_reg);
 
-    VTD_DPRINTF(FLOG, "msi: addr 0x%"PRIx64 " data 0x%"PRIx32,
-                msi.address, msi.data);
+    trace_vtd_irq_generate(msi.address, msi.data);
+
     apic_get_class()->send_msi(&msi);
 }
 
@@ -304,14 +287,14 @@ static void vtd_generate_fault_event(IntelIOMMUState *s, uint32_t pre_fsts)
 {
     if (pre_fsts & VTD_FSTS_PPF || pre_fsts & VTD_FSTS_PFO ||
         pre_fsts & VTD_FSTS_IQE) {
-        VTD_DPRINTF(FLOG, "there are previous interrupt conditions "
-                    "to be serviced by software, fault event is not generated "
-                    "(FSTS_REG 0x%"PRIx32 ")", pre_fsts);
+        trace_vtd_err("There are previous interrupt conditions "
+                      "to be serviced by software, fault event "
+                      "is not generated.");
         return;
     }
     vtd_set_clear_mask_long(s, DMAR_FECTL_REG, 0, VTD_FECTL_IP);
     if (vtd_get_long_raw(s, DMAR_FECTL_REG) & VTD_FECTL_IM) {
-        VTD_DPRINTF(FLOG, "Interrupt Mask set, fault event is not generated");
+        trace_vtd_err("Interrupt Mask set, irq is not generated.");
     } else {
         vtd_generate_interrupt(s, DMAR_FEADDR_REG, DMAR_FEDATA_REG);
         vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0);
@@ -348,7 +331,7 @@ static void vtd_update_fsts_ppf(IntelIOMMUState *s)
         }
     }
     vtd_set_clear_mask_long(s, DMAR_FSTS_REG, VTD_FSTS_PPF, ppf_mask);
-    VTD_DPRINTF(FLOG, "set PPF of FSTS_REG to %d", ppf_mask ? 1 : 0);
+    trace_vtd_fsts_ppf(!!ppf_mask);
 }
 
 static void vtd_set_frcd_and_update_ppf(IntelIOMMUState *s, uint16_t index)
@@ -380,8 +363,8 @@ static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index,
     }
     vtd_set_quad_raw(s, frcd_reg_addr, lo);
     vtd_set_quad_raw(s, frcd_reg_addr + 8, hi);
-    VTD_DPRINTF(FLOG, "record to FRCD_REG #%"PRIu16 ": hi 0x%"PRIx64
-                ", lo 0x%"PRIx64, index, hi, lo);
+
+    trace_vtd_frr_new(index, hi, lo);
 }
 
 /* Try to collapse multiple pending faults from the same requester */
@@ -393,7 +376,6 @@ static bool vtd_try_collapse_fault(IntelIOMMUState *s, uint16_t source_id)
 
     for (i = 0; i < DMAR_FRCD_REG_NR; i++) {
         frcd_reg = vtd_get_quad_raw(s, addr);
-        VTD_DPRINTF(FLOG, "frcd_reg #%d 0x%"PRIx64, i, frcd_reg);
         if ((frcd_reg & VTD_FRCD_F) &&
             ((frcd_reg & VTD_FRCD_SID_MASK) == source_id)) {
             return true;
@@ -416,21 +398,24 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
         /* This is not a normal fault reason case. Drop it. */
         return;
     }
-    VTD_DPRINTF(FLOG, "sid 0x%"PRIx16 ", fault %d, addr 0x%"PRIx64
-                ", is_write %d", source_id, fault, addr, is_write);
+
+    trace_vtd_dmar_fault(source_id, fault, addr, is_write);
+
     if (fsts_reg & VTD_FSTS_PFO) {
-        VTD_DPRINTF(FLOG, "new fault is not recorded due to "
-                    "Primary Fault Overflow");
+        trace_vtd_err("New fault is not recorded due to "
+                      "Primary Fault Overflow.");
         return;
     }
+
     if (vtd_try_collapse_fault(s, source_id)) {
-        VTD_DPRINTF(FLOG, "new fault is not recorded due to "
-                    "compression of faults");
+        trace_vtd_err("New fault is not recorded due to "
+                      "compression of faults.");
         return;
     }
+
     if (vtd_is_frcd_set(s, s->next_frcd_reg)) {
-        VTD_DPRINTF(FLOG, "Primary Fault Overflow and "
-                    "new fault is not recorded, set PFO field");
+        trace_vtd_err("Next Fault Recording Reg is used, "
+                      "new fault is not recorded, set PFO field.");
         vtd_set_clear_mask_long(s, DMAR_FSTS_REG, 0, VTD_FSTS_PFO);
         return;
     }
@@ -438,8 +423,8 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
     vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault, is_write);
 
     if (fsts_reg & VTD_FSTS_PPF) {
-        VTD_DPRINTF(FLOG, "there are pending faults already, "
-                    "fault event is not generated");
+        trace_vtd_err("There are pending faults already, "
+                      "fault event is not generated.");
         vtd_set_frcd_and_update_ppf(s, s->next_frcd_reg);
         s->next_frcd_reg++;
         if (s->next_frcd_reg == DMAR_FRCD_REG_NR) {
@@ -702,7 +687,7 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
     uint64_t access_right_check;
 
     if (!vtd_iova_range_check(iova, ce)) {
-        VTD_DPRINTF(GENERAL, "error: iova 0x%"PRIx64 " exceeds limits", iova);
+        trace_vtd_err_dmar_iova_overflow(iova);
         return -VTD_FR_ADDR_BEYOND_MGAW;
     }
 
@@ -714,9 +699,7 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
         slpte = vtd_get_slpte(addr, offset);
 
         if (slpte == (uint64_t)-1) {
-            VTD_DPRINTF(GENERAL, "error: fail to access second-level paging "
-                        "entry at level %"PRIu32 " for iova 0x%"PRIx64,
-                        level, iova);
+            trace_vtd_err_dmar_slpte_read_error(iova, level);
             if (level == vtd_ce_get_level(ce)) {
                 /* Invalid programming of context-entry */
                 return -VTD_FR_CONTEXT_ENTRY_INV;
@@ -727,15 +710,11 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
         *reads = (*reads) && (slpte & VTD_SL_R);
         *writes = (*writes) && (slpte & VTD_SL_W);
         if (!(slpte & access_right_check)) {
-            VTD_DPRINTF(GENERAL, "error: lack of %s permission for "
-                        "iova 0x%"PRIx64 " slpte 0x%"PRIx64,
-                        (is_write ? "write" : "read"), iova, slpte);
+            trace_vtd_err_dmar_slpte_perm_error(iova, level, slpte, is_write);
             return is_write ? -VTD_FR_WRITE : -VTD_FR_READ;
         }
         if (vtd_slpte_nonzero_rsvd(slpte, level)) {
-            VTD_DPRINTF(GENERAL, "error: non-zero reserved field in second "
-                        "level paging entry level %"PRIu32 " slpte 0x%"PRIx64,
-                        level, slpte);
+            trace_vtd_err_dmar_slpte_resv_error(iova, level, slpte);
             return -VTD_FR_PAGING_ENTRY_RSVD;
         }
 
@@ -1090,8 +1069,10 @@ out:
  * @devfn: The devfn, which is the  combined of device and function number
  * @is_write: The access is a write operation
  * @entry: IOMMUTLBEntry that contain the addr to be translated and result
+ *
+ * Returns true if translation is successful, otherwise false.
  */
-static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
+static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
                                    uint8_t devfn, hwaddr addr, bool is_write,
                                    IOMMUTLBEntry *entry)
 {
@@ -1125,6 +1106,7 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
         page_mask = iotlb_entry->mask;
         goto out;
     }
+
     /* Try to fetch context-entry from cache first */
     if (cc_entry->context_cache_gen == s->context_cache_gen) {
         trace_vtd_iotlb_cc_hit(bus_num, devfn, cc_entry->context_entry.hi,
@@ -1142,7 +1124,7 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
             } else {
                 vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write);
             }
-            return;
+            goto error;
         }
         /* Update context-cache */
         trace_vtd_iotlb_cc_update(bus_num, devfn, ce.hi, ce.lo,
@@ -1157,8 +1139,9 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
      * Also, let's ignore IOTLB caching as well for PT devices.
      */
     if (vtd_ce_get_type(&ce) == VTD_CONTEXT_TT_PASS_THROUGH) {
+        entry->iova = addr & VTD_PAGE_MASK;
         entry->translated_addr = entry->iova;
-        entry->addr_mask = VTD_PAGE_SIZE - 1;
+        entry->addr_mask = VTD_PAGE_MASK;
         entry->perm = IOMMU_RW;
         trace_vtd_translate_pt(source_id, entry->iova);
 
@@ -1173,7 +1156,7 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
          */
         vtd_pt_enable_fast_path(s, source_id);
 
-        return;
+        return true;
     }
 
     ret_fr = vtd_iova_to_slpte(&ce, addr, is_write, &slpte, &level,
@@ -1185,7 +1168,7 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
         } else {
             vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write);
         }
-        return;
+        goto error;
     }
 
     page_mask = vtd_slpt_level_page_mask(level);
@@ -1196,6 +1179,14 @@ out:
     entry->translated_addr = vtd_get_slpte_addr(slpte) & page_mask;
     entry->addr_mask = ~page_mask;
     entry->perm = IOMMU_ACCESS_FLAG(reads, writes);
+    return true;
+
+error:
+    entry->iova = 0;
+    entry->translated_addr = 0;
+    entry->addr_mask = 0;
+    entry->perm = IOMMU_NONE;
+    return false;
 }
 
 static void vtd_root_table_setup(IntelIOMMUState *s)
@@ -1204,8 +1195,7 @@ static void vtd_root_table_setup(IntelIOMMUState *s)
     s->root_extended = s->root & VTD_RTADDR_RTT;
     s->root &= VTD_RTADDR_ADDR_MASK;
 
-    VTD_DPRINTF(CSR, "root_table addr 0x%"PRIx64 " %s", s->root,
-                (s->root_extended ? "(extended)" : ""));
+    trace_vtd_reg_dmar_root(s->root, s->root_extended);
 }
 
 static void vtd_iec_notify_all(IntelIOMMUState *s, bool global,
@@ -1225,8 +1215,7 @@ static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s)
     /* Notify global invalidation */
     vtd_iec_notify_all(s, true, 0, 0);
 
-    VTD_DPRINTF(CSR, "int remap table addr 0x%"PRIx64 " size %"PRIu32,
-                s->intr_root, s->intr_size);
+    trace_vtd_reg_ir_root(s->intr_root, s->intr_size);
 }
 
 static void vtd_iommu_replay_all(IntelIOMMUState *s)
@@ -1328,11 +1317,8 @@ static uint64_t vtd_context_cache_invalidate(IntelIOMMUState *s, uint64_t val)
 
     switch (type) {
     case VTD_CCMD_DOMAIN_INVL:
-        VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16,
-                    (uint16_t)VTD_CCMD_DID(val));
         /* Fall through */
     case VTD_CCMD_GLOBAL_INVL:
-        VTD_DPRINTF(INV, "global invalidation");
         caig = VTD_CCMD_GLOBAL_INVL_A;
         vtd_context_global_invalidate(s);
         break;
@@ -1343,7 +1329,7 @@ static uint64_t vtd_context_cache_invalidate(IntelIOMMUState *s, uint64_t val)
         break;
 
     default:
-        VTD_DPRINTF(GENERAL, "error: invalid granularity");
+        trace_vtd_err("Context cache invalidate type error.");
         caig = 0;
     }
     return caig;
@@ -1351,7 +1337,7 @@ static uint64_t vtd_context_cache_invalidate(IntelIOMMUState *s, uint64_t val)
 
 static void vtd_iotlb_global_invalidate(IntelIOMMUState *s)
 {
-    trace_vtd_iotlb_reset("global invalidation recved");
+    trace_vtd_inv_desc_iotlb_global();
     vtd_reset_iotlb(s);
     vtd_iommu_replay_all(s);
 }
@@ -1362,6 +1348,8 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
     VTDContextEntry ce;
     VTDAddressSpace *vtd_as;
 
+    trace_vtd_inv_desc_iotlb_domain(domain_id);
+
     g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain,
                                 &domain_id);
 
@@ -1407,6 +1395,8 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
 {
     VTDIOTLBPageInvInfo info;
 
+    trace_vtd_inv_desc_iotlb_pages(domain_id, addr, am);
+
     assert(am <= VTD_MAMV);
     info.domain_id = domain_id;
     info.addr = addr;
@@ -1429,15 +1419,12 @@ static uint64_t vtd_iotlb_flush(IntelIOMMUState *s, uint64_t val)
 
     switch (type) {
     case VTD_TLB_GLOBAL_FLUSH:
-        VTD_DPRINTF(INV, "global invalidation");
         iaig = VTD_TLB_GLOBAL_FLUSH_A;
         vtd_iotlb_global_invalidate(s);
         break;
 
     case VTD_TLB_DSI_FLUSH:
         domain_id = VTD_TLB_DID(val);
-        VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16,
-                    domain_id);
         iaig = VTD_TLB_DSI_FLUSH_A;
         vtd_iotlb_domain_invalidate(s, domain_id);
         break;
@@ -1447,11 +1434,8 @@ static uint64_t vtd_iotlb_flush(IntelIOMMUState *s, uint64_t val)
         addr = vtd_get_quad_raw(s, DMAR_IVA_REG);
         am = VTD_IVA_AM(addr);
         addr = VTD_IVA_ADDR(addr);
-        VTD_DPRINTF(INV, "page-selective invalidation domain 0x%"PRIx16
-                    " addr 0x%"PRIx64 " mask %"PRIu8, domain_id, addr, am);
         if (am > VTD_MAMV) {
-            VTD_DPRINTF(GENERAL, "error: supported max address mask value is "
-                        "%"PRIu8, (uint8_t)VTD_MAMV);
+            trace_vtd_err("IOTLB PSI flush: address mask overflow.");
             iaig = 0;
             break;
         }
@@ -1460,7 +1444,7 @@ static uint64_t vtd_iotlb_flush(IntelIOMMUState *s, uint64_t val)
         break;
 
     default:
-        VTD_DPRINTF(GENERAL, "error: invalid granularity");
+        trace_vtd_err("IOTLB flush: invalid granularity.");
         iaig = 0;
     }
     return iaig;
@@ -1481,21 +1465,19 @@ static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool en)
 {
     uint64_t iqa_val = vtd_get_quad_raw(s, DMAR_IQA_REG);
 
-    VTD_DPRINTF(INV, "Queued Invalidation Enable %s", (en ? "on" : "off"));
+    trace_vtd_inv_qi_enable(en);
+
     if (en) {
         if (vtd_queued_inv_enable_check(s)) {
             s->iq = iqa_val & VTD_IQA_IQA_MASK;
             /* 2^(x+8) entries */
             s->iq_size = 1UL << ((iqa_val & VTD_IQA_QS) + 8);
             s->qi_enabled = true;
-            VTD_DPRINTF(INV, "DMAR_IQA_REG 0x%"PRIx64, iqa_val);
-            VTD_DPRINTF(INV, "Invalidation Queue addr 0x%"PRIx64 " size %d",
-                        s->iq, s->iq_size);
+            trace_vtd_inv_qi_setup(s->iq, s->iq_size);
             /* Ok - report back to driver */
             vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_QIES);
         } else {
-            VTD_DPRINTF(GENERAL, "error: can't enable Queued Invalidation: "
-                        "tail %"PRIu16, s->iq_tail);
+            trace_vtd_err_qi_enable(s->iq_tail);
         }
     } else {
         if (vtd_queued_inv_disable_check(s)) {
@@ -1506,10 +1488,7 @@ static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool en)
             /* Ok - report back to driver */
             vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_QIES, 0);
         } else {
-            VTD_DPRINTF(GENERAL, "error: can't disable Queued Invalidation: "
-                        "head %"PRIu16 ", tail %"PRIu16
-                        ", last_descriptor %"PRIu8,
-                        s->iq_head, s->iq_tail, s->iq_last_desc_type);
+            trace_vtd_err_qi_disable(s->iq_head, s->iq_tail, s->iq_last_desc_type);
         }
     }
 }
@@ -1517,8 +1496,6 @@ static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool en)
 /* Set Root Table Pointer */
 static void vtd_handle_gcmd_srtp(IntelIOMMUState *s)
 {
-    VTD_DPRINTF(CSR, "set Root Table Pointer");
-
     vtd_root_table_setup(s);
     /* Ok - report back to driver */
     vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_RTPS);
@@ -1527,8 +1504,6 @@ static void vtd_handle_gcmd_srtp(IntelIOMMUState *s)
 /* Set Interrupt Remap Table Pointer */
 static void vtd_handle_gcmd_sirtp(IntelIOMMUState *s)
 {
-    VTD_DPRINTF(CSR, "set Interrupt Remap Table Pointer");
-
     vtd_interrupt_remap_table_setup(s);
     /* Ok - report back to driver */
     vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRTPS);
@@ -1541,7 +1516,7 @@ static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en)
         return;
     }
 
-    VTD_DPRINTF(CSR, "Translation Enable %s", (en ? "on" : "off"));
+    trace_vtd_dmar_enable(en);
 
     if (en) {
         s->dmar_enabled = true;
@@ -1562,7 +1537,7 @@ static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en)
 /* Handle Interrupt Remap Enable/Disable */
 static void vtd_handle_gcmd_ire(IntelIOMMUState *s, bool en)
 {
-    VTD_DPRINTF(CSR, "Interrupt Remap Enable %s", (en ? "on" : "off"));
+    trace_vtd_ir_enable(en);
 
     if (en) {
         s->intr_enabled = true;
@@ -1582,7 +1557,7 @@ static void vtd_handle_gcmd_write(IntelIOMMUState *s)
     uint32_t val = vtd_get_long_raw(s, DMAR_GCMD_REG);
     uint32_t changed = status ^ val;
 
-    VTD_DPRINTF(CSR, "value 0x%"PRIx32 " status 0x%"PRIx32, val, status);
+    trace_vtd_reg_write_gcmd(status, val);
     if (changed & VTD_GCMD_TE) {
         /* Translation enable/disable */
         vtd_handle_gcmd_te(s, val & VTD_GCMD_TE);
@@ -1614,8 +1589,8 @@ static void vtd_handle_ccmd_write(IntelIOMMUState *s)
     /* Context-cache invalidation request */
     if (val & VTD_CCMD_ICC) {
         if (s->qi_enabled) {
-            VTD_DPRINTF(GENERAL, "error: Queued Invalidation enabled, "
-                        "should not use register-based invalidation");
+            trace_vtd_err("Queued Invalidation enabled, "
+                          "should not use register-based invalidation");
             return;
         }
         ret = vtd_context_cache_invalidate(s, val);
@@ -1623,7 +1598,6 @@ static void vtd_handle_ccmd_write(IntelIOMMUState *s)
         vtd_set_clear_mask_quad(s, DMAR_CCMD_REG, VTD_CCMD_ICC, 0ULL);
         ret = vtd_set_clear_mask_quad(s, DMAR_CCMD_REG, VTD_CCMD_CAIG_MASK,
                                       ret);
-        VTD_DPRINTF(INV, "CCMD_REG write-back val: 0x%"PRIx64, ret);
     }
 }
 
@@ -1636,8 +1610,8 @@ static void vtd_handle_iotlb_write(IntelIOMMUState *s)
     /* IOTLB invalidation request */
     if (val & VTD_TLB_IVT) {
         if (s->qi_enabled) {
-            VTD_DPRINTF(GENERAL, "error: Queued Invalidation enabled, "
-                        "should not use register-based invalidation");
+            trace_vtd_err("Queued Invalidation enabled, "
+                          "should not use register-based invalidation.");
             return;
         }
         ret = vtd_iotlb_flush(s, val);
@@ -1645,7 +1619,6 @@ static void vtd_handle_iotlb_write(IntelIOMMUState *s)
         vtd_set_clear_mask_quad(s, DMAR_IOTLB_REG, VTD_TLB_IVT, 0ULL);
         ret = vtd_set_clear_mask_quad(s, DMAR_IOTLB_REG,
                                       VTD_TLB_FLUSH_GRANU_MASK_A, ret);
-        VTD_DPRINTF(INV, "IOTLB_REG write-back val: 0x%"PRIx64, ret);
     }
 }
 
@@ -1656,11 +1629,9 @@ static bool vtd_get_inv_desc(dma_addr_t base_addr, uint32_t offset,
     dma_addr_t addr = base_addr + offset * sizeof(*inv_desc);
     if (dma_memory_read(&address_space_memory, addr, inv_desc,
         sizeof(*inv_desc))) {
-        VTD_DPRINTF(GENERAL, "error: fail to fetch Invalidation Descriptor "
-                    "base_addr 0x%"PRIx64 " offset %"PRIu32, base_addr, offset);
+        trace_vtd_err("Read INV DESC failed.");
         inv_desc->lo = 0;
         inv_desc->hi = 0;
-
         return false;
     }
     inv_desc->lo = le64_to_cpu(inv_desc->lo);
@@ -1746,13 +1717,11 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
 
     switch (inv_desc->lo & VTD_INV_DESC_IOTLB_G) {
     case VTD_INV_DESC_IOTLB_GLOBAL:
-        trace_vtd_inv_desc_iotlb_global();
         vtd_iotlb_global_invalidate(s);
         break;
 
     case VTD_INV_DESC_IOTLB_DOMAIN:
         domain_id = VTD_INV_DESC_IOTLB_DID(inv_desc->lo);
-        trace_vtd_inv_desc_iotlb_domain(domain_id);
         vtd_iotlb_domain_invalidate(s, domain_id);
         break;
 
@@ -1760,7 +1729,6 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
         domain_id = VTD_INV_DESC_IOTLB_DID(inv_desc->lo);
         addr = VTD_INV_DESC_IOTLB_ADDR(inv_desc->hi);
         am = VTD_INV_DESC_IOTLB_AM(inv_desc->hi);
-        trace_vtd_inv_desc_iotlb_pages(domain_id, addr, am);
         if (am > VTD_MAMV) {
             trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
             return false;
@@ -1778,10 +1746,9 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
 static bool vtd_process_inv_iec_desc(IntelIOMMUState *s,
                                      VTDInvDesc *inv_desc)
 {
-    VTD_DPRINTF(INV, "inv ir glob %d index %d mask %d",
-                inv_desc->iec.granularity,
-                inv_desc->iec.index,
-                inv_desc->iec.index_mask);
+    trace_vtd_inv_desc_iec(inv_desc->iec.granularity,
+                           inv_desc->iec.index,
+                           inv_desc->iec.index_mask);
 
     vtd_iec_notify_all(s, !inv_desc->iec.granularity,
                        inv_desc->iec.index,
@@ -1810,9 +1777,7 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
 
     if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) ||
         (inv_desc->hi & VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI)) {
-        VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Device "
-                    "IOTLB Invalidate Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
-                    inv_desc->hi, inv_desc->lo);
+        trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
         return false;
     }
 
@@ -1857,7 +1822,7 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
     VTDInvDesc inv_desc;
     uint8_t desc_type;
 
-    VTD_DPRINTF(INV, "iq head %"PRIu16, s->iq_head);
+    trace_vtd_inv_qi_head(s->iq_head);
     if (!vtd_get_inv_desc(s->iq, s->iq_head, &inv_desc)) {
         s->iq_last_desc_type = VTD_INV_DESC_NONE;
         return false;
@@ -1896,8 +1861,7 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
         break;
 
     case VTD_INV_DESC_DEVICE:
-        VTD_DPRINTF(INV, "Device IOTLB Invalidation Descriptor hi 0x%"PRIx64
-                    " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo);
+        trace_vtd_inv_desc("device", inv_desc.hi, inv_desc.lo);
         if (!vtd_process_device_iotlb_desc(s, &inv_desc)) {
             return false;
         }
@@ -1917,11 +1881,11 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
 /* Try to fetch and process more Invalidation Descriptors */
 static void vtd_fetch_inv_desc(IntelIOMMUState *s)
 {
-    VTD_DPRINTF(INV, "fetch Invalidation Descriptors");
+    trace_vtd_inv_qi_fetch();
+
     if (s->iq_tail >= s->iq_size) {
         /* Detects an invalid Tail pointer */
-        VTD_DPRINTF(GENERAL, "error: iq_tail is %"PRIu16
-                    " while iq_size is %"PRIu16, s->iq_tail, s->iq_size);
+        trace_vtd_err_qi_tail(s->iq_tail, s->iq_size);
         vtd_handle_inv_queue_error(s);
         return;
     }
@@ -1944,7 +1908,8 @@ static void vtd_handle_iqt_write(IntelIOMMUState *s)
     uint64_t val = vtd_get_quad_raw(s, DMAR_IQT_REG);
 
     s->iq_tail = VTD_IQT_QT(val);
-    VTD_DPRINTF(INV, "set iq tail %"PRIu16, s->iq_tail);
+    trace_vtd_inv_qi_tail(s->iq_tail);
+
     if (s->qi_enabled && !(vtd_get_long_raw(s, DMAR_FSTS_REG) & VTD_FSTS_IQE)) {
         /* Process Invalidation Queue here */
         vtd_fetch_inv_desc(s);
@@ -1959,8 +1924,7 @@ static void vtd_handle_fsts_write(IntelIOMMUState *s)
 
     if ((fectl_reg & VTD_FECTL_IP) && !(fsts_reg & status_fields)) {
         vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0);
-        VTD_DPRINTF(FLOG, "all pending interrupt conditions serviced, clear "
-                    "IP field of FECTL_REG");
+        trace_vtd_fsts_clear_ip();
     }
     /* FIXME: when IQE is Clear, should we try to fetch some Invalidation
      * Descriptors if there are any when Queued Invalidation is enabled?
@@ -1975,11 +1939,12 @@ static void vtd_handle_fectl_write(IntelIOMMUState *s)
      * software clears the IM field? Or just check if the IM field is zero?
      */
     fectl_reg = vtd_get_long_raw(s, DMAR_FECTL_REG);
+
+    trace_vtd_reg_write_fectl(fectl_reg);
+
     if ((fectl_reg & VTD_FECTL_IP) && !(fectl_reg & VTD_FECTL_IM)) {
         vtd_generate_interrupt(s, DMAR_FEADDR_REG, DMAR_FEDATA_REG);
         vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0);
-        VTD_DPRINTF(FLOG, "IM field is cleared, generate "
-                    "fault event interrupt");
     }
 }
 
@@ -1989,9 +1954,8 @@ static void vtd_handle_ics_write(IntelIOMMUState *s)
     uint32_t iectl_reg = vtd_get_long_raw(s, DMAR_IECTL_REG);
 
     if ((iectl_reg & VTD_IECTL_IP) && !(ics_reg & VTD_ICS_IWC)) {
+        trace_vtd_reg_ics_clear_ip();
         vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0);
-        VTD_DPRINTF(INV, "pending completion interrupt condition serviced, "
-                    "clear IP field of IECTL_REG");
     }
 }
 
@@ -2003,11 +1967,12 @@ static void vtd_handle_iectl_write(IntelIOMMUState *s)
      * software clears the IM field? Or just check if the IM field is zero?
      */
     iectl_reg = vtd_get_long_raw(s, DMAR_IECTL_REG);
+
+    trace_vtd_reg_write_iectl(iectl_reg);
+
     if ((iectl_reg & VTD_IECTL_IP) && !(iectl_reg & VTD_IECTL_IM)) {
         vtd_generate_interrupt(s, DMAR_IEADDR_REG, DMAR_IEDATA_REG);
         vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0);
-        VTD_DPRINTF(INV, "IM field is cleared, generate "
-                    "invalidation event interrupt");
     }
 }
 
@@ -2016,10 +1981,10 @@ static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size)
     IntelIOMMUState *s = opaque;
     uint64_t val;
 
+    trace_vtd_reg_read(addr, size);
+
     if (addr + size > DMAR_REG_SIZE) {
-        VTD_DPRINTF(GENERAL, "error: addr outside region: max 0x%"PRIx64
-                    ", got 0x%"PRIx64 " %d",
-                    (uint64_t)DMAR_REG_SIZE, addr, size);
+        trace_vtd_err("Read MMIO over range.");
         return (uint64_t)-1;
     }
 
@@ -2058,8 +2023,7 @@ static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size)
             val = vtd_get_quad(s, addr);
         }
     }
-    VTD_DPRINTF(CSR, "addr 0x%"PRIx64 " size %d val 0x%"PRIx64,
-                addr, size, val);
+
     return val;
 }
 
@@ -2068,26 +2032,22 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
 {
     IntelIOMMUState *s = opaque;
 
+    trace_vtd_reg_write(addr, size, val);
+
     if (addr + size > DMAR_REG_SIZE) {
-        VTD_DPRINTF(GENERAL, "error: addr outside region: max 0x%"PRIx64
-                    ", got 0x%"PRIx64 " %d",
-                    (uint64_t)DMAR_REG_SIZE, addr, size);
+        trace_vtd_err("Write MMIO over range.");
         return;
     }
 
     switch (addr) {
     /* Global Command Register, 32-bit */
     case DMAR_GCMD_REG:
-        VTD_DPRINTF(CSR, "DMAR_GCMD_REG write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         vtd_set_long(s, addr, val);
         vtd_handle_gcmd_write(s);
         break;
 
     /* Context Command Register, 64-bit */
     case DMAR_CCMD_REG:
-        VTD_DPRINTF(CSR, "DMAR_CCMD_REG write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         if (size == 4) {
             vtd_set_long(s, addr, val);
         } else {
@@ -2097,8 +2057,6 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
         break;
 
     case DMAR_CCMD_REG_HI:
-        VTD_DPRINTF(CSR, "DMAR_CCMD_REG_HI write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         assert(size == 4);
         vtd_set_long(s, addr, val);
         vtd_handle_ccmd_write(s);
@@ -2106,8 +2064,6 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
 
     /* IOTLB Invalidation Register, 64-bit */
     case DMAR_IOTLB_REG:
-        VTD_DPRINTF(INV, "DMAR_IOTLB_REG write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         if (size == 4) {
             vtd_set_long(s, addr, val);
         } else {
@@ -2117,8 +2073,6 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
         break;
 
     case DMAR_IOTLB_REG_HI:
-        VTD_DPRINTF(INV, "DMAR_IOTLB_REG_HI write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         assert(size == 4);
         vtd_set_long(s, addr, val);
         vtd_handle_iotlb_write(s);
@@ -2126,8 +2080,6 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
 
     /* Invalidate Address Register, 64-bit */
     case DMAR_IVA_REG:
-        VTD_DPRINTF(INV, "DMAR_IVA_REG write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         if (size == 4) {
             vtd_set_long(s, addr, val);
         } else {
@@ -2136,16 +2088,12 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
         break;
 
     case DMAR_IVA_REG_HI:
-        VTD_DPRINTF(INV, "DMAR_IVA_REG_HI write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         assert(size == 4);
         vtd_set_long(s, addr, val);
         break;
 
     /* Fault Status Register, 32-bit */
     case DMAR_FSTS_REG:
-        VTD_DPRINTF(FLOG, "DMAR_FSTS_REG write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         assert(size == 4);
         vtd_set_long(s, addr, val);
         vtd_handle_fsts_write(s);
@@ -2153,8 +2101,6 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
 
     /* Fault Event Control Register, 32-bit */
     case DMAR_FECTL_REG:
-        VTD_DPRINTF(FLOG, "DMAR_FECTL_REG write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         assert(size == 4);
         vtd_set_long(s, addr, val);
         vtd_handle_fectl_write(s);
@@ -2162,40 +2108,30 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
 
     /* Fault Event Data Register, 32-bit */
     case DMAR_FEDATA_REG:
-        VTD_DPRINTF(FLOG, "DMAR_FEDATA_REG write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         assert(size == 4);
         vtd_set_long(s, addr, val);
         break;
 
     /* Fault Event Address Register, 32-bit */
     case DMAR_FEADDR_REG:
-        VTD_DPRINTF(FLOG, "DMAR_FEADDR_REG write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         assert(size == 4);
         vtd_set_long(s, addr, val);
         break;
 
     /* Fault Event Upper Address Register, 32-bit */
     case DMAR_FEUADDR_REG:
-        VTD_DPRINTF(FLOG, "DMAR_FEUADDR_REG write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         assert(size == 4);
         vtd_set_long(s, addr, val);
         break;
 
     /* Protected Memory Enable Register, 32-bit */
     case DMAR_PMEN_REG:
-        VTD_DPRINTF(CSR, "DMAR_PMEN_REG write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         assert(size == 4);
         vtd_set_long(s, addr, val);
         break;
 
     /* Root Table Address Register, 64-bit */
     case DMAR_RTADDR_REG:
-        VTD_DPRINTF(CSR, "DMAR_RTADDR_REG write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         if (size == 4) {
             vtd_set_long(s, addr, val);
         } else {
@@ -2204,16 +2140,12 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
         break;
 
     case DMAR_RTADDR_REG_HI:
-        VTD_DPRINTF(CSR, "DMAR_RTADDR_REG_HI write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         assert(size == 4);
         vtd_set_long(s, addr, val);
         break;
 
     /* Invalidation Queue Tail Register, 64-bit */
     case DMAR_IQT_REG:
-        VTD_DPRINTF(INV, "DMAR_IQT_REG write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         if (size == 4) {
             vtd_set_long(s, addr, val);
         } else {
@@ -2223,8 +2155,6 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
         break;
 
     case DMAR_IQT_REG_HI:
-        VTD_DPRINTF(INV, "DMAR_IQT_REG_HI write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         assert(size == 4);
         vtd_set_long(s, addr, val);
         /* 19:63 of IQT_REG is RsvdZ, do nothing here */
@@ -2232,8 +2162,6 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
 
     /* Invalidation Queue Address Register, 64-bit */
     case DMAR_IQA_REG:
-        VTD_DPRINTF(INV, "DMAR_IQA_REG write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         if (size == 4) {
             vtd_set_long(s, addr, val);
         } else {
@@ -2242,16 +2170,12 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
         break;
 
     case DMAR_IQA_REG_HI:
-        VTD_DPRINTF(INV, "DMAR_IQA_REG_HI write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         assert(size == 4);
         vtd_set_long(s, addr, val);
         break;
 
     /* Invalidation Completion Status Register, 32-bit */
     case DMAR_ICS_REG:
-        VTD_DPRINTF(INV, "DMAR_ICS_REG write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         assert(size == 4);
         vtd_set_long(s, addr, val);
         vtd_handle_ics_write(s);
@@ -2259,8 +2183,6 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
 
     /* Invalidation Event Control Register, 32-bit */
     case DMAR_IECTL_REG:
-        VTD_DPRINTF(INV, "DMAR_IECTL_REG write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         assert(size == 4);
         vtd_set_long(s, addr, val);
         vtd_handle_iectl_write(s);
@@ -2268,32 +2190,24 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
 
     /* Invalidation Event Data Register, 32-bit */
     case DMAR_IEDATA_REG:
-        VTD_DPRINTF(INV, "DMAR_IEDATA_REG write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         assert(size == 4);
         vtd_set_long(s, addr, val);
         break;
 
     /* Invalidation Event Address Register, 32-bit */
     case DMAR_IEADDR_REG:
-        VTD_DPRINTF(INV, "DMAR_IEADDR_REG write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         assert(size == 4);
         vtd_set_long(s, addr, val);
         break;
 
     /* Invalidation Event Upper Address Register, 32-bit */
     case DMAR_IEUADDR_REG:
-        VTD_DPRINTF(INV, "DMAR_IEUADDR_REG write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         assert(size == 4);
         vtd_set_long(s, addr, val);
         break;
 
     /* Fault Recording Registers, 128-bit */
     case DMAR_FRCD_REG_0_0:
-        VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_0 write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         if (size == 4) {
             vtd_set_long(s, addr, val);
         } else {
@@ -2302,15 +2216,11 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
         break;
 
     case DMAR_FRCD_REG_0_1:
-        VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_1 write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         assert(size == 4);
         vtd_set_long(s, addr, val);
         break;
 
     case DMAR_FRCD_REG_0_2:
-        VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_2 write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         if (size == 4) {
             vtd_set_long(s, addr, val);
         } else {
@@ -2321,8 +2231,6 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
         break;
 
     case DMAR_FRCD_REG_0_3:
-        VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_3 write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         assert(size == 4);
         vtd_set_long(s, addr, val);
         /* May clear bit 127 (Fault), update PPF */
@@ -2330,8 +2238,6 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
         break;
 
     case DMAR_IRTA_REG:
-        VTD_DPRINTF(IR, "DMAR_IRTA_REG write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         if (size == 4) {
             vtd_set_long(s, addr, val);
         } else {
@@ -2340,15 +2246,11 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
         break;
 
     case DMAR_IRTA_REG_HI:
-        VTD_DPRINTF(IR, "DMAR_IRTA_REG_HI write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         assert(size == 4);
         vtd_set_long(s, addr, val);
         break;
 
     default:
-        VTD_DPRINTF(GENERAL, "error: unhandled reg write addr 0x%"PRIx64
-                    ", size %d, val 0x%"PRIx64, addr, size, val);
         if (size == 4) {
             vtd_set_long(s, addr, val);
         } else {
@@ -2362,31 +2264,38 @@ static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr,
 {
     VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
     IntelIOMMUState *s = vtd_as->iommu_state;
-    IOMMUTLBEntry ret = {
+    IOMMUTLBEntry iotlb = {
+        /* We'll fill in the rest later. */
         .target_as = &address_space_memory,
-        .iova = addr,
-        .translated_addr = 0,
-        .addr_mask = ~(hwaddr)0,
-        .perm = IOMMU_NONE,
     };
+    bool success;
 
-    if (!s->dmar_enabled) {
+    if (likely(s->dmar_enabled)) {
+        success = vtd_do_iommu_translate(vtd_as, vtd_as->bus, vtd_as->devfn,
+                                         addr, flag & IOMMU_WO, &iotlb);
+    } else {
         /* DMAR disabled, passthrough, use 4k-page*/
-        ret.iova = addr & VTD_PAGE_MASK_4K;
-        ret.translated_addr = addr & VTD_PAGE_MASK_4K;
-        ret.addr_mask = ~VTD_PAGE_MASK_4K;
-        ret.perm = IOMMU_RW;
-        return ret;
+        iotlb.iova = addr & VTD_PAGE_MASK_4K;
+        iotlb.translated_addr = addr & VTD_PAGE_MASK_4K;
+        iotlb.addr_mask = ~VTD_PAGE_MASK_4K;
+        iotlb.perm = IOMMU_RW;
+        success = true;
     }
 
-    vtd_do_iommu_translate(vtd_as, vtd_as->bus, vtd_as->devfn, addr,
-                           flag & IOMMU_WO, &ret);
-    VTD_DPRINTF(MMU,
-                "bus %"PRIu8 " slot %"PRIu8 " func %"PRIu8 " devfn %"PRIu8
-                " iova 0x%"PRIx64 " hpa 0x%"PRIx64, pci_bus_num(vtd_as->bus),
-                VTD_PCI_SLOT(vtd_as->devfn), VTD_PCI_FUNC(vtd_as->devfn),
-                vtd_as->devfn, addr, ret.translated_addr);
-    return ret;
+    if (likely(success)) {
+        trace_vtd_dmar_translate(pci_bus_num(vtd_as->bus),
+                                 VTD_PCI_SLOT(vtd_as->devfn),
+                                 VTD_PCI_FUNC(vtd_as->devfn),
+                                 iotlb.iova, iotlb.translated_addr,
+                                 iotlb.addr_mask);
+    } else {
+        trace_vtd_err_dmar_translate(pci_bus_num(vtd_as->bus),
+                                     VTD_PCI_SLOT(vtd_as->devfn),
+                                     VTD_PCI_FUNC(vtd_as->devfn),
+                                     iotlb.iova);
+    }
+
+    return iotlb;
 }
 
 static void vtd_iommu_notify_flag_changed(MemoryRegion *iommu,
@@ -2484,25 +2393,23 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
     addr = iommu->intr_root + index * sizeof(*entry);
     if (dma_memory_read(&address_space_memory, addr, entry,
                         sizeof(*entry))) {
-        VTD_DPRINTF(GENERAL, "error: fail to access IR root at 0x%"PRIx64
-                    " + %"PRIu16, iommu->intr_root, index);
+        trace_vtd_err("Memory read failed for IRTE.");
         return -VTD_FR_IR_ROOT_INVAL;
     }
 
+    trace_vtd_ir_irte_get(index, le64_to_cpu(entry->data[1]),
+                          le64_to_cpu(entry->data[0]));
+
     if (!entry->irte.present) {
-        VTD_DPRINTF(GENERAL, "error: present flag not set in IRTE"
-                    " entry index %u value 0x%"PRIx64 " 0x%"PRIx64,
-                    index, le64_to_cpu(entry->data[1]),
-                    le64_to_cpu(entry->data[0]));
+        trace_vtd_err_irte(index, le64_to_cpu(entry->data[1]),
+                           le64_to_cpu(entry->data[0]));
         return -VTD_FR_IR_ENTRY_P;
     }
 
     if (entry->irte.__reserved_0 || entry->irte.__reserved_1 ||
         entry->irte.__reserved_2) {
-        VTD_DPRINTF(GENERAL, "error: IRTE entry index %"PRIu16
-                    " reserved fields non-zero: 0x%"PRIx64 " 0x%"PRIx64,
-                    index, le64_to_cpu(entry->data[1]),
-                    le64_to_cpu(entry->data[0]));
+        trace_vtd_err_irte(index, le64_to_cpu(entry->data[1]),
+                           le64_to_cpu(entry->data[0]));
         return -VTD_FR_IR_IRTE_RSVD;
     }
 
@@ -2511,15 +2418,12 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
         source_id = le32_to_cpu(entry->irte.source_id);
         switch (entry->irte.sid_vtype) {
         case VTD_SVT_NONE:
-            VTD_DPRINTF(IR, "No SID validation for IRTE index %d", index);
             break;
 
         case VTD_SVT_ALL:
             mask = vtd_svt_mask[entry->irte.sid_q];
             if ((source_id & mask) != (sid & mask)) {
-                VTD_DPRINTF(GENERAL, "SID validation for IRTE index "
-                            "%d failed (reqid 0x%04x sid 0x%04x)", index,
-                            sid, source_id);
+                trace_vtd_err_irte_sid(index, sid, source_id);
                 return -VTD_FR_IR_SID_ERR;
             }
             break;
@@ -2529,16 +2433,13 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
             bus_min = source_id & 0xff;
             bus = sid >> 8;
             if (bus > bus_max || bus < bus_min) {
-                VTD_DPRINTF(GENERAL, "SID validation for IRTE index %d "
-                            "failed (bus %d outside %d-%d)", index, bus,
-                            bus_min, bus_max);
+                trace_vtd_err_irte_sid_bus(index, bus, bus_min, bus_max);
                 return -VTD_FR_IR_SID_ERR;
             }
             break;
 
         default:
-            VTD_DPRINTF(GENERAL, "Invalid SVT bits (0x%x) in IRTE index "
-                        "%d", entry->irte.sid_vtype, index);
+            trace_vtd_err_irte_svt(index, entry->irte.sid_vtype);
             /* Take this as verification failure. */
             return -VTD_FR_IR_SID_ERR;
             break;
@@ -2573,10 +2474,8 @@ static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index,
     irq->dest_mode = irte.irte.dest_mode;
     irq->redir_hint = irte.irte.redir_hint;
 
-    VTD_DPRINTF(IR, "remapping interrupt index %d: trig:%u,vec:%u,"
-                "deliver:%u,dest:%u,dest_mode:%u", index,
-                irq->trigger_mode, irq->vector, irq->delivery_mode,
-                irq->dest, irq->dest_mode);
+    trace_vtd_ir_remap(index, irq->trigger_mode, irq->vector,
+                       irq->delivery_mode, irq->dest, irq->dest_mode);
 
     return 0;
 }
@@ -2618,28 +2517,29 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
 
     assert(origin && translated);
 
+    trace_vtd_ir_remap_msi_req(origin->address, origin->data);
+
     if (!iommu || !iommu->intr_enabled) {
-        goto do_not_translate;
+        memcpy(translated, origin, sizeof(*origin));
+        goto out;
     }
 
     if (origin->address & VTD_MSI_ADDR_HI_MASK) {
-        VTD_DPRINTF(GENERAL, "error: MSI addr high 32 bits nonzero"
-                    " during interrupt remapping: 0x%"PRIx32,
-                    (uint32_t)((origin->address & VTD_MSI_ADDR_HI_MASK) >> \
-                    VTD_MSI_ADDR_HI_SHIFT));
+        trace_vtd_err("MSI address high 32 bits non-zero when "
+                      "Interrupt Remapping enabled.");
         return -VTD_FR_IR_REQ_RSVD;
     }
 
     addr.data = origin->address & VTD_MSI_ADDR_LO_MASK;
     if (addr.addr.__head != 0xfee) {
-        VTD_DPRINTF(GENERAL, "error: MSI addr low 32 bits invalid: "
-                    "0x%"PRIx32, addr.data);
+        trace_vtd_err("MSI addr low 32 bit invalid.");
         return -VTD_FR_IR_REQ_RSVD;
     }
 
     /* This is compatible mode. */
     if (addr.addr.int_mode != VTD_IR_INT_FORMAT_REMAP) {
-        goto do_not_translate;
+        memcpy(translated, origin, sizeof(*origin));
+        goto out;
     }
 
     index = addr.addr.index_h << 15 | le16_to_cpu(addr.addr.index_l);
@@ -2658,34 +2558,28 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
     }
 
     if (addr.addr.sub_valid) {
-        VTD_DPRINTF(IR, "received MSI interrupt");
+        trace_vtd_ir_remap_type("MSI");
         if (origin->data & VTD_IR_MSI_DATA_RESERVED) {
-            VTD_DPRINTF(GENERAL, "error: MSI data bits non-zero for "
-                        "interrupt remappable entry: 0x%"PRIx32,
-                        origin->data);
+            trace_vtd_err_ir_msi_invalid(sid, origin->address, origin->data);
             return -VTD_FR_IR_REQ_RSVD;
         }
     } else {
         uint8_t vector = origin->data & 0xff;
         uint8_t trigger_mode = (origin->data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
 
-        VTD_DPRINTF(IR, "received IOAPIC interrupt");
+        trace_vtd_ir_remap_type("IOAPIC");
         /* IOAPIC entry vector should be aligned with IRTE vector
          * (see vt-d spec 5.1.5.1). */
         if (vector != irq.vector) {
-            VTD_DPRINTF(GENERAL, "IOAPIC vector inconsistent: "
-                        "entry: %d, IRTE: %d, index: %d",
-                        vector, irq.vector, index);
+            trace_vtd_warn_ir_vector(sid, index, vector, irq.vector);
         }
 
         /* The Trigger Mode field must match the Trigger Mode in the IRTE.
          * (see vt-d spec 5.1.5.1). */
         if (trigger_mode != irq.trigger_mode) {
-            VTD_DPRINTF(GENERAL, "IOAPIC trigger mode inconsistent: "
-                        "entry: %u, IRTE: %u, index: %d",
-                        trigger_mode, irq.trigger_mode, index);
+            trace_vtd_warn_ir_trigger(sid, index, trigger_mode,
+                                      irq.trigger_mode);
         }
-
     }
 
     /*
@@ -2697,13 +2591,9 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
     /* Translate VTDIrq to MSI message */
     vtd_generate_msi_message(&irq, translated);
 
-    VTD_DPRINTF(IR, "mapping MSI 0x%"PRIx64":0x%"PRIx32 " -> "
-                "0x%"PRIx64":0x%"PRIx32, origin->address, origin->data,
-                translated->address, translated->data);
-    return 0;
-
-do_not_translate:
-    memcpy(translated, origin, sizeof(*origin));
+out:
+    trace_vtd_ir_remap_msi(origin->address, origin->data,
+                           translated->address, translated->data);
     return 0;
 }
 
@@ -2740,16 +2630,10 @@ static MemTxResult vtd_mem_ir_write(void *opaque, hwaddr addr,
     ret = vtd_interrupt_remap_msi(opaque, &from, &to, sid);
     if (ret) {
         /* TODO: report error */
-        VTD_DPRINTF(GENERAL, "int remap fail for addr 0x%"PRIx64
-                    " data 0x%"PRIx32, from.address, from.data);
         /* Drop this interrupt */
         return MEMTX_ERROR;
     }
 
-    VTD_DPRINTF(IR, "delivering MSI 0x%"PRIx64":0x%"PRIx32
-                " for device sid 0x%04x",
-                to.address, to.data, sid);
-
     apic_get_class()->send_msi(&to);
 
     return MEMTX_OK;
@@ -3052,7 +2936,6 @@ static void vtd_reset(DeviceState *dev)
 {
     IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev);
 
-    VTD_DPRINTF(GENERAL, "");
     vtd_init(s);
 
     /*
@@ -3125,7 +3008,6 @@ static void vtd_realize(DeviceState *dev, Error **errp)
     }
 
     bus = pcms->bus;
-    VTD_DPRINTF(GENERAL, "");
     x86_iommu->type = TYPE_INTEL;
 
     if (!vtd_decide_config(s, errp)) {
@@ -3173,7 +3055,6 @@ static const TypeInfo vtd_info = {
 
 static void vtd_register_types(void)
 {
-    VTD_DPRINTF(GENERAL, "");
     type_register_static(&vtd_info);
 }
 
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 0e73a65bf2..f50ecd8b73 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -384,6 +384,7 @@ typedef struct VTDIOTLBPageInvInfo VTDIOTLBPageInvInfo;
 /* Pagesize of VTD paging structures, including root and context tables */
 #define VTD_PAGE_SHIFT              12
 #define VTD_PAGE_SIZE               (1ULL << VTD_PAGE_SHIFT)
+#define VTD_PAGE_MASK               (VTD_PAGE_SIZE - 1)
 
 #define VTD_PAGE_SHIFT_4K           12
 #define VTD_PAGE_MASK_4K            (~((1ULL << VTD_PAGE_SHIFT_4K) - 1))
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 5b8c6fbbea..db41cca063 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1692,6 +1692,7 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
     PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
     MemoryRegion *mr = ddc->get_memory_region(dimm);
     uint64_t align = TARGET_PAGE_SIZE;
+    bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM);
 
     if (memory_region_get_alignment(mr) && pcmc->enforce_aligned_dimm) {
         align = memory_region_get_alignment(mr);
@@ -1703,17 +1704,18 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
         goto out;
     }
 
+    if (is_nvdimm && !pcms->acpi_nvdimm_state.is_enabled) {
+        error_setg(&local_err,
+                   "nvdimm is not enabled: missing 'nvdimm' in '-M'");
+        goto out;
+    }
+
     pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, &local_err);
     if (local_err) {
         goto out;
     }
 
-    if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) {
-        if (!pcms->acpi_nvdimm_state.is_enabled) {
-            error_setg(&local_err,
-                       "nvdimm is not enabled: missing 'nvdimm' in '-M'");
-            goto out;
-        }
+    if (is_nvdimm) {
         nvdimm_plug(&pcms->acpi_nvdimm_state);
     }
 
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index 72556dad48..5f111d6dde 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -19,6 +19,13 @@ vtd_inv_desc_wait_sw(uint64_t addr, uint32_t data) "wait invalidate status write
 vtd_inv_desc_wait_irq(const char *msg) "%s"
 vtd_inv_desc_wait_invalid(uint64_t hi, uint64_t lo) "invalid wait desc hi 0x%"PRIx64" lo 0x%"PRIx64
 vtd_inv_desc_wait_write_fail(uint64_t hi, uint64_t lo) "write fail for wait desc hi 0x%"PRIx64" lo 0x%"PRIx64
+vtd_inv_desc_iec(uint32_t granularity, uint32_t index, uint32_t mask) "granularity 0x%"PRIx32" index 0x%"PRIx32" mask 0x%"PRIx32
+vtd_inv_qi_enable(bool enable) "enabled %d"
+vtd_inv_qi_setup(uint64_t addr, int size) "addr 0x%"PRIx64" size %d"
+vtd_inv_qi_head(uint16_t head) "read head %d"
+vtd_inv_qi_tail(uint16_t head) "write tail %d"
+vtd_inv_qi_fetch(void) ""
+vtd_context_cache_reset(void) ""
 vtd_re_not_present(uint8_t bus) "Root entry bus %"PRIu8" not present"
 vtd_re_invalid(uint64_t hi, uint64_t lo) "invalid root entry hi 0x%"PRIx64" lo 0x%"PRIx64
 vtd_ce_not_present(uint8_t bus, uint8_t devfn) "Context entry bus %"PRIu8" devfn %"PRIu8" not present"
@@ -40,6 +47,43 @@ vtd_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device
 vtd_as_unmap_whole(uint8_t bus, uint8_t slot, uint8_t fn, uint64_t iova, uint64_t size) "Device %02x:%02x.%x start 0x%"PRIx64" size 0x%"PRIx64
 vtd_translate_pt(uint16_t sid, uint64_t addr) "source id 0x%"PRIu16", iova 0x%"PRIx64
 vtd_pt_enable_fast_path(uint16_t sid, bool success) "sid 0x%"PRIu16" %d"
+vtd_irq_generate(uint64_t addr, uint64_t data) "addr 0x%"PRIx64" data 0x%"PRIx64
+vtd_reg_read(uint64_t addr, uint64_t size) "addr 0x%"PRIx64" size 0x%"PRIx64
+vtd_reg_write(uint64_t addr, uint64_t size, uint64_t val) "addr 0x%"PRIx64" size 0x%"PRIx64" value 0x%"PRIx64
+vtd_reg_dmar_root(uint64_t addr, bool extended) "addr 0x%"PRIx64" extended %d"
+vtd_reg_ir_root(uint64_t addr, uint32_t size) "addr 0x%"PRIx64" size 0x%"PRIx32
+vtd_reg_write_gcmd(uint32_t status, uint32_t val) "status 0x%"PRIx32" value 0x%"PRIx32
+vtd_reg_write_fectl(uint32_t value) "value 0x%"PRIx32
+vtd_reg_write_iectl(uint32_t value) "value 0x%"PRIx32
+vtd_reg_ics_clear_ip(void) ""
+vtd_dmar_translate(uint8_t bus, uint8_t slot, uint8_t func, uint64_t iova, uint64_t gpa, uint64_t mask) "dev %02x:%02x.%02x iova 0x%"PRIx64" -> gpa 0x%"PRIx64" mask 0x%"PRIx64
+vtd_dmar_enable(bool en) "enable %d"
+vtd_dmar_fault(uint16_t sid, int fault, uint64_t addr, bool is_write) "sid 0x%"PRIx16" fault %d addr 0x%"PRIx64" write %d"
+vtd_ir_enable(bool en) "enable %d"
+vtd_ir_irte_get(int index, uint64_t lo, uint64_t hi) "index %d low 0x%"PRIx64" high 0x%"PRIx64
+vtd_ir_remap(int index, int tri, int vec, int deliver, uint32_t dest, int dest_mode) "index %d trigger %d vector %d deliver %d dest 0x%"PRIx32" mode %d"
+vtd_ir_remap_type(const char *type) "%s"
+vtd_ir_remap_msi(uint64_t addr, uint64_t data, uint64_t addr2, uint64_t data2) "(addr 0x%"PRIx64", data 0x%"PRIx64") -> (addr 0x%"PRIx64", data 0x%"PRIx64")"
+vtd_ir_remap_msi_req(uint64_t addr, uint64_t data) "addr 0x%"PRIx64" data 0x%"PRIx64
+vtd_fsts_ppf(bool set) "FSTS PPF bit set to %d"
+vtd_fsts_clear_ip(void) ""
+vtd_frr_new(int index, uint64_t hi, uint64_t lo) "index %d high 0x%"PRIx64" low 0x%"PRIx64
+vtd_err(const char *str) "%s"
+vtd_err_dmar_iova_overflow(uint64_t iova) "iova 0x%"PRIx64
+vtd_err_dmar_slpte_read_error(uint64_t iova, int level) "iova 0x%"PRIx64" level %d"
+vtd_err_dmar_slpte_perm_error(uint64_t iova, int level, uint64_t slpte, bool is_write) "iova 0x%"PRIx64" level %d slpte 0x%"PRIx64" write %d"
+vtd_err_dmar_slpte_resv_error(uint64_t iova, int level, uint64_t slpte) "iova 0x%"PRIx64" level %d slpte 0x%"PRIx64
+vtd_err_dmar_translate(uint8_t bus, uint8_t slot, uint8_t func, uint64_t iova) "dev %02x:%02x.%02x iova 0x%"PRIx64
+vtd_err_qi_enable(uint16_t tail) "tail 0x%"PRIx16
+vtd_err_qi_disable(uint16_t head, uint16_t tail, int type) "head 0x%"PRIx16" tail 0x%"PRIx16" last_desc_type %d"
+vtd_err_qi_tail(uint16_t tail, uint16_t size) "tail 0x%"PRIx16" size 0x%"PRIx16
+vtd_err_irte(int index, uint64_t lo, uint64_t hi) "index %d low 0x%"PRIx64" high 0x%"PRIx64
+vtd_err_irte_sid(int index, uint16_t req, uint16_t target) "index %d SVT_ALL sid 0x%"PRIx16" (should be: 0x%"PRIx16")"
+vtd_err_irte_sid_bus(int index, uint8_t bus, uint8_t min, uint8_t max) "index %d SVT_BUS bus 0x%"PRIx8" (should be: 0x%"PRIx8"-0x%"PRIx8")"
+vtd_err_irte_svt(int index, int type) "index %d SVT type %d"
+vtd_err_ir_msi_invalid(uint16_t sid, uint64_t addr, uint64_t data) "sid 0x%"PRIx16" addr 0x%"PRIx64" data 0x%"PRIx64
+vtd_warn_ir_vector(uint16_t sid, int index, int vec, int target) "sid 0x%"PRIx16" index %d vec %d (should be: %d)"
+vtd_warn_ir_trigger(uint16_t sid, int index, int trig, int target) "sid 0x%"PRIx16" index %d trigger %d (should be: %d)"
 
 # hw/i386/amd_iommu.c
 amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" +  offset 0x%"PRIx32
diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
index 6367d041f0..2f0819d977 100644
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c
@@ -491,9 +491,9 @@ static void setup_interrupt(IVShmemState *s, int vector, Error **errp)
 
 static void process_msg_shmem(IVShmemState *s, int fd, Error **errp)
 {
+    Error *local_err = NULL;
     struct stat buf;
     size_t size;
-    void *ptr;
 
     if (s->ivshmem_bar2) {
         error_setg(errp, "server sent unexpected shared memory message");
@@ -522,15 +522,13 @@ static void process_msg_shmem(IVShmemState *s, int fd, Error **errp)
     }
 
     /* mmap the region and map into the BAR2 */
-    ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
-    if (ptr == MAP_FAILED) {
-        error_setg_errno(errp, errno, "Failed to mmap shared memory");
-        close(fd);
+    memory_region_init_ram_from_fd(&s->server_bar2, OBJECT(s),
+                                   "ivshmem.bar2", size, true, fd, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
         return;
     }
-    memory_region_init_ram_ptr(&s->server_bar2, OBJECT(s),
-                               "ivshmem.bar2", size, ptr);
-    memory_region_set_fd(&s->server_bar2, fd);
+
     s->ivshmem_bar2 = &s->server_bar2;
 }
 
diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index cd5c49616e..28cb97b60f 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -134,7 +134,7 @@ static void q35_host_get_mmcfg_size(Object *obj, Visitor *v, const char *name,
     visit_type_uint32(v, name, &value, errp);
 }
 
-static Property mch_props[] = {
+static Property q35_host_props[] = {
     DEFINE_PROP_UINT64(PCIE_HOST_MCFG_BASE, Q35PCIHost, parent_obj.base_addr,
                         MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT),
     DEFINE_PROP_SIZE(PCI_HOST_PROP_PCI_HOLE64_SIZE, Q35PCIHost,
@@ -154,7 +154,7 @@ static void q35_host_class_init(ObjectClass *klass, void *data)
 
     hc->root_bus_path = q35_host_root_bus_path;
     dc->realize = q35_host_realize;
-    dc->props = mch_props;
+    dc->props = q35_host_props;
     /* Reason: needs to be wired up by pc_q35_init */
     dc->user_creatable = false;
     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
@@ -369,7 +369,7 @@ static void mch_update_smram(MCHPCIState *mch)
             tseg_size = 1024 * 1024 * 8;
             break;
         default:
-            tseg_size = 0;
+            tseg_size = 1024 * 1024 * (uint32_t)mch->ext_tseg_mbytes;
             break;
         }
     } else {
@@ -392,6 +392,17 @@ static void mch_update_smram(MCHPCIState *mch)
     memory_region_transaction_commit();
 }
 
+static void mch_update_ext_tseg_mbytes(MCHPCIState *mch)
+{
+    PCIDevice *pd = PCI_DEVICE(mch);
+    uint8_t *reg = pd->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES;
+
+    if (mch->ext_tseg_mbytes > 0 &&
+        pci_get_word(reg) == MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY) {
+        pci_set_word(reg, mch->ext_tseg_mbytes);
+    }
+}
+
 static void mch_write_config(PCIDevice *d,
                               uint32_t address, uint32_t val, int len)
 {
@@ -413,6 +424,11 @@ static void mch_write_config(PCIDevice *d,
                        MCH_HOST_BRIDGE_SMRAM_SIZE)) {
         mch_update_smram(mch);
     }
+
+    if (ranges_overlap(address, len, MCH_HOST_BRIDGE_EXT_TSEG_MBYTES,
+                       MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_SIZE)) {
+        mch_update_ext_tseg_mbytes(mch);
+    }
 }
 
 static void mch_update(MCHPCIState *mch)
@@ -420,6 +436,7 @@ static void mch_update(MCHPCIState *mch)
     mch_update_pciexbar(mch);
     mch_update_pam(mch);
     mch_update_smram(mch);
+    mch_update_ext_tseg_mbytes(mch);
 }
 
 static int mch_post_load(void *opaque, int version_id)
@@ -457,6 +474,11 @@ static void mch_reset(DeviceState *qdev)
     d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK;
     d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK;
 
+    if (mch->ext_tseg_mbytes > 0) {
+        pci_set_word(d->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES,
+                     MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY);
+    }
+
     mch_update(mch);
 }
 
@@ -465,6 +487,12 @@ static void mch_realize(PCIDevice *d, Error **errp)
     int i;
     MCHPCIState *mch = MCH_PCI_DEVICE(d);
 
+    if (mch->ext_tseg_mbytes > MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_MAX) {
+        error_setg(errp, "invalid extended-tseg-mbytes value: %" PRIu16,
+                   mch->ext_tseg_mbytes);
+        return;
+    }
+
     /* setup pci memory mapping */
     pc_pci_as_mapping_init(OBJECT(mch), mch->system_memory,
                            mch->pci_address_space);
@@ -530,6 +558,12 @@ uint64_t mch_mcfg_base(void)
     return MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT;
 }
 
+static Property mch_props[] = {
+    DEFINE_PROP_UINT16("extended-tseg-mbytes", MCHPCIState, ext_tseg_mbytes,
+                       16),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static void mch_class_init(ObjectClass *klass, void *data)
 {
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
@@ -538,6 +572,7 @@ static void mch_class_init(ObjectClass *klass, void *data)
     k->realize = mch_realize;
     k->config_write = mch_write_config;
     dc->reset = mch_reset;
+    dc->props = mch_props;
     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
     dc->desc = "Host bridge";
     dc->vmsd = &vmstate_mch;
diff --git a/hw/scsi/Makefile.objs b/hw/scsi/Makefile.objs
index 54d8754e9a..b188f7242b 100644
--- a/hw/scsi/Makefile.objs
+++ b/hw/scsi/Makefile.objs
@@ -11,4 +11,5 @@ obj-$(CONFIG_PSERIES) += spapr_vscsi.o
 ifeq ($(CONFIG_VIRTIO),y)
 obj-y += virtio-scsi.o virtio-scsi-dataplane.o
 obj-$(CONFIG_VHOST_SCSI) += vhost-scsi-common.o vhost-scsi.o
+obj-$(CONFIG_VHOST_USER_SCSI) += vhost-scsi-common.o vhost-user-scsi.o
 endif
diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c
index 804122ab05..734fdaef90 100644
--- a/hw/scsi/megasas.c
+++ b/hw/scsi/megasas.c
@@ -63,6 +63,7 @@ typedef struct MegasasCmd {
 
     hwaddr pa;
     hwaddr pa_size;
+    uint32_t dcmd_opcode;
     union mfi_frame *frame;
     SCSIRequest *req;
     QEMUSGList qsg;
@@ -309,9 +310,11 @@ static int megasas_build_sense(MegasasCmd *cmd, uint8_t *sense_ptr,
     PCIDevice *pcid = PCI_DEVICE(cmd->state);
     uint32_t pa_hi = 0, pa_lo;
     hwaddr pa;
+    int frame_sense_len;
 
-    if (sense_len > cmd->frame->header.sense_len) {
-        sense_len = cmd->frame->header.sense_len;
+    frame_sense_len = cmd->frame->header.sense_len;
+    if (sense_len > frame_sense_len) {
+        sense_len = frame_sense_len;
     }
     if (sense_len) {
         pa_lo = le32_to_cpu(cmd->frame->pass.sense_addr_lo);
@@ -511,6 +514,7 @@ static MegasasCmd *megasas_enqueue_frame(MegasasState *s,
         cmd->context &= (uint64_t)0xFFFFFFFF;
     }
     cmd->count = count;
+    cmd->dcmd_opcode = -1;
     s->busy++;
 
     if (s->consumer_pa) {
@@ -605,6 +609,9 @@ static void megasas_reset_frames(MegasasState *s)
 static void megasas_abort_command(MegasasCmd *cmd)
 {
     /* Never abort internal commands.  */
+    if (cmd->dcmd_opcode != -1) {
+        return;
+    }
     if (cmd->req != NULL) {
         scsi_req_cancel(cmd->req);
     }
@@ -673,15 +680,16 @@ out:
 static int megasas_map_dcmd(MegasasState *s, MegasasCmd *cmd)
 {
     dma_addr_t iov_pa, iov_size;
+    int iov_count;
 
     cmd->flags = le16_to_cpu(cmd->frame->header.flags);
-    if (!cmd->frame->header.sge_count) {
+    iov_count = cmd->frame->header.sge_count;
+    if (!iov_count) {
         trace_megasas_dcmd_zero_sge(cmd->index);
         cmd->iov_size = 0;
         return 0;
-    } else if (cmd->frame->header.sge_count > 1) {
-        trace_megasas_dcmd_invalid_sge(cmd->index,
-                                       cmd->frame->header.sge_count);
+    } else if (iov_count > 1) {
+        trace_megasas_dcmd_invalid_sge(cmd->index, iov_count);
         cmd->iov_size = 0;
         return -EINVAL;
     }
@@ -1012,7 +1020,6 @@ static int megasas_pd_get_info_submit(SCSIDevice *sdev, int lun,
     uint64_t pd_size;
     uint16_t pd_id = ((sdev->id & 0xFF) << 8) | (lun & 0xFF);
     uint8_t cmdbuf[6];
-    SCSIRequest *req;
     size_t len, resid;
 
     if (!cmd->iov_buf) {
@@ -1021,8 +1028,8 @@ static int megasas_pd_get_info_submit(SCSIDevice *sdev, int lun,
         info->inquiry_data[0] = 0x7f; /* Force PQual 0x3, PType 0x1f */
         info->vpd_page83[0] = 0x7f;
         megasas_setup_inquiry(cmdbuf, 0, sizeof(info->inquiry_data));
-        req = scsi_req_new(sdev, cmd->index, lun, cmdbuf, cmd);
-        if (!req) {
+        cmd->req = scsi_req_new(sdev, cmd->index, lun, cmdbuf, cmd);
+        if (!cmd->req) {
             trace_megasas_dcmd_req_alloc_failed(cmd->index,
                                                 "PD get info std inquiry");
             g_free(cmd->iov_buf);
@@ -1031,26 +1038,26 @@ static int megasas_pd_get_info_submit(SCSIDevice *sdev, int lun,
         }
         trace_megasas_dcmd_internal_submit(cmd->index,
                                            "PD get info std inquiry", lun);
-        len = scsi_req_enqueue(req);
+        len = scsi_req_enqueue(cmd->req);
         if (len > 0) {
             cmd->iov_size = len;
-            scsi_req_continue(req);
+            scsi_req_continue(cmd->req);
         }
         return MFI_STAT_INVALID_STATUS;
     } else if (info->inquiry_data[0] != 0x7f && info->vpd_page83[0] == 0x7f) {
         megasas_setup_inquiry(cmdbuf, 0x83, sizeof(info->vpd_page83));
-        req = scsi_req_new(sdev, cmd->index, lun, cmdbuf, cmd);
-        if (!req) {
+        cmd->req = scsi_req_new(sdev, cmd->index, lun, cmdbuf, cmd);
+        if (!cmd->req) {
             trace_megasas_dcmd_req_alloc_failed(cmd->index,
                                                 "PD get info vpd inquiry");
             return MFI_STAT_FLASH_ALLOC_FAIL;
         }
         trace_megasas_dcmd_internal_submit(cmd->index,
                                            "PD get info vpd inquiry", lun);
-        len = scsi_req_enqueue(req);
+        len = scsi_req_enqueue(cmd->req);
         if (len > 0) {
             cmd->iov_size = len;
-            scsi_req_continue(req);
+            scsi_req_continue(cmd->req);
         }
         return MFI_STAT_INVALID_STATUS;
     }
@@ -1212,7 +1219,6 @@ static int megasas_ld_get_info_submit(SCSIDevice *sdev, int lun,
     struct mfi_ld_info *info = cmd->iov_buf;
     size_t dcmd_size = sizeof(struct mfi_ld_info);
     uint8_t cdb[6];
-    SCSIRequest *req;
     ssize_t len, resid;
     uint16_t sdev_id = ((sdev->id & 0xFF) << 8) | (lun & 0xFF);
     uint64_t ld_size;
@@ -1221,8 +1227,8 @@ static int megasas_ld_get_info_submit(SCSIDevice *sdev, int lun,
         cmd->iov_buf = g_malloc0(dcmd_size);
         info = cmd->iov_buf;
         megasas_setup_inquiry(cdb, 0x83, sizeof(info->vpd_page83));
-        req = scsi_req_new(sdev, cmd->index, lun, cdb, cmd);
-        if (!req) {
+        cmd->req = scsi_req_new(sdev, cmd->index, lun, cdb, cmd);
+        if (!cmd->req) {
             trace_megasas_dcmd_req_alloc_failed(cmd->index,
                                                 "LD get info vpd inquiry");
             g_free(cmd->iov_buf);
@@ -1231,10 +1237,10 @@ static int megasas_ld_get_info_submit(SCSIDevice *sdev, int lun,
         }
         trace_megasas_dcmd_internal_submit(cmd->index,
                                            "LD get info vpd inquiry", lun);
-        len = scsi_req_enqueue(req);
+        len = scsi_req_enqueue(cmd->req);
         if (len > 0) {
             cmd->iov_size = len;
-            scsi_req_continue(req);
+            scsi_req_continue(cmd->req);
         }
         return MFI_STAT_INVALID_STATUS;
     }
@@ -1559,22 +1565,21 @@ static const struct dcmd_cmd_tbl_t {
 
 static int megasas_handle_dcmd(MegasasState *s, MegasasCmd *cmd)
 {
-    int opcode;
     int retval = 0;
     size_t len;
     const struct dcmd_cmd_tbl_t *cmdptr = dcmd_cmd_tbl;
 
-    opcode = le32_to_cpu(cmd->frame->dcmd.opcode);
-    trace_megasas_handle_dcmd(cmd->index, opcode);
+    cmd->dcmd_opcode = le32_to_cpu(cmd->frame->dcmd.opcode);
+    trace_megasas_handle_dcmd(cmd->index, cmd->dcmd_opcode);
     if (megasas_map_dcmd(s, cmd) < 0) {
         return MFI_STAT_MEMORY_NOT_AVAILABLE;
     }
-    while (cmdptr->opcode != -1 && cmdptr->opcode != opcode) {
+    while (cmdptr->opcode != -1 && cmdptr->opcode != cmd->dcmd_opcode) {
         cmdptr++;
     }
     len = cmd->iov_size;
     if (cmdptr->opcode == -1) {
-        trace_megasas_dcmd_unhandled(cmd->index, opcode, len);
+        trace_megasas_dcmd_unhandled(cmd->index, cmd->dcmd_opcode, len);
         retval = megasas_dcmd_dummy(s, cmd);
     } else {
         trace_megasas_dcmd_enter(cmd->index, cmdptr->desc, len);
@@ -1587,15 +1592,14 @@ static int megasas_handle_dcmd(MegasasState *s, MegasasCmd *cmd)
 }
 
 static int megasas_finish_internal_dcmd(MegasasCmd *cmd,
-                                        SCSIRequest *req)
+                                        SCSIRequest *req, size_t resid)
 {
-    int opcode;
     int retval = MFI_STAT_OK;
     int lun = req->lun;
 
-    opcode = le32_to_cpu(cmd->frame->dcmd.opcode);
-    trace_megasas_dcmd_internal_finish(cmd->index, opcode, lun);
-    switch (opcode) {
+    trace_megasas_dcmd_internal_finish(cmd->index, cmd->dcmd_opcode, lun);
+    cmd->iov_size -= resid;
+    switch (cmd->dcmd_opcode) {
     case MFI_DCMD_PD_GET_INFO:
         retval = megasas_pd_get_info_submit(req->dev, lun, cmd);
         break;
@@ -1603,7 +1607,7 @@ static int megasas_finish_internal_dcmd(MegasasCmd *cmd,
         retval = megasas_ld_get_info_submit(req->dev, lun, cmd);
         break;
     default:
-        trace_megasas_dcmd_internal_invalid(cmd->index, opcode);
+        trace_megasas_dcmd_internal_invalid(cmd->index, cmd->dcmd_opcode);
         retval = MFI_STAT_INVALID_DCMD;
         break;
     }
@@ -1647,43 +1651,42 @@ static int megasas_enqueue_req(MegasasCmd *cmd, bool is_write)
 }
 
 static int megasas_handle_scsi(MegasasState *s, MegasasCmd *cmd,
-                               bool is_logical)
+                               int frame_cmd)
 {
     uint8_t *cdb;
+    int target_id, lun_id, cdb_len;
     bool is_write;
     struct SCSIDevice *sdev = NULL;
+    bool is_logical = (frame_cmd == MFI_CMD_LD_SCSI_IO);
 
     cdb = cmd->frame->pass.cdb;
+    target_id = cmd->frame->header.target_id;
+    lun_id = cmd->frame->header.lun_id;
+    cdb_len = cmd->frame->header.cdb_len;
 
     if (is_logical) {
-        if (cmd->frame->header.target_id >= MFI_MAX_LD ||
-            cmd->frame->header.lun_id != 0) {
+        if (target_id >= MFI_MAX_LD || lun_id != 0) {
             trace_megasas_scsi_target_not_present(
-                mfi_frame_desc[cmd->frame->header.frame_cmd], is_logical,
-                cmd->frame->header.target_id, cmd->frame->header.lun_id);
+                mfi_frame_desc[frame_cmd], is_logical, target_id, lun_id);
             return MFI_STAT_DEVICE_NOT_FOUND;
         }
     }
-    sdev = scsi_device_find(&s->bus, 0, cmd->frame->header.target_id,
-                            cmd->frame->header.lun_id);
+    sdev = scsi_device_find(&s->bus, 0, target_id, lun_id);
 
     cmd->iov_size = le32_to_cpu(cmd->frame->header.data_len);
-    trace_megasas_handle_scsi(mfi_frame_desc[cmd->frame->header.frame_cmd],
-                              is_logical, cmd->frame->header.target_id,
-                              cmd->frame->header.lun_id, sdev, cmd->iov_size);
+    trace_megasas_handle_scsi(mfi_frame_desc[frame_cmd], is_logical,
+                              target_id, lun_id, sdev, cmd->iov_size);
 
     if (!sdev || (megasas_is_jbod(s) && is_logical)) {
         trace_megasas_scsi_target_not_present(
-            mfi_frame_desc[cmd->frame->header.frame_cmd], is_logical,
-            cmd->frame->header.target_id, cmd->frame->header.lun_id);
+            mfi_frame_desc[frame_cmd], is_logical, target_id, lun_id);
         return MFI_STAT_DEVICE_NOT_FOUND;
     }
 
-    if (cmd->frame->header.cdb_len > 16) {
+    if (cdb_len > 16) {
         trace_megasas_scsi_invalid_cdb_len(
-                mfi_frame_desc[cmd->frame->header.frame_cmd], is_logical,
-                cmd->frame->header.target_id, cmd->frame->header.lun_id,
-                cmd->frame->header.cdb_len);
+                mfi_frame_desc[frame_cmd], is_logical,
+                target_id, lun_id, cdb_len);
         megasas_write_sense(cmd, SENSE_CODE(INVALID_OPCODE));
         cmd->frame->header.scsi_status = CHECK_CONDITION;
         s->event_count++;
@@ -1697,12 +1700,10 @@ static int megasas_handle_scsi(MegasasState *s, MegasasCmd *cmd,
         return MFI_STAT_SCSI_DONE_WITH_ERROR;
     }
 
-    cmd->req = scsi_req_new(sdev, cmd->index,
-                            cmd->frame->header.lun_id, cdb, cmd);
+    cmd->req = scsi_req_new(sdev, cmd->index, lun_id, cdb, cmd);
     if (!cmd->req) {
         trace_megasas_scsi_req_alloc_failed(
-                mfi_frame_desc[cmd->frame->header.frame_cmd],
-                cmd->frame->header.target_id, cmd->frame->header.lun_id);
+                mfi_frame_desc[frame_cmd], target_id, lun_id);
         megasas_write_sense(cmd, SENSE_CODE(NO_SENSE));
         cmd->frame->header.scsi_status = BUSY;
         s->event_count++;
@@ -1723,43 +1724,41 @@ static int megasas_handle_scsi(MegasasState *s, MegasasCmd *cmd,
     return MFI_STAT_INVALID_STATUS;
 }
 
-static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd)
+static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd, int frame_cmd)
 {
     uint32_t lba_count, lba_start_hi, lba_start_lo;
     uint64_t lba_start;
-    bool is_write = (cmd->frame->header.frame_cmd == MFI_CMD_LD_WRITE);
+    bool is_write = (frame_cmd == MFI_CMD_LD_WRITE);
     uint8_t cdb[16];
     int len;
     struct SCSIDevice *sdev = NULL;
+    int target_id, lun_id, cdb_len;
 
     lba_count = le32_to_cpu(cmd->frame->io.header.data_len);
     lba_start_lo = le32_to_cpu(cmd->frame->io.lba_lo);
     lba_start_hi = le32_to_cpu(cmd->frame->io.lba_hi);
     lba_start = ((uint64_t)lba_start_hi << 32) | lba_start_lo;
 
-    if (cmd->frame->header.target_id < MFI_MAX_LD &&
-        cmd->frame->header.lun_id == 0) {
-        sdev = scsi_device_find(&s->bus, 0, cmd->frame->header.target_id,
-                                cmd->frame->header.lun_id);
+    target_id = cmd->frame->header.target_id;
+    lun_id = cmd->frame->header.lun_id;
+    cdb_len = cmd->frame->header.cdb_len;
+
+    if (target_id < MFI_MAX_LD && lun_id == 0) {
+        sdev = scsi_device_find(&s->bus, 0, target_id, lun_id);
     }
 
     trace_megasas_handle_io(cmd->index,
-                            mfi_frame_desc[cmd->frame->header.frame_cmd],
-                            cmd->frame->header.target_id,
-                            cmd->frame->header.lun_id,
+                            mfi_frame_desc[frame_cmd], target_id, lun_id,
                             (unsigned long)lba_start, (unsigned long)lba_count);
     if (!sdev) {
         trace_megasas_io_target_not_present(cmd->index,
-            mfi_frame_desc[cmd->frame->header.frame_cmd],
-            cmd->frame->header.target_id, cmd->frame->header.lun_id);
+            mfi_frame_desc[frame_cmd], target_id, lun_id);
         return MFI_STAT_DEVICE_NOT_FOUND;
     }
 
-    if (cmd->frame->header.cdb_len > 16) {
+    if (cdb_len > 16) {
         trace_megasas_scsi_invalid_cdb_len(
-            mfi_frame_desc[cmd->frame->header.frame_cmd], 1,
-            cmd->frame->header.target_id, cmd->frame->header.lun_id,
-            cmd->frame->header.cdb_len);
+            mfi_frame_desc[frame_cmd], 1, target_id, lun_id, cdb_len);
         megasas_write_sense(cmd, SENSE_CODE(INVALID_OPCODE));
         cmd->frame->header.scsi_status = CHECK_CONDITION;
         s->event_count++;
@@ -1776,11 +1775,10 @@ static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd)
 
     megasas_encode_lba(cdb, lba_start, lba_count, is_write);
     cmd->req = scsi_req_new(sdev, cmd->index,
-                            cmd->frame->header.lun_id, cdb, cmd);
+                            lun_id, cdb, cmd);
     if (!cmd->req) {
         trace_megasas_scsi_req_alloc_failed(
-            mfi_frame_desc[cmd->frame->header.frame_cmd],
-            cmd->frame->header.target_id, cmd->frame->header.lun_id);
+            mfi_frame_desc[frame_cmd], target_id, lun_id);
         megasas_write_sense(cmd, SENSE_CODE(NO_SENSE));
         cmd->frame->header.scsi_status = BUSY;
         s->event_count++;
@@ -1797,23 +1795,11 @@ static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd)
     return MFI_STAT_INVALID_STATUS;
 }
 
-static int megasas_finish_internal_command(MegasasCmd *cmd,
-                                           SCSIRequest *req, size_t resid)
-{
-    int retval = MFI_STAT_INVALID_CMD;
-
-    if (cmd->frame->header.frame_cmd == MFI_CMD_DCMD) {
-        cmd->iov_size -= resid;
-        retval = megasas_finish_internal_dcmd(cmd, req);
-    }
-    return retval;
-}
-
 static QEMUSGList *megasas_get_sg_list(SCSIRequest *req)
 {
     MegasasCmd *cmd = req->hba_private;
 
-    if (cmd->frame->header.frame_cmd == MFI_CMD_DCMD) {
+    if (cmd->dcmd_opcode != -1) {
         return NULL;
     } else {
         return &cmd->qsg;
@@ -1824,18 +1810,16 @@ static void megasas_xfer_complete(SCSIRequest *req, uint32_t len)
 {
     MegasasCmd *cmd = req->hba_private;
     uint8_t *buf;
-    uint32_t opcode;
 
     trace_megasas_io_complete(cmd->index, len);
 
-    if (cmd->frame->header.frame_cmd != MFI_CMD_DCMD) {
+    if (cmd->dcmd_opcode != -1) {
         scsi_req_continue(req);
         return;
     }
 
     buf = scsi_req_get_buf(req);
-    opcode = le32_to_cpu(cmd->frame->dcmd.opcode);
-    if (opcode == MFI_DCMD_PD_GET_INFO && cmd->iov_buf) {
+    if (cmd->dcmd_opcode == MFI_DCMD_PD_GET_INFO && cmd->iov_buf) {
         struct mfi_pd_info *info = cmd->iov_buf;
 
         if (info->inquiry_data[0] == 0x7f) {
@@ -1846,7 +1830,7 @@ static void megasas_xfer_complete(SCSIRequest *req, uint32_t len)
             memcpy(info->vpd_page83, buf, len);
         }
         scsi_req_continue(req);
-    } else if (opcode == MFI_DCMD_LD_GET_INFO) {
+    } else if (cmd->dcmd_opcode == MFI_DCMD_LD_GET_INFO) {
         struct mfi_ld_info *info = cmd->iov_buf;
 
         if (cmd->iov_buf) {
@@ -1868,11 +1852,11 @@ static void megasas_command_complete(SCSIRequest *req, uint32_t status,
         return;
     }
 
-    if (cmd->req == NULL) {
+    if (cmd->dcmd_opcode != -1) {
         /*
          * Internal command complete
          */
-        cmd_status = megasas_finish_internal_command(cmd, req, resid);
+        cmd_status = megasas_finish_internal_dcmd(cmd, req, resid);
         if (cmd_status == MFI_STAT_INVALID_STATUS) {
             return;
         }
@@ -1943,6 +1927,7 @@ static void megasas_handle_frame(MegasasState *s, uint64_t frame_addr,
 {
     uint8_t frame_status = MFI_STAT_INVALID_CMD;
     uint64_t frame_context;
+    int frame_cmd;
     MegasasCmd *cmd;
 
     /*
@@ -1961,7 +1946,8 @@ static void megasas_handle_frame(MegasasState *s, uint64_t frame_addr,
         s->event_count++;
         return;
     }
-    switch (cmd->frame->header.frame_cmd) {
+    frame_cmd = cmd->frame->header.frame_cmd;
+    switch (frame_cmd) {
     case MFI_CMD_INIT:
         frame_status = megasas_init_firmware(s, cmd);
         break;
@@ -1972,18 +1958,15 @@ static void megasas_handle_frame(MegasasState *s, uint64_t frame_addr,
         frame_status = megasas_handle_abort(s, cmd);
         break;
     case MFI_CMD_PD_SCSI_IO:
-        frame_status = megasas_handle_scsi(s, cmd, 0);
-        break;
     case MFI_CMD_LD_SCSI_IO:
-        frame_status = megasas_handle_scsi(s, cmd, 1);
+        frame_status = megasas_handle_scsi(s, cmd, frame_cmd);
         break;
     case MFI_CMD_LD_READ:
     case MFI_CMD_LD_WRITE:
-        frame_status = megasas_handle_io(s, cmd);
+        frame_status = megasas_handle_io(s, cmd, frame_cmd);
         break;
     default:
-        trace_megasas_unhandled_frame_cmd(cmd->index,
-                                          cmd->frame->header.frame_cmd);
+        trace_megasas_unhandled_frame_cmd(cmd->index, frame_cmd);
         s->event_count++;
         break;
     }
diff --git a/hw/scsi/vhost-scsi-common.c b/hw/scsi/vhost-scsi-common.c
index e41c0314db..d434b3e99a 100644
--- a/hw/scsi/vhost-scsi-common.c
+++ b/hw/scsi/vhost-scsi-common.c
@@ -16,7 +16,6 @@
  */
 
 #include "qemu/osdep.h"
-#include <linux/vhost.h>
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "migration/migration.h"
diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
new file mode 100644
index 0000000000..500fa6a067
--- /dev/null
+++ b/hw/scsi/vhost-user-scsi.c
@@ -0,0 +1,205 @@
+/*
+ * vhost-user-scsi host device
+ *
+ * Copyright (c) 2016 Nutanix Inc. All rights reserved.
+ *
+ * Author:
+ *  Felipe Franciosi <felipe@nutanix.com>
+ *
+ * This work is largely based on the "vhost-scsi" implementation by:
+ *  Stefan Hajnoczi    <stefanha@linux.vnet.ibm.com>
+ *  Nicholas Bellinger <nab@risingtidesystems.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/typedefs.h"
+#include "qom/object.h"
+#include "hw/fw-path-provider.h"
+#include "hw/qdev-core.h"
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-backend.h"
+#include "hw/virtio/vhost-user-scsi.h"
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/virtio-access.h"
+#include "chardev/char-fe.h"
+
+/* Features supported by the host application */
+static const int user_feature_bits[] = {
+    VIRTIO_F_NOTIFY_ON_EMPTY,
+    VIRTIO_RING_F_INDIRECT_DESC,
+    VIRTIO_RING_F_EVENT_IDX,
+    VIRTIO_SCSI_F_HOTPLUG,
+    VHOST_INVALID_FEATURE_BIT
+};
+
+static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
+{
+    VHostUserSCSI *s = (VHostUserSCSI *)vdev;
+    VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
+    bool start = (status & VIRTIO_CONFIG_S_DRIVER_OK) && vdev->vm_running;
+
+    if (vsc->dev.started == start) {
+        return;
+    }
+
+    if (start) {
+        int ret;
+
+        ret = vhost_scsi_common_start(vsc);
+        if (ret < 0) {
+            error_report("unable to start vhost-user-scsi: %s", strerror(-ret));
+            exit(1);
+        }
+    } else {
+        vhost_scsi_common_stop(vsc);
+    }
+}
+
+static void vhost_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq)
+{
+}
+
+static void vhost_user_scsi_realize(DeviceState *dev, Error **errp)
+{
+    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
+    VHostUserSCSI *s = VHOST_USER_SCSI(dev);
+    VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
+    Error *err = NULL;
+    int ret;
+
+    if (!vs->conf.chardev.chr) {
+        error_setg(errp, "vhost-user-scsi: missing chardev");
+        return;
+    }
+
+    virtio_scsi_common_realize(dev, vhost_dummy_handle_output,
+                               vhost_dummy_handle_output,
+                               vhost_dummy_handle_output, &err);
+    if (err != NULL) {
+        error_propagate(errp, err);
+        return;
+    }
+
+    vsc->dev.nvqs = 2 + vs->conf.num_queues;
+    vsc->dev.vqs = g_new(struct vhost_virtqueue, vsc->dev.nvqs);
+    vsc->dev.vq_index = 0;
+    vsc->dev.backend_features = 0;
+
+    ret = vhost_dev_init(&vsc->dev, (void *)&vs->conf.chardev,
+                         VHOST_BACKEND_TYPE_USER, 0);
+    if (ret < 0) {
+        error_setg(errp, "vhost-user-scsi: vhost initialization failed: %s",
+                   strerror(-ret));
+        return;
+    }
+
+    /* Channel and lun both are 0 for bootable vhost-user-scsi disk */
+    vsc->channel = 0;
+    vsc->lun = 0;
+    vsc->target = vs->conf.boot_tpgt;
+}
+
+static void vhost_user_scsi_unrealize(DeviceState *dev, Error **errp)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserSCSI *s = VHOST_USER_SCSI(dev);
+    VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
+
+    /* This will stop the vhost backend. */
+    vhost_user_scsi_set_status(vdev, 0);
+
+    vhost_dev_cleanup(&vsc->dev);
+    g_free(vsc->dev.vqs);
+
+    virtio_scsi_common_unrealize(dev, errp);
+}
+
+static uint64_t vhost_user_scsi_get_features(VirtIODevice *vdev,
+                                             uint64_t features, Error **errp)
+{
+    VHostUserSCSI *s = VHOST_USER_SCSI(vdev);
+
+    /* Turn on predefined features supported by this device */
+    features |= s->host_features;
+
+    return vhost_scsi_common_get_features(vdev, features, errp);
+}
+
+static Property vhost_user_scsi_properties[] = {
+    DEFINE_PROP_CHR("chardev", VirtIOSCSICommon, conf.chardev),
+    DEFINE_PROP_UINT32("boot_tpgt", VirtIOSCSICommon, conf.boot_tpgt, 0),
+    DEFINE_PROP_UINT32("num_queues", VirtIOSCSICommon, conf.num_queues, 1),
+    DEFINE_PROP_UINT32("max_sectors", VirtIOSCSICommon, conf.max_sectors,
+                       0xFFFF),
+    DEFINE_PROP_UINT32("cmd_per_lun", VirtIOSCSICommon, conf.cmd_per_lun, 128),
+    DEFINE_PROP_BIT64("hotplug", VHostUserSCSI, host_features,
+                                                VIRTIO_SCSI_F_HOTPLUG,
+                                                true),
+    DEFINE_PROP_BIT64("param_change", VHostUserSCSI, host_features,
+                                                     VIRTIO_SCSI_F_CHANGE,
+                                                     true),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static const VMStateDescription vmstate_vhost_scsi = {
+    .name = "virtio-scsi",
+    .minimum_version_id = 1,
+    .version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_VIRTIO_DEVICE,
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static void vhost_user_scsi_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+    FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(klass);
+
+    dc->props = vhost_user_scsi_properties;
+    dc->vmsd = &vmstate_vhost_scsi;
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+    vdc->realize = vhost_user_scsi_realize;
+    vdc->unrealize = vhost_user_scsi_unrealize;
+    vdc->get_features = vhost_user_scsi_get_features;
+    vdc->set_config = vhost_scsi_common_set_config;
+    vdc->set_status = vhost_user_scsi_set_status;
+    fwc->get_dev_path = vhost_scsi_common_get_fw_dev_path;
+}
+
+static void vhost_user_scsi_instance_init(Object *obj)
+{
+    VHostSCSICommon *vsc = VHOST_SCSI_COMMON(obj);
+
+    vsc->feature_bits = user_feature_bits;
+
+    /* Add the bootindex property for this object */
+    device_add_bootindex_property(obj, &vsc->bootindex, "bootindex", NULL,
+                                  DEVICE(vsc), NULL);
+}
+
+static const TypeInfo vhost_user_scsi_info = {
+    .name = TYPE_VHOST_USER_SCSI,
+    .parent = TYPE_VHOST_SCSI_COMMON,
+    .instance_size = sizeof(VHostUserSCSI),
+    .class_init = vhost_user_scsi_class_init,
+    .instance_init = vhost_user_scsi_instance_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_FW_PATH_PROVIDER },
+        { }
+    },
+};
+
+static void virtio_register_types(void)
+{
+    type_register_static(&vhost_user_scsi_info);
+}
+
+type_init(virtio_register_types)
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index f9b7244808..20d6a08616 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -2135,6 +2135,61 @@ static const TypeInfo vhost_scsi_pci_info = {
 };
 #endif
 
+#ifdef CONFIG_LINUX
+/* vhost-user-scsi-pci */
+static Property vhost_user_scsi_pci_properties[] = {
+    DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors,
+                       DEV_NVECTORS_UNSPECIFIED),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vhost_user_scsi_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+    VHostUserSCSIPCI *dev = VHOST_USER_SCSI_PCI(vpci_dev);
+    DeviceState *vdev = DEVICE(&dev->vdev);
+    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev);
+
+    if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) {
+        vpci_dev->nvectors = vs->conf.num_queues + 3;
+    }
+
+    qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus));
+    object_property_set_bool(OBJECT(vdev), true, "realized", errp);
+}
+
+static void vhost_user_scsi_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+    k->realize = vhost_user_scsi_pci_realize;
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+    dc->props = vhost_user_scsi_pci_properties;
+    pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
+    pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_SCSI;
+    pcidev_k->revision = 0x00;
+    pcidev_k->class_id = PCI_CLASS_STORAGE_SCSI;
+}
+
+static void vhost_user_scsi_pci_instance_init(Object *obj)
+{
+    VHostUserSCSIPCI *dev = VHOST_USER_SCSI_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VHOST_USER_SCSI);
+    object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev),
+                              "bootindex", &error_abort);
+}
+
+static const TypeInfo vhost_user_scsi_pci_info = {
+    .name          = TYPE_VHOST_USER_SCSI_PCI,
+    .parent        = TYPE_VIRTIO_PCI,
+    .instance_size = sizeof(VHostUserSCSIPCI),
+    .instance_init = vhost_user_scsi_pci_instance_init,
+    .class_init    = vhost_user_scsi_pci_class_init,
+};
+#endif
+
 /* vhost-vsock-pci */
 
 #ifdef CONFIG_VHOST_VSOCK
@@ -2612,6 +2667,9 @@ static void virtio_pci_register_types(void)
 #ifdef CONFIG_VHOST_SCSI
     type_register_static(&vhost_scsi_pci_info);
 #endif
+#ifdef CONFIG_LINUX
+    type_register_static(&vhost_user_scsi_pci_info);
+#endif
 #ifdef CONFIG_VHOST_VSOCK
     type_register_static(&vhost_vsock_pci_info);
 #endif
diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
index b095dfc6d9..69f5959623 100644
--- a/hw/virtio/virtio-pci.h
+++ b/hw/virtio/virtio-pci.h
@@ -26,6 +26,7 @@
 #include "hw/virtio/virtio-input.h"
 #include "hw/virtio/virtio-gpu.h"
 #include "hw/virtio/virtio-crypto.h"
+#include "hw/virtio/vhost-user-scsi.h"
 
 #ifdef CONFIG_VIRTFS
 #include "hw/9pfs/virtio-9p.h"
@@ -44,6 +45,7 @@ typedef struct VirtIOBalloonPCI VirtIOBalloonPCI;
 typedef struct VirtIOSerialPCI VirtIOSerialPCI;
 typedef struct VirtIONetPCI VirtIONetPCI;
 typedef struct VHostSCSIPCI VHostSCSIPCI;
+typedef struct VHostUserSCSIPCI VHostUserSCSIPCI;
 typedef struct VirtIORngPCI VirtIORngPCI;
 typedef struct VirtIOInputPCI VirtIOInputPCI;
 typedef struct VirtIOInputHIDPCI VirtIOInputHIDPCI;
@@ -230,6 +232,15 @@ struct VHostSCSIPCI {
 };
 #endif
 
+#define TYPE_VHOST_USER_SCSI_PCI "vhost-user-scsi-pci"
+#define VHOST_USER_SCSI_PCI(obj) \
+        OBJECT_CHECK(VHostUserSCSIPCI, (obj), TYPE_VHOST_USER_SCSI_PCI)
+
+struct VHostUserSCSIPCI {
+    VirtIOPCIProxy parent_obj;
+    VHostUserSCSI vdev;
+};
+
 /*
  * virtio-blk-pci: This extends VirtioPCIProxy.
  */
diff --git a/include/block/accounting.h b/include/block/accounting.h
index 20891639d5..b833d26d6c 100644
--- a/include/block/accounting.h
+++ b/include/block/accounting.h
@@ -26,8 +26,10 @@
 #define BLOCK_ACCOUNTING_H
 
 #include "qemu/timed-average.h"
+#include "qemu/thread.h"
 
 typedef struct BlockAcctTimedStats BlockAcctTimedStats;
+typedef struct BlockAcctStats BlockAcctStats;
 
 enum BlockAcctType {
     BLOCK_ACCT_READ,
@@ -37,12 +39,14 @@ enum BlockAcctType {
 };
 
 struct BlockAcctTimedStats {
+    BlockAcctStats *stats;
     TimedAverage latency[BLOCK_MAX_IOTYPE];
     unsigned interval_length; /* in seconds */
     QSLIST_ENTRY(BlockAcctTimedStats) entries;
 };
 
-typedef struct BlockAcctStats {
+struct BlockAcctStats {
+    QemuMutex lock;
     uint64_t nr_bytes[BLOCK_MAX_IOTYPE];
     uint64_t nr_ops[BLOCK_MAX_IOTYPE];
     uint64_t invalid_ops[BLOCK_MAX_IOTYPE];
@@ -53,7 +57,7 @@ typedef struct BlockAcctStats {
     QSLIST_HEAD(, BlockAcctTimedStats) intervals;
     bool account_invalid;
     bool account_failed;
-} BlockAcctStats;
+};
 
 typedef struct BlockAcctCookie {
     int64_t bytes;
@@ -61,7 +65,8 @@ typedef struct BlockAcctCookie {
     enum BlockAcctType type;
 } BlockAcctCookie;
 
-void block_acct_init(BlockAcctStats *stats, bool account_invalid,
+void block_acct_init(BlockAcctStats *stats);
+void block_acct_setup(BlockAcctStats *stats, bool account_invalid,
                      bool account_failed);
 void block_acct_cleanup(BlockAcctStats *stats);
 void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length);
diff --git a/include/block/block.h b/include/block/block.h
index 9b355e92d8..a4f09df95a 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -402,7 +402,8 @@ void bdrv_drain_all(void);
          * block_job_defer_to_main_loop for how to do it). \
          */                                                \
         assert(!bs_->wakeup);                              \
-        bs_->wakeup = true;                                \
+        /* Set bs->wakeup before evaluating cond.  */      \
+        atomic_mb_set(&bs_->wakeup, true);                 \
         while (busy_) {                                    \
             if ((cond)) {                                  \
                 waited_ = busy_ = true;                    \
@@ -414,7 +415,7 @@ void bdrv_drain_all(void);
                 waited_ |= busy_;                          \
             }                                              \
         }                                                  \
-        bs_->wakeup = false;                               \
+        atomic_set(&bs_->wakeup, false);                   \
     }                                                      \
     waited_; })
 
diff --git a/include/block/block_int.h b/include/block/block_int.h
index cb78c4fa82..748970055e 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -29,6 +29,7 @@
 #include "qemu/option.h"
 #include "qemu/queue.h"
 #include "qemu/coroutine.h"
+#include "qemu/stats64.h"
 #include "qemu/timer.h"
 #include "qapi-types.h"
 #include "qemu/hbitmap.h"
@@ -595,11 +596,6 @@ struct BlockDriverState {
 
     /* Protected by AioContext lock */
 
-    /* If true, copy read backing sectors into image.  Can be >1 if more
-     * than one client has requested copy-on-read.
-     */
-    int copy_on_read;
-
     /* If we are reading a disk image, give its size in sectors.
      * Generally read-only; it is written to by load_snapshot and
      * save_snaphost, but the block layer is quiescent during those.
@@ -609,34 +605,57 @@ struct BlockDriverState {
     /* Callback before write request is processed */
     NotifierWithReturnList before_write_notifiers;
 
-    /* number of in-flight requests; overall and serialising */
-    unsigned int in_flight;
-    unsigned int serialising_in_flight;
+    /* threshold limit for writes, in bytes. "High water mark". */
+    uint64_t write_threshold_offset;
+    NotifierWithReturn write_threshold_notifier;
 
-    bool wakeup;
+    /* Writing to the list requires the BQL _and_ the dirty_bitmap_mutex.
+     * Reading from the list can be done with either the BQL or the
+     * dirty_bitmap_mutex.  Modifying a bitmap only requires
+     * dirty_bitmap_mutex.  */
+    QemuMutex dirty_bitmap_mutex;
+    QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
 
     /* Offset after the highest byte written to */
-    uint64_t wr_highest_offset;
+    Stat64 wr_highest_offset;
 
-    /* threshold limit for writes, in bytes. "High water mark". */
-    uint64_t write_threshold_offset;
-    NotifierWithReturn write_threshold_notifier;
+    /* If true, copy read backing sectors into image.  Can be >1 if more
+     * than one client has requested copy-on-read.  Accessed with atomic
+     * ops.
+     */
+    int copy_on_read;
 
-    /* counter for nested bdrv_io_plug */
-    unsigned io_plugged;
+    /* number of in-flight requests; overall and serialising.
+     * Accessed with atomic ops.
+     */
+    unsigned int in_flight;
+    unsigned int serialising_in_flight;
 
-    QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
-    CoQueue flush_queue;                  /* Serializing flush queue */
-    bool active_flush_req;                /* Flush request in flight? */
-    unsigned int write_gen;               /* Current data generation */
-    unsigned int flushed_gen;             /* Flushed write generation */
+    /* Internal to BDRV_POLL_WHILE and bdrv_wakeup.  Accessed with atomic
+     * ops.
+     */
+    bool wakeup;
 
-    QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
+    /* counter for nested bdrv_io_plug.
+     * Accessed with atomic ops.
+    */
+    unsigned io_plugged;
 
     /* do we need to tell the quest if we have a volatile write cache? */
     int enable_write_cache;
 
+    /* Accessed with atomic ops.  */
     int quiesce_counter;
+    unsigned int write_gen;               /* Current data generation */
+
+    /* Protected by reqs_lock.  */
+    CoMutex reqs_lock;
+    QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
+    CoQueue flush_queue;                  /* Serializing flush queue */
+    bool active_flush_req;                /* Flush request in flight? */
+
+    /* Only read/written by whoever has set active_flush_req to true.  */
+    unsigned int flushed_gen;             /* Flushed write generation */
 };
 
 struct BlockBackendRootState {
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 9dea14ba03..ad6558af56 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -36,8 +36,6 @@ bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap);
 const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap);
 int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap);
 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap);
-int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
-                   int64_t sector);
 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
                            int64_t cur_sector, int64_t nr_sectors);
 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
@@ -45,6 +43,9 @@ void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
 int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs,
                                BdrvDirtyBitmap *bitmap, int64_t sector,
                                int nb_sectors);
+int bdrv_dirty_bitmap_get_meta_locked(BlockDriverState *bs,
+                                      BdrvDirtyBitmap *bitmap, int64_t sector,
+                                      int nb_sectors);
 void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs,
                                   BdrvDirtyBitmap *bitmap, int64_t sector,
                                   int nb_sectors);
@@ -52,11 +53,6 @@ BdrvDirtyBitmapIter *bdrv_dirty_meta_iter_new(BdrvDirtyBitmap *bitmap);
 BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap,
                                          uint64_t first_sector);
 void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter);
-int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter);
-void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t sector_num);
-int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
-int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap);
-void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
 
 uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap,
                                               uint64_t start, uint64_t count);
@@ -72,4 +68,19 @@ void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap,
                                           bool finish);
 void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap);
 
+/* Functions that require manual locking.  */
+void bdrv_dirty_bitmap_lock(BdrvDirtyBitmap *bitmap);
+void bdrv_dirty_bitmap_unlock(BdrvDirtyBitmap *bitmap);
+int bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+                          int64_t sector);
+void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
+                                  int64_t cur_sector, int64_t nr_sectors);
+void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
+                                    int64_t cur_sector, int64_t nr_sectors);
+int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter);
+void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t sector_num);
+int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
+int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap);
+void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
+
 #endif
diff --git a/include/block/nbd.h b/include/block/nbd.h
index 416257abca..6d75d5a670 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -123,12 +123,8 @@ enum {
  * aren't overflowing some other buffer. */
 #define NBD_MAX_NAME_SIZE 256
 
-ssize_t nbd_wr_syncv(QIOChannel *ioc,
-                     struct iovec *iov,
-                     size_t niov,
-                     size_t length,
-                     bool do_read,
-                     Error **errp);
+ssize_t nbd_rwv(QIOChannel *ioc, struct iovec *iov, size_t niov, size_t length,
+                bool do_read, Error **errp);
 int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
                           QCryptoTLSCreds *tlscreds, const char *hostname,
                           QIOChannel **outioc,
@@ -162,7 +158,7 @@ void nbd_client_new(NBDExport *exp,
                     QIOChannelSocket *sioc,
                     QCryptoTLSCreds *tlscreds,
                     const char *tlsaclname,
-                    void (*close)(NBDClient *));
+                    void (*close_fn)(NBDClient *, bool));
 void nbd_client_get(NBDClient *client);
 void nbd_client_put(NBDClient *client);
 
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 80e605a96a..37f8e78e71 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -456,6 +456,26 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
                                       bool share,
                                       const char *path,
                                       Error **errp);
+
+/**
+ * memory_region_init_ram_from_fd:  Initialize RAM memory region with a
+ *                                  mmap-ed backend.
+ *
+ * @mr: the #MemoryRegion to be initialized.
+ * @owner: the object that tracks the region's reference count
+ * @name: the name of the region.
+ * @size: size of the region.
+ * @share: %true if memory must be mmaped with the MAP_SHARED flag
+ * @fd: the fd to mmap.
+ * @errp: pointer to Error*, to store an error if it happens.
+ */
+void memory_region_init_ram_from_fd(MemoryRegion *mr,
+                                    struct Object *owner,
+                                    const char *name,
+                                    uint64_t size,
+                                    bool share,
+                                    int fd,
+                                    Error **errp);
 #endif
 
 /**
@@ -805,17 +825,6 @@ static inline bool memory_region_is_rom(MemoryRegion *mr)
 int memory_region_get_fd(MemoryRegion *mr);
 
 /**
- * memory_region_set_fd: Mark a RAM memory region as backed by a
- * file descriptor.
- *
- * This function is typically used after memory_region_init_ram_ptr().
- *
- * @mr: the memory region being queried.
- * @fd: the file descriptor that backs @mr.
- */
-void memory_region_set_fd(MemoryRegion *mr, int fd);
-
-/**
  * memory_region_from_host: Convert a pointer into a RAM memory region
  * and an offset within it.
  *
diff --git a/include/exec/poison.h b/include/exec/poison.h
index 3ca7929cce..5ffed4d56e 100644
--- a/include/exec/poison.h
+++ b/include/exec/poison.h
@@ -12,17 +12,28 @@
 #pragma GCC poison TARGET_CRIS
 #pragma GCC poison TARGET_LM32
 #pragma GCC poison TARGET_M68K
+#pragma GCC poison TARGET_MICROBLAZE
 #pragma GCC poison TARGET_MIPS
+#pragma GCC poison TARGET_ABI_MIPSO32
 #pragma GCC poison TARGET_MIPS64
+#pragma GCC poison TARGET_ABI_MIPSN64
+#pragma GCC poison TARGET_MOXIE
+#pragma GCC poison TARGET_NIOS2
 #pragma GCC poison TARGET_OPENRISC
 #pragma GCC poison TARGET_PPC
 #pragma GCC poison TARGET_PPCEMB
 #pragma GCC poison TARGET_PPC64
 #pragma GCC poison TARGET_ABI32
+#pragma GCC poison TARGET_S390X
 #pragma GCC poison TARGET_SH4
 #pragma GCC poison TARGET_SPARC
 #pragma GCC poison TARGET_SPARC64
+#pragma GCC poison TARGET_TRICORE
+#pragma GCC poison TARGET_UNICORE32
+#pragma GCC poison TARGET_XTENSA
 
+#pragma GCC poison TARGET_NAME
+#pragma GCC poison TARGET_SUPPORTS_MTTCG
 #pragma GCC poison TARGET_WORDS_BIGENDIAN
 #pragma GCC poison BSWAP_NEEDED
 
@@ -50,5 +61,25 @@
 #pragma GCC poison CPU_INTERRUPT_TGT_INT_1
 #pragma GCC poison CPU_INTERRUPT_TGT_INT_2
 
+#pragma GCC poison CONFIG_ALPHA_DIS
+#pragma GCC poison CONFIG_ARM_A64_DIS
+#pragma GCC poison CONFIG_ARM_DIS
+#pragma GCC poison CONFIG_CRIS_DIS
+#pragma GCC poison CONFIG_I386_DIS
+#pragma GCC poison CONFIG_LM32_DIS
+#pragma GCC poison CONFIG_M68K_DIS
+#pragma GCC poison CONFIG_MICROBLAZE_DIS
+#pragma GCC poison CONFIG_MIPS_DIS
+#pragma GCC poison CONFIG_MOXIE_DIS
+#pragma GCC poison CONFIG_NIOS2_DIS
+#pragma GCC poison CONFIG_PPC_DIS
+#pragma GCC poison CONFIG_S390_DIS
+#pragma GCC poison CONFIG_SH4_DIS
+#pragma GCC poison CONFIG_SPARC_DIS
+#pragma GCC poison CONFIG_XTENSA_DIS
+
+#pragma GCC poison CONFIG_LINUX_USER
+#pragma GCC poison CONFIG_VHOST_NET
+
 #endif
 #endif
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 140efa840c..73d1bea8b6 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -65,6 +65,9 @@ unsigned long last_ram_page(void);
 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
                                    bool share, const char *mem_path,
                                    Error **errp);
+RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
+                                 bool share, int fd,
+                                 Error **errp);
 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
                                   MemoryRegion *mr, Error **errp);
 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp);
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index d071c9c0e9..233216abdc 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -384,6 +384,11 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
 
 #define PC_COMPAT_2_9 \
     HW_COMPAT_2_9 \
+    {\
+        .driver   = "mch",\
+        .property = "extended-tseg-mbytes",\
+        .value    = stringify(0),\
+    },\
 
 #define PC_COMPAT_2_8 \
     HW_COMPAT_2_8 \
diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h
index 53b6760c16..58983c00b3 100644
--- a/include/hw/pci-host/q35.h
+++ b/include/hw/pci-host/q35.h
@@ -60,6 +60,7 @@ typedef struct MCHPCIState {
     uint64_t above_4g_mem_size;
     uint64_t pci_hole64_size;
     uint32_t short_root_bus;
+    uint16_t ext_tseg_mbytes;
 } MCHPCIState;
 
 typedef struct Q35PCIHost {
@@ -91,6 +92,11 @@ typedef struct Q35PCIHost {
 /* D0:F0 configuration space */
 #define MCH_HOST_BRIDGE_REVISION_DEFAULT       0x0
 
+#define MCH_HOST_BRIDGE_EXT_TSEG_MBYTES        0x50
+#define MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_SIZE   2
+#define MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY  0xffff
+#define MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_MAX    0xfff
+
 #define MCH_HOST_BRIDGE_PCIEXBAR               0x60    /* 64bit register */
 #define MCH_HOST_BRIDGE_PCIEXBAR_SIZE          8       /* 64bit register */
 #define MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT       0xb0000000
diff --git a/include/hw/virtio/vhost-user-scsi.h b/include/hw/virtio/vhost-user-scsi.h
new file mode 100644
index 0000000000..01861f78d0
--- /dev/null
+++ b/include/hw/virtio/vhost-user-scsi.h
@@ -0,0 +1,35 @@
+/*
+ * vhost-user-scsi host device
+ *
+ * Copyright (c) 2016 Nutanix Inc. All rights reserved.
+ *
+ * Author:
+ *  Felipe Franciosi <felipe@nutanix.com>
+ *
+ * This file is largely based on "vhost-scsi.h" by:
+ *  Stefan Hajnoczi   <stefanha@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef VHOST_USER_SCSI_H
+#define VHOST_USER_SCSI_H
+
+#include "qemu-common.h"
+#include "hw/qdev.h"
+#include "hw/virtio/virtio-scsi.h"
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-scsi-common.h"
+
+#define TYPE_VHOST_USER_SCSI "vhost-user-scsi"
+#define VHOST_USER_SCSI(obj) \
+        OBJECT_CHECK(VHostUserSCSI, (obj), TYPE_VHOST_USER_SCSI)
+
+typedef struct VHostUserSCSI {
+    VHostSCSICommon parent_obj;
+    uint64_t host_features;
+} VHostUserSCSI;
+
+#endif /* VHOST_USER_SCSI_H */
diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
index eac2013ddd..de6ae5a9f6 100644
--- a/include/hw/virtio/virtio-scsi.h
+++ b/include/hw/virtio/virtio-scsi.h
@@ -21,6 +21,7 @@
 #include "hw/virtio/virtio.h"
 #include "hw/pci/pci.h"
 #include "hw/scsi/scsi.h"
+#include "chardev/char-fe.h"
 #include "sysemu/iothread.h"
 
 #define TYPE_VIRTIO_SCSI_COMMON "virtio-scsi-common"
@@ -53,6 +54,7 @@ struct VirtIOSCSIConf {
     char *vhostfd;
     char *wwpn;
 #endif
+    CharBackend chardev;
     uint32_t boot_tpgt;
     IOThread *iothread;
 };
diff --git a/include/qemu/stats64.h b/include/qemu/stats64.h
new file mode 100644
index 0000000000..4a357b3e9d
--- /dev/null
+++ b/include/qemu/stats64.h
@@ -0,0 +1,193 @@
+/*
+ * Atomic operations on 64-bit quantities.
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_STATS64_H
+#define QEMU_STATS64_H 1
+
+#include "qemu/atomic.h"
+
+/* This provides atomic operations on 64-bit type, using a reader-writer
+ * spinlock on architectures that do not have 64-bit accesses.  Even on
+ * those architectures, it tries hard not to take the lock.
+ */
+
+typedef struct Stat64 {
+#ifdef CONFIG_ATOMIC64
+    uint64_t value;
+#else
+    uint32_t low, high;
+    uint32_t lock;
+#endif
+} Stat64;
+
+#ifdef CONFIG_ATOMIC64
+static inline void stat64_init(Stat64 *s, uint64_t value)
+{
+    /* This is not guaranteed to be atomic! */
+    *s = (Stat64) { value };
+}
+
+static inline uint64_t stat64_get(const Stat64 *s)
+{
+    return atomic_read__nocheck(&s->value);
+}
+
+static inline void stat64_add(Stat64 *s, uint64_t value)
+{
+    atomic_add(&s->value, value);
+}
+
+static inline void stat64_min(Stat64 *s, uint64_t value)
+{
+    uint64_t orig = atomic_read__nocheck(&s->value);
+    while (orig > value) {
+        orig = atomic_cmpxchg__nocheck(&s->value, orig, value);
+    }
+}
+
+static inline void stat64_max(Stat64 *s, uint64_t value)
+{
+    uint64_t orig = atomic_read__nocheck(&s->value);
+    while (orig < value) {
+        orig = atomic_cmpxchg__nocheck(&s->value, orig, value);
+    }
+}
+#else
+uint64_t stat64_get(const Stat64 *s);
+bool stat64_min_slow(Stat64 *s, uint64_t value);
+bool stat64_max_slow(Stat64 *s, uint64_t value);
+bool stat64_add32_carry(Stat64 *s, uint32_t low, uint32_t high);
+
+static inline void stat64_init(Stat64 *s, uint64_t value)
+{
+    /* This is not guaranteed to be atomic! */
+    *s = (Stat64) { .low = value, .high = value >> 32, .lock = 0 };
+}
+
+static inline void stat64_add(Stat64 *s, uint64_t value)
+{
+    uint32_t low, high;
+    high = value >> 32;
+    low = (uint32_t) value;
+    if (!low) {
+        if (high) {
+            atomic_add(&s->high, high);
+        }
+        return;
+    }
+
+    for (;;) {
+        uint32_t orig = s->low;
+        uint32_t result = orig + low;
+        uint32_t old;
+
+        if (result < low || high) {
+            /* If the high part is affected, take the lock.  */
+            if (stat64_add32_carry(s, low, high)) {
+                return;
+            }
+            continue;
+        }
+
+        /* No carry, try with a 32-bit cmpxchg.  The result is independent of
+         * the high 32 bits, so it can race just fine with stat64_add32_carry
+         * and even stat64_get!
+         */
+        old = atomic_cmpxchg(&s->low, orig, result);
+        if (orig == old) {
+            return;
+        }
+    }
+}
+
+static inline void stat64_min(Stat64 *s, uint64_t value)
+{
+    uint32_t low, high;
+    uint32_t orig_low, orig_high;
+
+    high = value >> 32;
+    low = (uint32_t) value;
+    do {
+        orig_high = atomic_read(&s->high);
+        if (orig_high < high) {
+            return;
+        }
+
+        if (orig_high == high) {
+            /* High 32 bits are equal.  Read low after high, otherwise we
+             * can get a false positive (e.g. 0x1235,0x0000 changes to
+             * 0x1234,0x8000 and we read it as 0x1234,0x0000). Pairs with
+             * the write barrier in stat64_min_slow.
+             */
+            smp_rmb();
+            orig_low = atomic_read(&s->low);
+            if (orig_low <= low) {
+                return;
+            }
+
+            /* See if we were lucky and a writer raced against us.  The
+             * barrier is theoretically unnecessary, but if we remove it
+             * we may miss being lucky.
+             */
+            smp_rmb();
+            orig_high = atomic_read(&s->high);
+            if (orig_high < high) {
+                return;
+            }
+        }
+
+        /* If the value changes in any way, we have to take the lock.  */
+    } while (!stat64_min_slow(s, value));
+}
+
+static inline void stat64_max(Stat64 *s, uint64_t value)
+{
+    uint32_t low, high;
+    uint32_t orig_low, orig_high;
+
+    high = value >> 32;
+    low = (uint32_t) value;
+    do {
+        orig_high = atomic_read(&s->high);
+        if (orig_high > high) {
+            return;
+        }
+
+        if (orig_high == high) {
+            /* High 32 bits are equal.  Read low after high, otherwise we
+             * can get a false positive (e.g. 0x1234,0x8000 changes to
+             * 0x1235,0x0000 and we read it as 0x1235,0x8000). Pairs with
+             * the write barrier in stat64_max_slow.
+             */
+            smp_rmb();
+            orig_low = atomic_read(&s->low);
+            if (orig_low >= low) {
+                return;
+            }
+
+            /* See if we were lucky and a writer raced against us.  The
+             * barrier is theoretically unnecessary, but if we remove it
+             * we may miss being lucky.
+             */
+            smp_rmb();
+            orig_high = atomic_read(&s->high);
+            if (orig_high > high) {
+                return;
+            }
+        }
+
+        /* If the value changes in any way, we have to take the lock.  */
+    } while (!stat64_max_slow(s, value));
+}
+
+#endif
+
+#endif
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 840ad6134c..999eb2333a 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -72,15 +72,13 @@ typedef struct BlockDevOps {
  * fields that must be public. This is in particular for QLIST_ENTRY() and
  * friends so that BlockBackends can be kept in lists outside block-backend.c */
 typedef struct BlockBackendPublic {
-    /* I/O throttling has its own locking, but also some fields are
-     * protected by the AioContext lock.
-     */
-
-    /* Protected by AioContext lock.  */
+    /* throttled_reqs_lock protects the CoQueues for throttled requests.  */
+    CoMutex      throttled_reqs_lock;
     CoQueue      throttled_reqs[2];
 
     /* Nonzero if the I/O limits are currently being ignored; generally
-     * it is zero.  */
+     * it is zero.  Accessed with atomic operations.
+     */
     unsigned int io_limits_disabled;
 
     /* The following fields are protected by the ThrottleGroup lock.
diff --git a/include/ui/spice-display.h b/include/ui/spice-display.h
index 184d4c373a..4ba9444dba 100644
--- a/include/ui/spice-display.h
+++ b/include/ui/spice-display.h
@@ -140,6 +140,8 @@ struct SimpleSpiceCursor {
     QXLCursor cursor;
 };
 
+extern bool spice_opengl;
+
 int qemu_spice_rect_is_empty(const QXLRect* r);
 void qemu_spice_rect_union(QXLRect *dest, const QXLRect *r);
 
diff --git a/memory.c b/memory.c
index 0ddc4cc28d..e08fa0ae6c 100644
--- a/memory.c
+++ b/memory.c
@@ -1397,6 +1397,22 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
     mr->ram_block = qemu_ram_alloc_from_file(size, mr, share, path, errp);
     mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
 }
+
+void memory_region_init_ram_from_fd(MemoryRegion *mr,
+                                    struct Object *owner,
+                                    const char *name,
+                                    uint64_t size,
+                                    bool share,
+                                    int fd,
+                                    Error **errp)
+{
+    memory_region_init(mr, owner, name, size);
+    mr->ram = true;
+    mr->terminates = true;
+    mr->destructor = memory_region_destructor_ram;
+    mr->ram_block = qemu_ram_alloc_from_fd(size, mr, share, fd, errp);
+    mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
+}
 #endif
 
 void memory_region_init_ram_ptr(MemoryRegion *mr,
@@ -1835,16 +1851,6 @@ int memory_region_get_fd(MemoryRegion *mr)
     return fd;
 }
 
-void memory_region_set_fd(MemoryRegion *mr, int fd)
-{
-    rcu_read_lock();
-    while (mr->alias) {
-        mr = mr->alias;
-    }
-    mr->ram_block->fd = fd;
-    rcu_read_unlock();
-}
-
 void *memory_region_get_ram_ptr(MemoryRegion *mr)
 {
     void *ptr;
diff --git a/migration/block.c b/migration/block.c
index 3aae5a375e..7674ae1078 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -341,10 +341,8 @@ static int set_dirty_tracking(void)
     int ret;
 
     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
-        aio_context_acquire(blk_get_aio_context(bmds->blk));
         bmds->dirty_bitmap = bdrv_create_dirty_bitmap(blk_bs(bmds->blk),
                                                       BLOCK_SIZE, NULL, NULL);
-        aio_context_release(blk_get_aio_context(bmds->blk));
         if (!bmds->dirty_bitmap) {
             ret = -errno;
             goto fail;
@@ -355,9 +353,7 @@ static int set_dirty_tracking(void)
 fail:
     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
         if (bmds->dirty_bitmap) {
-            aio_context_acquire(blk_get_aio_context(bmds->blk));
             bdrv_release_dirty_bitmap(blk_bs(bmds->blk), bmds->dirty_bitmap);
-            aio_context_release(blk_get_aio_context(bmds->blk));
         }
     }
     return ret;
@@ -370,9 +366,7 @@ static void unset_dirty_tracking(void)
     BlkMigDevState *bmds;
 
     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
-        aio_context_acquire(blk_get_aio_context(bmds->blk));
         bdrv_release_dirty_bitmap(blk_bs(bmds->blk), bmds->dirty_bitmap);
-        aio_context_release(blk_get_aio_context(bmds->blk));
     }
 }
 
@@ -531,13 +525,16 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
         } else {
             blk_mig_unlock();
         }
-        if (bdrv_get_dirty(bs, bmds->dirty_bitmap, sector)) {
-
+        bdrv_dirty_bitmap_lock(bmds->dirty_bitmap);
+        if (bdrv_get_dirty_locked(bs, bmds->dirty_bitmap, sector)) {
             if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
                 nr_sectors = total_sectors - sector;
             } else {
                 nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
             }
+            bdrv_reset_dirty_bitmap_locked(bmds->dirty_bitmap, sector, nr_sectors);
+            bdrv_dirty_bitmap_unlock(bmds->dirty_bitmap);
+
             blk = g_new(BlkMigBlock, 1);
             blk->buf = g_malloc(BLOCK_SIZE);
             blk->bmds = bmds;
@@ -570,12 +567,12 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
                 g_free(blk);
             }
 
-            bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, sector, nr_sectors);
             sector += nr_sectors;
             bmds->cur_dirty = sector;
-
             break;
         }
+
+        bdrv_dirty_bitmap_unlock(bmds->dirty_bitmap);
         sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
         bmds->cur_dirty = sector;
     }
diff --git a/nbd/client.c b/nbd/client.c
index 595d99ed30..b97143fa60 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -86,32 +86,6 @@ static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
 
 */
 
-/* Discard length bytes from channel.  Return -errno on failure and 0 on
- * success*/
-static int drop_sync(QIOChannel *ioc, size_t size, Error **errp)
-{
-    ssize_t ret = 0;
-    char small[1024];
-    char *buffer;
-
-    buffer = sizeof(small) >= size ? small : g_malloc(MIN(65536, size));
-    while (size > 0) {
-        ssize_t count = MIN(65536, size);
-        ret = read_sync(ioc, buffer, MIN(65536, size), errp);
-
-        if (ret < 0) {
-            goto cleanup;
-        }
-        size -= count;
-    }
-
- cleanup:
-    if (buffer != small) {
-        g_free(buffer);
-    }
-    return ret;
-}
-
 /* Send an option request.
  *
  * The request is for option @opt, with @data containing @len bytes of
@@ -135,12 +109,12 @@ static int nbd_send_option_request(QIOChannel *ioc, uint32_t opt,
     stl_be_p(&req.option, opt);
     stl_be_p(&req.length, len);
 
-    if (write_sync(ioc, &req, sizeof(req), errp) < 0) {
+    if (nbd_write(ioc, &req, sizeof(req), errp) < 0) {
         error_prepend(errp, "Failed to send option request header");
         return -1;
     }
 
-    if (len && write_sync(ioc, (char *) data, len, errp) < 0) {
+    if (len && nbd_write(ioc, (char *) data, len, errp) < 0) {
         error_prepend(errp, "Failed to send option request data");
         return -1;
     }
@@ -169,7 +143,7 @@ static int nbd_receive_option_reply(QIOChannel *ioc, uint32_t opt,
                                     nbd_opt_reply *reply, Error **errp)
 {
     QEMU_BUILD_BUG_ON(sizeof(*reply) != 20);
-    if (read_sync(ioc, reply, sizeof(*reply), errp) < 0) {
+    if (nbd_read(ioc, reply, sizeof(*reply), errp) < 0) {
         error_prepend(errp, "failed to read option reply");
         nbd_send_opt_abort(ioc);
         return -1;
@@ -218,7 +192,7 @@ static int nbd_handle_reply_err(QIOChannel *ioc, nbd_opt_reply *reply,
             goto cleanup;
         }
         msg = g_malloc(reply->length + 1);
-        if (read_sync(ioc, msg, reply->length, errp) < 0) {
+        if (nbd_read(ioc, msg, reply->length, errp) < 0) {
             error_prepend(errp, "failed to read option error message");
             goto cleanup;
         }
@@ -320,7 +294,7 @@ static int nbd_receive_list(QIOChannel *ioc, const char *want, bool *match,
         nbd_send_opt_abort(ioc);
         return -1;
     }
-    if (read_sync(ioc, &namelen, sizeof(namelen), errp) < 0) {
+    if (nbd_read(ioc, &namelen, sizeof(namelen), errp) < 0) {
         error_prepend(errp, "failed to read option name length");
         nbd_send_opt_abort(ioc);
         return -1;
@@ -333,7 +307,7 @@ static int nbd_receive_list(QIOChannel *ioc, const char *want, bool *match,
         return -1;
     }
     if (namelen != strlen(want)) {
-        if (drop_sync(ioc, len, errp) < 0) {
+        if (nbd_drop(ioc, len, errp) < 0) {
             error_prepend(errp, "failed to skip export name with wrong length");
             nbd_send_opt_abort(ioc);
             return -1;
@@ -342,14 +316,14 @@ static int nbd_receive_list(QIOChannel *ioc, const char *want, bool *match,
     }
 
     assert(namelen < sizeof(name));
-    if (read_sync(ioc, name, namelen, errp) < 0) {
+    if (nbd_read(ioc, name, namelen, errp) < 0) {
         error_prepend(errp, "failed to read export name");
         nbd_send_opt_abort(ioc);
         return -1;
     }
     name[namelen] = '\0';
     len -= namelen;
-    if (drop_sync(ioc, len, errp) < 0) {
+    if (nbd_drop(ioc, len, errp) < 0) {
         error_prepend(errp, "failed to read export description");
         nbd_send_opt_abort(ioc);
         return -1;
@@ -476,7 +450,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
         goto fail;
     }
 
-    if (read_sync(ioc, buf, 8, errp) < 0) {
+    if (nbd_read(ioc, buf, 8, errp) < 0) {
         error_prepend(errp, "Failed to read data");
         goto fail;
     }
@@ -502,7 +476,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
         goto fail;
     }
 
-    if (read_sync(ioc, &magic, sizeof(magic), errp) < 0) {
+    if (nbd_read(ioc, &magic, sizeof(magic), errp) < 0) {
         error_prepend(errp, "Failed to read magic");
         goto fail;
     }
@@ -514,7 +488,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
         uint16_t globalflags;
         bool fixedNewStyle = false;
 
-        if (read_sync(ioc, &globalflags, sizeof(globalflags), errp) < 0) {
+        if (nbd_read(ioc, &globalflags, sizeof(globalflags), errp) < 0) {
             error_prepend(errp, "Failed to read server flags");
             goto fail;
         }
@@ -532,7 +506,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
         }
         /* client requested flags */
         clientflags = cpu_to_be32(clientflags);
-        if (write_sync(ioc, &clientflags, sizeof(clientflags), errp) < 0) {
+        if (nbd_write(ioc, &clientflags, sizeof(clientflags), errp) < 0) {
             error_prepend(errp, "Failed to send clientflags field");
             goto fail;
         }
@@ -570,13 +544,13 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
         }
 
         /* Read the response */
-        if (read_sync(ioc, &s, sizeof(s), errp) < 0) {
+        if (nbd_read(ioc, &s, sizeof(s), errp) < 0) {
             error_prepend(errp, "Failed to read export length");
             goto fail;
         }
         *size = be64_to_cpu(s);
 
-        if (read_sync(ioc, flags, sizeof(*flags), errp) < 0) {
+        if (nbd_read(ioc, flags, sizeof(*flags), errp) < 0) {
             error_prepend(errp, "Failed to read export flags");
             goto fail;
         }
@@ -593,14 +567,14 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
             goto fail;
         }
 
-        if (read_sync(ioc, &s, sizeof(s), errp) < 0) {
+        if (nbd_read(ioc, &s, sizeof(s), errp) < 0) {
             error_prepend(errp, "Failed to read export length");
             goto fail;
         }
         *size = be64_to_cpu(s);
         TRACE("Size is %" PRIu64, *size);
 
-        if (read_sync(ioc, &oldflags, sizeof(oldflags), errp) < 0) {
+        if (nbd_read(ioc, &oldflags, sizeof(oldflags), errp) < 0) {
             error_prepend(errp, "Failed to read export flags");
             goto fail;
         }
@@ -616,7 +590,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
     }
 
     TRACE("Size is %" PRIu64 ", export flags %" PRIx16, *size, *flags);
-    if (zeroes && drop_sync(ioc, 124, errp) < 0) {
+    if (zeroes && nbd_drop(ioc, 124, errp) < 0) {
         error_prepend(errp, "Failed to read reserved block");
         goto fail;
     }
@@ -759,7 +733,7 @@ ssize_t nbd_send_request(QIOChannel *ioc, NBDRequest *request)
     stq_be_p(buf + 16, request->from);
     stl_be_p(buf + 24, request->len);
 
-    return write_sync(ioc, buf, sizeof(buf), NULL);
+    return nbd_write(ioc, buf, sizeof(buf), NULL);
 }
 
 ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, Error **errp)
@@ -768,7 +742,7 @@ ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, Error **errp)
     uint32_t magic;
     ssize_t ret;
 
-    ret = read_sync_eof(ioc, buf, sizeof(buf), errp);
+    ret = nbd_read_eof(ioc, buf, sizeof(buf), errp);
     if (ret <= 0) {
         return ret;
     }
diff --git a/nbd/common.c b/nbd/common.c
index bd81637ab9..6b5c1b7b02 100644
--- a/nbd/common.c
+++ b/nbd/common.c
@@ -24,12 +24,8 @@
  * The function may be called from coroutine or from non-coroutine context.
  * When called from non-coroutine context @ioc must be in blocking mode.
  */
-ssize_t nbd_wr_syncv(QIOChannel *ioc,
-                     struct iovec *iov,
-                     size_t niov,
-                     size_t length,
-                     bool do_read,
-                     Error **errp)
+ssize_t nbd_rwv(QIOChannel *ioc, struct iovec *iov, size_t niov, size_t length,
+                bool do_read, Error **errp)
 {
     ssize_t done = 0;
     struct iovec *local_iov = g_new(struct iovec, niov);
@@ -69,6 +65,32 @@ ssize_t nbd_wr_syncv(QIOChannel *ioc,
     return done;
 }
 
+/* Discard length bytes from channel.  Return -errno on failure and 0 on
+ * success */
+int nbd_drop(QIOChannel *ioc, size_t size, Error **errp)
+{
+    ssize_t ret = 0;
+    char small[1024];
+    char *buffer;
+
+    buffer = sizeof(small) >= size ? small : g_malloc(MIN(65536, size));
+    while (size > 0) {
+        ssize_t count = MIN(65536, size);
+        ret = nbd_read(ioc, buffer, MIN(65536, size), errp);
+
+        if (ret < 0) {
+            goto cleanup;
+        }
+        size -= count;
+    }
+
+ cleanup:
+    if (buffer != small) {
+        g_free(buffer);
+    }
+    return ret;
+}
+
 
 void nbd_tls_handshake(QIOTask *task,
                        void *opaque)
diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index d6071640a0..39bfed177c 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -94,14 +94,14 @@
 #define NBD_ENOSPC     28
 #define NBD_ESHUTDOWN  108
 
-/* read_sync_eof
+/* nbd_read_eof
  * Tries to read @size bytes from @ioc. Returns number of bytes actually read.
  * May return a value >= 0 and < size only on EOF, i.e. when iteratively called
- * qio_channel_readv() returns 0. So, there are no needs to call read_sync_eof
+ * qio_channel_readv() returns 0. So, there is no need to call nbd_read_eof
  * iteratively.
  */
-static inline ssize_t read_sync_eof(QIOChannel *ioc, void *buffer, size_t size,
-                                    Error **errp)
+static inline ssize_t nbd_read_eof(QIOChannel *ioc, void *buffer, size_t size,
+                                   Error **errp)
 {
     struct iovec iov = { .iov_base = buffer, .iov_len = size };
     /* Sockets are kept in blocking mode in the negotiation phase.  After
@@ -109,16 +109,16 @@ static inline ssize_t read_sync_eof(QIOChannel *ioc, void *buffer, size_t size,
      * our request/reply.  Synchronization is done with recv_coroutine, so
      * that this is coroutine-safe.
      */
-    return nbd_wr_syncv(ioc, &iov, 1, size, true, errp);
+    return nbd_rwv(ioc, &iov, 1, size, true, errp);
 }
 
-/* read_sync
+/* nbd_read
  * Reads @size bytes from @ioc. Returns 0 on success.
  */
-static inline int read_sync(QIOChannel *ioc, void *buffer, size_t size,
-                            Error **errp)
+static inline int nbd_read(QIOChannel *ioc, void *buffer, size_t size,
+                           Error **errp)
 {
-    ssize_t ret = read_sync_eof(ioc, buffer, size, errp);
+    ssize_t ret = nbd_read_eof(ioc, buffer, size, errp);
 
     if (ret >= 0 && ret != size) {
         ret = -EINVAL;
@@ -128,15 +128,15 @@ static inline int read_sync(QIOChannel *ioc, void *buffer, size_t size,
     return ret < 0 ? ret : 0;
 }
 
-/* write_sync
+/* nbd_write
  * Writes @size bytes to @ioc. Returns 0 on success.
  */
-static inline int write_sync(QIOChannel *ioc, const void *buffer, size_t size,
-                             Error **errp)
+static inline int nbd_write(QIOChannel *ioc, const void *buffer, size_t size,
+                            Error **errp)
 {
     struct iovec iov = { .iov_base = (void *) buffer, .iov_len = size };
 
-    ssize_t ret = nbd_wr_syncv(ioc, &iov, 1, size, false, errp);
+    ssize_t ret = nbd_rwv(ioc, &iov, 1, size, false, errp);
 
     assert(ret < 0 || ret == size);
 
@@ -153,4 +153,6 @@ struct NBDTLSHandshakeData {
 void nbd_tls_handshake(QIOTask *task,
                        void *opaque);
 
+int nbd_drop(QIOChannel *ioc, size_t size, Error **errp);
+
 #endif
diff --git a/nbd/server.c b/nbd/server.c
index 49b55f6ede..8a70c054a6 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -81,7 +81,7 @@ static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
 
 struct NBDClient {
     int refcount;
-    void (*close)(NBDClient *client);
+    void (*close_fn)(NBDClient *client, bool negotiated);
 
     bool no_zeroes;
     NBDExport *exp;
@@ -104,69 +104,6 @@ struct NBDClient {
 
 static void nbd_client_receive_next_request(NBDClient *client);
 
-static gboolean nbd_negotiate_continue(QIOChannel *ioc,
-                                       GIOCondition condition,
-                                       void *opaque)
-{
-    qemu_coroutine_enter(opaque);
-    return TRUE;
-}
-
-static int nbd_negotiate_read(QIOChannel *ioc, void *buffer, size_t size)
-{
-    ssize_t ret;
-    guint watch;
-
-    assert(qemu_in_coroutine());
-    /* Negotiation are always in main loop. */
-    watch = qio_channel_add_watch(ioc,
-                                  G_IO_IN,
-                                  nbd_negotiate_continue,
-                                  qemu_coroutine_self(),
-                                  NULL);
-    ret = read_sync(ioc, buffer, size, NULL);
-    g_source_remove(watch);
-    return ret;
-
-}
-
-static int nbd_negotiate_write(QIOChannel *ioc, const void *buffer, size_t size)
-{
-    ssize_t ret;
-    guint watch;
-
-    assert(qemu_in_coroutine());
-    /* Negotiation are always in main loop. */
-    watch = qio_channel_add_watch(ioc,
-                                  G_IO_OUT,
-                                  nbd_negotiate_continue,
-                                  qemu_coroutine_self(),
-                                  NULL);
-    ret = write_sync(ioc, buffer, size, NULL);
-    g_source_remove(watch);
-    return ret;
-}
-
-static int nbd_negotiate_drop_sync(QIOChannel *ioc, size_t size)
-{
-    ssize_t ret;
-    uint8_t *buffer = g_malloc(MIN(65536, size));
-
-    while (size > 0) {
-        size_t count = MIN(65536, size);
-        ret = nbd_negotiate_read(ioc, buffer, count);
-        if (ret < 0) {
-            g_free(buffer);
-            return ret;
-        }
-
-        size -= count;
-    }
-
-    g_free(buffer);
-    return 0;
-}
-
 /* Basic flow for negotiation
 
    Server         Client
@@ -205,22 +142,22 @@ static int nbd_negotiate_send_rep_len(QIOChannel *ioc, uint32_t type,
           type, opt, len);
 
     magic = cpu_to_be64(NBD_REP_MAGIC);
-    if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) < 0) {
+    if (nbd_write(ioc, &magic, sizeof(magic), NULL) < 0) {
         LOG("write failed (rep magic)");
         return -EINVAL;
     }
     opt = cpu_to_be32(opt);
-    if (nbd_negotiate_write(ioc, &opt, sizeof(opt)) < 0) {
+    if (nbd_write(ioc, &opt, sizeof(opt), NULL) < 0) {
         LOG("write failed (rep opt)");
         return -EINVAL;
     }
     type = cpu_to_be32(type);
-    if (nbd_negotiate_write(ioc, &type, sizeof(type)) < 0) {
+    if (nbd_write(ioc, &type, sizeof(type), NULL) < 0) {
         LOG("write failed (rep type)");
         return -EINVAL;
     }
     len = cpu_to_be32(len);
-    if (nbd_negotiate_write(ioc, &len, sizeof(len)) < 0) {
+    if (nbd_write(ioc, &len, sizeof(len), NULL) < 0) {
         LOG("write failed (rep data length)");
         return -EINVAL;
     }
@@ -255,7 +192,7 @@ nbd_negotiate_send_rep_err(QIOChannel *ioc, uint32_t type,
     if (ret < 0) {
         goto out;
     }
-    if (nbd_negotiate_write(ioc, msg, len) < 0) {
+    if (nbd_write(ioc, msg, len, NULL) < 0) {
         LOG("write failed (error message)");
         ret = -EIO;
     } else {
@@ -274,27 +211,27 @@ static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp)
     uint32_t len;
     const char *name = exp->name ? exp->name : "";
     const char *desc = exp->description ? exp->description : "";
-    int rc;
+    int ret;
 
     TRACE("Advertising export name '%s' description '%s'", name, desc);
     name_len = strlen(name);
     desc_len = strlen(desc);
     len = name_len + desc_len + sizeof(len);
-    rc = nbd_negotiate_send_rep_len(ioc, NBD_REP_SERVER, NBD_OPT_LIST, len);
-    if (rc < 0) {
-        return rc;
+    ret = nbd_negotiate_send_rep_len(ioc, NBD_REP_SERVER, NBD_OPT_LIST, len);
+    if (ret < 0) {
+        return ret;
     }
 
     len = cpu_to_be32(name_len);
-    if (nbd_negotiate_write(ioc, &len, sizeof(len)) < 0) {
+    if (nbd_write(ioc, &len, sizeof(len), NULL) < 0) {
         LOG("write failed (name length)");
         return -EINVAL;
     }
-    if (nbd_negotiate_write(ioc, name, name_len) < 0) {
+    if (nbd_write(ioc, name, name_len, NULL) < 0) {
         LOG("write failed (name buffer)");
         return -EINVAL;
     }
-    if (nbd_negotiate_write(ioc, desc, desc_len) < 0) {
+    if (nbd_write(ioc, desc, desc_len, NULL) < 0) {
         LOG("write failed (description buffer)");
         return -EINVAL;
     }
@@ -308,7 +245,7 @@ static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length)
     NBDExport *exp;
 
     if (length) {
-        if (nbd_negotiate_drop_sync(client->ioc, length) < 0) {
+        if (nbd_drop(client->ioc, length, NULL) < 0) {
             return -EIO;
         }
         return nbd_negotiate_send_rep_err(client->ioc,
@@ -328,7 +265,6 @@ static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length)
 
 static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length)
 {
-    int rc = -EINVAL;
     char name[NBD_MAX_NAME_SIZE + 1];
 
     /* Client sends:
@@ -337,11 +273,11 @@ static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length)
     TRACE("Checking length");
     if (length >= sizeof(name)) {
         LOG("Bad length received");
-        goto fail;
+        return -EINVAL;
     }
-    if (nbd_negotiate_read(client->ioc, name, length) < 0) {
+    if (nbd_read(client->ioc, name, length, NULL) < 0) {
         LOG("read failed");
-        goto fail;
+        return -EINVAL;
     }
     name[length] = '\0';
 
@@ -350,14 +286,13 @@ static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length)
     client->exp = nbd_export_find(name);
     if (!client->exp) {
         LOG("export not found");
-        goto fail;
+        return -EINVAL;
     }
 
     QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
     nbd_export_get(client->exp);
-    rc = 0;
-fail:
-    return rc;
+
+    return 0;
 }
 
 /* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the
@@ -372,7 +307,7 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
     TRACE("Setting up TLS");
     ioc = client->ioc;
     if (length) {
-        if (nbd_negotiate_drop_sync(ioc, length) < 0) {
+        if (nbd_drop(ioc, length, NULL) < 0) {
             return NULL;
         }
         nbd_negotiate_send_rep_err(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS,
@@ -436,7 +371,7 @@ static int nbd_negotiate_options(NBDClient *client)
         ...           Rest of request
     */
 
-    if (nbd_negotiate_read(client->ioc, &flags, sizeof(flags)) < 0) {
+    if (nbd_read(client->ioc, &flags, sizeof(flags), NULL) < 0) {
         LOG("read failed");
         return -EIO;
     }
@@ -462,7 +397,7 @@ static int nbd_negotiate_options(NBDClient *client)
         uint32_t clientflags, length;
         uint64_t magic;
 
-        if (nbd_negotiate_read(client->ioc, &magic, sizeof(magic)) < 0) {
+        if (nbd_read(client->ioc, &magic, sizeof(magic), NULL) < 0) {
             LOG("read failed");
             return -EINVAL;
         }
@@ -472,15 +407,15 @@ static int nbd_negotiate_options(NBDClient *client)
             return -EINVAL;
         }
 
-        if (nbd_negotiate_read(client->ioc, &clientflags,
-                               sizeof(clientflags)) < 0)
+        if (nbd_read(client->ioc, &clientflags,
+                      sizeof(clientflags), NULL) < 0)
         {
             LOG("read failed");
             return -EINVAL;
         }
         clientflags = be32_to_cpu(clientflags);
 
-        if (nbd_negotiate_read(client->ioc, &length, sizeof(length)) < 0) {
+        if (nbd_read(client->ioc, &length, sizeof(length), NULL) < 0) {
             LOG("read failed");
             return -EINVAL;
         }
@@ -510,7 +445,7 @@ static int nbd_negotiate_options(NBDClient *client)
                 return -EINVAL;
 
             default:
-                if (nbd_negotiate_drop_sync(client->ioc, length) < 0) {
+                if (nbd_drop(client->ioc, length, NULL) < 0) {
                     return -EIO;
                 }
                 ret = nbd_negotiate_send_rep_err(client->ioc,
@@ -548,7 +483,7 @@ static int nbd_negotiate_options(NBDClient *client)
                 return nbd_negotiate_handle_export_name(client, length);
 
             case NBD_OPT_STARTTLS:
-                if (nbd_negotiate_drop_sync(client->ioc, length) < 0) {
+                if (nbd_drop(client->ioc, length, NULL) < 0) {
                     return -EIO;
                 }
                 if (client->tlscreds) {
@@ -567,7 +502,7 @@ static int nbd_negotiate_options(NBDClient *client)
                 }
                 break;
             default:
-                if (nbd_negotiate_drop_sync(client->ioc, length) < 0) {
+                if (nbd_drop(client->ioc, length, NULL) < 0) {
                     return -EIO;
                 }
                 ret = nbd_negotiate_send_rep_err(client->ioc,
@@ -598,16 +533,10 @@ static int nbd_negotiate_options(NBDClient *client)
     }
 }
 
-typedef struct {
-    NBDClient *client;
-    Coroutine *co;
-} NBDClientNewData;
-
-static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
+static coroutine_fn int nbd_negotiate(NBDClient *client)
 {
-    NBDClient *client = data->client;
     char buf[8 + 8 + 8 + 128];
-    int rc;
+    int ret;
     const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
                               NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA |
                               NBD_FLAG_SEND_WRITE_ZEROES);
@@ -633,7 +562,6 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
      */
 
     qio_channel_set_blocking(client->ioc, false, NULL);
-    rc = -EINVAL;
 
     TRACE("Beginning negotiation.");
     memset(buf, 0, sizeof(buf));
@@ -654,21 +582,21 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
     if (oldStyle) {
         if (client->tlscreds) {
             TRACE("TLS cannot be enabled with oldstyle protocol");
-            goto fail;
+            return -EINVAL;
         }
-        if (nbd_negotiate_write(client->ioc, buf, sizeof(buf)) < 0) {
+        if (nbd_write(client->ioc, buf, sizeof(buf), NULL) < 0) {
             LOG("write failed");
-            goto fail;
+            return -EINVAL;
         }
     } else {
-        if (nbd_negotiate_write(client->ioc, buf, 18) < 0) {
+        if (nbd_write(client->ioc, buf, 18, NULL) < 0) {
             LOG("write failed");
-            goto fail;
+            return -EINVAL;
         }
-        rc = nbd_negotiate_options(client);
-        if (rc != 0) {
+        ret = nbd_negotiate_options(client);
+        if (ret != 0) {
             LOG("option negotiation failed");
-            goto fail;
+            return ret;
         }
 
         TRACE("advertising size %" PRIu64 " and flags %x",
@@ -676,25 +604,25 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
         stq_be_p(buf + 18, client->exp->size);
         stw_be_p(buf + 26, client->exp->nbdflags | myflags);
         len = client->no_zeroes ? 10 : sizeof(buf) - 18;
-        if (nbd_negotiate_write(client->ioc, buf + 18, len) < 0) {
+        ret = nbd_write(client->ioc, buf + 18, len, NULL);
+        if (ret < 0) {
             LOG("write failed");
-            goto fail;
+            return ret;
         }
     }
 
     TRACE("Negotiation succeeded.");
-    rc = 0;
-fail:
-    return rc;
+
+    return 0;
 }
 
-static ssize_t nbd_receive_request(QIOChannel *ioc, NBDRequest *request)
+static int nbd_receive_request(QIOChannel *ioc, NBDRequest *request)
 {
     uint8_t buf[NBD_REQUEST_SIZE];
     uint32_t magic;
-    ssize_t ret;
+    int ret;
 
-    ret = read_sync(ioc, buf, sizeof(buf), NULL);
+    ret = nbd_read(ioc, buf, sizeof(buf), NULL);
     if (ret < 0) {
         return ret;
     }
@@ -726,7 +654,7 @@ static ssize_t nbd_receive_request(QIOChannel *ioc, NBDRequest *request)
     return 0;
 }
 
-static ssize_t nbd_send_reply(QIOChannel *ioc, NBDReply *reply)
+static int nbd_send_reply(QIOChannel *ioc, NBDReply *reply)
 {
     uint8_t buf[NBD_REPLY_SIZE];
 
@@ -745,7 +673,7 @@ static ssize_t nbd_send_reply(QIOChannel *ioc, NBDReply *reply)
     stl_be_p(buf + 4, reply->error);
     stq_be_p(buf + 8, reply->handle);
 
-    return write_sync(ioc, buf, sizeof(buf), NULL);
+    return nbd_write(ioc, buf, sizeof(buf), NULL);
 }
 
 #define MAX_NBD_REQUESTS 16
@@ -778,7 +706,7 @@ void nbd_client_put(NBDClient *client)
     }
 }
 
-static void client_close(NBDClient *client)
+static void client_close(NBDClient *client, bool negotiated)
 {
     if (client->closing) {
         return;
@@ -793,8 +721,8 @@ static void client_close(NBDClient *client)
                          NULL);
 
     /* Also tell the client, so that they release their reference.  */
-    if (client->close) {
-        client->close(client);
+    if (client->close_fn) {
+        client->close_fn(client, negotiated);
     }
 }
 
@@ -975,7 +903,7 @@ void nbd_export_close(NBDExport *exp)
 
     nbd_export_get(exp);
     QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) {
-        client_close(client);
+        client_close(client, true);
     }
     nbd_export_set_name(exp, NULL);
     nbd_export_set_description(exp, NULL);
@@ -1032,25 +960,24 @@ void nbd_export_close_all(void)
     }
 }
 
-static ssize_t nbd_co_send_reply(NBDRequestData *req, NBDReply *reply,
-                                 int len)
+static int nbd_co_send_reply(NBDRequestData *req, NBDReply *reply, int len)
 {
     NBDClient *client = req->client;
-    ssize_t rc, ret;
+    int ret;
 
     g_assert(qemu_in_coroutine());
     qemu_co_mutex_lock(&client->send_lock);
     client->send_coroutine = qemu_coroutine_self();
 
     if (!len) {
-        rc = nbd_send_reply(client->ioc, reply);
+        ret = nbd_send_reply(client->ioc, reply);
     } else {
         qio_channel_set_cork(client->ioc, true);
-        rc = nbd_send_reply(client->ioc, reply);
-        if (rc >= 0) {
-            ret = write_sync(client->ioc, req->data, len, NULL);
+        ret = nbd_send_reply(client->ioc, reply);
+        if (ret == 0) {
+            ret = nbd_write(client->ioc, req->data, len, NULL);
             if (ret < 0) {
-                rc = -EIO;
+                ret = -EIO;
             }
         }
         qio_channel_set_cork(client->ioc, false);
@@ -1058,28 +985,23 @@ static ssize_t nbd_co_send_reply(NBDRequestData *req, NBDReply *reply,
 
     client->send_coroutine = NULL;
     qemu_co_mutex_unlock(&client->send_lock);
-    return rc;
+    return ret;
 }
 
-/* Collect a client request.  Return 0 if request looks valid, -EAGAIN
- * to keep trying the collection, -EIO to drop connection right away,
- * and any other negative value to report an error to the client
- * (although the caller may still need to disconnect after reporting
- * the error).  */
-static ssize_t nbd_co_receive_request(NBDRequestData *req,
-                                      NBDRequest *request)
+/* nbd_co_receive_request
+ * Collect a client request. Return 0 if request looks valid, -EIO to drop
+ * connection right away, and any other negative value to report an error to
+ * the client (although the caller may still need to disconnect after reporting
+ * the error).
+ */
+static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request)
 {
     NBDClient *client = req->client;
-    ssize_t rc;
 
     g_assert(qemu_in_coroutine());
     assert(client->recv_coroutine == qemu_coroutine_self());
-    rc = nbd_receive_request(client->ioc, request);
-    if (rc < 0) {
-        if (rc != -EAGAIN) {
-            rc = -EIO;
-        }
-        goto out;
+    if (nbd_receive_request(client->ioc, request) < 0) {
+        return -EIO;
     }
 
     TRACE("Decoding type");
@@ -1093,8 +1015,7 @@ static ssize_t nbd_co_receive_request(NBDRequestData *req,
         /* Special case: we're going to disconnect without a reply,
          * whether or not flags, from, or len are bogus */
         TRACE("Request type is DISCONNECT");
-        rc = -EIO;
-        goto out;
+        return -EIO;
     }
 
     /* Check for sanity in the parameters, part 1.  Defer as many
@@ -1102,31 +1023,27 @@ static ssize_t nbd_co_receive_request(NBDRequestData *req,
      * payload, so we can try and keep the connection alive.  */
     if ((request->from + request->len) < request->from) {
         LOG("integer overflow detected, you're probably being attacked");
-        rc = -EINVAL;
-        goto out;
+        return -EINVAL;
     }
 
     if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE) {
         if (request->len > NBD_MAX_BUFFER_SIZE) {
             LOG("len (%" PRIu32" ) is larger than max len (%u)",
                 request->len, NBD_MAX_BUFFER_SIZE);
-            rc = -EINVAL;
-            goto out;
+            return -EINVAL;
         }
 
         req->data = blk_try_blockalign(client->exp->blk, request->len);
         if (req->data == NULL) {
-            rc = -ENOMEM;
-            goto out;
+            return -ENOMEM;
         }
     }
     if (request->type == NBD_CMD_WRITE) {
         TRACE("Reading %" PRIu32 " byte(s)", request->len);
 
-        if (read_sync(client->ioc, req->data, request->len, NULL) < 0) {
+        if (nbd_read(client->ioc, req->data, request->len, NULL) < 0) {
             LOG("reading from socket failed");
-            rc = -EIO;
-            goto out;
+            return -EIO;
         }
         req->complete = true;
     }
@@ -1136,28 +1053,19 @@ static ssize_t nbd_co_receive_request(NBDRequestData *req,
         LOG("operation past EOF; From: %" PRIu64 ", Len: %" PRIu32
             ", Size: %" PRIu64, request->from, request->len,
             (uint64_t)client->exp->size);
-        rc = request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL;
-        goto out;
+        return request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL;
     }
     if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) {
         LOG("unsupported flags (got 0x%x)", request->flags);
-        rc = -EINVAL;
-        goto out;
+        return -EINVAL;
     }
     if (request->type != NBD_CMD_WRITE_ZEROES &&
         (request->flags & NBD_CMD_FLAG_NO_HOLE)) {
         LOG("unexpected flags (got 0x%x)", request->flags);
-        rc = -EINVAL;
-        goto out;
+        return -EINVAL;
     }
 
-    rc = 0;
-
-out:
-    client->recv_coroutine = NULL;
-    nbd_client_receive_next_request(client);
-
-    return rc;
+    return 0;
 }
 
 /* Owns a reference to the NBDClient passed as opaque.  */
@@ -1168,8 +1076,9 @@ static coroutine_fn void nbd_trip(void *opaque)
     NBDRequestData *req;
     NBDRequest request = { 0 };    /* GCC thinks it can be used uninitialized */
     NBDReply reply;
-    ssize_t ret;
+    int ret;
     int flags;
+    int reply_data_len = 0;
 
     TRACE("Reading request.");
     if (client->closing) {
@@ -1179,11 +1088,10 @@ static coroutine_fn void nbd_trip(void *opaque)
 
     req = nbd_request_get(client);
     ret = nbd_co_receive_request(req, &request);
-    if (ret == -EAGAIN) {
-        goto done;
-    }
+    client->recv_coroutine = NULL;
+    nbd_client_receive_next_request(client);
     if (ret == -EIO) {
-        goto out;
+        goto disconnect;
     }
 
     reply.handle = request.handle;
@@ -1191,7 +1099,7 @@ static coroutine_fn void nbd_trip(void *opaque)
 
     if (ret < 0) {
         reply.error = -ret;
-        goto error_reply;
+        goto reply;
     }
 
     if (client->closing) {
@@ -1212,7 +1120,7 @@ static coroutine_fn void nbd_trip(void *opaque)
             if (ret < 0) {
                 LOG("flush failed");
                 reply.error = -ret;
-                goto error_reply;
+                break;
             }
         }
 
@@ -1221,12 +1129,12 @@ static coroutine_fn void nbd_trip(void *opaque)
         if (ret < 0) {
             LOG("reading from file failed");
             reply.error = -ret;
-            goto error_reply;
+            break;
         }
 
+        reply_data_len = request.len;
         TRACE("Read %" PRIu32" byte(s)", request.len);
-        if (nbd_co_send_reply(req, &reply, request.len) < 0)
-            goto out;
+
         break;
     case NBD_CMD_WRITE:
         TRACE("Request type is WRITE");
@@ -1234,7 +1142,7 @@ static coroutine_fn void nbd_trip(void *opaque)
         if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
             TRACE("Server is read-only, return error");
             reply.error = EROFS;
-            goto error_reply;
+            break;
         }
 
         TRACE("Writing to device");
@@ -1248,21 +1156,16 @@ static coroutine_fn void nbd_trip(void *opaque)
         if (ret < 0) {
             LOG("writing to file failed");
             reply.error = -ret;
-            goto error_reply;
         }
 
-        if (nbd_co_send_reply(req, &reply, 0) < 0) {
-            goto out;
-        }
         break;
-
     case NBD_CMD_WRITE_ZEROES:
         TRACE("Request type is WRITE_ZEROES");
 
         if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
             TRACE("Server is read-only, return error");
             reply.error = EROFS;
-            goto error_reply;
+            break;
         }
 
         TRACE("Writing to device");
@@ -1279,14 +1182,9 @@ static coroutine_fn void nbd_trip(void *opaque)
         if (ret < 0) {
             LOG("writing to file failed");
             reply.error = -ret;
-            goto error_reply;
         }
 
-        if (nbd_co_send_reply(req, &reply, 0) < 0) {
-            goto out;
-        }
         break;
-
     case NBD_CMD_DISC:
         /* unreachable, thanks to special case in nbd_co_receive_request() */
         abort();
@@ -1299,9 +1197,7 @@ static coroutine_fn void nbd_trip(void *opaque)
             LOG("flush failed");
             reply.error = -ret;
         }
-        if (nbd_co_send_reply(req, &reply, 0) < 0) {
-            goto out;
-        }
+
         break;
     case NBD_CMD_TRIM:
         TRACE("Request type is TRIM");
@@ -1311,21 +1207,19 @@ static coroutine_fn void nbd_trip(void *opaque)
             LOG("discard failed");
             reply.error = -ret;
         }
-        if (nbd_co_send_reply(req, &reply, 0) < 0) {
-            goto out;
-        }
+
         break;
     default:
         LOG("invalid request type (%" PRIu32 ") received", request.type);
         reply.error = EINVAL;
-    error_reply:
-        /* We must disconnect after NBD_CMD_WRITE if we did not
-         * read the payload.
-         */
-        if (nbd_co_send_reply(req, &reply, 0) < 0 || !req->complete) {
-            goto out;
-        }
-        break;
+    }
+
+reply:
+    /* We must disconnect after NBD_CMD_WRITE if we did not
+     * read the payload.
+     */
+    if (nbd_co_send_reply(req, &reply, reply_data_len) < 0 || !req->complete) {
+        goto disconnect;
     }
 
     TRACE("Request/Reply complete");
@@ -1335,9 +1229,9 @@ done:
     nbd_client_put(client);
     return;
 
-out:
+disconnect:
     nbd_request_put(req);
-    client_close(client);
+    client_close(client, true);
     nbd_client_put(client);
 }
 
@@ -1352,8 +1246,7 @@ static void nbd_client_receive_next_request(NBDClient *client)
 
 static coroutine_fn void nbd_co_client_start(void *opaque)
 {
-    NBDClientNewData *data = opaque;
-    NBDClient *client = data->client;
+    NBDClient *client = opaque;
     NBDExport *exp = client->exp;
 
     if (exp) {
@@ -1362,25 +1255,28 @@ static coroutine_fn void nbd_co_client_start(void *opaque)
     }
     qemu_co_mutex_init(&client->send_lock);
 
-    if (nbd_negotiate(data)) {
-        client_close(client);
-        goto out;
+    if (nbd_negotiate(client)) {
+        client_close(client, false);
+        return;
     }
 
     nbd_client_receive_next_request(client);
-
-out:
-    g_free(data);
 }
 
+/*
+ * Create a new client listener on the given export @exp, using the
+ * given channel @sioc.  Begin servicing it in a coroutine.  When the
+ * connection closes, call @close_fn with an indication of whether the
+ * client completed negotiation.
+ */
 void nbd_client_new(NBDExport *exp,
                     QIOChannelSocket *sioc,
                     QCryptoTLSCreds *tlscreds,
                     const char *tlsaclname,
-                    void (*close_fn)(NBDClient *))
+                    void (*close_fn)(NBDClient *, bool))
 {
     NBDClient *client;
-    NBDClientNewData *data = g_new(NBDClientNewData, 1);
+    Coroutine *co;
 
     client = g_malloc0(sizeof(NBDClient));
     client->refcount = 1;
@@ -1394,9 +1290,8 @@ void nbd_client_new(NBDExport *exp,
     object_ref(OBJECT(client->sioc));
     client->ioc = QIO_CHANNEL(sioc);
     object_ref(OBJECT(client->ioc));
-    client->close = close_fn;
+    client->close_fn = close_fn;
 
-    data->client = client;
-    data->co = qemu_coroutine_create(nbd_co_client_start, data);
-    qemu_coroutine_enter(data->co);
+    co = qemu_coroutine_create(nbd_co_client_start, client);
+    qemu_coroutine_enter(co);
 }
diff --git a/qemu-doc.texi b/qemu-doc.texi
index 965ba5929e..21079fd675 100644
--- a/qemu-doc.texi
+++ b/qemu-doc.texi
@@ -1,11 +1,12 @@
 \input texinfo @c -*- texinfo -*-
 @c %**start of header
 @setfilename qemu-doc.info
+@include version.texi
 
 @documentlanguage en
 @documentencoding UTF-8
 
-@settitle QEMU Emulator User Documentation
+@settitle QEMU version @value{VERSION} User Documentation
 @exampleindent 0
 @paragraphindent 0
 @c %**end of header
@@ -19,7 +20,7 @@
 @iftex
 @titlepage
 @sp 7
-@center @titlefont{QEMU Emulator}
+@center @titlefont{QEMU version @value{VERSION}}
 @sp 1
 @center @titlefont{User Documentation}
 @sp 3
diff --git a/qemu-nbd.c b/qemu-nbd.c
index 651f85ecc1..4dd3fd4732 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -336,10 +336,10 @@ static void nbd_export_closed(NBDExport *exp)
 
 static void nbd_update_server_watch(void);
 
-static void nbd_client_closed(NBDClient *client)
+static void nbd_client_closed(NBDClient *client, bool negotiated)
 {
     nb_fds--;
-    if (nb_fds == 0 && !persistent && state == RUNNING) {
+    if (negotiated && nb_fds == 0 && !persistent && state == RUNNING) {
         state = TERMINATE;
     }
     nbd_update_server_watch();
@@ -581,6 +581,10 @@ int main(int argc, char **argv)
     sa_sigterm.sa_handler = termsig_handler;
     sigaction(SIGTERM, &sa_sigterm, NULL);
 
+#ifdef CONFIG_POSIX
+    signal(SIGPIPE, SIG_IGN);
+#endif
+
     module_call_init(MODULE_INIT_TRACE);
     qcrypto_init(&error_fatal);
 
diff --git a/rules.mak b/rules.mak
index 2a2fb72e85..6e943335f3 100644
--- a/rules.mak
+++ b/rules.mak
@@ -377,7 +377,7 @@ define unnest-vars
 endef
 
 TEXI2MAN = $(call quiet-command, \
-	perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl -I docs $< $@.pod && \
+	perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $(TEXI2PODFLAGS) $< $@.pod && \
 	$(POD2MAN) --section=$(subst .,,$(suffix $@)) --center=" " --release=" " $@.pod > $@, \
 	"GEN","$@")
 
diff --git a/target/i386/hax-all.c b/target/i386/hax-all.c
index 097db5cae1..ba6117d7de 100644
--- a/target/i386/hax-all.c
+++ b/target/i386/hax-all.c
@@ -514,9 +514,10 @@ static int hax_vcpu_hax_exec(CPUArchState *env)
         hax_vcpu_interrupt(env);
 
         qemu_mutex_unlock_iothread();
+        cpu_exec_start(cpu);
         hax_ret = hax_vcpu_run(vcpu);
+        cpu_exec_end(cpu);
         qemu_mutex_lock_iothread();
-        current_cpu = cpu;
 
         /* Simply continue the vcpu_run if system call interrupted */
         if (hax_ret == -EINTR || hax_ret == -EAGAIN) {
diff --git a/target/m68k/Makefile.objs b/target/m68k/Makefile.objs
index 02cf616a78..39141ab93d 100644
--- a/target/m68k/Makefile.objs
+++ b/target/m68k/Makefile.objs
@@ -1,3 +1,3 @@
 obj-y += m68k-semi.o
-obj-y += translate.o op_helper.o helper.o cpu.o
+obj-y += translate.o op_helper.o helper.o cpu.o fpu_helper.o
 obj-y += gdbstub.o
diff --git a/target/m68k/fpu_helper.c b/target/m68k/fpu_helper.c
new file mode 100644
index 0000000000..5bf2576c2b
--- /dev/null
+++ b/target/m68k/fpu_helper.c
@@ -0,0 +1,112 @@
+/*
+ *  m68k FPU helpers
+ *
+ *  Copyright (c) 2006-2007 CodeSourcery
+ *  Written by Paul Brook
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/helper-proto.h"
+
+uint32_t HELPER(f64_to_i32)(CPUM68KState *env, float64 val)
+{
+    return float64_to_int32(val, &env->fp_status);
+}
+
+float32 HELPER(f64_to_f32)(CPUM68KState *env, float64 val)
+{
+    return float64_to_float32(val, &env->fp_status);
+}
+
+float64 HELPER(i32_to_f64)(CPUM68KState *env, uint32_t val)
+{
+    return int32_to_float64(val, &env->fp_status);
+}
+
+float64 HELPER(f32_to_f64)(CPUM68KState *env, float32 val)
+{
+    return float32_to_float64(val, &env->fp_status);
+}
+
+float64 HELPER(iround_f64)(CPUM68KState *env, float64 val)
+{
+    return float64_round_to_int(val, &env->fp_status);
+}
+
+float64 HELPER(itrunc_f64)(CPUM68KState *env, float64 val)
+{
+    return float64_trunc_to_int(val, &env->fp_status);
+}
+
+float64 HELPER(sqrt_f64)(CPUM68KState *env, float64 val)
+{
+    return float64_sqrt(val, &env->fp_status);
+}
+
+float64 HELPER(abs_f64)(float64 val)
+{
+    return float64_abs(val);
+}
+
+float64 HELPER(chs_f64)(float64 val)
+{
+    return float64_chs(val);
+}
+
+float64 HELPER(add_f64)(CPUM68KState *env, float64 a, float64 b)
+{
+    return float64_add(a, b, &env->fp_status);
+}
+
+float64 HELPER(sub_f64)(CPUM68KState *env, float64 a, float64 b)
+{
+    return float64_sub(a, b, &env->fp_status);
+}
+
+float64 HELPER(mul_f64)(CPUM68KState *env, float64 a, float64 b)
+{
+    return float64_mul(a, b, &env->fp_status);
+}
+
+float64 HELPER(div_f64)(CPUM68KState *env, float64 a, float64 b)
+{
+    return float64_div(a, b, &env->fp_status);
+}
+
+float64 HELPER(sub_cmp_f64)(CPUM68KState *env, float64 a, float64 b)
+{
+    /* ??? This may incorrectly raise exceptions.  */
+    /* ??? Should flush denormals to zero.  */
+    float64 res;
+    res = float64_sub(a, b, &env->fp_status);
+    if (float64_is_quiet_nan(res, &env->fp_status)) {
+        /* +/-inf compares equal against itself, but sub returns nan.  */
+        if (!float64_is_quiet_nan(a, &env->fp_status)
+            && !float64_is_quiet_nan(b, &env->fp_status)) {
+            res = float64_zero;
+            if (float64_lt_quiet(a, res, &env->fp_status)) {
+                res = float64_chs(res);
+            }
+        }
+    }
+    return res;
+}
+
+uint32_t HELPER(compare_f64)(CPUM68KState *env, float64 val)
+{
+    return float64_compare_quiet(val, float64_zero, &env->fp_status);
+}
diff --git a/target/m68k/helper.c b/target/m68k/helper.c
index f750d3dbaa..5ca9911657 100644
--- a/target/m68k/helper.c
+++ b/target/m68k/helper.c
@@ -284,94 +284,6 @@ void HELPER(set_sr)(CPUM68KState *env, uint32_t val)
     m68k_switch_sp(env);
 }
 
-/* FPU helpers.  */
-uint32_t HELPER(f64_to_i32)(CPUM68KState *env, float64 val)
-{
-    return float64_to_int32(val, &env->fp_status);
-}
-
-float32 HELPER(f64_to_f32)(CPUM68KState *env, float64 val)
-{
-    return float64_to_float32(val, &env->fp_status);
-}
-
-float64 HELPER(i32_to_f64)(CPUM68KState *env, uint32_t val)
-{
-    return int32_to_float64(val, &env->fp_status);
-}
-
-float64 HELPER(f32_to_f64)(CPUM68KState *env, float32 val)
-{
-    return float32_to_float64(val, &env->fp_status);
-}
-
-float64 HELPER(iround_f64)(CPUM68KState *env, float64 val)
-{
-    return float64_round_to_int(val, &env->fp_status);
-}
-
-float64 HELPER(itrunc_f64)(CPUM68KState *env, float64 val)
-{
-    return float64_trunc_to_int(val, &env->fp_status);
-}
-
-float64 HELPER(sqrt_f64)(CPUM68KState *env, float64 val)
-{
-    return float64_sqrt(val, &env->fp_status);
-}
-
-float64 HELPER(abs_f64)(float64 val)
-{
-    return float64_abs(val);
-}
-
-float64 HELPER(chs_f64)(float64 val)
-{
-    return float64_chs(val);
-}
-
-float64 HELPER(add_f64)(CPUM68KState *env, float64 a, float64 b)
-{
-    return float64_add(a, b, &env->fp_status);
-}
-
-float64 HELPER(sub_f64)(CPUM68KState *env, float64 a, float64 b)
-{
-    return float64_sub(a, b, &env->fp_status);
-}
-
-float64 HELPER(mul_f64)(CPUM68KState *env, float64 a, float64 b)
-{
-    return float64_mul(a, b, &env->fp_status);
-}
-
-float64 HELPER(div_f64)(CPUM68KState *env, float64 a, float64 b)
-{
-    return float64_div(a, b, &env->fp_status);
-}
-
-float64 HELPER(sub_cmp_f64)(CPUM68KState *env, float64 a, float64 b)
-{
-    /* ??? This may incorrectly raise exceptions.  */
-    /* ??? Should flush denormals to zero.  */
-    float64 res;
-    res = float64_sub(a, b, &env->fp_status);
-    if (float64_is_quiet_nan(res, &env->fp_status)) {
-        /* +/-inf compares equal against itself, but sub returns nan.  */
-        if (!float64_is_quiet_nan(a, &env->fp_status)
-            && !float64_is_quiet_nan(b, &env->fp_status)) {
-            res = float64_zero;
-            if (float64_lt_quiet(a, res, &env->fp_status))
-                res = float64_chs(res);
-        }
-    }
-    return res;
-}
-
-uint32_t HELPER(compare_f64)(CPUM68KState *env, float64 val)
-{
-    return float64_compare_quiet(val, float64_zero, &env->fp_status);
-}
 
 /* MAC unit.  */
 /* FIXME: The MAC unit implementation is a bit of a mess.  Some helpers
diff --git a/target/m68k/translate.c b/target/m68k/translate.c
index ad4d4efb8d..dfecfb6e5f 100644
--- a/target/m68k/translate.c
+++ b/target/m68k/translate.c
@@ -565,7 +565,7 @@ static void gen_flush_flags(DisasContext *s)
         t1 = tcg_temp_new();
         tcg_gen_add_i32(t0, QREG_CC_N, QREG_CC_V);
         gen_ext(t0, t0, s->cc_op - CC_OP_SUBB, 1);
-        tcg_gen_xor_i32(t1, QREG_CC_N, QREG_CC_V);
+        tcg_gen_xor_i32(t1, QREG_CC_N, t0);
         tcg_gen_xor_i32(QREG_CC_V, QREG_CC_V, t0);
         tcg_temp_free(t0);
         tcg_gen_and_i32(QREG_CC_V, QREG_CC_V, t1);
@@ -669,6 +669,21 @@ static inline int insn_opsize(int insn)
     }
 }
 
+static inline int ext_opsize(int ext, int pos)
+{
+    switch ((ext >> pos) & 7) {
+    case 0: return OS_LONG;
+    case 1: return OS_SINGLE;
+    case 2: return OS_EXTENDED;
+    case 3: return OS_PACKED;
+    case 4: return OS_WORD;
+    case 5: return OS_DOUBLE;
+    case 6: return OS_BYTE;
+    default:
+        g_assert_not_reached();
+    }
+}
+
 /* Assign value to a register.  If the width is less than the register width
    only the low part of the register is set.  */
 static void gen_partset_reg(int opsize, TCGv reg, TCGv val)
@@ -4111,20 +4126,19 @@ DISAS_INSN(fpu)
         tmp32 = tcg_temp_new_i32();
         /* fmove */
         /* ??? TODO: Proper behavior on overflow.  */
-        switch ((ext >> 10) & 7) {
-        case 0:
-            opsize = OS_LONG;
+
+        opsize = ext_opsize(ext, 10);
+        switch (opsize) {
+        case OS_LONG:
             gen_helper_f64_to_i32(tmp32, cpu_env, src);
             break;
-        case 1:
-            opsize = OS_SINGLE;
+        case OS_SINGLE:
             gen_helper_f64_to_f32(tmp32, cpu_env, src);
             break;
-        case 4:
-            opsize = OS_WORD;
+        case OS_WORD:
             gen_helper_f64_to_i32(tmp32, cpu_env, src);
             break;
-        case 5: /* OS_DOUBLE */
+        case OS_DOUBLE:
             tcg_gen_mov_i32(tmp32, AREG(insn, 0));
             switch ((insn >> 3) & 7) {
             case 2:
@@ -4153,8 +4167,7 @@ DISAS_INSN(fpu)
             }
             tcg_temp_free_i32(tmp32);
             return;
-        case 6:
-            opsize = OS_BYTE;
+        case OS_BYTE:
             gen_helper_f64_to_i32(tmp32, cpu_env, src);
             break;
         default:
@@ -4227,15 +4240,7 @@ DISAS_INSN(fpu)
     }
     if (ext & (1 << 14)) {
         /* Source effective address.  */
-        switch ((ext >> 10) & 7) {
-        case 0: opsize = OS_LONG; break;
-        case 1: opsize = OS_SINGLE; break;
-        case 4: opsize = OS_WORD; break;
-        case 5: opsize = OS_DOUBLE; break;
-        case 6: opsize = OS_BYTE; break;
-        default:
-            goto undef;
-        }
+        opsize = ext_opsize(ext, 10);
         if (opsize == OS_DOUBLE) {
             tmp32 = tcg_temp_new_i32();
             tcg_gen_mov_i32(tmp32, AREG(insn, 0));
diff --git a/tcg-runtime.c b/tcg/tcg-runtime.c
index ec3a34e461..ec3a34e461 100644
--- a/tcg-runtime.c
+++ b/tcg/tcg-runtime.c
diff --git a/tci.c b/tcg/tci.c
index 4bdc645f2a..4bdc645f2a 100644
--- a/tci.c
+++ b/tcg/tci.c
diff --git a/tests/Makefile.include b/tests/Makefile.include
index f42f3dfa72..77da9b7f4b 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -205,6 +205,8 @@ check-qtest-pci-y += tests/intel-hda-test$(EXESUF)
 gcov-files-pci-y += hw/audio/intel-hda.c hw/audio/hda-codec.c
 check-qtest-pci-$(CONFIG_EVENTFD) += tests/ivshmem-test$(EXESUF)
 gcov-files-pci-y += hw/misc/ivshmem.c
+check-qtest-pci-y += tests/megasas-test$(EXESUF)
+gcov-files-pci-y += hw/scsi/megasas.c
 
 check-qtest-i386-y = tests/endianness-test$(EXESUF)
 check-qtest-i386-y += tests/fdc-test$(EXESUF)
@@ -755,6 +757,7 @@ tests/test-filter-mirror$(EXESUF): tests/test-filter-mirror.o $(qtest-obj-y)
 tests/test-filter-redirector$(EXESUF): tests/test-filter-redirector.o $(qtest-obj-y)
 tests/test-x86-cpuid-compat$(EXESUF): tests/test-x86-cpuid-compat.o $(qtest-obj-y)
 tests/ivshmem-test$(EXESUF): tests/ivshmem-test.o contrib/ivshmem-server/ivshmem-server.o $(libqos-pc-obj-y) $(libqos-spapr-obj-y)
+tests/megasas-test$(EXESUF): tests/megasas-test.o $(libqos-spapr-obj-y) $(libqos-pc-obj-y)
 tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o contrib/libvhost-user/libvhost-user.o $(test-util-obj-y)
 tests/test-uuid$(EXESUF): tests/test-uuid.o $(test-util-obj-y)
 tests/test-arm-mptimer$(EXESUF): tests/test-arm-mptimer.o
diff --git a/tests/docker/Makefile.include b/tests/docker/Makefile.include
index 03eda37bf4..0ed8c3d323 100644
--- a/tests/docker/Makefile.include
+++ b/tests/docker/Makefile.include
@@ -126,7 +126,7 @@ docker-run: docker-qemu-src
 			"  COPYING $(EXECUTABLE) to $(IMAGE)"))
 	$(call quiet-command,						\
 		$(SRC_PATH)/tests/docker/docker.py run 			\
-			-t 						\
+			$(if $(NOUSER),,-u $(shell id -u)) -t 		\
 			$(if $V,,--rm) 					\
 			$(if $(DEBUG),-i,--net=none) 			\
 			-e TARGET_LIST=$(TARGET_LIST) 			\
diff --git a/tests/docker/dockerfiles/centos6.docker b/tests/docker/dockerfiles/centos6.docker
index 34e0d3b91e..17a4d24d54 100644
--- a/tests/docker/dockerfiles/centos6.docker
+++ b/tests/docker/dockerfiles/centos6.docker
@@ -1,7 +1,7 @@
 FROM centos:6
 RUN yum install -y epel-release
 ENV PACKAGES libfdt-devel ccache \
-    tar git make gcc g++ \
+    tar git make gcc g++ flex bison \
     zlib-devel glib2-devel SDL-devel pixman-devel \
     epel-release
 RUN yum install -y $PACKAGES
diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker
index c4f80ad3d8..4eaa8ed2a5 100644
--- a/tests/docker/dockerfiles/fedora.docker
+++ b/tests/docker/dockerfiles/fedora.docker
@@ -1,8 +1,8 @@
 FROM fedora:latest
 ENV PACKAGES \
-    ccache git tar PyYAML sparse flex bison python2 \
+    ccache git tar PyYAML sparse flex bison python2 bzip2 hostname \
     glib2-devel pixman-devel zlib-devel SDL-devel libfdt-devel \
-    gcc gcc-c++ clang make perl which bc findutils \
+    gcc gcc-c++ clang make perl which bc findutils libaio-devel \
     mingw32-pixman mingw32-glib2 mingw32-gmp mingw32-SDL mingw32-pkg-config \
     mingw32-gtk2 mingw32-gtk3 mingw32-gnutls mingw32-nettle mingw32-libtasn1 \
     mingw32-libjpeg-turbo mingw32-libpng mingw32-curl mingw32-libssh2 \
diff --git a/tests/megasas-test.c b/tests/megasas-test.c
new file mode 100644
index 0000000000..ce960e7f81
--- /dev/null
+++ b/tests/megasas-test.c
@@ -0,0 +1,86 @@
+/*
+ * QTest testcase for LSI MegaRAID
+ *
+ * Copyright (c) 2017 Red Hat Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "libqtest.h"
+#include "qemu/bswap.h"
+#include "libqos/libqos-pc.h"
+#include "libqos/libqos-spapr.h"
+
+static QOSState *qmegasas_start(const char *extra_opts)
+{
+    const char *arch = qtest_get_arch();
+    const char *cmd = "-drive id=hd0,if=none,file=null-co://,format=raw "
+                      "-device megasas,id=scsi0,addr=04.0 "
+                      "-device scsi-hd,bus=scsi0.0,drive=hd0 %s";
+
+    if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
+        return qtest_pc_boot(cmd, extra_opts ? : "");
+    }
+
+    g_printerr("virtio-scsi tests are only available on x86 or ppc64\n");
+    exit(EXIT_FAILURE);
+}
+
+static void qmegasas_stop(QOSState *qs)
+{
+    qtest_shutdown(qs);
+}
+
+/* Tests only initialization so far. TODO: Replace with functional tests */
+static void pci_nop(void)
+{
+    QOSState *qs;
+
+    qs = qmegasas_start(NULL);
+    qmegasas_stop(qs);
+}
+
+/* This used to cause a NULL pointer dereference.  */
+static void megasas_pd_get_info_fuzz(void)
+{
+    QPCIDevice *dev;
+    QOSState *qs;
+    QPCIBar bar;
+    uint32_t context[256];
+    uint64_t context_pa;
+    int i;
+
+    qs = qmegasas_start(NULL);
+    dev = qpci_device_find(qs->pcibus, QPCI_DEVFN(4,0));
+    g_assert(dev != NULL);
+
+    qpci_device_enable(dev);
+    bar = qpci_iomap(dev, 0, NULL);
+
+    memset(context, 0, sizeof(context));
+    context[0] = cpu_to_le32(0x05050505);
+    context[1] = cpu_to_le32(0x01010101);
+    for (i = 2; i < ARRAY_SIZE(context); i++) {
+        context[i] = cpu_to_le32(0x41414141);
+    }
+    context[6] = cpu_to_le32(0x02020000);
+    context[7] = cpu_to_le32(0);
+
+    context_pa = qmalloc(qs, sizeof(context));
+    memwrite(context_pa, context, sizeof(context));
+    qpci_io_writel(dev, bar, 0x40, context_pa);
+
+    g_free(dev);
+    qmegasas_stop(qs);
+}
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+    qtest_add_func("/megasas/pci/nop", pci_nop);
+    qtest_add_func("/megasas/dcmd/pd-get-info/fuzz", megasas_pd_get_info_fuzz);
+
+    return g_test_run();
+}
diff --git a/tests/q35-test.c b/tests/q35-test.c
index cc58f3ecf4..f98bed7a2d 100644
--- a/tests/q35-test.c
+++ b/tests/q35-test.c
@@ -15,6 +15,48 @@
 #include "libqos/pci-pc.h"
 #include "hw/pci-host/q35.h"
 
+#define TSEG_SIZE_TEST_GUEST_RAM_MBYTES 128
+
+/* @esmramc_tseg_sz: ESMRAMC.TSEG_SZ bitmask for selecting the requested TSEG
+ *                   size. Must be a subset of
+ *                   MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_MASK.
+ *
+ * @extended_tseg_mbytes: Size of the extended TSEG. Only consulted if
+ *                        @esmramc_tseg_sz equals
+ *                        MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_MASK precisely.
+ *
+ * @expected_tseg_mbytes: Expected guest-visible TSEG size in megabytes,
+ *                        matching @esmramc_tseg_sz and @extended_tseg_mbytes
+ *                        above.
+ */
+struct TsegSizeArgs {
+    uint8_t esmramc_tseg_sz;
+    uint16_t extended_tseg_mbytes;
+    uint16_t expected_tseg_mbytes;
+};
+typedef struct TsegSizeArgs TsegSizeArgs;
+
+static const TsegSizeArgs tseg_1mb = {
+    .esmramc_tseg_sz      = MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_1MB,
+    .extended_tseg_mbytes = 0,
+    .expected_tseg_mbytes = 1,
+};
+static const TsegSizeArgs tseg_2mb = {
+    .esmramc_tseg_sz      = MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_2MB,
+    .extended_tseg_mbytes = 0,
+    .expected_tseg_mbytes = 2,
+};
+static const TsegSizeArgs tseg_8mb = {
+    .esmramc_tseg_sz      = MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_8MB,
+    .extended_tseg_mbytes = 0,
+    .expected_tseg_mbytes = 8,
+};
+static const TsegSizeArgs tseg_ext_16mb = {
+    .esmramc_tseg_sz      = MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_MASK,
+    .extended_tseg_mbytes = 16,
+    .expected_tseg_mbytes = 16,
+};
+
 static void smram_set_bit(QPCIDevice *pcidev, uint8_t mask, bool enabled)
 {
     uint8_t smram;
@@ -42,6 +84,8 @@ static void test_smram_lock(void)
     QPCIDevice *pcidev;
     QDict *response;
 
+    qtest_start("-M q35");
+
     pcibus = qpci_init_pc(NULL);
     g_assert(pcibus != NULL);
 
@@ -74,19 +118,86 @@ static void test_smram_lock(void)
 
     g_free(pcidev);
     qpci_free_pc(pcibus);
+
+    qtest_end();
 }
 
-int main(int argc, char **argv)
+static void test_tseg_size(const void *data)
 {
-    int ret;
+    const TsegSizeArgs *args = data;
+    char *cmdline;
+    QPCIBus *pcibus;
+    QPCIDevice *pcidev;
+    uint8_t smram_val;
+    uint8_t esmramc_val;
+    uint32_t ram_offs;
+
+    if (args->esmramc_tseg_sz == MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_MASK) {
+        cmdline = g_strdup_printf("-M q35 -m %uM "
+                                  "-global mch.extended-tseg-mbytes=%u",
+                                  TSEG_SIZE_TEST_GUEST_RAM_MBYTES,
+                                  args->extended_tseg_mbytes);
+    } else {
+        cmdline = g_strdup_printf("-M q35 -m %uM",
+                                  TSEG_SIZE_TEST_GUEST_RAM_MBYTES);
+    }
+    qtest_start(cmdline);
+    g_free(cmdline);
 
-    g_test_init(&argc, &argv, NULL);
+    /* locate the DRAM controller */
+    pcibus = qpci_init_pc(NULL);
+    g_assert(pcibus != NULL);
+    pcidev = qpci_device_find(pcibus, 0);
+    g_assert(pcidev != NULL);
 
-    qtest_add_func("/q35/smram/lock", test_smram_lock);
+    /* Set TSEG size. Restrict TSEG visibility to SMM by setting T_EN. */
+    esmramc_val = qpci_config_readb(pcidev, MCH_HOST_BRIDGE_ESMRAMC);
+    esmramc_val &= ~MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_MASK;
+    esmramc_val |= args->esmramc_tseg_sz;
+    esmramc_val |= MCH_HOST_BRIDGE_ESMRAMC_T_EN;
+    qpci_config_writeb(pcidev, MCH_HOST_BRIDGE_ESMRAMC, esmramc_val);
+
+    /* Enable TSEG by setting G_SMRAME. Close TSEG by setting D_CLS. */
+    smram_val = qpci_config_readb(pcidev, MCH_HOST_BRIDGE_SMRAM);
+    smram_val &= ~(MCH_HOST_BRIDGE_SMRAM_D_OPEN |
+                   MCH_HOST_BRIDGE_SMRAM_D_LCK);
+    smram_val |= (MCH_HOST_BRIDGE_SMRAM_D_CLS |
+                  MCH_HOST_BRIDGE_SMRAM_G_SMRAME);
+    qpci_config_writeb(pcidev, MCH_HOST_BRIDGE_SMRAM, smram_val);
+
+    /* lock TSEG */
+    smram_val |= MCH_HOST_BRIDGE_SMRAM_D_LCK;
+    qpci_config_writeb(pcidev, MCH_HOST_BRIDGE_SMRAM, smram_val);
+
+    /* Now check that the byte right before the TSEG is r/w, and that the first
+     * byte in the TSEG always reads as 0xff.
+     */
+    ram_offs = (TSEG_SIZE_TEST_GUEST_RAM_MBYTES - args->expected_tseg_mbytes) *
+               1024 * 1024 - 1;
+    g_assert_cmpint(readb(ram_offs), ==, 0);
+    writeb(ram_offs, 1);
+    g_assert_cmpint(readb(ram_offs), ==, 1);
+
+    ram_offs++;
+    g_assert_cmpint(readb(ram_offs), ==, 0xff);
+    writeb(ram_offs, 1);
+    g_assert_cmpint(readb(ram_offs), ==, 0xff);
 
-    qtest_start("-M q35");
-    ret = g_test_run();
+    g_free(pcidev);
+    qpci_free_pc(pcibus);
     qtest_end();
+}
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+
+    qtest_add_func("/q35/smram/lock", test_smram_lock);
 
-    return ret;
+    qtest_add_data_func("/q35/tseg-size/1mb", &tseg_1mb, test_tseg_size);
+    qtest_add_data_func("/q35/tseg-size/2mb", &tseg_2mb, test_tseg_size);
+    qtest_add_data_func("/q35/tseg-size/8mb", &tseg_8mb, test_tseg_size);
+    qtest_add_data_func("/q35/tseg-size/ext/16mb", &tseg_ext_16mb,
+                        test_tseg_size);
+    return g_test_run();
 }
diff --git a/trace-events b/trace-events
index fd83087e39..bae63fdb1d 100644
--- a/trace-events
+++ b/trace-events
@@ -55,28 +55,6 @@ dma_complete(void *dbs, int ret, void *cb) "dbs=%p ret=%d cb=%p"
 dma_blk_cb(void *dbs, int ret) "dbs=%p ret=%d"
 dma_map_wait(void *dbs) "dbs=%p"
 
-# kvm-all.c
-kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"
-kvm_vm_ioctl(int type, void *arg) "type 0x%x, arg %p"
-kvm_vcpu_ioctl(int cpu_index, int type, void *arg) "cpu_index %d, type 0x%x, arg %p"
-kvm_run_exit(int cpu_index, uint32_t reason) "cpu_index %d, reason %d"
-kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p"
-kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s"
-kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s"
-kvm_irqchip_commit_routes(void) ""
-kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d"
-kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
-kvm_irqchip_release_virq(int virq) "virq %d"
-
-# TCG related tracing (mostly disabled by default)
-# cpu-exec.c
-disable exec_tb(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR
-disable exec_tb_nocache(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR
-disable exec_tb_exit(void *last_tb, unsigned int flags) "tb:%p flags=%x"
-
-# translate-all.c
-translate_block(void *tb, uintptr_t pc, uint8_t *tb_code) "tb:%p, pc:0x%"PRIxPTR", tb_code:%p"
-
 # memory.c
 memory_region_ops_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
 memory_region_ops_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
diff --git a/ui/cocoa.m b/ui/cocoa.m
index 004ec2711c..1f010d3ae7 100644
--- a/ui/cocoa.m
+++ b/ui/cocoa.m
@@ -52,6 +52,8 @@
 /* macOS 10.12 deprecated many constants, #define the new names for older SDKs */
 #if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_12
 #define NSEventMaskAny                  NSAnyEventMask
+#define NSEventModifierFlagCapsLock     NSAlphaShiftKeyMask
+#define NSEventModifierFlagShift        NSShiftKeyMask
 #define NSEventModifierFlagCommand      NSCommandKeyMask
 #define NSEventModifierFlagControl      NSControlKeyMask
 #define NSEventModifierFlagOption       NSAlternateKeyMask
@@ -268,7 +270,7 @@ static void handleAnyDeviceErrors(Error * err)
     NSWindow *fullScreenWindow;
     float cx,cy,cw,ch,cdx,cdy;
     CGDataProviderRef dataProviderRef;
-    int modifiers_state[256];
+    BOOL modifiers_state[256];
     BOOL isMouseGrabbed;
     BOOL isFullscreen;
     BOOL isAbsoluteEnabled;
@@ -536,18 +538,59 @@ QemuCocoaView *cocoaView;
     }
 }
 
+- (void) toggleModifier: (int)keycode {
+    // Toggle the stored state.
+    modifiers_state[keycode] = !modifiers_state[keycode];
+    // Send a keyup or keydown depending on the state.
+    qemu_input_event_send_key_qcode(dcl->con, keycode, modifiers_state[keycode]);
+}
+
+- (void) toggleStatefulModifier: (int)keycode {
+    // Toggle the stored state.
+    modifiers_state[keycode] = !modifiers_state[keycode];
+    // Generate keydown and keyup.
+    qemu_input_event_send_key_qcode(dcl->con, keycode, true);
+    qemu_input_event_send_key_qcode(dcl->con, keycode, false);
+}
+
 - (void) handleEvent:(NSEvent *)event
 {
     COCOA_DEBUG("QemuCocoaView: handleEvent\n");
 
     int buttons = 0;
-    int keycode;
+    int keycode = 0;
     bool mouse_event = false;
     NSPoint p = [event locationInWindow];
 
     switch ([event type]) {
         case NSEventTypeFlagsChanged:
-            keycode = cocoa_keycode_to_qemu([event keyCode]);
+            if ([event keyCode] == 0) {
+                // When the Cocoa keyCode is zero that means keys should be
+                // synthesized based on the values in in the eventModifiers
+                // bitmask.
+
+                if (qemu_console_is_graphic(NULL)) {
+                    NSEventModifierFlags modifiers = [event modifierFlags];
+
+                    if (!!(modifiers & NSEventModifierFlagCapsLock) != !!modifiers_state[Q_KEY_CODE_CAPS_LOCK]) {
+                        [self toggleStatefulModifier:Q_KEY_CODE_CAPS_LOCK];
+                    }
+                    if (!!(modifiers & NSEventModifierFlagShift) != !!modifiers_state[Q_KEY_CODE_SHIFT]) {
+                        [self toggleModifier:Q_KEY_CODE_SHIFT];
+                    }
+                    if (!!(modifiers & NSEventModifierFlagControl) != !!modifiers_state[Q_KEY_CODE_CTRL]) {
+                        [self toggleModifier:Q_KEY_CODE_CTRL];
+                    }
+                    if (!!(modifiers & NSEventModifierFlagOption) != !!modifiers_state[Q_KEY_CODE_ALT]) {
+                        [self toggleModifier:Q_KEY_CODE_ALT];
+                    }
+                    if (!!(modifiers & NSEventModifierFlagCommand) != !!modifiers_state[Q_KEY_CODE_META_L]) {
+                        [self toggleModifier:Q_KEY_CODE_META_L];
+                    }
+                }
+            } else {
+                keycode = cocoa_keycode_to_qemu([event keyCode]);
+            }
 
             if ((keycode == Q_KEY_CODE_META_L || keycode == Q_KEY_CODE_META_R)
                && !isMouseGrabbed) {
@@ -559,16 +602,9 @@ QemuCocoaView *cocoaView;
                 // emulate caps lock and num lock keydown and keyup
                 if (keycode == Q_KEY_CODE_CAPS_LOCK ||
                     keycode == Q_KEY_CODE_NUM_LOCK) {
-                    qemu_input_event_send_key_qcode(dcl->con, keycode, true);
-                    qemu_input_event_send_key_qcode(dcl->con, keycode, false);
+                    [self toggleStatefulModifier:keycode];
                 } else if (qemu_console_is_graphic(NULL)) {
-                    if (modifiers_state[keycode] == 0) { // keydown
-                        qemu_input_event_send_key_qcode(dcl->con, keycode, true);
-                        modifiers_state[keycode] = 1;
-                    } else { // keyup
-                        qemu_input_event_send_key_qcode(dcl->con, keycode, false);
-                        modifiers_state[keycode] = 0;
-                    }
+                  [self toggleModifier:keycode];
                 }
             }
 
diff --git a/ui/spice-core.c b/ui/spice-core.c
index a087ad5da9..182f550f1f 100644
--- a/ui/spice-core.c
+++ b/ui/spice-core.c
@@ -847,6 +847,7 @@ void qemu_spice_init(void)
             exit(1);
         }
         display_opengl = 1;
+        spice_opengl = 1;
     }
 #endif
 }
diff --git a/ui/spice-display.c b/ui/spice-display.c
index b353445f58..042292cc90 100644
--- a/ui/spice-display.c
+++ b/ui/spice-display.c
@@ -27,6 +27,7 @@
 #include "ui/spice-display.h"
 
 static int debug = 0;
+bool spice_opengl;
 
 static void GCC_FMT_ATTR(2, 3) dprint(int level, const char *fmt, ...)
 {
@@ -1013,7 +1014,7 @@ static void qemu_spice_display_init_one(QemuConsole *con)
 
     ssd->dcl.ops = &display_listener_ops;
 #ifdef HAVE_SPICE_GL
-    if (display_opengl) {
+    if (spice_opengl) {
         ssd->dcl.ops = &display_listener_gl_ops;
         ssd->gl_unblock_bh = qemu_bh_new(qemu_spice_gl_unblock_bh, ssd);
         ssd->gl_unblock_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
diff --git a/ui/spice-input.c b/ui/spice-input.c
index 86293dd2ce..918580239d 100644
--- a/ui/spice-input.c
+++ b/ui/spice-input.c
@@ -87,7 +87,7 @@ static void kbd_leds(void *opaque, int ledstate)
     if (ledstate & QEMU_CAPS_LOCK_LED) {
         kbd->ledstate |= SPICE_KEYBOARD_MODIFIER_FLAGS_CAPS_LOCK;
     }
-    spice_server_kbd_leds(&kbd->sin, ledstate);
+    spice_server_kbd_leds(&kbd->sin, kbd->ledstate);
 }
 
 /* mouse bits */
diff --git a/util/Makefile.objs b/util/Makefile.objs
index 94d9477209..50a55ecc75 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -43,4 +43,5 @@ util-obj-y += log.o
 util-obj-y += qdist.o
 util-obj-y += qht.o
 util-obj-y += range.o
+util-obj-y += stats64.o
 util-obj-y += systemd.o
diff --git a/util/stats64.c b/util/stats64.c
new file mode 100644
index 0000000000..9968fcceac
--- /dev/null
+++ b/util/stats64.c
@@ -0,0 +1,137 @@
+/*
+ * Atomic operations on 64-bit quantities.
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/atomic.h"
+#include "qemu/stats64.h"
+#include "qemu/processor.h"
+
+#ifndef CONFIG_ATOMIC64
+static inline void stat64_rdlock(Stat64 *s)
+{
+    /* Keep out incoming writers to avoid them starving us. */
+    atomic_add(&s->lock, 2);
+
+    /* If there is a concurrent writer, wait for it.  */
+    while (atomic_read(&s->lock) & 1) {
+        cpu_relax();
+    }
+}
+
+static inline void stat64_rdunlock(Stat64 *s)
+{
+    atomic_sub(&s->lock, 2);
+}
+
+static inline bool stat64_wrtrylock(Stat64 *s)
+{
+    return atomic_cmpxchg(&s->lock, 0, 1) == 0;
+}
+
+static inline void stat64_wrunlock(Stat64 *s)
+{
+    atomic_dec(&s->lock);
+}
+
+uint64_t stat64_get(const Stat64 *s)
+{
+    uint32_t high, low;
+
+    stat64_rdlock((Stat64 *)s);
+
+    /* 64-bit writes always take the lock, so we can read in
+     * any order.
+     */
+    high = atomic_read(&s->high);
+    low = atomic_read(&s->low);
+    stat64_rdunlock((Stat64 *)s);
+
+    return ((uint64_t)high << 32) | low;
+}
+
+bool stat64_add32_carry(Stat64 *s, uint32_t low, uint32_t high)
+{
+    uint32_t old;
+
+    if (!stat64_wrtrylock(s)) {
+        cpu_relax();
+        return false;
+    }
+
+    /* 64-bit reads always take the lock, so they don't care about the
+     * order of our update.  By updating s->low first, we can check
+     * whether we have to carry into s->high.
+     */
+    old = atomic_fetch_add(&s->low, low);
+    high += (old + low) < old;
+    atomic_add(&s->high, high);
+    stat64_wrunlock(s);
+    return true;
+}
+
+bool stat64_min_slow(Stat64 *s, uint64_t value)
+{
+    uint32_t high, low;
+    uint64_t orig;
+
+    if (!stat64_wrtrylock(s)) {
+        cpu_relax();
+        return false;
+    }
+
+    high = atomic_read(&s->high);
+    low = atomic_read(&s->low);
+
+    orig = ((uint64_t)high << 32) | low;
+    if (orig < value) {
+        /* We have to set low before high, just like stat64_min reads
+         * high before low.  The value may become higher temporarily, but
+         * stat64_get does not notice (it takes the lock) and the only ill
+         * effect on stat64_min is that the slow path may be triggered
+         * unnecessarily.
+         */
+        atomic_set(&s->low, (uint32_t)value);
+        smp_wmb();
+        atomic_set(&s->high, value >> 32);
+    }
+    stat64_wrunlock(s);
+    return true;
+}
+
+bool stat64_max_slow(Stat64 *s, uint64_t value)
+{
+    uint32_t high, low;
+    uint64_t orig;
+
+    if (!stat64_wrtrylock(s)) {
+        cpu_relax();
+        return false;
+    }
+
+    high = atomic_read(&s->high);
+    low = atomic_read(&s->low);
+
+    orig = ((uint64_t)high << 32) | low;
+    if (orig > value) {
+        /* We have to set low before high, just like stat64_max reads
+         * high before low.  The value may become lower temporarily, but
+         * stat64_get does not notice (it takes the lock) and the only ill
+         * effect on stat64_max is that the slow path may be triggered
+         * unnecessarily.
+         */
+        atomic_set(&s->low, (uint32_t)value);
+        smp_wmb();
+        atomic_set(&s->high, value >> 32);
+    }
+    stat64_wrunlock(s);
+    return true;
+}
+#endif
diff --git a/vl.c b/vl.c
index 32db19e3b9..59fea15488 100644
--- a/vl.c
+++ b/vl.c
@@ -3757,21 +3757,18 @@ int main(int argc, char **argv, char **envp)
                 qdev_prop_register_global(&kvm_pit_lost_tick_policy);
                 break;
             }
-            case QEMU_OPTION_accel: {
-                QemuOpts *accel_opts;
-
+            case QEMU_OPTION_accel:
                 accel_opts = qemu_opts_parse_noisily(qemu_find_opts("accel"),
                                                      optarg, true);
                 optarg = qemu_opt_get(accel_opts, "accel");
                 if (!optarg || is_help_option(optarg)) {
                     error_printf("Possible accelerators: kvm, xen, hax, tcg\n");
-                    exit(1);
+                    exit(0);
                 }
-                accel_opts = qemu_opts_create(qemu_find_opts("machine"), NULL,
-                                              false, &error_abort);
-                qemu_opt_set(accel_opts, "accel", optarg, &error_abort);
+                opts = qemu_opts_create(qemu_find_opts("machine"), NULL,
+                                        false, &error_abort);
+                qemu_opt_set(opts, "accel", optarg, &error_abort);
                 break;
-            }
             case QEMU_OPTION_usb:
                 olist = qemu_find_opts("machine");
                 qemu_opts_parse_noisily(olist, "usb=on", false);