aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore17
-rw-r--r--Makefile77
-rw-r--r--Makefile.objs7
-rw-r--r--Makefile.target14
-rw-r--r--accel/Makefile.objs4
-rw-r--r--accel/accel.c (renamed from accel.c)27
-rw-r--r--accel/kvm/Makefile.objs1
-rw-r--r--accel/kvm/kvm-all.c (renamed from kvm-all.c)4
-rw-r--r--accel/kvm/trace-events15
-rw-r--r--accel/stubs/Makefile.objs1
-rw-r--r--accel/stubs/kvm-stub.c (renamed from kvm-stub.c)0
-rw-r--r--accel/tcg/Makefile.objs3
-rw-r--r--accel/tcg/cpu-exec-common.c (renamed from cpu-exec-common.c)0
-rw-r--r--accel/tcg/cpu-exec.c (renamed from cpu-exec.c)2
-rw-r--r--accel/tcg/cputlb.c (renamed from cputlb.c)0
-rw-r--r--accel/tcg/tcg-all.c61
-rw-r--r--accel/tcg/trace-events10
-rw-r--r--accel/tcg/translate-all.c (renamed from translate-all.c)2
-rw-r--r--accel/tcg/translate-all.h (renamed from translate-all.h)0
-rw-r--r--accel/tcg/translate-common.c (renamed from translate-common.c)0
-rw-r--r--block.c10
-rw-r--r--block/accounting.c78
-rw-r--r--block/block-backend.c6
-rw-r--r--block/dirty-bitmap.c112
-rw-r--r--block/io.c51
-rw-r--r--block/iscsi.c22
-rw-r--r--block/mirror.c14
-rw-r--r--block/nbd-client.c8
-rw-r--r--block/nfs.c4
-rw-r--r--block/qapi.c2
-rw-r--r--block/rbd.c22
-rw-r--r--block/sheepdog.c3
-rw-r--r--block/throttle-groups.c91
-rw-r--r--blockdev-nbd.c6
-rw-r--r--blockdev.c48
-rwxr-xr-xconfigure28
-rw-r--r--contrib/libvhost-user/libvhost-user.h11
-rw-r--r--contrib/vhost-user-scsi/Makefile.objs1
-rw-r--r--contrib/vhost-user-scsi/vhost-user-scsi.c886
-rw-r--r--default-configs/pci.mak1
-rw-r--r--default-configs/s390x-softmmu.mak1
-rw-r--r--docs/interop/parallels.txt (renamed from docs/specs/parallels.txt)0
-rw-r--r--docs/interop/qcow2.txt (renamed from docs/specs/qcow2.txt)0
-rw-r--r--docs/interop/qed_spec.txt (renamed from docs/specs/qed_spec.txt)0
-rw-r--r--docs/interop/qemu-ga-ref.texi (renamed from docs/qemu-ga-ref.texi)0
-rw-r--r--docs/interop/qemu-qmp-ref.texi (renamed from docs/qemu-qmp-ref.texi)0
-rw-r--r--docs/interop/qmp-intro.txt (renamed from docs/qmp-intro.txt)0
-rw-r--r--docs/interop/qmp-spec.txt (renamed from docs/qmp-spec.txt)0
-rw-r--r--docs/interop/vhost-user.txt (renamed from docs/specs/vhost-user.txt)0
-rw-r--r--docs/interop/vnc-ledstate-Pseudo-encoding.txt (renamed from docs/vnc-ledstate-Pseudo-encoding.txt)0
-rw-r--r--exec.c116
-rw-r--r--fpu/softfloat-specialize.h34
-rw-r--r--hw/i386/intel_iommu.c421
-rw-r--r--hw/i386/intel_iommu_internal.h1
-rw-r--r--hw/i386/pc.c14
-rw-r--r--hw/i386/trace-events44
-rw-r--r--hw/misc/ivshmem.c14
-rw-r--r--hw/pci-host/q35.c41
-rw-r--r--hw/scsi/Makefile.objs1
-rw-r--r--hw/scsi/megasas.c175
-rw-r--r--hw/scsi/vhost-scsi-common.c1
-rw-r--r--hw/scsi/vhost-user-scsi.c205
-rw-r--r--hw/virtio/virtio-pci.c58
-rw-r--r--hw/virtio/virtio-pci.h11
-rw-r--r--include/block/accounting.h11
-rw-r--r--include/block/block.h5
-rw-r--r--include/block/block_int.h61
-rw-r--r--include/block/dirty-bitmap.h25
-rw-r--r--include/block/nbd.h10
-rw-r--r--include/exec/memory.h31
-rw-r--r--include/exec/poison.h31
-rw-r--r--include/exec/ram_addr.h3
-rw-r--r--include/hw/i386/pc.h5
-rw-r--r--include/hw/pci-host/q35.h6
-rw-r--r--include/hw/virtio/vhost-user-scsi.h35
-rw-r--r--include/hw/virtio/virtio-scsi.h2
-rw-r--r--include/qemu/stats64.h193
-rw-r--r--include/sysemu/block-backend.h10
-rw-r--r--include/ui/spice-display.h2
-rw-r--r--memory.c26
-rw-r--r--migration/block.c17
-rw-r--r--nbd/client.c64
-rw-r--r--nbd/common.c34
-rw-r--r--nbd/nbd-internal.h28
-rw-r--r--nbd/server.c343
-rw-r--r--qemu-doc.texi5
-rw-r--r--qemu-nbd.c8
-rw-r--r--rules.mak2
-rw-r--r--target/i386/hax-all.c3
-rw-r--r--target/m68k/Makefile.objs2
-rw-r--r--target/m68k/fpu_helper.c112
-rw-r--r--target/m68k/helper.c88
-rw-r--r--target/m68k/translate.c45
-rw-r--r--tcg/tcg-runtime.c (renamed from tcg-runtime.c)0
-rw-r--r--tcg/tci.c (renamed from tci.c)0
-rw-r--r--tests/Makefile.include3
-rw-r--r--tests/docker/Makefile.include2
-rw-r--r--tests/docker/dockerfiles/centos6.docker2
-rw-r--r--tests/docker/dockerfiles/fedora.docker4
-rw-r--r--tests/megasas-test.c86
-rw-r--r--tests/q35-test.c125
-rw-r--r--trace-events22
-rw-r--r--ui/cocoa.m60
-rw-r--r--ui/spice-core.c1
-rw-r--r--ui/spice-display.c3
-rw-r--r--ui/spice-input.c2
-rw-r--r--util/Makefile.objs1
-rw-r--r--util/stats64.c137
-rw-r--r--vl.c13
109 files changed, 3242 insertions, 1212 deletions
diff --git a/.gitignore b/.gitignore
index 55a001e3b8..09c2363acf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -50,6 +50,7 @@
/qemu-version.h.tmp
/module_block.h
/vscclient
+/vhost-user-scsi
/fsdev/virtfs-proxy-helper
*.[1-9]
*.a
@@ -99,14 +100,14 @@
/pc-bios/optionrom/kvmvapic.img
/pc-bios/s390-ccw/s390-ccw.elf
/pc-bios/s390-ccw/s390-ccw.img
-/docs/qemu-ga-qapi.texi
-/docs/qemu-ga-ref.html
-/docs/qemu-ga-ref.info*
-/docs/qemu-ga-ref.txt
-/docs/qemu-qmp-qapi.texi
-/docs/qemu-qmp-ref.html
-/docs/qemu-qmp-ref.info*
-/docs/qemu-qmp-ref.txt
+/docs/interop/qemu-ga-qapi.texi
+/docs/interop/qemu-ga-ref.html
+/docs/interop/qemu-ga-ref.info*
+/docs/interop/qemu-ga-ref.txt
+/docs/interop/qemu-qmp-qapi.texi
+/docs/interop/qemu-qmp-ref.html
+/docs/interop/qemu-qmp-ref.info*
+/docs/interop/qemu-qmp-ref.txt
/docs/version.texi
*.tps
.stgit-*
diff --git a/Makefile b/Makefile
index c830d7a46c..16a0430c6c 100644
--- a/Makefile
+++ b/Makefile
@@ -207,8 +207,8 @@ HELPERS-$(CONFIG_LINUX) = qemu-bridge-helper$(EXESUF)
ifdef BUILD_DOCS
DOCS=qemu-doc.html qemu-doc.txt qemu.1 qemu-img.1 qemu-nbd.8 qemu-ga.8
-DOCS+=docs/qemu-qmp-ref.html docs/qemu-qmp-ref.txt docs/qemu-qmp-ref.7
-DOCS+=docs/qemu-ga-ref.html docs/qemu-ga-ref.txt docs/qemu-ga-ref.7
+DOCS+=docs/interop/qemu-qmp-ref.html docs/interop/qemu-qmp-ref.txt docs/interop/qemu-qmp-ref.7
+DOCS+=docs/interop/qemu-ga-ref.html docs/interop/qemu-ga-ref.txt docs/interop/qemu-ga-ref.7
ifdef CONFIG_VIRTFS
DOCS+=fsdev/virtfs-proxy-helper.1
endif
@@ -269,6 +269,7 @@ dummy := $(call unnest-vars,, \
ivshmem-client-obj-y \
ivshmem-server-obj-y \
libvhost-user-obj-y \
+ vhost-user-scsi-obj-y \
qga-vss-dll-obj-y \
block-obj-y \
block-obj-m \
@@ -473,6 +474,8 @@ ivshmem-client$(EXESUF): $(ivshmem-client-obj-y) $(COMMON_LDADDS)
$(call LINK, $^)
ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) $(COMMON_LDADDS)
$(call LINK, $^)
+vhost-user-scsi$(EXESUF): $(vhost-user-scsi-obj-y)
+ $(call LINK, $^)
module_block.h: $(SRC_PATH)/scripts/modules/module_block.py config-host.mak
$(call quiet-command,$(PYTHON) $< $@ \
@@ -519,11 +522,12 @@ distclean: clean
rm -f qemu-doc.vr qemu-doc.txt
rm -f config.log
rm -f linux-headers/asm
- rm -f docs/qemu-ga-qapi.texi docs/qemu-qmp-qapi.texi docs/version.texi
- rm -f docs/qemu-qmp-ref.7 docs/qemu-ga-ref.7
- rm -f docs/qemu-qmp-ref.txt docs/qemu-ga-ref.txt
- rm -f docs/qemu-qmp-ref.pdf docs/qemu-ga-ref.pdf
- rm -f docs/qemu-qmp-ref.html docs/qemu-ga-ref.html
+ rm -f docs/version.texi
+ rm -f docs/interop/qemu-ga-qapi.texi docs/interop/qemu-qmp-qapi.texi
+ rm -f docs/interop/qemu-qmp-ref.7 docs/interop/qemu-ga-ref.7
+ rm -f docs/interop/qemu-qmp-ref.txt docs/interop/qemu-ga-ref.txt
+ rm -f docs/interop/qemu-qmp-ref.pdf docs/interop/qemu-ga-ref.pdf
+ rm -f docs/interop/qemu-qmp-ref.html docs/interop/qemu-ga-ref.html
for d in $(TARGET_DIRS); do \
rm -rf $$d || exit 1 ; \
done
@@ -562,13 +566,13 @@ install-doc: $(DOCS)
$(INSTALL_DIR) "$(DESTDIR)$(qemu_docdir)"
$(INSTALL_DATA) qemu-doc.html "$(DESTDIR)$(qemu_docdir)"
$(INSTALL_DATA) qemu-doc.txt "$(DESTDIR)$(qemu_docdir)"
- $(INSTALL_DATA) docs/qemu-qmp-ref.html "$(DESTDIR)$(qemu_docdir)"
- $(INSTALL_DATA) docs/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)"
+ $(INSTALL_DATA) docs/interop/qemu-qmp-ref.html "$(DESTDIR)$(qemu_docdir)"
+ $(INSTALL_DATA) docs/interop/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)"
ifdef CONFIG_POSIX
$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1"
$(INSTALL_DATA) qemu.1 "$(DESTDIR)$(mandir)/man1"
$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man7"
- $(INSTALL_DATA) docs/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7"
+ $(INSTALL_DATA) docs/interop/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7"
ifneq ($(TOOLS),)
$(INSTALL_DATA) qemu-img.1 "$(DESTDIR)$(mandir)/man1"
$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man8"
@@ -576,9 +580,9 @@ ifneq ($(TOOLS),)
endif
ifneq (,$(findstring qemu-ga,$(TOOLS)))
$(INSTALL_DATA) qemu-ga.8 "$(DESTDIR)$(mandir)/man8"
- $(INSTALL_DATA) docs/qemu-ga-ref.html "$(DESTDIR)$(qemu_docdir)"
- $(INSTALL_DATA) docs/qemu-ga-ref.txt "$(DESTDIR)$(qemu_docdir)"
- $(INSTALL_DATA) docs/qemu-ga-ref.7 "$(DESTDIR)$(mandir)/man7"
+ $(INSTALL_DATA) docs/interop/qemu-ga-ref.html "$(DESTDIR)$(qemu_docdir)"
+ $(INSTALL_DATA) docs/interop/qemu-ga-ref.txt "$(DESTDIR)$(qemu_docdir)"
+ $(INSTALL_DATA) docs/interop/qemu-ga-ref.7 "$(DESTDIR)$(mandir)/man7"
endif
endif
ifdef CONFIG_VIRTFS
@@ -666,28 +670,27 @@ ui/console-gl.o: $(SRC_PATH)/ui/console-gl.c \
# documentation
MAKEINFO=makeinfo
-MAKEINFOFLAGS=--no-split --number-sections -I docs
-TEXIFLAG=$(if $(V),,--quiet)
+MAKEINFOINCLUDES= -I docs -I $(<D) -I $(@D)
+MAKEINFOFLAGS=--no-split --number-sections $(MAKEINFOINCLUDES)
+TEXI2PODFLAGS=$(MAKEINFOINCLUDES) "-DVERSION=$(VERSION)"
+TEXI2PDFFLAGS=$(if $(V),,--quiet) -I $(SRC_PATH) $(MAKEINFOINCLUDES)
docs/version.texi: $(SRC_PATH)/VERSION
$(call quiet-command,echo "@set VERSION $(VERSION)" > $@,"GEN","$@")
-%.html: %.texi
+%.html: %.texi docs/version.texi
$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \
--html $< -o $@,"GEN","$@")
-%.info: %.texi
+%.info: %.texi docs/version.texi
$(call quiet-command,$(MAKEINFO) $(MAKEINFOFLAGS) $< -o $@,"GEN","$@")
-%.txt: %.texi
+%.txt: %.texi docs/version.texi
$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \
--plaintext $< -o $@,"GEN","$@")
-%.pdf: %.texi
- $(call quiet-command,texi2pdf $(TEXIFLAG) -I $(SRC_PATH) -I docs $< -o $@,"GEN","$@")
-
-docs/qemu-ga-ref.html docs/qemu-ga-ref.info docs/qemu-ga-ref.txt docs/qemu-ga-ref.pdf docs/qemu-ga-ref.7.pod: docs/version.texi
-docs/qemu-qmp-ref.html docs/qemu-qmp-ref.info docs/qemu-qmp-ref.txt docs/qemu-qmp-ref.pdf docs/qemu-qmp-ref.pod: docs/version.texi
+%.pdf: %.texi docs/version.texi
+ $(call quiet-command,texi2pdf $(TEXI2PDFFLAGS) $< -o $@,"GEN","$@")
qemu-options.texi: $(SRC_PATH)/qemu-options.hx $(SRC_PATH)/scripts/hxtool
$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@")
@@ -701,12 +704,12 @@ qemu-monitor-info.texi: $(SRC_PATH)/hmp-commands-info.hx $(SRC_PATH)/scripts/hxt
qemu-img-cmds.texi: $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/scripts/hxtool
$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@")
-docs/qemu-qmp-qapi.texi docs/qemu-ga-qapi.texi: $(SRC_PATH)/scripts/qapi2texi.py $(qapi-py)
+docs/interop/qemu-qmp-qapi.texi docs/interop/qemu-ga-qapi.texi: $(SRC_PATH)/scripts/qapi2texi.py $(qapi-py)
-docs/qemu-qmp-qapi.texi: $(qapi-modules)
+docs/interop/qemu-qmp-qapi.texi: $(qapi-modules)
$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi2texi.py $< > $@,"GEN","$@")
-docs/qemu-ga-qapi.texi: $(SRC_PATH)/qga/qapi-schema.json
+docs/interop/qemu-ga-qapi.texi: $(SRC_PATH)/qga/qapi-schema.json
$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi2texi.py $< > $@,"GEN","$@")
qemu.1: qemu-doc.texi qemu-options.texi qemu-monitor.texi qemu-monitor-info.texi
@@ -716,21 +719,25 @@ fsdev/virtfs-proxy-helper.1: fsdev/virtfs-proxy-helper.texi
qemu-nbd.8: qemu-nbd.texi qemu-option-trace.texi
qemu-ga.8: qemu-ga.texi
-html: qemu-doc.html docs/qemu-qmp-ref.html docs/qemu-ga-ref.html
-info: qemu-doc.info docs/qemu-qmp-ref.info docs/qemu-ga-ref.info
-pdf: qemu-doc.pdf docs/qemu-qmp-ref.pdf docs/qemu-ga-ref.pdf
-txt: qemu-doc.txt docs/qemu-qmp-ref.txt docs/qemu-ga-ref.txt
+html: qemu-doc.html docs/interop/qemu-qmp-ref.html docs/interop/qemu-ga-ref.html
+info: qemu-doc.info docs/interop/qemu-qmp-ref.info docs/interop/qemu-ga-ref.info
+pdf: qemu-doc.pdf docs/interop/qemu-qmp-ref.pdf docs/interop/qemu-ga-ref.pdf
+txt: qemu-doc.txt docs/interop/qemu-qmp-ref.txt docs/interop/qemu-ga-ref.txt
qemu-doc.html qemu-doc.info qemu-doc.pdf qemu-doc.txt: \
qemu-img.texi qemu-nbd.texi qemu-options.texi qemu-option-trace.texi \
qemu-monitor.texi qemu-img-cmds.texi qemu-ga.texi \
qemu-monitor-info.texi
-docs/qemu-ga-ref.dvi docs/qemu-ga-ref.html docs/qemu-ga-ref.info docs/qemu-ga-ref.pdf docs/qemu-ga-ref.txt docs/qemu-ga-ref.7: \
-docs/qemu-ga-ref.texi docs/qemu-ga-qapi.texi
+docs/interop/qemu-ga-ref.dvi docs/interop/qemu-ga-ref.html \
+ docs/interop/qemu-ga-ref.info docs/interop/qemu-ga-ref.pdf \
+ docs/interop/qemu-ga-ref.txt docs/interop/qemu-ga-ref.7: \
+ docs/interop/qemu-ga-ref.texi docs/interop/qemu-ga-qapi.texi
-docs/qemu-qmp-ref.dvi docs/qemu-qmp-ref.html docs/qemu-qmp-ref.info docs/qemu-qmp-ref.pdf docs/qemu-qmp-ref.txt docs/qemu-qmp-ref.7: \
-docs/qemu-qmp-ref.texi docs/qemu-qmp-qapi.texi
+docs/interop/qemu-qmp-ref.dvi docs/interop/qemu-qmp-ref.html \
+ docs/interop/qemu-qmp-ref.info docs/interop/qemu-qmp-ref.pdf \
+ docs/interop/qemu-qmp-ref.txt docs/interop/qemu-qmp-ref.7: \
+ docs/interop/qemu-qmp-ref.texi docs/interop/qemu-qmp-qapi.texi
ifdef CONFIG_WIN32
@@ -791,9 +798,11 @@ endif # CONFIG_WIN
# Add a dependency on the generated files, so that they are always
# rebuilt before other object files
+ifneq ($(wildcard config-host.mak),)
ifneq ($(filter-out $(UNCHECKED_GOALS),$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
Makefile: $(GENERATED_FILES)
endif
+endif
.SECONDARY: $(TRACE_HEADERS) $(TRACE_HEADERS:%=%-timestamp) \
$(TRACE_SOURCES) $(TRACE_SOURCES:%=%-timestamp) \
diff --git a/Makefile.objs b/Makefile.objs
index 0575802440..b2e6322ef0 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -52,7 +52,6 @@ common-obj-y += migration/
common-obj-y += audio/
common-obj-y += hw/
-common-obj-y += accel.o
common-obj-y += replay/
@@ -111,6 +110,10 @@ qga-vss-dll-obj-y = qga/
ivshmem-client-obj-y = contrib/ivshmem-client/
ivshmem-server-obj-y = contrib/ivshmem-server/
libvhost-user-obj-y = contrib/libvhost-user/
+vhost-user-scsi.o-cflags := $(LIBISCSI_CFLAGS)
+vhost-user-scsi.o-libs := $(LIBISCSI_LIBS)
+vhost-user-scsi-obj-y = contrib/vhost-user-scsi/
+vhost-user-scsi-obj-y += contrib/libvhost-user/libvhost-user.o
######################################################################
trace-events-subdirs =
@@ -163,6 +166,8 @@ trace-events-subdirs += target/ppc
trace-events-subdirs += qom
trace-events-subdirs += linux-user
trace-events-subdirs += qapi
+trace-events-subdirs += accel/tcg
+trace-events-subdirs += accel/kvm
trace-events-files = $(SRC_PATH)/trace-events $(trace-events-subdirs:%=$(SRC_PATH)/%/trace-events)
diff --git a/Makefile.target b/Makefile.target
index ce8dfe44a8..0066579090 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -88,20 +88,17 @@ all: $(PROGS) stap
#########################################################
# cpu emulator library
-obj-y = exec.o translate-all.o cpu-exec.o
-obj-y += translate-common.o
-obj-y += cpu-exec-common.o
+obj-y += exec.o
+obj-y += accel/
obj-y += tcg/tcg.o tcg/tcg-op.o tcg/optimize.o
-obj-$(CONFIG_TCG_INTERPRETER) += tci.o
-obj-y += tcg/tcg-common.o
+obj-y += tcg/tcg-common.o tcg/tcg-runtime.o
+obj-$(CONFIG_TCG_INTERPRETER) += tcg/tci.o
obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o
obj-y += fpu/softfloat.o
obj-y += target/$(TARGET_BASE_ARCH)/
obj-y += disas.o
-obj-y += tcg-runtime.o
obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o
obj-$(call lnot,$(CONFIG_HAX)) += hax-stub.o
-obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/decContext.o
obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/decNumber.o
@@ -142,8 +139,7 @@ ifdef CONFIG_SOFTMMU
obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o numa.o
obj-y += qtest.o bootdevice.o
obj-y += hw/
-obj-$(CONFIG_KVM) += kvm-all.o
-obj-y += memory.o cputlb.o
+obj-y += memory.o
obj-y += memory_mapping.o
obj-y += dump.o
obj-y += migration/ram.o
diff --git a/accel/Makefile.objs b/accel/Makefile.objs
new file mode 100644
index 0000000000..cd5702f347
--- /dev/null
+++ b/accel/Makefile.objs
@@ -0,0 +1,4 @@
+obj-$(CONFIG_SOFTMMU) += accel.o
+obj-y += kvm/
+obj-y += tcg/
+obj-y += stubs/
diff --git a/accel.c b/accel/accel.c
index 664bb88422..7c079a5611 100644
--- a/accel.c
+++ b/accel/accel.c
@@ -34,15 +34,6 @@
#include "hw/xen/xen.h"
#include "qom/object.h"
-int tcg_tb_size;
-static bool tcg_allowed = true;
-
-static int tcg_init(MachineState *ms)
-{
- tcg_exec_init(tcg_tb_size * 1024 * 1024);
- return 0;
-}
-
static const TypeInfo accel_type = {
.name = TYPE_ACCEL,
.parent = TYPE_OBJECT,
@@ -129,27 +120,9 @@ void configure_accelerator(MachineState *ms)
}
}
-
-static void tcg_accel_class_init(ObjectClass *oc, void *data)
-{
- AccelClass *ac = ACCEL_CLASS(oc);
- ac->name = "tcg";
- ac->init_machine = tcg_init;
- ac->allowed = &tcg_allowed;
-}
-
-#define TYPE_TCG_ACCEL ACCEL_CLASS_NAME("tcg")
-
-static const TypeInfo tcg_accel_type = {
- .name = TYPE_TCG_ACCEL,
- .parent = TYPE_ACCEL,
- .class_init = tcg_accel_class_init,
-};
-
static void register_accel_types(void)
{
type_register_static(&accel_type);
- type_register_static(&tcg_accel_type);
}
type_init(register_accel_types);
diff --git a/accel/kvm/Makefile.objs b/accel/kvm/Makefile.objs
new file mode 100644
index 0000000000..85351e7de7
--- /dev/null
+++ b/accel/kvm/Makefile.objs
@@ -0,0 +1 @@
+obj-$(CONFIG_KVM) += kvm-all.o
diff --git a/kvm-all.c b/accel/kvm/kvm-all.c
index ab8262f672..75feffa504 100644
--- a/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -36,7 +36,7 @@
#include "exec/ram_addr.h"
#include "exec/address-spaces.h"
#include "qemu/event_notifier.h"
-#include "trace-root.h"
+#include "trace.h"
#include "hw/irq.h"
#include "hw/boards.h"
@@ -1977,6 +1977,7 @@ int kvm_cpu_exec(CPUState *cpu)
}
qemu_mutex_unlock_iothread();
+ cpu_exec_start(cpu);
do {
MemTxAttrs attrs;
@@ -2106,6 +2107,7 @@ int kvm_cpu_exec(CPUState *cpu)
}
} while (ret == 0);
+ cpu_exec_end(cpu);
qemu_mutex_lock_iothread();
if (ret < 0) {
diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events
new file mode 100644
index 0000000000..f89ba5578d
--- /dev/null
+++ b/accel/kvm/trace-events
@@ -0,0 +1,15 @@
+# Trace events for debugging and performance instrumentation
+
+# kvm-all.c
+kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"
+kvm_vm_ioctl(int type, void *arg) "type 0x%x, arg %p"
+kvm_vcpu_ioctl(int cpu_index, int type, void *arg) "cpu_index %d, type 0x%x, arg %p"
+kvm_run_exit(int cpu_index, uint32_t reason) "cpu_index %d, reason %d"
+kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p"
+kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s"
+kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s"
+kvm_irqchip_commit_routes(void) ""
+kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d"
+kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
+kvm_irqchip_release_virq(int virq) "virq %d"
+
diff --git a/accel/stubs/Makefile.objs b/accel/stubs/Makefile.objs
new file mode 100644
index 0000000000..bd5794f222
--- /dev/null
+++ b/accel/stubs/Makefile.objs
@@ -0,0 +1 @@
+obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
diff --git a/kvm-stub.c b/accel/stubs/kvm-stub.c
index ef0c7346af..ef0c7346af 100644
--- a/kvm-stub.c
+++ b/accel/stubs/kvm-stub.c
diff --git a/accel/tcg/Makefile.objs b/accel/tcg/Makefile.objs
new file mode 100644
index 0000000000..f173cd5397
--- /dev/null
+++ b/accel/tcg/Makefile.objs
@@ -0,0 +1,3 @@
+obj-$(CONFIG_SOFTMMU) += tcg-all.o
+obj-$(CONFIG_SOFTMMU) += cputlb.o
+obj-y += cpu-exec.o cpu-exec-common.o translate-all.o translate-common.o
diff --git a/cpu-exec-common.c b/accel/tcg/cpu-exec-common.c
index e81da276bb..e81da276bb 100644
--- a/cpu-exec-common.c
+++ b/accel/tcg/cpu-exec-common.c
diff --git a/cpu-exec.c b/accel/tcg/cpu-exec.c
index 5b181c18ed..3581618bc0 100644
--- a/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -18,7 +18,7 @@
*/
#include "qemu/osdep.h"
#include "cpu.h"
-#include "trace-root.h"
+#include "trace.h"
#include "disas/disas.h"
#include "exec/exec-all.h"
#include "tcg.h"
diff --git a/cputlb.c b/accel/tcg/cputlb.c
index 743776ae19..743776ae19 100644
--- a/cputlb.c
+++ b/accel/tcg/cputlb.c
diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c
new file mode 100644
index 0000000000..dba99315e3
--- /dev/null
+++ b/accel/tcg/tcg-all.c
@@ -0,0 +1,61 @@
+/*
+ * QEMU System Emulator, accelerator interfaces
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2014 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/accel.h"
+#include "sysemu/sysemu.h"
+#include "qom/object.h"
+
+int tcg_tb_size;
+static bool tcg_allowed = true;
+
+static int tcg_init(MachineState *ms)
+{
+ tcg_exec_init(tcg_tb_size * 1024 * 1024);
+ return 0;
+}
+
+static void tcg_accel_class_init(ObjectClass *oc, void *data)
+{
+ AccelClass *ac = ACCEL_CLASS(oc);
+ ac->name = "tcg";
+ ac->init_machine = tcg_init;
+ ac->allowed = &tcg_allowed;
+}
+
+#define TYPE_TCG_ACCEL ACCEL_CLASS_NAME("tcg")
+
+static const TypeInfo tcg_accel_type = {
+ .name = TYPE_TCG_ACCEL,
+ .parent = TYPE_ACCEL,
+ .class_init = tcg_accel_class_init,
+};
+
+static void register_accel_types(void)
+{
+ type_register_static(&tcg_accel_type);
+}
+
+type_init(register_accel_types);
diff --git a/accel/tcg/trace-events b/accel/tcg/trace-events
new file mode 100644
index 0000000000..2de8359670
--- /dev/null
+++ b/accel/tcg/trace-events
@@ -0,0 +1,10 @@
+# Trace events for debugging and performance instrumentation
+
+# TCG related tracing (mostly disabled by default)
+# cpu-exec.c
+disable exec_tb(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR
+disable exec_tb_nocache(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR
+disable exec_tb_exit(void *last_tb, unsigned int flags) "tb:%p flags=%x"
+
+# translate-all.c
+translate_block(void *tb, uintptr_t pc, uint8_t *tb_code) "tb:%p, pc:0x%"PRIxPTR", tb_code:%p"
diff --git a/translate-all.c b/accel/tcg/translate-all.c
index d4f364d6ff..f6ad46b613 100644
--- a/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -25,7 +25,7 @@
#include "qemu-common.h"
#define NO_CPU_IO_DEFS
#include "cpu.h"
-#include "trace-root.h"
+#include "trace.h"
#include "disas/disas.h"
#include "exec/exec-all.h"
#include "tcg.h"
diff --git a/translate-all.h b/accel/tcg/translate-all.h
index ba8e4d63c4..ba8e4d63c4 100644
--- a/translate-all.h
+++ b/accel/tcg/translate-all.h
diff --git a/translate-common.c b/accel/tcg/translate-common.c
index 40fe5a19bb..40fe5a19bb 100644
--- a/translate-common.c
+++ b/accel/tcg/translate-common.c
diff --git a/block.c b/block.c
index fa1d06d846..694396281b 100644
--- a/block.c
+++ b/block.c
@@ -320,6 +320,8 @@ BlockDriverState *bdrv_new(void)
QLIST_INIT(&bs->op_blockers[i]);
}
notifier_with_return_list_init(&bs->before_write_notifiers);
+ qemu_co_mutex_init(&bs->reqs_lock);
+ qemu_mutex_init(&bs->dirty_bitmap_mutex);
bs->refcnt = 1;
bs->aio_context = qemu_get_aio_context();
@@ -1300,7 +1302,9 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
goto fail_opts;
}
- assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
+ /* bdrv_new() and bdrv_close() make it so */
+ assert(atomic_read(&bs->copy_on_read) == 0);
+
if (bs->open_flags & BDRV_O_COPY_ON_READ) {
if (!bs->read_only) {
bdrv_enable_copy_on_read(bs);
@@ -3063,7 +3067,7 @@ static void bdrv_close(BlockDriverState *bs)
g_free(bs->opaque);
bs->opaque = NULL;
- bs->copy_on_read = 0;
+ atomic_set(&bs->copy_on_read, 0);
bs->backing_file[0] = '\0';
bs->backing_format[0] = '\0';
bs->total_sectors = 0;
@@ -3422,7 +3426,7 @@ int bdrv_truncate(BdrvChild *child, int64_t offset, Error **errp)
ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
bdrv_dirty_bitmap_truncate(bs);
bdrv_parent_cb_resize(bs);
- ++bs->write_gen;
+ atomic_inc(&bs->write_gen);
}
return ret;
}
diff --git a/block/accounting.c b/block/accounting.c
index 3f457c4e73..87ef5bbfaa 100644
--- a/block/accounting.c
+++ b/block/accounting.c
@@ -32,23 +32,28 @@
static QEMUClockType clock_type = QEMU_CLOCK_REALTIME;
static const int qtest_latency_ns = NANOSECONDS_PER_SECOND / 1000;
-void block_acct_init(BlockAcctStats *stats, bool account_invalid,
- bool account_failed)
+void block_acct_init(BlockAcctStats *stats)
{
- stats->account_invalid = account_invalid;
- stats->account_failed = account_failed;
-
+ qemu_mutex_init(&stats->lock);
if (qtest_enabled()) {
clock_type = QEMU_CLOCK_VIRTUAL;
}
}
+void block_acct_setup(BlockAcctStats *stats, bool account_invalid,
+ bool account_failed)
+{
+ stats->account_invalid = account_invalid;
+ stats->account_failed = account_failed;
+}
+
void block_acct_cleanup(BlockAcctStats *stats)
{
BlockAcctTimedStats *s, *next;
QSLIST_FOREACH_SAFE(s, &stats->intervals, entries, next) {
g_free(s);
}
+ qemu_mutex_destroy(&stats->lock);
}
void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length)
@@ -58,12 +63,15 @@ void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length)
s = g_new0(BlockAcctTimedStats, 1);
s->interval_length = interval_length;
+ s->stats = stats;
+ qemu_mutex_lock(&stats->lock);
QSLIST_INSERT_HEAD(&stats->intervals, s, entries);
for (i = 0; i < BLOCK_MAX_IOTYPE; i++) {
timed_average_init(&s->latency[i], clock_type,
(uint64_t) interval_length * NANOSECONDS_PER_SECOND);
}
+ qemu_mutex_unlock(&stats->lock);
}
BlockAcctTimedStats *block_acct_interval_next(BlockAcctStats *stats,
@@ -86,7 +94,8 @@ void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie,
cookie->type = type;
}
-void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
+static void block_account_one_io(BlockAcctStats *stats, BlockAcctCookie *cookie,
+ bool failed)
{
BlockAcctTimedStats *s;
int64_t time_ns = qemu_clock_get_ns(clock_type);
@@ -98,31 +107,16 @@ void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
assert(cookie->type < BLOCK_MAX_IOTYPE);
- stats->nr_bytes[cookie->type] += cookie->bytes;
- stats->nr_ops[cookie->type]++;
- stats->total_time_ns[cookie->type] += latency_ns;
- stats->last_access_time_ns = time_ns;
+ qemu_mutex_lock(&stats->lock);
- QSLIST_FOREACH(s, &stats->intervals, entries) {
- timed_average_account(&s->latency[cookie->type], latency_ns);
+ if (failed) {
+ stats->failed_ops[cookie->type]++;
+ } else {
+ stats->nr_bytes[cookie->type] += cookie->bytes;
+ stats->nr_ops[cookie->type]++;
}
-}
-
-void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie)
-{
- assert(cookie->type < BLOCK_MAX_IOTYPE);
-
- stats->failed_ops[cookie->type]++;
-
- if (stats->account_failed) {
- BlockAcctTimedStats *s;
- int64_t time_ns = qemu_clock_get_ns(clock_type);
- int64_t latency_ns = time_ns - cookie->start_time_ns;
-
- if (qtest_enabled()) {
- latency_ns = qtest_latency_ns;
- }
+ if (!failed || stats->account_failed) {
stats->total_time_ns[cookie->type] += latency_ns;
stats->last_access_time_ns = time_ns;
@@ -130,29 +124,45 @@ void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie)
timed_average_account(&s->latency[cookie->type], latency_ns);
}
}
+
+ qemu_mutex_unlock(&stats->lock);
+}
+
+void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
+{
+ block_account_one_io(stats, cookie, false);
+}
+
+void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie)
+{
+ block_account_one_io(stats, cookie, true);
}
void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type)
{
assert(type < BLOCK_MAX_IOTYPE);
- /* block_acct_done() and block_acct_failed() update
- * total_time_ns[], but this one does not. The reason is that
- * invalid requests are accounted during their submission,
- * therefore there's no actual I/O involved. */
-
+ /* block_account_one_io() updates total_time_ns[], but this one does
+ * not. The reason is that invalid requests are accounted during their
+ * submission, therefore there's no actual I/O involved.
+ */
+ qemu_mutex_lock(&stats->lock);
stats->invalid_ops[type]++;
if (stats->account_invalid) {
stats->last_access_time_ns = qemu_clock_get_ns(clock_type);
}
+ qemu_mutex_unlock(&stats->lock);
}
void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
int num_requests)
{
assert(type < BLOCK_MAX_IOTYPE);
+
+ qemu_mutex_lock(&stats->lock);
stats->merged[type] += num_requests;
+ qemu_mutex_unlock(&stats->lock);
}
int64_t block_acct_idle_time_ns(BlockAcctStats *stats)
@@ -167,7 +177,9 @@ double block_acct_queue_depth(BlockAcctTimedStats *stats,
assert(type < BLOCK_MAX_IOTYPE);
+ qemu_mutex_lock(&stats->stats->lock);
sum = timed_average_sum(&stats->latency[type], &elapsed);
+ qemu_mutex_unlock(&stats->stats->lock);
return (double) sum / elapsed;
}
diff --git a/block/block-backend.c b/block/block-backend.c
index 7d7f3697d1..a2bbae90b1 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -216,8 +216,10 @@ BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm)
blk->shared_perm = shared_perm;
blk_set_enable_write_cache(blk, true);
+ qemu_co_mutex_init(&blk->public.throttled_reqs_lock);
qemu_co_queue_init(&blk->public.throttled_reqs[0]);
qemu_co_queue_init(&blk->public.throttled_reqs[1]);
+ block_acct_init(&blk->stats);
notifier_list_init(&blk->remove_bs_notifiers);
notifier_list_init(&blk->insert_bs_notifiers);
@@ -1953,7 +1955,7 @@ static void blk_root_drained_begin(BdrvChild *child)
/* Note that blk->root may not be accessible here yet if we are just
* attaching to a BlockDriverState that is drained. Use child instead. */
- if (blk->public.io_limits_disabled++ == 0) {
+ if (atomic_fetch_inc(&blk->public.io_limits_disabled) == 0) {
throttle_group_restart_blk(blk);
}
}
@@ -1964,7 +1966,7 @@ static void blk_root_drained_end(BdrvChild *child)
assert(blk->quiesce_counter);
assert(blk->public.io_limits_disabled);
- --blk->public.io_limits_disabled;
+ atomic_dec(&blk->public.io_limits_disabled);
if (--blk->quiesce_counter == 0) {
if (blk->dev_ops && blk->dev_ops->drained_end) {
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 519737c8d3..a04c6e4154 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -37,6 +37,7 @@
* or enabled. A frozen bitmap can only abdicate() or reclaim().
*/
struct BdrvDirtyBitmap {
+ QemuMutex *mutex;
HBitmap *bitmap; /* Dirty sector bitmap implementation */
HBitmap *meta; /* Meta dirty bitmap */
BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
@@ -52,6 +53,27 @@ struct BdrvDirtyBitmapIter {
BdrvDirtyBitmap *bitmap;
};
+static inline void bdrv_dirty_bitmaps_lock(BlockDriverState *bs)
+{
+ qemu_mutex_lock(&bs->dirty_bitmap_mutex);
+}
+
+static inline void bdrv_dirty_bitmaps_unlock(BlockDriverState *bs)
+{
+ qemu_mutex_unlock(&bs->dirty_bitmap_mutex);
+}
+
+void bdrv_dirty_bitmap_lock(BdrvDirtyBitmap *bitmap)
+{
+ qemu_mutex_lock(bitmap->mutex);
+}
+
+void bdrv_dirty_bitmap_unlock(BdrvDirtyBitmap *bitmap)
+{
+ qemu_mutex_unlock(bitmap->mutex);
+}
+
+/* Called with BQL or dirty_bitmap lock taken. */
BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
{
BdrvDirtyBitmap *bm;
@@ -65,6 +87,7 @@ BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
return NULL;
}
+/* Called with BQL taken. */
void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
{
assert(!bdrv_dirty_bitmap_frozen(bitmap));
@@ -72,6 +95,7 @@ void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
bitmap->name = NULL;
}
+/* Called with BQL taken. */
BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
uint32_t granularity,
const char *name,
@@ -96,11 +120,14 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
return NULL;
}
bitmap = g_new0(BdrvDirtyBitmap, 1);
+ bitmap->mutex = &bs->dirty_bitmap_mutex;
bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
bitmap->size = bitmap_size;
bitmap->name = g_strdup(name);
bitmap->disabled = false;
+ bdrv_dirty_bitmaps_lock(bs);
QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
+ bdrv_dirty_bitmaps_unlock(bs);
return bitmap;
}
@@ -119,20 +146,24 @@ void bdrv_create_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap,
int chunk_size)
{
assert(!bitmap->meta);
+ qemu_mutex_lock(bitmap->mutex);
bitmap->meta = hbitmap_create_meta(bitmap->bitmap,
chunk_size * BITS_PER_BYTE);
+ qemu_mutex_unlock(bitmap->mutex);
}
void bdrv_release_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap)
{
assert(bitmap->meta);
+ qemu_mutex_lock(bitmap->mutex);
hbitmap_free_meta(bitmap->bitmap);
bitmap->meta = NULL;
+ qemu_mutex_unlock(bitmap->mutex);
}
-int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs,
- BdrvDirtyBitmap *bitmap, int64_t sector,
- int nb_sectors)
+int bdrv_dirty_bitmap_get_meta_locked(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap, int64_t sector,
+ int nb_sectors)
{
uint64_t i;
int sectors_per_bit = 1 << hbitmap_granularity(bitmap->meta);
@@ -147,11 +178,26 @@ int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs,
return false;
}
+int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap, int64_t sector,
+ int nb_sectors)
+{
+ bool dirty;
+
+ qemu_mutex_lock(bitmap->mutex);
+ dirty = bdrv_dirty_bitmap_get_meta_locked(bs, bitmap, sector, nb_sectors);
+ qemu_mutex_unlock(bitmap->mutex);
+
+ return dirty;
+}
+
void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap, int64_t sector,
int nb_sectors)
{
+ qemu_mutex_lock(bitmap->mutex);
hbitmap_reset(bitmap->meta, sector, nb_sectors);
+ qemu_mutex_unlock(bitmap->mutex);
}
int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap)
@@ -164,16 +210,19 @@ const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap)
return bitmap->name;
}
+/* Called with BQL taken. */
bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
{
return bitmap->successor;
}
+/* Called with BQL taken. */
bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
{
return !(bitmap->disabled || bitmap->successor);
}
+/* Called with BQL taken. */
DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
{
if (bdrv_dirty_bitmap_frozen(bitmap)) {
@@ -188,6 +237,7 @@ DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
/**
* Create a successor bitmap destined to replace this bitmap after an operation.
* Requires that the bitmap is not frozen and has no successor.
+ * Called with BQL taken.
*/
int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap, Error **errp)
@@ -220,6 +270,7 @@ int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
/**
* For a bitmap with a successor, yield our name to the successor,
* delete the old bitmap, and return a handle to the new bitmap.
+ * Called with BQL taken.
*/
BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap,
@@ -247,6 +298,7 @@ BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
* In cases of failure where we can no longer safely delete the parent,
* we may wish to re-join the parent and child/successor.
* The merged parent will be un-frozen, but not explicitly re-enabled.
+ * Called with BQL taken.
*/
BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
BdrvDirtyBitmap *parent,
@@ -271,25 +323,30 @@ BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
/**
* Truncates _all_ bitmaps attached to a BDS.
+ * Called with BQL taken.
*/
void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
{
BdrvDirtyBitmap *bitmap;
uint64_t size = bdrv_nb_sectors(bs);
+ bdrv_dirty_bitmaps_lock(bs);
QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
assert(!bdrv_dirty_bitmap_frozen(bitmap));
assert(!bitmap->active_iterators);
hbitmap_truncate(bitmap->bitmap, size);
bitmap->size = size;
}
+ bdrv_dirty_bitmaps_unlock(bs);
}
+/* Called with BQL taken. */
static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap,
bool only_named)
{
BdrvDirtyBitmap *bm, *next;
+ bdrv_dirty_bitmaps_lock(bs);
QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
assert(!bm->active_iterators);
@@ -301,15 +358,19 @@ static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
g_free(bm);
if (bitmap) {
- return;
+ goto out;
}
}
}
if (bitmap) {
abort();
}
+
+out:
+ bdrv_dirty_bitmaps_unlock(bs);
}
+/* Called with BQL taken. */
void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
{
bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false);
@@ -318,18 +379,21 @@ void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
/**
* Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
* There must not be any frozen bitmaps attached.
+ * Called with BQL taken.
*/
void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
{
bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
}
+/* Called with BQL taken. */
void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
{
assert(!bdrv_dirty_bitmap_frozen(bitmap));
bitmap->disabled = true;
}
+/* Called with BQL taken. */
void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
{
assert(!bdrv_dirty_bitmap_frozen(bitmap));
@@ -342,6 +406,7 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
BlockDirtyInfoList *list = NULL;
BlockDirtyInfoList **plist = &list;
+ bdrv_dirty_bitmaps_lock(bs);
QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
@@ -354,12 +419,14 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
*plist = entry;
plist = &entry->next;
}
+ bdrv_dirty_bitmaps_unlock(bs);
return list;
}
-int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
- int64_t sector)
+/* Called within bdrv_dirty_bitmap_lock..unlock */
+int bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+ int64_t sector)
{
if (bitmap) {
return hbitmap_get(bitmap->bitmap, sector);
@@ -432,23 +499,42 @@ int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter)
return hbitmap_iter_next(&iter->hbi);
}
+/* Called within bdrv_dirty_bitmap_lock..unlock */
+void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
+ int64_t cur_sector, int64_t nr_sectors)
+{
+ assert(bdrv_dirty_bitmap_enabled(bitmap));
+ hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
+}
+
void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
int64_t cur_sector, int64_t nr_sectors)
{
+ bdrv_dirty_bitmap_lock(bitmap);
+ bdrv_set_dirty_bitmap_locked(bitmap, cur_sector, nr_sectors);
+ bdrv_dirty_bitmap_unlock(bitmap);
+}
+
+/* Called within bdrv_dirty_bitmap_lock..unlock */
+void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
+ int64_t cur_sector, int64_t nr_sectors)
+{
assert(bdrv_dirty_bitmap_enabled(bitmap));
- hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
+ hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
}
void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
int64_t cur_sector, int64_t nr_sectors)
{
- assert(bdrv_dirty_bitmap_enabled(bitmap));
- hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
+ bdrv_dirty_bitmap_lock(bitmap);
+ bdrv_reset_dirty_bitmap_locked(bitmap, cur_sector, nr_sectors);
+ bdrv_dirty_bitmap_unlock(bitmap);
}
void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
{
assert(bdrv_dirty_bitmap_enabled(bitmap));
+ bdrv_dirty_bitmap_lock(bitmap);
if (!out) {
hbitmap_reset_all(bitmap->bitmap);
} else {
@@ -457,6 +543,7 @@ void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
hbitmap_granularity(backup));
*out = backup;
}
+ bdrv_dirty_bitmap_unlock(bitmap);
}
void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
@@ -508,12 +595,19 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
int64_t nr_sectors)
{
BdrvDirtyBitmap *bitmap;
+
+ if (QLIST_EMPTY(&bs->dirty_bitmaps)) {
+ return;
+ }
+
+ bdrv_dirty_bitmaps_lock(bs);
QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
if (!bdrv_dirty_bitmap_enabled(bitmap)) {
continue;
}
hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
}
+ bdrv_dirty_bitmaps_unlock(bs);
}
/**
diff --git a/block/io.c b/block/io.c
index ed31810c0a..91611ffb2a 100644
--- a/block/io.c
+++ b/block/io.c
@@ -130,13 +130,13 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
*/
void bdrv_enable_copy_on_read(BlockDriverState *bs)
{
- bs->copy_on_read++;
+ atomic_inc(&bs->copy_on_read);
}
void bdrv_disable_copy_on_read(BlockDriverState *bs)
{
- assert(bs->copy_on_read > 0);
- bs->copy_on_read--;
+ int old = atomic_fetch_dec(&bs->copy_on_read);
+ assert(old >= 1);
}
/* Check if any requests are in-flight (including throttled requests) */
@@ -241,7 +241,7 @@ void bdrv_drained_begin(BlockDriverState *bs)
return;
}
- if (!bs->quiesce_counter++) {
+ if (atomic_fetch_inc(&bs->quiesce_counter) == 0) {
aio_disable_external(bdrv_get_aio_context(bs));
bdrv_parent_drained_begin(bs);
}
@@ -252,7 +252,7 @@ void bdrv_drained_begin(BlockDriverState *bs)
void bdrv_drained_end(BlockDriverState *bs)
{
assert(bs->quiesce_counter > 0);
- if (--bs->quiesce_counter > 0) {
+ if (atomic_fetch_dec(&bs->quiesce_counter) > 1) {
return;
}
@@ -375,11 +375,13 @@ void bdrv_drain_all(void)
static void tracked_request_end(BdrvTrackedRequest *req)
{
if (req->serialising) {
- req->bs->serialising_in_flight--;
+ atomic_dec(&req->bs->serialising_in_flight);
}
+ qemu_co_mutex_lock(&req->bs->reqs_lock);
QLIST_REMOVE(req, list);
qemu_co_queue_restart_all(&req->wait_queue);
+ qemu_co_mutex_unlock(&req->bs->reqs_lock);
}
/**
@@ -404,7 +406,9 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
qemu_co_queue_init(&req->wait_queue);
+ qemu_co_mutex_lock(&bs->reqs_lock);
QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
+ qemu_co_mutex_unlock(&bs->reqs_lock);
}
static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
@@ -414,7 +418,7 @@ static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
- overlap_offset;
if (!req->serialising) {
- req->bs->serialising_in_flight++;
+ atomic_inc(&req->bs->serialising_in_flight);
req->serialising = true;
}
@@ -501,7 +505,8 @@ static void dummy_bh_cb(void *opaque)
void bdrv_wakeup(BlockDriverState *bs)
{
- if (bs->wakeup) {
+ /* The barrier (or an atomic op) is in the caller. */
+ if (atomic_read(&bs->wakeup)) {
aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
}
}
@@ -519,12 +524,13 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
bool retry;
bool waited = false;
- if (!bs->serialising_in_flight) {
+ if (!atomic_read(&bs->serialising_in_flight)) {
return false;
}
do {
retry = false;
+ qemu_co_mutex_lock(&bs->reqs_lock);
QLIST_FOREACH(req, &bs->tracked_requests, list) {
if (req == self || (!req->serialising && !self->serialising)) {
continue;
@@ -543,7 +549,7 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
* (instead of producing a deadlock in the former case). */
if (!req->waiting_for) {
self->waiting_for = req;
- qemu_co_queue_wait(&req->wait_queue, NULL);
+ qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock);
self->waiting_for = NULL;
retry = true;
waited = true;
@@ -551,6 +557,7 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
}
}
}
+ qemu_co_mutex_unlock(&bs->reqs_lock);
} while (retry);
return waited;
@@ -1144,7 +1151,7 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
bdrv_inc_in_flight(bs);
/* Don't do copy-on-read if we read data before write operation */
- if (bs->copy_on_read && !(flags & BDRV_REQ_NO_SERIALISING)) {
+ if (atomic_read(&bs->copy_on_read) && !(flags & BDRV_REQ_NO_SERIALISING)) {
flags |= BDRV_REQ_COPY_ON_READ;
}
@@ -1401,12 +1408,10 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
}
bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
- ++bs->write_gen;
+ atomic_inc(&bs->write_gen);
bdrv_set_dirty(bs, start_sector, end_sector - start_sector);
- if (bs->wr_highest_offset < offset + bytes) {
- bs->wr_highest_offset = offset + bytes;
- }
+ stat64_max(&bs->wr_highest_offset, offset + bytes);
if (ret >= 0) {
bs->total_sectors = MAX(bs->total_sectors, end_sector);
@@ -2292,14 +2297,17 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
goto early_exit;
}
- current_gen = bs->write_gen;
+ qemu_co_mutex_lock(&bs->reqs_lock);
+ current_gen = atomic_read(&bs->write_gen);
/* Wait until any previous flushes are completed */
while (bs->active_flush_req) {
- qemu_co_queue_wait(&bs->flush_queue, NULL);
+ qemu_co_queue_wait(&bs->flush_queue, &bs->reqs_lock);
}
+ /* Flushes reach this point in nondecreasing current_gen order. */
bs->active_flush_req = true;
+ qemu_co_mutex_unlock(&bs->reqs_lock);
/* Write back all layers by calling one driver function */
if (bs->drv->bdrv_co_flush) {
@@ -2371,9 +2379,12 @@ out:
if (ret == 0) {
bs->flushed_gen = current_gen;
}
+
+ qemu_co_mutex_lock(&bs->reqs_lock);
bs->active_flush_req = false;
/* Return value is ignored - it's ok if wait queue is empty */
qemu_co_queue_next(&bs->flush_queue);
+ qemu_co_mutex_unlock(&bs->reqs_lock);
early_exit:
bdrv_dec_in_flight(bs);
@@ -2517,7 +2528,7 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
}
ret = 0;
out:
- ++bs->write_gen;
+ atomic_inc(&bs->write_gen);
bdrv_set_dirty(bs, req.offset >> BDRV_SECTOR_BITS,
req.bytes >> BDRV_SECTOR_BITS);
tracked_request_end(&req);
@@ -2644,7 +2655,7 @@ void bdrv_io_plug(BlockDriverState *bs)
bdrv_io_plug(child->bs);
}
- if (bs->io_plugged++ == 0) {
+ if (atomic_fetch_inc(&bs->io_plugged) == 0) {
BlockDriver *drv = bs->drv;
if (drv && drv->bdrv_io_plug) {
drv->bdrv_io_plug(bs);
@@ -2657,7 +2668,7 @@ void bdrv_io_unplug(BlockDriverState *bs)
BdrvChild *child;
assert(bs->io_plugged);
- if (--bs->io_plugged == 0) {
+ if (atomic_fetch_dec(&bs->io_plugged) == 1) {
BlockDriver *drv = bs->drv;
if (drv && drv->bdrv_io_unplug) {
drv->bdrv_io_unplug(bs);
diff --git a/block/iscsi.c b/block/iscsi.c
index 5daa201181..b5f7a228b9 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1732,6 +1732,10 @@ static QemuOptsList runtime_opts = {
.name = "timeout",
.type = QEMU_OPT_NUMBER,
},
+ {
+ .name = "filename",
+ .type = QEMU_OPT_STRING,
+ },
{ /* end of list */ }
},
};
@@ -1747,12 +1751,27 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
char *initiator_name = NULL;
QemuOpts *opts;
Error *local_err = NULL;
- const char *transport_name, *portal, *target;
+ const char *transport_name, *portal, *target, *filename;
#if LIBISCSI_API_VERSION >= (20160603)
enum iscsi_transport_type transport;
#endif
int i, ret = 0, timeout = 0, lun;
+ /* If we are given a filename, parse the filename, with precedence given to
+ * filename encoded options */
+ filename = qdict_get_try_str(options, "filename");
+ if (filename) {
+ error_report("Warning: 'filename' option specified. "
+ "This is an unsupported option, and may be deprecated "
+ "in the future");
+ iscsi_parse_filename(filename, options, &local_err);
+ if (local_err) {
+ ret = -EINVAL;
+ error_propagate(errp, local_err);
+ goto exit;
+ }
+ }
+
opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (local_err) {
@@ -1967,6 +1986,7 @@ out:
}
memset(iscsilun, 0, sizeof(IscsiLun));
}
+exit:
return ret;
}
diff --git a/block/mirror.c b/block/mirror.c
index a2a970301c..19afcc6f1a 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -342,6 +342,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
int max_io_sectors = MAX((s->buf_size >> BDRV_SECTOR_BITS) / MAX_IN_FLIGHT,
MAX_IO_SECTORS);
+ bdrv_dirty_bitmap_lock(s->dirty_bitmap);
sector_num = bdrv_dirty_iter_next(s->dbi);
if (sector_num < 0) {
bdrv_set_dirty_iter(s->dbi, 0);
@@ -349,6 +350,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
assert(sector_num >= 0);
}
+ bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
first_chunk = sector_num / sectors_per_chunk;
while (test_bit(first_chunk, s->in_flight_bitmap)) {
@@ -360,12 +362,13 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
/* Find the number of consective dirty chunks following the first dirty
* one, and wait for in flight requests in them. */
+ bdrv_dirty_bitmap_lock(s->dirty_bitmap);
while (nb_chunks * sectors_per_chunk < (s->buf_size >> BDRV_SECTOR_BITS)) {
int64_t next_dirty;
int64_t next_sector = sector_num + nb_chunks * sectors_per_chunk;
int64_t next_chunk = next_sector / sectors_per_chunk;
if (next_sector >= end ||
- !bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) {
+ !bdrv_get_dirty_locked(source, s->dirty_bitmap, next_sector)) {
break;
}
if (test_bit(next_chunk, s->in_flight_bitmap)) {
@@ -386,8 +389,10 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
* calling bdrv_get_block_status_above could yield - if some blocks are
* marked dirty in this window, we need to know.
*/
- bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num,
- nb_chunks * sectors_per_chunk);
+ bdrv_reset_dirty_bitmap_locked(s->dirty_bitmap, sector_num,
+ nb_chunks * sectors_per_chunk);
+ bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
+
bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks);
while (nb_chunks > 0 && sector_num < end) {
int64_t ret;
@@ -506,6 +511,8 @@ static void mirror_exit(BlockJob *job, void *opaque)
BlockDriverState *mirror_top_bs = s->mirror_top_bs;
Error *local_err = NULL;
+ bdrv_release_dirty_bitmap(src, s->dirty_bitmap);
+
/* Make sure that the source BDS doesn't go away before we called
* block_job_completed(). */
bdrv_ref(src);
@@ -904,7 +911,6 @@ immediate_exit:
g_free(s->cow_bitmap);
g_free(s->in_flight_bitmap);
bdrv_dirty_iter_free(s->dbi);
- bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
data = g_malloc(sizeof(*data));
data->ret = ret;
diff --git a/block/nbd-client.c b/block/nbd-client.c
index 87d19c7253..d64e775385 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -144,8 +144,8 @@ static int nbd_co_send_request(BlockDriverState *bs,
qio_channel_set_cork(s->ioc, true);
rc = nbd_send_request(s->ioc, request);
if (rc >= 0) {
- ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, request->len,
- false, NULL);
+ ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, false,
+ NULL);
if (ret != request->len) {
rc = -EIO;
}
@@ -173,8 +173,8 @@ static void nbd_co_receive_reply(NBDClientSession *s,
reply->error = EIO;
} else {
if (qiov && reply->error == 0) {
- ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, request->len,
- true, NULL);
+ ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, true,
+ NULL);
if (ret != request->len) {
reply->error = EIO;
}
diff --git a/block/nfs.c b/block/nfs.c
index 848b2c0bb0..18c87d2f25 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -730,7 +730,9 @@ nfs_get_allocated_file_size_cb(int ret, struct nfs_context *nfs, void *data,
if (task->ret < 0) {
error_report("NFS Error: %s", nfs_get_error(nfs));
}
- task->complete = 1;
+
+ /* Set task->complete before reading bs->wakeup. */
+ atomic_mb_set(&task->complete, 1);
bdrv_wakeup(task->bs);
}
diff --git a/block/qapi.c b/block/qapi.c
index a40922ea26..14b60ae66c 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -441,7 +441,7 @@ static BlockStats *bdrv_query_bds_stats(const BlockDriverState *bs,
s->node_name = g_strdup(bdrv_get_node_name(bs));
}
- s->stats->wr_highest_offset = bs->wr_highest_offset;
+ s->stats->wr_highest_offset = stat64_get(&bs->wr_highest_offset);
if (bs->file) {
s->has_parent = true;
diff --git a/block/rbd.c b/block/rbd.c
index e551639e47..ff44e5f437 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -340,6 +340,10 @@ static QemuOptsList runtime_opts = {
.type = QEMU_OPT_STRING,
.help = "Legacy rados key/value option parameters",
},
+ {
+ .name = "filename",
+ .type = QEMU_OPT_STRING,
+ },
{ /* end of list */ }
},
};
@@ -541,12 +545,27 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
{
BDRVRBDState *s = bs->opaque;
const char *pool, *snap, *conf, *user, *image_name, *keypairs;
- const char *secretid;
+ const char *secretid, *filename;
QemuOpts *opts;
Error *local_err = NULL;
char *mon_host = NULL;
int r;
+ /* If we are given a filename, parse the filename, with precedence given to
+ * filename encoded options */
+ filename = qdict_get_try_str(options, "filename");
+ if (filename) {
+ error_report("Warning: 'filename' option specified. "
+ "This is an unsupported option, and may be deprecated "
+ "in the future");
+ qemu_rbd_parse_filename(filename, options, &local_err);
+ if (local_err) {
+ r = -EINVAL;
+ error_propagate(errp, local_err);
+ goto exit;
+ }
+ }
+
opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (local_err) {
@@ -665,6 +684,7 @@ failed_shutdown:
failed_opts:
qemu_opts_del(opts);
g_free(mon_host);
+exit:
return r;
}
diff --git a/block/sheepdog.c b/block/sheepdog.c
index a18315a1ca..5ebf5d9fbb 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -698,7 +698,8 @@ out:
srco->co = NULL;
srco->ret = ret;
- srco->finished = true;
+ /* Set srco->finished before reading bs->wakeup. */
+ atomic_mb_set(&srco->finished, true);
if (srco->bs) {
bdrv_wakeup(srco->bs);
}
diff --git a/block/throttle-groups.c b/block/throttle-groups.c
index b73e7a800b..a181cb1dee 100644
--- a/block/throttle-groups.c
+++ b/block/throttle-groups.c
@@ -240,7 +240,7 @@ static bool throttle_group_schedule_timer(BlockBackend *blk, bool is_write)
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
bool must_wait;
- if (blkp->io_limits_disabled) {
+ if (atomic_read(&blkp->io_limits_disabled)) {
return false;
}
@@ -260,6 +260,25 @@ static bool throttle_group_schedule_timer(BlockBackend *blk, bool is_write)
return must_wait;
}
+/* Start the next pending I/O request for a BlockBackend. Return whether
+ * any request was actually pending.
+ *
+ * @blk: the current BlockBackend
+ * @is_write: the type of operation (read/write)
+ */
+static bool coroutine_fn throttle_group_co_restart_queue(BlockBackend *blk,
+ bool is_write)
+{
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ bool ret;
+
+ qemu_co_mutex_lock(&blkp->throttled_reqs_lock);
+ ret = qemu_co_queue_next(&blkp->throttled_reqs[is_write]);
+ qemu_co_mutex_unlock(&blkp->throttled_reqs_lock);
+
+ return ret;
+}
+
/* Look for the next pending I/O request and schedule it.
*
* This assumes that tg->lock is held.
@@ -287,12 +306,12 @@ static void schedule_next_request(BlockBackend *blk, bool is_write)
if (!must_wait) {
/* Give preference to requests from the current blk */
if (qemu_in_coroutine() &&
- qemu_co_queue_next(&blkp->throttled_reqs[is_write])) {
+ throttle_group_co_restart_queue(blk, is_write)) {
token = blk;
} else {
ThrottleTimers *tt = &blk_get_public(token)->throttle_timers;
int64_t now = qemu_clock_get_ns(tt->clock_type);
- timer_mod(tt->timers[is_write], now + 1);
+ timer_mod(tt->timers[is_write], now);
tg->any_timer_armed[is_write] = true;
}
tg->tokens[is_write] = token;
@@ -326,7 +345,10 @@ void coroutine_fn throttle_group_co_io_limits_intercept(BlockBackend *blk,
if (must_wait || blkp->pending_reqs[is_write]) {
blkp->pending_reqs[is_write]++;
qemu_mutex_unlock(&tg->lock);
- qemu_co_queue_wait(&blkp->throttled_reqs[is_write], NULL);
+ qemu_co_mutex_lock(&blkp->throttled_reqs_lock);
+ qemu_co_queue_wait(&blkp->throttled_reqs[is_write],
+ &blkp->throttled_reqs_lock);
+ qemu_co_mutex_unlock(&blkp->throttled_reqs_lock);
qemu_mutex_lock(&tg->lock);
blkp->pending_reqs[is_write]--;
}
@@ -340,15 +362,50 @@ void coroutine_fn throttle_group_co_io_limits_intercept(BlockBackend *blk,
qemu_mutex_unlock(&tg->lock);
}
+typedef struct {
+ BlockBackend *blk;
+ bool is_write;
+} RestartData;
+
+static void coroutine_fn throttle_group_restart_queue_entry(void *opaque)
+{
+ RestartData *data = opaque;
+ BlockBackend *blk = data->blk;
+ bool is_write = data->is_write;
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts);
+ bool empty_queue;
+
+ empty_queue = !throttle_group_co_restart_queue(blk, is_write);
+
+ /* If the request queue was empty then we have to take care of
+ * scheduling the next one */
+ if (empty_queue) {
+ qemu_mutex_lock(&tg->lock);
+ schedule_next_request(blk, is_write);
+ qemu_mutex_unlock(&tg->lock);
+ }
+}
+
+static void throttle_group_restart_queue(BlockBackend *blk, bool is_write)
+{
+ Coroutine *co;
+ RestartData rd = {
+ .blk = blk,
+ .is_write = is_write
+ };
+
+ co = qemu_coroutine_create(throttle_group_restart_queue_entry, &rd);
+ aio_co_enter(blk_get_aio_context(blk), co);
+}
+
void throttle_group_restart_blk(BlockBackend *blk)
{
BlockBackendPublic *blkp = blk_get_public(blk);
- int i;
- for (i = 0; i < 2; i++) {
- while (qemu_co_enter_next(&blkp->throttled_reqs[i])) {
- ;
- }
+ if (blkp->throttle_state) {
+ throttle_group_restart_queue(blk, 0);
+ throttle_group_restart_queue(blk, 1);
}
}
@@ -376,8 +433,7 @@ void throttle_group_config(BlockBackend *blk, ThrottleConfig *cfg)
throttle_config(ts, tt, cfg);
qemu_mutex_unlock(&tg->lock);
- qemu_co_enter_next(&blkp->throttled_reqs[0]);
- qemu_co_enter_next(&blkp->throttled_reqs[1]);
+ throttle_group_restart_blk(blk);
}
/* Get the throttle configuration from a particular group. Similar to
@@ -408,7 +464,6 @@ static void timer_cb(BlockBackend *blk, bool is_write)
BlockBackendPublic *blkp = blk_get_public(blk);
ThrottleState *ts = blkp->throttle_state;
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
- bool empty_queue;
/* The timer has just been fired, so we can update the flag */
qemu_mutex_lock(&tg->lock);
@@ -416,17 +471,7 @@ static void timer_cb(BlockBackend *blk, bool is_write)
qemu_mutex_unlock(&tg->lock);
/* Run the request that was waiting for this timer */
- aio_context_acquire(blk_get_aio_context(blk));
- empty_queue = !qemu_co_enter_next(&blkp->throttled_reqs[is_write]);
- aio_context_release(blk_get_aio_context(blk));
-
- /* If the request queue was empty then we have to take care of
- * scheduling the next one */
- if (empty_queue) {
- qemu_mutex_lock(&tg->lock);
- schedule_next_request(blk, is_write);
- qemu_mutex_unlock(&tg->lock);
- }
+ throttle_group_restart_queue(blk, is_write);
}
static void read_timer_cb(void *opaque)
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index dd0860f4a6..28f551a7b0 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -27,6 +27,10 @@ typedef struct NBDServerData {
static NBDServerData *nbd_server;
+static void nbd_blockdev_client_closed(NBDClient *client, bool ignored)
+{
+ nbd_client_put(client);
+}
static gboolean nbd_accept(QIOChannel *ioc, GIOCondition condition,
gpointer opaque)
@@ -46,7 +50,7 @@ static gboolean nbd_accept(QIOChannel *ioc, GIOCondition condition,
qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server");
nbd_client_new(NULL, cioc,
nbd_server->tlscreds, NULL,
- nbd_client_put);
+ nbd_blockdev_client_closed);
object_unref(OBJECT(cioc));
return TRUE;
}
diff --git a/blockdev.c b/blockdev.c
index 6472548186..39d6b9712b 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -595,7 +595,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
autostart = 0;
}
- block_acct_init(blk_get_stats(blk), account_invalid, account_failed);
+ block_acct_setup(blk_get_stats(blk), account_invalid, account_failed);
if (!parse_stats_intervals(blk_get_stats(blk), interval_list, errp)) {
blk_unref(blk);
@@ -1362,12 +1362,10 @@ out_aio_context:
static BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node,
const char *name,
BlockDriverState **pbs,
- AioContext **paio,
Error **errp)
{
BlockDriverState *bs;
BdrvDirtyBitmap *bitmap;
- AioContext *aio_context;
if (!node) {
error_setg(errp, "Node cannot be NULL");
@@ -1383,29 +1381,17 @@ static BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node,
return NULL;
}
- aio_context = bdrv_get_aio_context(bs);
- aio_context_acquire(aio_context);
-
bitmap = bdrv_find_dirty_bitmap(bs, name);
if (!bitmap) {
error_setg(errp, "Dirty bitmap '%s' not found", name);
- goto fail;
+ return NULL;
}
if (pbs) {
*pbs = bs;
}
- if (paio) {
- *paio = aio_context;
- } else {
- aio_context_release(aio_context);
- }
return bitmap;
-
- fail:
- aio_context_release(aio_context);
- return NULL;
}
/* New and old BlockDriverState structs for atomic group operations */
@@ -1791,7 +1777,7 @@ static void external_snapshot_commit(BlkActionState *common)
/* We don't need (or want) to use the transactional
* bdrv_reopen_multiple() across all the entries at once, because we
* don't want to abort all of them if one of them fails the reopen */
- if (!state->old_bs->copy_on_read) {
+ if (!atomic_read(&state->old_bs->copy_on_read)) {
bdrv_reopen(state->old_bs, state->old_bs->open_flags & ~BDRV_O_RDWR,
NULL);
}
@@ -2025,7 +2011,6 @@ static void block_dirty_bitmap_clear_prepare(BlkActionState *common,
state->bitmap = block_dirty_bitmap_lookup(action->node,
action->name,
&state->bs,
- &state->aio_context,
errp);
if (!state->bitmap) {
return;
@@ -2733,7 +2718,6 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name,
bool has_granularity, uint32_t granularity,
Error **errp)
{
- AioContext *aio_context;
BlockDriverState *bs;
if (!name || name[0] == '\0') {
@@ -2746,14 +2730,11 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name,
return;
}
- aio_context = bdrv_get_aio_context(bs);
- aio_context_acquire(aio_context);
-
if (has_granularity) {
if (granularity < 512 || !is_power_of_2(granularity)) {
error_setg(errp, "Granularity must be power of 2 "
"and at least 512");
- goto out;
+ return;
}
} else {
/* Default to cluster size, if available: */
@@ -2761,19 +2742,15 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name,
}
bdrv_create_dirty_bitmap(bs, granularity, name, errp);
-
- out:
- aio_context_release(aio_context);
}
void qmp_block_dirty_bitmap_remove(const char *node, const char *name,
Error **errp)
{
- AioContext *aio_context;
BlockDriverState *bs;
BdrvDirtyBitmap *bitmap;
- bitmap = block_dirty_bitmap_lookup(node, name, &bs, &aio_context, errp);
+ bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp);
if (!bitmap || !bs) {
return;
}
@@ -2782,13 +2759,10 @@ void qmp_block_dirty_bitmap_remove(const char *node, const char *name,
error_setg(errp,
"Bitmap '%s' is currently frozen and cannot be removed",
name);
- goto out;
+ return;
}
bdrv_dirty_bitmap_make_anon(bitmap);
bdrv_release_dirty_bitmap(bs, bitmap);
-
- out:
- aio_context_release(aio_context);
}
/**
@@ -2798,11 +2772,10 @@ void qmp_block_dirty_bitmap_remove(const char *node, const char *name,
void qmp_block_dirty_bitmap_clear(const char *node, const char *name,
Error **errp)
{
- AioContext *aio_context;
BdrvDirtyBitmap *bitmap;
BlockDriverState *bs;
- bitmap = block_dirty_bitmap_lookup(node, name, &bs, &aio_context, errp);
+ bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp);
if (!bitmap || !bs) {
return;
}
@@ -2811,18 +2784,15 @@ void qmp_block_dirty_bitmap_clear(const char *node, const char *name,
error_setg(errp,
"Bitmap '%s' is currently frozen and cannot be modified",
name);
- goto out;
+ return;
} else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
error_setg(errp,
"Bitmap '%s' is currently disabled and cannot be cleared",
name);
- goto out;
+ return;
}
bdrv_clear_dirty_bitmap(bitmap, NULL);
-
- out:
- aio_context_release(aio_context);
}
void hmp_drive_del(Monitor *mon, const QDict *qdict)
diff --git a/configure b/configure
index b147191ae6..ff0f8b915c 100755
--- a/configure
+++ b/configure
@@ -407,7 +407,7 @@ QEMU_CFLAGS="-fno-strict-aliasing -fno-common -fwrapv $QEMU_CFLAGS"
QEMU_CFLAGS="-Wall -Wundef -Wwrite-strings -Wmissing-prototypes $QEMU_CFLAGS"
QEMU_CFLAGS="-Wstrict-prototypes -Wredundant-decls $QEMU_CFLAGS"
QEMU_CFLAGS="-D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE $QEMU_CFLAGS"
-QEMU_INCLUDES="-I. -I\$(SRC_PATH) -I\$(SRC_PATH)/include"
+QEMU_INCLUDES="-I. -I\$(SRC_PATH) -I\$(SRC_PATH)/accel/tcg -I\$(SRC_PATH)/include"
if test "$debug_info" = "yes"; then
CFLAGS="-g $CFLAGS"
LDFLAGS="-g $LDFLAGS"
@@ -2301,14 +2301,14 @@ fi
# GTK probe
if test "$gtkabi" = ""; then
- # The GTK ABI was not specified explicitly, so try whether 2.0 is available.
- # Use 3.0 as a fallback if that is available.
- if $pkg_config --exists "gtk+-2.0 >= 2.18.0"; then
- gtkabi=2.0
- elif $pkg_config --exists "gtk+-3.0 >= 3.0.0"; then
+ # The GTK ABI was not specified explicitly, so try whether 3.0 is available.
+ # Use 2.0 as a fallback if that is available.
+ if $pkg_config --exists "gtk+-3.0 >= 3.0.0"; then
gtkabi=3.0
- else
+ elif $pkg_config --exists "gtk+-2.0 >= 2.18.0"; then
gtkabi=2.0
+ else
+ gtkabi=3.0
fi
fi
@@ -2331,7 +2331,7 @@ if test "$gtk" != "no"; then
libs_softmmu="$gtk_libs $libs_softmmu"
gtk="yes"
elif test "$gtk" = "yes"; then
- feature_not_found "gtk" "Install gtk2 or gtk3 devel"
+ feature_not_found "gtk" "Install gtk3-devel"
else
gtk="no"
fi
@@ -2598,12 +2598,12 @@ fi
# sdl-config even without cross prefix, and favour pkg-config over sdl-config.
if test "$sdlabi" = ""; then
- if $pkg_config --exists "sdl"; then
- sdlabi=1.2
- elif $pkg_config --exists "sdl2"; then
+ if $pkg_config --exists "sdl2"; then
sdlabi=2.0
- else
+ elif $pkg_config --exists "sdl"; then
sdlabi=1.2
+ else
+ sdlabi=2.0
fi
fi
@@ -2630,7 +2630,7 @@ elif has ${sdl_config}; then
sdlversion=$($sdlconfig --version)
else
if test "$sdl" = "yes" ; then
- feature_not_found "sdl" "Install SDL devel"
+ feature_not_found "sdl" "Install SDL2-devel"
fi
sdl=no
fi
@@ -6374,7 +6374,7 @@ fi
# build tree in object directory in case the source is not in the current directory
DIRS="tests tests/tcg tests/tcg/cris tests/tcg/lm32 tests/libqos tests/qapi-schema tests/tcg/xtensa tests/qemu-iotests"
-DIRS="$DIRS docs fsdev"
+DIRS="$DIRS docs docs/interop fsdev"
DIRS="$DIRS pc-bios/optionrom pc-bios/spapr-rtas pc-bios/s390-ccw"
DIRS="$DIRS roms/seabios roms/vgabios"
DIRS="$DIRS qapi-generated"
diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h
index af02a31ebe..53ef222c0b 100644
--- a/contrib/libvhost-user/libvhost-user.h
+++ b/contrib/libvhost-user/libvhost-user.h
@@ -17,6 +17,7 @@
#include <stdint.h>
#include <stdbool.h>
#include <stddef.h>
+#include <sys/poll.h>
#include <linux/vhost.h>
#include "standard-headers/linux/virtio_ring.h"
@@ -192,11 +193,11 @@ typedef struct VuVirtq {
} VuVirtq;
enum VuWatchCondtion {
- VU_WATCH_IN = 1 << 0,
- VU_WATCH_OUT = 1 << 1,
- VU_WATCH_PRI = 1 << 2,
- VU_WATCH_ERR = 1 << 3,
- VU_WATCH_HUP = 1 << 4,
+ VU_WATCH_IN = POLLIN,
+ VU_WATCH_OUT = POLLOUT,
+ VU_WATCH_PRI = POLLPRI,
+ VU_WATCH_ERR = POLLERR,
+ VU_WATCH_HUP = POLLHUP,
};
typedef void (*vu_panic_cb) (VuDev *dev, const char *err);
diff --git a/contrib/vhost-user-scsi/Makefile.objs b/contrib/vhost-user-scsi/Makefile.objs
new file mode 100644
index 0000000000..e83a38a85b
--- /dev/null
+++ b/contrib/vhost-user-scsi/Makefile.objs
@@ -0,0 +1 @@
+vhost-user-scsi-obj-y = vhost-user-scsi.o
diff --git a/contrib/vhost-user-scsi/vhost-user-scsi.c b/contrib/vhost-user-scsi/vhost-user-scsi.c
new file mode 100644
index 0000000000..b5ae02c96f
--- /dev/null
+++ b/contrib/vhost-user-scsi/vhost-user-scsi.c
@@ -0,0 +1,886 @@
+/*
+ * vhost-user-scsi sample application
+ *
+ * Copyright (c) 2016 Nutanix Inc. All rights reserved.
+ *
+ * Author:
+ * Felipe Franciosi <felipe@nutanix.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 only.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "contrib/libvhost-user/libvhost-user.h"
+#include "hw/virtio/virtio-scsi.h"
+#include "iscsi/iscsi.h"
+
+#include <glib.h>
+
+/* Small compat shim from glib 2.32 */
+#ifndef G_SOURCE_CONTINUE
+#define G_SOURCE_CONTINUE TRUE
+#endif
+#ifndef G_SOURCE_REMOVE
+#define G_SOURCE_REMOVE FALSE
+#endif
+
+/* #define VUS_DEBUG 1 */
+
+/** Log helpers **/
+
+#define PPRE \
+ struct timespec ts; \
+ char timebuf[64]; \
+ struct tm tm; \
+ (void)clock_gettime(CLOCK_REALTIME, &ts); \
+ (void)strftime(timebuf, 64, "%Y%m%d %T", gmtime_r(&ts.tv_sec, &tm))
+
+#define PEXT(lvl, msg, ...) do { \
+ PPRE; \
+ fprintf(stderr, "%s.%06ld " lvl ": %s:%s():%d: " msg "\n", \
+ timebuf, ts.tv_nsec / 1000, \
+ __FILE__, __func__, __LINE__, ## __VA_ARGS__); \
+} while (0)
+
+#define PNOR(lvl, msg, ...) do { \
+ PPRE; \
+ fprintf(stderr, "%s.%06ld " lvl ": " msg "\n", \
+ timebuf, ts.tv_nsec / 1000, ## __VA_ARGS__); \
+} while (0)
+
+#ifdef VUS_DEBUG
+#define PDBG(msg, ...) PEXT("DBG", msg, ## __VA_ARGS__)
+#define PERR(msg, ...) PEXT("ERR", msg, ## __VA_ARGS__)
+#define PLOG(msg, ...) PEXT("LOG", msg, ## __VA_ARGS__)
+#else
+#define PDBG(msg, ...) { }
+#define PERR(msg, ...) PNOR("ERR", msg, ## __VA_ARGS__)
+#define PLOG(msg, ...) PNOR("LOG", msg, ## __VA_ARGS__)
+#endif
+
+/** vhost-user-scsi specific definitions **/
+
+ /* Only 1 LUN and device supported today */
+#define VUS_MAX_LUNS 1
+#define VUS_MAX_DEVS 1
+
+#define VUS_ISCSI_INITIATOR "iqn.2016-11.com.nutanix:vhost-user-scsi"
+
+typedef struct iscsi_lun {
+ struct iscsi_context *iscsi_ctx;
+ int iscsi_lun;
+} iscsi_lun_t;
+
+typedef struct vhost_scsi_dev {
+ VuDev vu_dev;
+ int server_sock;
+ GMainLoop *loop;
+ GTree *fdmap; /* fd -> gsource context id */
+ iscsi_lun_t luns[VUS_MAX_LUNS];
+} vhost_scsi_dev_t;
+
+static vhost_scsi_dev_t *vhost_scsi_devs[VUS_MAX_DEVS];
+
+/** glib event loop integration for libvhost-user and misc callbacks **/
+
+QEMU_BUILD_BUG_ON((int)G_IO_IN != (int)VU_WATCH_IN);
+QEMU_BUILD_BUG_ON((int)G_IO_OUT != (int)VU_WATCH_OUT);
+QEMU_BUILD_BUG_ON((int)G_IO_PRI != (int)VU_WATCH_PRI);
+QEMU_BUILD_BUG_ON((int)G_IO_ERR != (int)VU_WATCH_ERR);
+QEMU_BUILD_BUG_ON((int)G_IO_HUP != (int)VU_WATCH_HUP);
+
+typedef struct vus_gsrc {
+ GSource parent;
+ vhost_scsi_dev_t *vdev_scsi;
+ GPollFD gfd;
+ vu_watch_cb vu_cb;
+} vus_gsrc_t;
+
+static gint vus_fdmap_compare(gconstpointer a, gconstpointer b)
+{
+ return (b > a) - (b < a);
+}
+
+static gboolean vus_gsrc_prepare(GSource *src, gint *timeout)
+{
+ assert(timeout);
+
+ *timeout = -1;
+ return FALSE;
+}
+
+static gboolean vus_gsrc_check(GSource *src)
+{
+ vus_gsrc_t *vus_src = (vus_gsrc_t *)src;
+
+ assert(vus_src);
+
+ return vus_src->gfd.revents & vus_src->gfd.events;
+}
+
+static gboolean vus_gsrc_dispatch(GSource *src, GSourceFunc cb, gpointer data)
+{
+ vhost_scsi_dev_t *vdev_scsi;
+ vus_gsrc_t *vus_src = (vus_gsrc_t *)src;
+
+ assert(vus_src);
+ assert(!(vus_src->vu_cb && cb));
+
+ vdev_scsi = vus_src->vdev_scsi;
+
+ assert(vdev_scsi);
+
+ if (cb) {
+ return cb(data);
+ }
+ if (vus_src->vu_cb) {
+ vus_src->vu_cb(&vdev_scsi->vu_dev, vus_src->gfd.revents, data);
+ }
+ return G_SOURCE_CONTINUE;
+}
+
+static GSourceFuncs vus_gsrc_funcs = {
+ vus_gsrc_prepare,
+ vus_gsrc_check,
+ vus_gsrc_dispatch,
+ NULL
+};
+
+static int vus_gsrc_new(vhost_scsi_dev_t *vdev_scsi, int fd, GIOCondition cond,
+ vu_watch_cb vu_cb, GSourceFunc gsrc_cb, gpointer data)
+{
+ GSource *vus_gsrc;
+ vus_gsrc_t *vus_src;
+ guint id;
+
+ assert(vdev_scsi);
+ assert(fd >= 0);
+ assert(vu_cb || gsrc_cb);
+ assert(!(vu_cb && gsrc_cb));
+
+ vus_gsrc = g_source_new(&vus_gsrc_funcs, sizeof(vus_gsrc_t));
+ if (!vus_gsrc) {
+ PERR("Error creating GSource for new watch");
+ return -1;
+ }
+ vus_src = (vus_gsrc_t *)vus_gsrc;
+
+ vus_src->vdev_scsi = vdev_scsi;
+ vus_src->gfd.fd = fd;
+ vus_src->gfd.events = cond;
+ vus_src->vu_cb = vu_cb;
+
+ g_source_add_poll(vus_gsrc, &vus_src->gfd);
+ g_source_set_callback(vus_gsrc, gsrc_cb, data, NULL);
+ id = g_source_attach(vus_gsrc, NULL);
+ assert(id);
+ g_source_unref(vus_gsrc);
+
+ g_tree_insert(vdev_scsi->fdmap, (gpointer)(uintptr_t)fd,
+ (gpointer)(uintptr_t)id);
+
+ return 0;
+}
+
+/* from libiscsi's scsi-lowlevel.h **
+ *
+ * nb. We can't directly include scsi-lowlevel.h due to a namespace conflict:
+ * QEMU's scsi.h also defines "SCSI_XFER_NONE".
+ */
+
+#define SCSI_CDB_MAX_SIZE 16
+
+struct scsi_iovector {
+ struct scsi_iovec *iov;
+ int niov;
+ int nalloc;
+ size_t offset;
+ int consumed;
+};
+
+struct scsi_allocated_memory {
+ struct scsi_allocated_memory *next;
+ char buf[0];
+};
+
+struct scsi_data {
+ int size;
+ unsigned char *data;
+};
+
+enum scsi_sense_key {
+ SCSI_SENSE_NO_SENSE = 0x00,
+ SCSI_SENSE_RECOVERED_ERROR = 0x01,
+ SCSI_SENSE_NOT_READY = 0x02,
+ SCSI_SENSE_MEDIUM_ERROR = 0x03,
+ SCSI_SENSE_HARDWARE_ERROR = 0x04,
+ SCSI_SENSE_ILLEGAL_REQUEST = 0x05,
+ SCSI_SENSE_UNIT_ATTENTION = 0x06,
+ SCSI_SENSE_DATA_PROTECTION = 0x07,
+ SCSI_SENSE_BLANK_CHECK = 0x08,
+ SCSI_SENSE_VENDOR_SPECIFIC = 0x09,
+ SCSI_SENSE_COPY_ABORTED = 0x0a,
+ SCSI_SENSE_COMMAND_ABORTED = 0x0b,
+ SCSI_SENSE_OBSOLETE_ERROR_CODE = 0x0c,
+ SCSI_SENSE_OVERFLOW_COMMAND = 0x0d,
+ SCSI_SENSE_MISCOMPARE = 0x0e
+};
+
+struct scsi_sense {
+ unsigned char error_type;
+ enum scsi_sense_key key;
+ int ascq;
+ unsigned sense_specific:1;
+ unsigned ill_param_in_cdb:1;
+ unsigned bit_pointer_valid:1;
+ unsigned char bit_pointer;
+ uint16_t field_pointer;
+};
+
+enum scsi_residual {
+ SCSI_RESIDUAL_NO_RESIDUAL = 0,
+ SCSI_RESIDUAL_UNDERFLOW,
+ SCSI_RESIDUAL_OVERFLOW
+};
+
+struct scsi_task {
+ int status;
+ int cdb_size;
+ int xfer_dir;
+ int expxferlen;
+ unsigned char cdb[SCSI_CDB_MAX_SIZE];
+ enum scsi_residual residual_status;
+ size_t residual;
+ struct scsi_sense sense;
+ struct scsi_data datain;
+ struct scsi_allocated_memory *mem;
+ void *ptr;
+
+ uint32_t itt;
+ uint32_t cmdsn;
+ uint32_t lun;
+
+ struct scsi_iovector iovector_in;
+ struct scsi_iovector iovector_out;
+};
+
+/** libiscsi integration **/
+
+static int iscsi_add_lun(iscsi_lun_t *lun, char *iscsi_uri)
+{
+ struct iscsi_url *iscsi_url;
+ struct iscsi_context *iscsi_ctx;
+ int ret = 0;
+
+ assert(lun);
+ assert(iscsi_uri);
+
+ iscsi_ctx = iscsi_create_context(VUS_ISCSI_INITIATOR);
+ if (!iscsi_ctx) {
+ PERR("Unable to create iSCSI context");
+ return -1;
+ }
+
+ iscsi_url = iscsi_parse_full_url(iscsi_ctx, iscsi_uri);
+ if (!iscsi_url) {
+ PERR("Unable to parse iSCSI URL: %s", iscsi_get_error(iscsi_ctx));
+ goto fail;
+ }
+
+ iscsi_set_session_type(iscsi_ctx, ISCSI_SESSION_NORMAL);
+ iscsi_set_header_digest(iscsi_ctx, ISCSI_HEADER_DIGEST_NONE_CRC32C);
+ if (iscsi_full_connect_sync(iscsi_ctx, iscsi_url->portal, iscsi_url->lun)) {
+ PERR("Unable to login to iSCSI portal: %s", iscsi_get_error(iscsi_ctx));
+ goto fail;
+ }
+
+ lun->iscsi_ctx = iscsi_ctx;
+ lun->iscsi_lun = iscsi_url->lun;
+
+ PDBG("Context %p created for lun 0: %s", iscsi_ctx, iscsi_uri);
+
+out:
+ if (iscsi_url) {
+ iscsi_destroy_url(iscsi_url);
+ }
+ return ret;
+
+fail:
+ (void)iscsi_destroy_context(iscsi_ctx);
+ ret = -1;
+ goto out;
+}
+
+static struct scsi_task *scsi_task_new(int cdb_len, uint8_t *cdb, int dir,
+ int xfer_len) {
+ struct scsi_task *task;
+
+ assert(cdb_len > 0);
+ assert(cdb);
+
+ task = calloc(1, sizeof(struct scsi_task));
+ if (!task) {
+ PERR("Error allocating task: %s", strerror(errno));
+ return NULL;
+ }
+
+ memcpy(task->cdb, cdb, cdb_len);
+ task->cdb_size = cdb_len;
+ task->xfer_dir = dir;
+ task->expxferlen = xfer_len;
+
+ return task;
+}
+
+static int get_cdb_len(uint8_t *cdb)
+{
+ assert(cdb);
+
+ switch (cdb[0] >> 5) {
+ case 0: return 6;
+ case 1: /* fall through */
+ case 2: return 10;
+ case 4: return 16;
+ case 5: return 12;
+ }
+ PERR("Unable to determine cdb len (0x%02hhX)", cdb[0] >> 5);
+ return -1;
+}
+
+static int handle_cmd_sync(struct iscsi_context *ctx,
+ VirtIOSCSICmdReq *req,
+ struct iovec *out, unsigned int out_len,
+ VirtIOSCSICmdResp *rsp,
+ struct iovec *in, unsigned int in_len) {
+ struct scsi_task *task;
+ uint32_t dir;
+ uint32_t len;
+ int cdb_len;
+ int i;
+
+ assert(ctx);
+ assert(req);
+ assert(rsp);
+
+ if (!(!req->lun[1] && req->lun[2] == 0x40 && !req->lun[3])) {
+ /* Ignore anything different than target=0, lun=0 */
+ PDBG("Ignoring unconnected lun (0x%hhX, 0x%hhX)",
+ req->lun[1], req->lun[3]);
+ rsp->status = SCSI_STATUS_CHECK_CONDITION;
+ memset(rsp->sense, 0, sizeof(rsp->sense));
+ rsp->sense_len = 18;
+ rsp->sense[0] = 0x70;
+ rsp->sense[2] = SCSI_SENSE_ILLEGAL_REQUEST;
+ rsp->sense[7] = 10;
+ rsp->sense[12] = 0x24;
+
+ return 0;
+ }
+
+ cdb_len = get_cdb_len(req->cdb);
+ if (cdb_len == -1) {
+ return -1;
+ }
+
+ len = 0;
+ if (!out_len && !in_len) {
+ dir = SCSI_XFER_NONE;
+ } else if (out_len) {
+ dir = SCSI_XFER_TO_DEV;
+ for (i = 0; i < out_len; i++) {
+ len += out[i].iov_len;
+ }
+ } else {
+ dir = SCSI_XFER_FROM_DEV;
+ for (i = 0; i < in_len; i++) {
+ len += in[i].iov_len;
+ }
+ }
+
+ task = scsi_task_new(cdb_len, req->cdb, dir, len);
+ if (!task) {
+ PERR("Unable to create iscsi task");
+ return -1;
+ }
+
+ if (dir == SCSI_XFER_TO_DEV) {
+ task->iovector_out.iov = (struct scsi_iovec *)out;
+ task->iovector_out.niov = out_len;
+ } else if (dir == SCSI_XFER_FROM_DEV) {
+ task->iovector_in.iov = (struct scsi_iovec *)in;
+ task->iovector_in.niov = in_len;
+ }
+
+ PDBG("Sending iscsi cmd (cdb_len=%d, dir=%d, task=%p)",
+ cdb_len, dir, task);
+ if (!iscsi_scsi_command_sync(ctx, 0, task, NULL)) {
+ PERR("Error serving SCSI command");
+ free(task);
+ return -1;
+ }
+
+ memset(rsp, 0, sizeof(*rsp));
+
+ rsp->status = task->status;
+ rsp->resid = task->residual;
+
+ if (task->status == SCSI_STATUS_CHECK_CONDITION) {
+ rsp->response = VIRTIO_SCSI_S_FAILURE;
+ rsp->sense_len = task->datain.size - 2;
+ memcpy(rsp->sense, &task->datain.data[2], rsp->sense_len);
+ }
+
+ free(task);
+
+ PDBG("Filled in rsp: status=%hhX, resid=%u, response=%hhX, sense_len=%u",
+ rsp->status, rsp->resid, rsp->response, rsp->sense_len);
+
+ return 0;
+}
+
+/** libvhost-user callbacks **/
+
+static vhost_scsi_dev_t *vdev_scsi_find_by_vu(VuDev *vu_dev);
+
+static void vus_panic_cb(VuDev *vu_dev, const char *buf)
+{
+ vhost_scsi_dev_t *vdev_scsi;
+
+ assert(vu_dev);
+
+ vdev_scsi = vdev_scsi_find_by_vu(vu_dev);
+
+ if (buf) {
+ PERR("vu_panic: %s", buf);
+ }
+
+ if (vdev_scsi) {
+ assert(vdev_scsi->loop);
+ g_main_loop_quit(vdev_scsi->loop);
+ }
+}
+
+static void vus_add_watch_cb(VuDev *vu_dev, int fd, int vu_evt, vu_watch_cb cb,
+ void *pvt) {
+ vhost_scsi_dev_t *vdev_scsi;
+ guint id;
+
+ assert(vu_dev);
+ assert(fd >= 0);
+ assert(cb);
+
+ vdev_scsi = vdev_scsi_find_by_vu(vu_dev);
+ if (!vdev_scsi) {
+ vus_panic_cb(vu_dev, NULL);
+ return;
+ }
+
+ id = (guint)(uintptr_t)g_tree_lookup(vdev_scsi->fdmap,
+ (gpointer)(uintptr_t)fd);
+ if (id) {
+ GSource *vus_src = g_main_context_find_source_by_id(NULL, id);
+ assert(vus_src);
+ g_source_destroy(vus_src);
+ (void)g_tree_remove(vdev_scsi->fdmap, (gpointer)(uintptr_t)fd);
+ }
+
+ if (vus_gsrc_new(vdev_scsi, fd, vu_evt, cb, NULL, pvt)) {
+ vus_panic_cb(vu_dev, NULL);
+ }
+}
+
+static void vus_del_watch_cb(VuDev *vu_dev, int fd)
+{
+ vhost_scsi_dev_t *vdev_scsi;
+ guint id;
+
+ assert(vu_dev);
+ assert(fd >= 0);
+
+ vdev_scsi = vdev_scsi_find_by_vu(vu_dev);
+ if (!vdev_scsi) {
+ vus_panic_cb(vu_dev, NULL);
+ return;
+ }
+
+ id = (guint)(uintptr_t)g_tree_lookup(vdev_scsi->fdmap,
+ (gpointer)(uintptr_t)fd);
+ if (id) {
+ GSource *vus_src = g_main_context_find_source_by_id(NULL, id);
+ assert(vus_src);
+ g_source_destroy(vus_src);
+ (void)g_tree_remove(vdev_scsi->fdmap, (gpointer)(uintptr_t)fd);
+ }
+}
+
+static void vus_proc_ctl(VuDev *vu_dev, int idx)
+{
+ /* Control VQ not implemented */
+}
+
+static void vus_proc_evt(VuDev *vu_dev, int idx)
+{
+ /* Event VQ not implemented */
+}
+
+static void vus_proc_req(VuDev *vu_dev, int idx)
+{
+ vhost_scsi_dev_t *vdev_scsi;
+ VuVirtq *vq;
+
+ assert(vu_dev);
+
+ vdev_scsi = vdev_scsi_find_by_vu(vu_dev);
+ if (!vdev_scsi) {
+ vus_panic_cb(vu_dev, NULL);
+ return;
+ }
+
+ if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) {
+ PERR("VQ Index out of range: %d", idx);
+ vus_panic_cb(vu_dev, NULL);
+ return;
+ }
+
+ vq = vu_get_queue(vu_dev, idx);
+ if (!vq) {
+ PERR("Error fetching VQ (dev=%p, idx=%d)", vu_dev, idx);
+ vus_panic_cb(vu_dev, NULL);
+ return;
+ }
+
+ PDBG("Got kicked on vq[%d]@%p", idx, vq);
+
+ while (1) {
+ VuVirtqElement *elem;
+ VirtIOSCSICmdReq *req;
+ VirtIOSCSICmdResp *rsp;
+
+ elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement));
+ if (!elem) {
+ PDBG("No more elements pending on vq[%d]@%p", idx, vq);
+ break;
+ }
+ PDBG("Popped elem@%p", elem);
+
+ assert(!((elem->out_num > 1) && (elem->in_num > 1)));
+ assert((elem->out_num > 0) && (elem->in_num > 0));
+
+ if (elem->out_sg[0].iov_len < sizeof(VirtIOSCSICmdReq)) {
+ PERR("Invalid virtio-scsi req header");
+ vus_panic_cb(vu_dev, NULL);
+ break;
+ }
+ req = (VirtIOSCSICmdReq *)elem->out_sg[0].iov_base;
+
+ if (elem->in_sg[0].iov_len < sizeof(VirtIOSCSICmdResp)) {
+ PERR("Invalid virtio-scsi rsp header");
+ vus_panic_cb(vu_dev, NULL);
+ break;
+ }
+ rsp = (VirtIOSCSICmdResp *)elem->in_sg[0].iov_base;
+
+ if (handle_cmd_sync(vdev_scsi->luns[0].iscsi_ctx,
+ req, &elem->out_sg[1], elem->out_num - 1,
+ rsp, &elem->in_sg[1], elem->in_num - 1) != 0) {
+ vus_panic_cb(vu_dev, NULL);
+ break;
+ }
+
+ vu_queue_push(vu_dev, vq, elem, 0);
+ vu_queue_notify(vu_dev, vq);
+
+ free(elem);
+ }
+}
+
+static void vus_queue_set_started(VuDev *vu_dev, int idx, bool started)
+{
+ VuVirtq *vq;
+
+ assert(vu_dev);
+
+ if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) {
+ PERR("VQ Index out of range: %d", idx);
+ vus_panic_cb(vu_dev, NULL);
+ return;
+ }
+
+ vq = vu_get_queue(vu_dev, idx);
+
+ switch (idx) {
+ case 0:
+ vu_set_queue_handler(vu_dev, vq, started ? vus_proc_ctl : NULL);
+ break;
+ case 1:
+ vu_set_queue_handler(vu_dev, vq, started ? vus_proc_evt : NULL);
+ break;
+ default:
+ vu_set_queue_handler(vu_dev, vq, started ? vus_proc_req : NULL);
+ }
+}
+
+static const VuDevIface vus_iface = {
+ .queue_set_started = vus_queue_set_started,
+};
+
+static gboolean vus_vhost_cb(gpointer data)
+{
+ VuDev *vu_dev = (VuDev *)data;
+
+ assert(vu_dev);
+
+ if (!vu_dispatch(vu_dev) != 0) {
+ PERR("Error processing vhost message");
+ vus_panic_cb(vu_dev, NULL);
+ return G_SOURCE_REMOVE;
+ }
+
+ return G_SOURCE_CONTINUE;
+}
+
+/** misc helpers **/
+
+static int unix_sock_new(char *unix_fn)
+{
+ int sock;
+ struct sockaddr_un un;
+ size_t len;
+
+ assert(unix_fn);
+
+ sock = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (sock <= 0) {
+ perror("socket");
+ return -1;
+ }
+
+ un.sun_family = AF_UNIX;
+ (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn);
+ len = sizeof(un.sun_family) + strlen(un.sun_path);
+
+ (void)unlink(unix_fn);
+ if (bind(sock, (struct sockaddr *)&un, len) < 0) {
+ perror("bind");
+ goto fail;
+ }
+
+ if (listen(sock, 1) < 0) {
+ perror("listen");
+ goto fail;
+ }
+
+ return sock;
+
+fail:
+ (void)close(sock);
+
+ return -1;
+}
+
+/** vhost-user-scsi **/
+
+static vhost_scsi_dev_t *vdev_scsi_find_by_vu(VuDev *vu_dev)
+{
+ int i;
+
+ assert(vu_dev);
+
+ for (i = 0; i < VUS_MAX_DEVS; i++) {
+ if (&vhost_scsi_devs[i]->vu_dev == vu_dev) {
+ return vhost_scsi_devs[i];
+ }
+ }
+
+ PERR("Unknown VuDev %p", vu_dev);
+ return NULL;
+}
+
+static void vdev_scsi_deinit(vhost_scsi_dev_t *vdev_scsi)
+{
+ if (!vdev_scsi) {
+ return;
+ }
+
+ if (vdev_scsi->server_sock >= 0) {
+ struct sockaddr_storage ss;
+ socklen_t sslen = sizeof(ss);
+
+ if (getsockname(vdev_scsi->server_sock, (struct sockaddr *)&ss,
+ &sslen) == 0) {
+ struct sockaddr_un *su = (struct sockaddr_un *)&ss;
+ (void)unlink(su->sun_path);
+ }
+
+ (void)close(vdev_scsi->server_sock);
+ vdev_scsi->server_sock = -1;
+ }
+
+ if (vdev_scsi->loop) {
+ g_main_loop_unref(vdev_scsi->loop);
+ vdev_scsi->loop = NULL;
+ }
+}
+
+static vhost_scsi_dev_t *vdev_scsi_new(char *unix_fn)
+{
+ vhost_scsi_dev_t *vdev_scsi = NULL;
+
+ assert(unix_fn);
+
+ vdev_scsi = calloc(1, sizeof(vhost_scsi_dev_t));
+ if (!vdev_scsi) {
+ PERR("calloc: %s", strerror(errno));
+ return NULL;
+ }
+
+ vdev_scsi->server_sock = unix_sock_new(unix_fn);
+ if (vdev_scsi->server_sock < 0) {
+ goto err;
+ }
+
+ vdev_scsi->loop = g_main_loop_new(NULL, FALSE);
+ if (!vdev_scsi->loop) {
+ PERR("Error creating glib event loop");
+ goto err;
+ }
+
+ vdev_scsi->fdmap = g_tree_new(vus_fdmap_compare);
+ if (!vdev_scsi->fdmap) {
+ PERR("Error creating glib tree for fdmap");
+ goto err;
+ }
+
+ return vdev_scsi;
+
+err:
+ vdev_scsi_deinit(vdev_scsi);
+ free(vdev_scsi);
+
+ return NULL;
+}
+
+static int vdev_scsi_add_iscsi_lun(vhost_scsi_dev_t *vdev_scsi,
+ char *iscsi_uri, uint32_t lun) {
+ assert(vdev_scsi);
+ assert(iscsi_uri);
+ assert(lun < VUS_MAX_LUNS);
+
+ if (vdev_scsi->luns[lun].iscsi_ctx) {
+ PERR("Lun %d already configured", lun);
+ return -1;
+ }
+
+ if (iscsi_add_lun(&vdev_scsi->luns[lun], iscsi_uri) != 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+static int vdev_scsi_run(vhost_scsi_dev_t *vdev_scsi)
+{
+ int cli_sock;
+ int ret = 0;
+
+ assert(vdev_scsi);
+ assert(vdev_scsi->server_sock >= 0);
+ assert(vdev_scsi->loop);
+
+ cli_sock = accept(vdev_scsi->server_sock, (void *)0, (void *)0);
+ if (cli_sock < 0) {
+ perror("accept");
+ return -1;
+ }
+
+ vu_init(&vdev_scsi->vu_dev,
+ cli_sock,
+ vus_panic_cb,
+ vus_add_watch_cb,
+ vus_del_watch_cb,
+ &vus_iface);
+
+ if (vus_gsrc_new(vdev_scsi, cli_sock, G_IO_IN, NULL, vus_vhost_cb,
+ &vdev_scsi->vu_dev)) {
+ goto fail;
+ }
+
+ g_main_loop_run(vdev_scsi->loop);
+
+out:
+ vu_deinit(&vdev_scsi->vu_dev);
+
+ return ret;
+
+fail:
+ ret = -1;
+ goto out;
+}
+
+int main(int argc, char **argv)
+{
+ vhost_scsi_dev_t *vdev_scsi = NULL;
+ char *unix_fn = NULL;
+ char *iscsi_uri = NULL;
+ int opt, err = EXIT_SUCCESS;
+
+ while ((opt = getopt(argc, argv, "u:i:")) != -1) {
+ switch (opt) {
+ case 'h':
+ goto help;
+ case 'u':
+ unix_fn = strdup(optarg);
+ break;
+ case 'i':
+ iscsi_uri = strdup(optarg);
+ break;
+ default:
+ goto help;
+ }
+ }
+ if (!unix_fn || !iscsi_uri) {
+ goto help;
+ }
+
+ vdev_scsi = vdev_scsi_new(unix_fn);
+ if (!vdev_scsi) {
+ goto err;
+ }
+ vhost_scsi_devs[0] = vdev_scsi;
+
+ if (vdev_scsi_add_iscsi_lun(vdev_scsi, iscsi_uri, 0) != 0) {
+ goto err;
+ }
+
+ if (vdev_scsi_run(vdev_scsi) != 0) {
+ goto err;
+ }
+
+out:
+ if (vdev_scsi) {
+ vdev_scsi_deinit(vdev_scsi);
+ free(vdev_scsi);
+ }
+ if (unix_fn) {
+ free(unix_fn);
+ }
+ if (iscsi_uri) {
+ free(iscsi_uri);
+ }
+
+ return err;
+
+err:
+ err = EXIT_FAILURE;
+ goto out;
+
+help:
+ fprintf(stderr, "Usage: %s [ -u unix_sock_path -i iscsi_uri ] | [ -h ]\n",
+ argv[0]);
+ fprintf(stderr, " -u path to unix socket\n");
+ fprintf(stderr, " -i iscsi uri for lun 0\n");
+ fprintf(stderr, " -h print help and quit\n");
+
+ goto err;
+}
diff --git a/default-configs/pci.mak b/default-configs/pci.mak
index 3bbeb62d9a..53ff10975c 100644
--- a/default-configs/pci.mak
+++ b/default-configs/pci.mak
@@ -43,3 +43,4 @@ CONFIG_VGA=y
CONFIG_VGA_PCI=y
CONFIG_IVSHMEM=$(CONFIG_EVENTFD)
CONFIG_ROCKER=y
+CONFIG_VHOST_USER_SCSI=$(CONFIG_LINUX)
diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak
index 18aed56fc0..b227a36179 100644
--- a/default-configs/s390x-softmmu.mak
+++ b/default-configs/s390x-softmmu.mak
@@ -1,5 +1,6 @@
CONFIG_PCI=y
CONFIG_VIRTIO_PCI=y
+CONFIG_VHOST_USER_SCSI=$(CONFIG_LINUX)
CONFIG_VIRTIO=y
CONFIG_SCLPCONSOLE=y
CONFIG_TERMINAL3270=y
diff --git a/docs/specs/parallels.txt b/docs/interop/parallels.txt
index e9271eba5d..e9271eba5d 100644
--- a/docs/specs/parallels.txt
+++ b/docs/interop/parallels.txt
diff --git a/docs/specs/qcow2.txt b/docs/interop/qcow2.txt
index 80cdfd0e91..80cdfd0e91 100644
--- a/docs/specs/qcow2.txt
+++ b/docs/interop/qcow2.txt
diff --git a/docs/specs/qed_spec.txt b/docs/interop/qed_spec.txt
index 7982e058b2..7982e058b2 100644
--- a/docs/specs/qed_spec.txt
+++ b/docs/interop/qed_spec.txt
diff --git a/docs/qemu-ga-ref.texi b/docs/interop/qemu-ga-ref.texi
index ddb76ce1c2..ddb76ce1c2 100644
--- a/docs/qemu-ga-ref.texi
+++ b/docs/interop/qemu-ga-ref.texi
diff --git a/docs/qemu-qmp-ref.texi b/docs/interop/qemu-qmp-ref.texi
index bb25758bd0..bb25758bd0 100644
--- a/docs/qemu-qmp-ref.texi
+++ b/docs/interop/qemu-qmp-ref.texi
diff --git a/docs/qmp-intro.txt b/docs/interop/qmp-intro.txt
index 60deafbae6..60deafbae6 100644
--- a/docs/qmp-intro.txt
+++ b/docs/interop/qmp-intro.txt
diff --git a/docs/qmp-spec.txt b/docs/interop/qmp-spec.txt
index f8b5356015..f8b5356015 100644
--- a/docs/qmp-spec.txt
+++ b/docs/interop/qmp-spec.txt
diff --git a/docs/specs/vhost-user.txt b/docs/interop/vhost-user.txt
index 481ab56e35..481ab56e35 100644
--- a/docs/specs/vhost-user.txt
+++ b/docs/interop/vhost-user.txt
diff --git a/docs/vnc-ledstate-Pseudo-encoding.txt b/docs/interop/vnc-ledstate-Pseudo-encoding.txt
index 0f124f68b1..0f124f68b1 100644
--- a/docs/vnc-ledstate-Pseudo-encoding.txt
+++ b/docs/interop/vnc-ledstate-Pseudo-encoding.txt
diff --git a/exec.c b/exec.c
index a93e209625..42ad1eaedd 100644
--- a/exec.c
+++ b/exec.c
@@ -1482,25 +1482,17 @@ static int64_t get_file_size(int fd)
return size;
}
-static void *file_ram_alloc(RAMBlock *block,
- ram_addr_t memory,
- const char *path,
- Error **errp)
+static int file_ram_open(const char *path,
+ const char *region_name,
+ bool *created,
+ Error **errp)
{
- bool unlink_on_error = false;
char *filename;
char *sanitized_name;
char *c;
- void *area = MAP_FAILED;
int fd = -1;
- int64_t file_size;
-
- if (kvm_enabled() && !kvm_has_sync_mmu()) {
- error_setg(errp,
- "host lacks kvm mmu notifiers, -mem-path unsupported");
- return NULL;
- }
+ *created = false;
for (;;) {
fd = open(path, O_RDWR);
if (fd >= 0) {
@@ -1511,13 +1503,13 @@ static void *file_ram_alloc(RAMBlock *block,
/* @path names a file that doesn't exist, create it */
fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
if (fd >= 0) {
- unlink_on_error = true;
+ *created = true;
break;
}
} else if (errno == EISDIR) {
/* @path names a directory, create a file there */
/* Make name safe to use with mkstemp by replacing '/' with '_'. */
- sanitized_name = g_strdup(memory_region_name(block->mr));
+ sanitized_name = g_strdup(region_name);
for (c = sanitized_name; *c != '\0'; c++) {
if (*c == '/') {
*c = '_';
@@ -1540,7 +1532,7 @@ static void *file_ram_alloc(RAMBlock *block,
error_setg_errno(errp, errno,
"can't open backing store %s for guest RAM",
path);
- goto error;
+ return -1;
}
/*
* Try again on EINTR and EEXIST. The latter happens when
@@ -1548,6 +1540,17 @@ static void *file_ram_alloc(RAMBlock *block,
*/
}
+ return fd;
+}
+
+static void *file_ram_alloc(RAMBlock *block,
+ ram_addr_t memory,
+ int fd,
+ bool truncate,
+ Error **errp)
+{
+ void *area;
+
block->page_size = qemu_fd_getpagesize(fd);
block->mr->align = block->page_size;
#if defined(__s390x__)
@@ -1556,20 +1559,11 @@ static void *file_ram_alloc(RAMBlock *block,
}
#endif
- file_size = get_file_size(fd);
-
if (memory < block->page_size) {
error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
"or larger than page size 0x%zx",
memory, block->page_size);
- goto error;
- }
-
- if (file_size > 0 && file_size < memory) {
- error_setg(errp, "backing store %s size 0x%" PRIx64
- " does not match 'size' option 0x" RAM_ADDR_FMT,
- path, file_size, memory);
- goto error;
+ return NULL;
}
memory = ROUND_UP(memory, block->page_size);
@@ -1588,7 +1582,7 @@ static void *file_ram_alloc(RAMBlock *block,
* those labels. Therefore, extending the non-empty backend file
* is disabled as well.
*/
- if (!file_size && ftruncate(fd, memory)) {
+ if (truncate && ftruncate(fd, memory)) {
perror("ftruncate");
}
@@ -1597,30 +1591,19 @@ static void *file_ram_alloc(RAMBlock *block,
if (area == MAP_FAILED) {
error_setg_errno(errp, errno,
"unable to map backing store for guest RAM");
- goto error;
+ return NULL;
}
if (mem_prealloc) {
os_mem_prealloc(fd, area, memory, smp_cpus, errp);
if (errp && *errp) {
- goto error;
+ qemu_ram_munmap(area, memory);
+ return NULL;
}
}
block->fd = fd;
return area;
-
-error:
- if (area != MAP_FAILED) {
- qemu_ram_munmap(area, memory);
- }
- if (unlink_on_error) {
- unlink(path);
- }
- if (fd != -1) {
- close(fd);
- }
- return NULL;
}
#endif
@@ -1931,18 +1914,25 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
}
#ifdef __linux__
-RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
- bool share, const char *mem_path,
- Error **errp)
+RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
+ bool share, int fd,
+ Error **errp)
{
RAMBlock *new_block;
Error *local_err = NULL;
+ int64_t file_size;
if (xen_enabled()) {
error_setg(errp, "-mem-path not supported with Xen");
return NULL;
}
+ if (kvm_enabled() && !kvm_has_sync_mmu()) {
+ error_setg(errp,
+ "host lacks kvm mmu notifiers, -mem-path unsupported");
+ return NULL;
+ }
+
if (phys_mem_alloc != qemu_anon_ram_alloc) {
/*
* file_ram_alloc() needs to allocate just like
@@ -1955,13 +1945,20 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
}
size = HOST_PAGE_ALIGN(size);
+ file_size = get_file_size(fd);
+ if (file_size > 0 && file_size < size) {
+ error_setg(errp, "backing store %s size 0x%" PRIx64
+ " does not match 'size' option 0x" RAM_ADDR_FMT,
+ mem_path, file_size, size);
+ return NULL;
+ }
+
new_block = g_malloc0(sizeof(*new_block));
new_block->mr = mr;
new_block->used_length = size;
new_block->max_length = size;
new_block->flags = share ? RAM_SHARED : 0;
- new_block->host = file_ram_alloc(new_block, size,
- mem_path, errp);
+ new_block->host = file_ram_alloc(new_block, size, fd, !file_size, errp);
if (!new_block->host) {
g_free(new_block);
return NULL;
@@ -1974,6 +1971,33 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
return NULL;
}
return new_block;
+
+}
+
+
+RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
+ bool share, const char *mem_path,
+ Error **errp)
+{
+ int fd;
+ bool created;
+ RAMBlock *block;
+
+ fd = file_ram_open(mem_path, memory_region_name(mr), &created, errp);
+ if (fd < 0) {
+ return NULL;
+ }
+
+ block = qemu_ram_alloc_from_fd(size, mr, share, fd, errp);
+ if (!block) {
+ if (created) {
+ unlink(mem_path);
+ }
+ close(fd);
+ return NULL;
+ }
+
+ return block;
}
#endif
diff --git a/fpu/softfloat-specialize.h b/fpu/softfloat-specialize.h
index 100c8a98bf..de2c5d5702 100644
--- a/fpu/softfloat-specialize.h
+++ b/fpu/softfloat-specialize.h
@@ -111,7 +111,7 @@ float16 float16_default_nan(float_status *status)
*----------------------------------------------------------------------------*/
float32 float32_default_nan(float_status *status)
{
-#if defined(TARGET_SPARC)
+#if defined(TARGET_SPARC) || defined(TARGET_M68K)
return const_float32(0x7FFFFFFF);
#elif defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA) || \
defined(TARGET_XTENSA) || defined(TARGET_S390X) || defined(TARGET_TRICORE)
@@ -136,7 +136,7 @@ float32 float32_default_nan(float_status *status)
*----------------------------------------------------------------------------*/
float64 float64_default_nan(float_status *status)
{
-#if defined(TARGET_SPARC)
+#if defined(TARGET_SPARC) || defined(TARGET_M68K)
return const_float64(LIT64(0x7FFFFFFFFFFFFFFF));
#elif defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA) || \
defined(TARGET_S390X)
@@ -162,7 +162,10 @@ float64 float64_default_nan(float_status *status)
floatx80 floatx80_default_nan(float_status *status)
{
floatx80 r;
-
+#if defined(TARGET_M68K)
+ r.low = LIT64(0xFFFFFFFFFFFFFFFF);
+ r.high = 0x7FFF;
+#else
if (status->snan_bit_is_one) {
r.low = LIT64(0xBFFFFFFFFFFFFFFF);
r.high = 0x7FFF;
@@ -170,6 +173,7 @@ floatx80 floatx80_default_nan(float_status *status)
r.low = LIT64(0xC000000000000000);
r.high = 0xFFFF;
}
+#endif
return r;
}
@@ -502,6 +506,30 @@ static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN,
return 1;
}
}
+#elif defined(TARGET_M68K)
+static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN,
+ flag aIsLargerSignificand)
+{
+ /* M68000 FAMILY PROGRAMMER'S REFERENCE MANUAL
+ * 3.4 FLOATING-POINT INSTRUCTION DETAILS
+ * If either operand, but not both operands, of an operation is a
+ * nonsignaling NaN, then that NaN is returned as the result. If both
+ * operands are nonsignaling NaNs, then the destination operand
+ * nonsignaling NaN is returned as the result.
+ * If either operand to an operation is a signaling NaN (SNaN), then the
+ * SNaN bit is set in the FPSR EXC byte. If the SNaN exception enable bit
+ * is set in the FPCR ENABLE byte, then the exception is taken and the
+ * destination is not modified. If the SNaN exception enable bit is not
+ * set, setting the SNaN bit in the operand to a one converts the SNaN to
+ * a nonsignaling NaN. The operation then continues as described in the
+ * preceding paragraph for nonsignaling NaNs.
+ */
+ if (aIsQNaN || aIsSNaN) { /* a is the destination operand */
+ return 0; /* return the destination operand */
+ } else {
+ return 1; /* return b */
+ }
+}
#else
static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN,
flag aIsLargerSignificand)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 15610b9de8..a9b59bdce5 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -37,24 +37,6 @@
#include "kvm_i386.h"
#include "trace.h"
-/*#define DEBUG_INTEL_IOMMU*/
-#ifdef DEBUG_INTEL_IOMMU
-enum {
- DEBUG_GENERAL, DEBUG_CSR, DEBUG_INV, DEBUG_MMU, DEBUG_FLOG,
- DEBUG_CACHE, DEBUG_IR,
-};
-#define VTD_DBGBIT(x) (1 << DEBUG_##x)
-static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT(CSR);
-
-#define VTD_DPRINTF(what, fmt, ...) do { \
- if (vtd_dbgflags & VTD_DBGBIT(what)) { \
- fprintf(stderr, "(vtd)%s: " fmt "\n", __func__, \
- ## __VA_ARGS__); } \
- } while (0)
-#else
-#define VTD_DPRINTF(what, fmt, ...) do {} while (0)
-#endif
-
static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
uint64_t wmask, uint64_t w1cmask)
{
@@ -199,9 +181,10 @@ static void vtd_reset_context_cache(IntelIOMMUState *s)
GHashTableIter bus_it;
uint32_t devfn_it;
+ trace_vtd_context_cache_reset();
+
g_hash_table_iter_init(&bus_it, s->vtd_as_by_busptr);
- VTD_DPRINTF(CACHE, "global context_cache_gen=1");
while (g_hash_table_iter_next (&bus_it, NULL, (void**)&vtd_bus)) {
for (devfn_it = 0; devfn_it < X86_IOMMU_PCI_DEVFN_MAX; ++devfn_it) {
vtd_as = vtd_bus->dev_as[devfn_it];
@@ -291,8 +274,8 @@ static void vtd_generate_interrupt(IntelIOMMUState *s, hwaddr mesg_addr_reg,
msi.address = vtd_get_long_raw(s, mesg_addr_reg);
msi.data = vtd_get_long_raw(s, mesg_data_reg);
- VTD_DPRINTF(FLOG, "msi: addr 0x%"PRIx64 " data 0x%"PRIx32,
- msi.address, msi.data);
+ trace_vtd_irq_generate(msi.address, msi.data);
+
apic_get_class()->send_msi(&msi);
}
@@ -304,14 +287,14 @@ static void vtd_generate_fault_event(IntelIOMMUState *s, uint32_t pre_fsts)
{
if (pre_fsts & VTD_FSTS_PPF || pre_fsts & VTD_FSTS_PFO ||
pre_fsts & VTD_FSTS_IQE) {
- VTD_DPRINTF(FLOG, "there are previous interrupt conditions "
- "to be serviced by software, fault event is not generated "
- "(FSTS_REG 0x%"PRIx32 ")", pre_fsts);
+ trace_vtd_err("There are previous interrupt conditions "
+ "to be serviced by software, fault event "
+ "is not generated.");
return;
}
vtd_set_clear_mask_long(s, DMAR_FECTL_REG, 0, VTD_FECTL_IP);
if (vtd_get_long_raw(s, DMAR_FECTL_REG) & VTD_FECTL_IM) {
- VTD_DPRINTF(FLOG, "Interrupt Mask set, fault event is not generated");
+ trace_vtd_err("Interrupt Mask set, irq is not generated.");
} else {
vtd_generate_interrupt(s, DMAR_FEADDR_REG, DMAR_FEDATA_REG);
vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0);
@@ -348,7 +331,7 @@ static void vtd_update_fsts_ppf(IntelIOMMUState *s)
}
}
vtd_set_clear_mask_long(s, DMAR_FSTS_REG, VTD_FSTS_PPF, ppf_mask);
- VTD_DPRINTF(FLOG, "set PPF of FSTS_REG to %d", ppf_mask ? 1 : 0);
+ trace_vtd_fsts_ppf(!!ppf_mask);
}
static void vtd_set_frcd_and_update_ppf(IntelIOMMUState *s, uint16_t index)
@@ -380,8 +363,8 @@ static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index,
}
vtd_set_quad_raw(s, frcd_reg_addr, lo);
vtd_set_quad_raw(s, frcd_reg_addr + 8, hi);
- VTD_DPRINTF(FLOG, "record to FRCD_REG #%"PRIu16 ": hi 0x%"PRIx64
- ", lo 0x%"PRIx64, index, hi, lo);
+
+ trace_vtd_frr_new(index, hi, lo);
}
/* Try to collapse multiple pending faults from the same requester */
@@ -393,7 +376,6 @@ static bool vtd_try_collapse_fault(IntelIOMMUState *s, uint16_t source_id)
for (i = 0; i < DMAR_FRCD_REG_NR; i++) {
frcd_reg = vtd_get_quad_raw(s, addr);
- VTD_DPRINTF(FLOG, "frcd_reg #%d 0x%"PRIx64, i, frcd_reg);
if ((frcd_reg & VTD_FRCD_F) &&
((frcd_reg & VTD_FRCD_SID_MASK) == source_id)) {
return true;
@@ -416,21 +398,24 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
/* This is not a normal fault reason case. Drop it. */
return;
}
- VTD_DPRINTF(FLOG, "sid 0x%"PRIx16 ", fault %d, addr 0x%"PRIx64
- ", is_write %d", source_id, fault, addr, is_write);
+
+ trace_vtd_dmar_fault(source_id, fault, addr, is_write);
+
if (fsts_reg & VTD_FSTS_PFO) {
- VTD_DPRINTF(FLOG, "new fault is not recorded due to "
- "Primary Fault Overflow");
+ trace_vtd_err("New fault is not recorded due to "
+ "Primary Fault Overflow.");
return;
}
+
if (vtd_try_collapse_fault(s, source_id)) {
- VTD_DPRINTF(FLOG, "new fault is not recorded due to "
- "compression of faults");
+ trace_vtd_err("New fault is not recorded due to "
+ "compression of faults.");
return;
}
+
if (vtd_is_frcd_set(s, s->next_frcd_reg)) {
- VTD_DPRINTF(FLOG, "Primary Fault Overflow and "
- "new fault is not recorded, set PFO field");
+ trace_vtd_err("Next Fault Recording Reg is used, "
+ "new fault is not recorded, set PFO field.");
vtd_set_clear_mask_long(s, DMAR_FSTS_REG, 0, VTD_FSTS_PFO);
return;
}
@@ -438,8 +423,8 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault, is_write);
if (fsts_reg & VTD_FSTS_PPF) {
- VTD_DPRINTF(FLOG, "there are pending faults already, "
- "fault event is not generated");
+ trace_vtd_err("There are pending faults already, "
+ "fault event is not generated.");
vtd_set_frcd_and_update_ppf(s, s->next_frcd_reg);
s->next_frcd_reg++;
if (s->next_frcd_reg == DMAR_FRCD_REG_NR) {
@@ -702,7 +687,7 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
uint64_t access_right_check;
if (!vtd_iova_range_check(iova, ce)) {
- VTD_DPRINTF(GENERAL, "error: iova 0x%"PRIx64 " exceeds limits", iova);
+ trace_vtd_err_dmar_iova_overflow(iova);
return -VTD_FR_ADDR_BEYOND_MGAW;
}
@@ -714,9 +699,7 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
slpte = vtd_get_slpte(addr, offset);
if (slpte == (uint64_t)-1) {
- VTD_DPRINTF(GENERAL, "error: fail to access second-level paging "
- "entry at level %"PRIu32 " for iova 0x%"PRIx64,
- level, iova);
+ trace_vtd_err_dmar_slpte_read_error(iova, level);
if (level == vtd_ce_get_level(ce)) {
/* Invalid programming of context-entry */
return -VTD_FR_CONTEXT_ENTRY_INV;
@@ -727,15 +710,11 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
*reads = (*reads) && (slpte & VTD_SL_R);
*writes = (*writes) && (slpte & VTD_SL_W);
if (!(slpte & access_right_check)) {
- VTD_DPRINTF(GENERAL, "error: lack of %s permission for "
- "iova 0x%"PRIx64 " slpte 0x%"PRIx64,
- (is_write ? "write" : "read"), iova, slpte);
+ trace_vtd_err_dmar_slpte_perm_error(iova, level, slpte, is_write);
return is_write ? -VTD_FR_WRITE : -VTD_FR_READ;
}
if (vtd_slpte_nonzero_rsvd(slpte, level)) {
- VTD_DPRINTF(GENERAL, "error: non-zero reserved field in second "
- "level paging entry level %"PRIu32 " slpte 0x%"PRIx64,
- level, slpte);
+ trace_vtd_err_dmar_slpte_resv_error(iova, level, slpte);
return -VTD_FR_PAGING_ENTRY_RSVD;
}
@@ -1090,8 +1069,10 @@ out:
* @devfn: The devfn, which is the combined of device and function number
* @is_write: The access is a write operation
* @entry: IOMMUTLBEntry that contain the addr to be translated and result
+ *
+ * Returns true if translation is successful, otherwise false.
*/
-static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
+static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
uint8_t devfn, hwaddr addr, bool is_write,
IOMMUTLBEntry *entry)
{
@@ -1125,6 +1106,7 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
page_mask = iotlb_entry->mask;
goto out;
}
+
/* Try to fetch context-entry from cache first */
if (cc_entry->context_cache_gen == s->context_cache_gen) {
trace_vtd_iotlb_cc_hit(bus_num, devfn, cc_entry->context_entry.hi,
@@ -1142,7 +1124,7 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
} else {
vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write);
}
- return;
+ goto error;
}
/* Update context-cache */
trace_vtd_iotlb_cc_update(bus_num, devfn, ce.hi, ce.lo,
@@ -1157,8 +1139,9 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
* Also, let's ignore IOTLB caching as well for PT devices.
*/
if (vtd_ce_get_type(&ce) == VTD_CONTEXT_TT_PASS_THROUGH) {
+ entry->iova = addr & VTD_PAGE_MASK;
entry->translated_addr = entry->iova;
- entry->addr_mask = VTD_PAGE_SIZE - 1;
+ entry->addr_mask = VTD_PAGE_MASK;
entry->perm = IOMMU_RW;
trace_vtd_translate_pt(source_id, entry->iova);
@@ -1173,7 +1156,7 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
*/
vtd_pt_enable_fast_path(s, source_id);
- return;
+ return true;
}
ret_fr = vtd_iova_to_slpte(&ce, addr, is_write, &slpte, &level,
@@ -1185,7 +1168,7 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
} else {
vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write);
}
- return;
+ goto error;
}
page_mask = vtd_slpt_level_page_mask(level);
@@ -1196,6 +1179,14 @@ out:
entry->translated_addr = vtd_get_slpte_addr(slpte) & page_mask;
entry->addr_mask = ~page_mask;
entry->perm = IOMMU_ACCESS_FLAG(reads, writes);
+ return true;
+
+error:
+ entry->iova = 0;
+ entry->translated_addr = 0;
+ entry->addr_mask = 0;
+ entry->perm = IOMMU_NONE;
+ return false;
}
static void vtd_root_table_setup(IntelIOMMUState *s)
@@ -1204,8 +1195,7 @@ static void vtd_root_table_setup(IntelIOMMUState *s)
s->root_extended = s->root & VTD_RTADDR_RTT;
s->root &= VTD_RTADDR_ADDR_MASK;
- VTD_DPRINTF(CSR, "root_table addr 0x%"PRIx64 " %s", s->root,
- (s->root_extended ? "(extended)" : ""));
+ trace_vtd_reg_dmar_root(s->root, s->root_extended);
}
static void vtd_iec_notify_all(IntelIOMMUState *s, bool global,
@@ -1225,8 +1215,7 @@ static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s)
/* Notify global invalidation */
vtd_iec_notify_all(s, true, 0, 0);
- VTD_DPRINTF(CSR, "int remap table addr 0x%"PRIx64 " size %"PRIu32,
- s->intr_root, s->intr_size);
+ trace_vtd_reg_ir_root(s->intr_root, s->intr_size);
}
static void vtd_iommu_replay_all(IntelIOMMUState *s)
@@ -1328,11 +1317,8 @@ static uint64_t vtd_context_cache_invalidate(IntelIOMMUState *s, uint64_t val)
switch (type) {
case VTD_CCMD_DOMAIN_INVL:
- VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16,
- (uint16_t)VTD_CCMD_DID(val));
/* Fall through */
case VTD_CCMD_GLOBAL_INVL:
- VTD_DPRINTF(INV, "global invalidation");
caig = VTD_CCMD_GLOBAL_INVL_A;
vtd_context_global_invalidate(s);
break;
@@ -1343,7 +1329,7 @@ static uint64_t vtd_context_cache_invalidate(IntelIOMMUState *s, uint64_t val)
break;
default:
- VTD_DPRINTF(GENERAL, "error: invalid granularity");
+ trace_vtd_err("Context cache invalidate type error.");
caig = 0;
}
return caig;
@@ -1351,7 +1337,7 @@ static uint64_t vtd_context_cache_invalidate(IntelIOMMUState *s, uint64_t val)
static void vtd_iotlb_global_invalidate(IntelIOMMUState *s)
{
- trace_vtd_iotlb_reset("global invalidation recved");
+ trace_vtd_inv_desc_iotlb_global();
vtd_reset_iotlb(s);
vtd_iommu_replay_all(s);
}
@@ -1362,6 +1348,8 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
VTDContextEntry ce;
VTDAddressSpace *vtd_as;
+ trace_vtd_inv_desc_iotlb_domain(domain_id);
+
g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain,
&domain_id);
@@ -1407,6 +1395,8 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
{
VTDIOTLBPageInvInfo info;
+ trace_vtd_inv_desc_iotlb_pages(domain_id, addr, am);
+
assert(am <= VTD_MAMV);
info.domain_id = domain_id;
info.addr = addr;
@@ -1429,15 +1419,12 @@ static uint64_t vtd_iotlb_flush(IntelIOMMUState *s, uint64_t val)
switch (type) {
case VTD_TLB_GLOBAL_FLUSH:
- VTD_DPRINTF(INV, "global invalidation");
iaig = VTD_TLB_GLOBAL_FLUSH_A;
vtd_iotlb_global_invalidate(s);
break;
case VTD_TLB_DSI_FLUSH:
domain_id = VTD_TLB_DID(val);
- VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16,
- domain_id);
iaig = VTD_TLB_DSI_FLUSH_A;
vtd_iotlb_domain_invalidate(s, domain_id);
break;
@@ -1447,11 +1434,8 @@ static uint64_t vtd_iotlb_flush(IntelIOMMUState *s, uint64_t val)
addr = vtd_get_quad_raw(s, DMAR_IVA_REG);
am = VTD_IVA_AM(addr);
addr = VTD_IVA_ADDR(addr);
- VTD_DPRINTF(INV, "page-selective invalidation domain 0x%"PRIx16
- " addr 0x%"PRIx64 " mask %"PRIu8, domain_id, addr, am);
if (am > VTD_MAMV) {
- VTD_DPRINTF(GENERAL, "error: supported max address mask value is "
- "%"PRIu8, (uint8_t)VTD_MAMV);
+ trace_vtd_err("IOTLB PSI flush: address mask overflow.");
iaig = 0;
break;
}
@@ -1460,7 +1444,7 @@ static uint64_t vtd_iotlb_flush(IntelIOMMUState *s, uint64_t val)
break;
default:
- VTD_DPRINTF(GENERAL, "error: invalid granularity");
+ trace_vtd_err("IOTLB flush: invalid granularity.");
iaig = 0;
}
return iaig;
@@ -1481,21 +1465,19 @@ static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool en)
{
uint64_t iqa_val = vtd_get_quad_raw(s, DMAR_IQA_REG);
- VTD_DPRINTF(INV, "Queued Invalidation Enable %s", (en ? "on" : "off"));
+ trace_vtd_inv_qi_enable(en);
+
if (en) {
if (vtd_queued_inv_enable_check(s)) {
s->iq = iqa_val & VTD_IQA_IQA_MASK;
/* 2^(x+8) entries */
s->iq_size = 1UL << ((iqa_val & VTD_IQA_QS) + 8);
s->qi_enabled = true;
- VTD_DPRINTF(INV, "DMAR_IQA_REG 0x%"PRIx64, iqa_val);
- VTD_DPRINTF(INV, "Invalidation Queue addr 0x%"PRIx64 " size %d",
- s->iq, s->iq_size);
+ trace_vtd_inv_qi_setup(s->iq, s->iq_size);
/* Ok - report back to driver */
vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_QIES);
} else {
- VTD_DPRINTF(GENERAL, "error: can't enable Queued Invalidation: "
- "tail %"PRIu16, s->iq_tail);
+ trace_vtd_err_qi_enable(s->iq_tail);
}
} else {
if (vtd_queued_inv_disable_check(s)) {
@@ -1506,10 +1488,7 @@ static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool en)
/* Ok - report back to driver */
vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_QIES, 0);
} else {
- VTD_DPRINTF(GENERAL, "error: can't disable Queued Invalidation: "
- "head %"PRIu16 ", tail %"PRIu16
- ", last_descriptor %"PRIu8,
- s->iq_head, s->iq_tail, s->iq_last_desc_type);
+ trace_vtd_err_qi_disable(s->iq_head, s->iq_tail, s->iq_last_desc_type);
}
}
}
@@ -1517,8 +1496,6 @@ static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool en)
/* Set Root Table Pointer */
static void vtd_handle_gcmd_srtp(IntelIOMMUState *s)
{
- VTD_DPRINTF(CSR, "set Root Table Pointer");
-
vtd_root_table_setup(s);
/* Ok - report back to driver */
vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_RTPS);
@@ -1527,8 +1504,6 @@ static void vtd_handle_gcmd_srtp(IntelIOMMUState *s)
/* Set Interrupt Remap Table Pointer */
static void vtd_handle_gcmd_sirtp(IntelIOMMUState *s)
{
- VTD_DPRINTF(CSR, "set Interrupt Remap Table Pointer");
-
vtd_interrupt_remap_table_setup(s);
/* Ok - report back to driver */
vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRTPS);
@@ -1541,7 +1516,7 @@ static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en)
return;
}
- VTD_DPRINTF(CSR, "Translation Enable %s", (en ? "on" : "off"));
+ trace_vtd_dmar_enable(en);
if (en) {
s->dmar_enabled = true;
@@ -1562,7 +1537,7 @@ static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en)
/* Handle Interrupt Remap Enable/Disable */
static void vtd_handle_gcmd_ire(IntelIOMMUState *s, bool en)
{
- VTD_DPRINTF(CSR, "Interrupt Remap Enable %s", (en ? "on" : "off"));
+ trace_vtd_ir_enable(en);
if (en) {
s->intr_enabled = true;
@@ -1582,7 +1557,7 @@ static void vtd_handle_gcmd_write(IntelIOMMUState *s)
uint32_t val = vtd_get_long_raw(s, DMAR_GCMD_REG);
uint32_t changed = status ^ val;
- VTD_DPRINTF(CSR, "value 0x%"PRIx32 " status 0x%"PRIx32, val, status);
+ trace_vtd_reg_write_gcmd(status, val);
if (changed & VTD_GCMD_TE) {
/* Translation enable/disable */
vtd_handle_gcmd_te(s, val & VTD_GCMD_TE);
@@ -1614,8 +1589,8 @@ static void vtd_handle_ccmd_write(IntelIOMMUState *s)
/* Context-cache invalidation request */
if (val & VTD_CCMD_ICC) {
if (s->qi_enabled) {
- VTD_DPRINTF(GENERAL, "error: Queued Invalidation enabled, "
- "should not use register-based invalidation");
+ trace_vtd_err("Queued Invalidation enabled, "
+ "should not use register-based invalidation");
return;
}
ret = vtd_context_cache_invalidate(s, val);
@@ -1623,7 +1598,6 @@ static void vtd_handle_ccmd_write(IntelIOMMUState *s)
vtd_set_clear_mask_quad(s, DMAR_CCMD_REG, VTD_CCMD_ICC, 0ULL);
ret = vtd_set_clear_mask_quad(s, DMAR_CCMD_REG, VTD_CCMD_CAIG_MASK,
ret);
- VTD_DPRINTF(INV, "CCMD_REG write-back val: 0x%"PRIx64, ret);
}
}
@@ -1636,8 +1610,8 @@ static void vtd_handle_iotlb_write(IntelIOMMUState *s)
/* IOTLB invalidation request */
if (val & VTD_TLB_IVT) {
if (s->qi_enabled) {
- VTD_DPRINTF(GENERAL, "error: Queued Invalidation enabled, "
- "should not use register-based invalidation");
+ trace_vtd_err("Queued Invalidation enabled, "
+ "should not use register-based invalidation.");
return;
}
ret = vtd_iotlb_flush(s, val);
@@ -1645,7 +1619,6 @@ static void vtd_handle_iotlb_write(IntelIOMMUState *s)
vtd_set_clear_mask_quad(s, DMAR_IOTLB_REG, VTD_TLB_IVT, 0ULL);
ret = vtd_set_clear_mask_quad(s, DMAR_IOTLB_REG,
VTD_TLB_FLUSH_GRANU_MASK_A, ret);
- VTD_DPRINTF(INV, "IOTLB_REG write-back val: 0x%"PRIx64, ret);
}
}
@@ -1656,11 +1629,9 @@ static bool vtd_get_inv_desc(dma_addr_t base_addr, uint32_t offset,
dma_addr_t addr = base_addr + offset * sizeof(*inv_desc);
if (dma_memory_read(&address_space_memory, addr, inv_desc,
sizeof(*inv_desc))) {
- VTD_DPRINTF(GENERAL, "error: fail to fetch Invalidation Descriptor "
- "base_addr 0x%"PRIx64 " offset %"PRIu32, base_addr, offset);
+ trace_vtd_err("Read INV DESC failed.");
inv_desc->lo = 0;
inv_desc->hi = 0;
-
return false;
}
inv_desc->lo = le64_to_cpu(inv_desc->lo);
@@ -1746,13 +1717,11 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
switch (inv_desc->lo & VTD_INV_DESC_IOTLB_G) {
case VTD_INV_DESC_IOTLB_GLOBAL:
- trace_vtd_inv_desc_iotlb_global();
vtd_iotlb_global_invalidate(s);
break;
case VTD_INV_DESC_IOTLB_DOMAIN:
domain_id = VTD_INV_DESC_IOTLB_DID(inv_desc->lo);
- trace_vtd_inv_desc_iotlb_domain(domain_id);
vtd_iotlb_domain_invalidate(s, domain_id);
break;
@@ -1760,7 +1729,6 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
domain_id = VTD_INV_DESC_IOTLB_DID(inv_desc->lo);
addr = VTD_INV_DESC_IOTLB_ADDR(inv_desc->hi);
am = VTD_INV_DESC_IOTLB_AM(inv_desc->hi);
- trace_vtd_inv_desc_iotlb_pages(domain_id, addr, am);
if (am > VTD_MAMV) {
trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
return false;
@@ -1778,10 +1746,9 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
static bool vtd_process_inv_iec_desc(IntelIOMMUState *s,
VTDInvDesc *inv_desc)
{
- VTD_DPRINTF(INV, "inv ir glob %d index %d mask %d",
- inv_desc->iec.granularity,
- inv_desc->iec.index,
- inv_desc->iec.index_mask);
+ trace_vtd_inv_desc_iec(inv_desc->iec.granularity,
+ inv_desc->iec.index,
+ inv_desc->iec.index_mask);
vtd_iec_notify_all(s, !inv_desc->iec.granularity,
inv_desc->iec.index,
@@ -1810,9 +1777,7 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) ||
(inv_desc->hi & VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI)) {
- VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Device "
- "IOTLB Invalidate Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
- inv_desc->hi, inv_desc->lo);
+ trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
return false;
}
@@ -1857,7 +1822,7 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
VTDInvDesc inv_desc;
uint8_t desc_type;
- VTD_DPRINTF(INV, "iq head %"PRIu16, s->iq_head);
+ trace_vtd_inv_qi_head(s->iq_head);
if (!vtd_get_inv_desc(s->iq, s->iq_head, &inv_desc)) {
s->iq_last_desc_type = VTD_INV_DESC_NONE;
return false;
@@ -1896,8 +1861,7 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
break;
case VTD_INV_DESC_DEVICE:
- VTD_DPRINTF(INV, "Device IOTLB Invalidation Descriptor hi 0x%"PRIx64
- " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo);
+ trace_vtd_inv_desc("device", inv_desc.hi, inv_desc.lo);
if (!vtd_process_device_iotlb_desc(s, &inv_desc)) {
return false;
}
@@ -1917,11 +1881,11 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
/* Try to fetch and process more Invalidation Descriptors */
static void vtd_fetch_inv_desc(IntelIOMMUState *s)
{
- VTD_DPRINTF(INV, "fetch Invalidation Descriptors");
+ trace_vtd_inv_qi_fetch();
+
if (s->iq_tail >= s->iq_size) {
/* Detects an invalid Tail pointer */
- VTD_DPRINTF(GENERAL, "error: iq_tail is %"PRIu16
- " while iq_size is %"PRIu16, s->iq_tail, s->iq_size);
+ trace_vtd_err_qi_tail(s->iq_tail, s->iq_size);
vtd_handle_inv_queue_error(s);
return;
}
@@ -1944,7 +1908,8 @@ static void vtd_handle_iqt_write(IntelIOMMUState *s)
uint64_t val = vtd_get_quad_raw(s, DMAR_IQT_REG);
s->iq_tail = VTD_IQT_QT(val);
- VTD_DPRINTF(INV, "set iq tail %"PRIu16, s->iq_tail);
+ trace_vtd_inv_qi_tail(s->iq_tail);
+
if (s->qi_enabled && !(vtd_get_long_raw(s, DMAR_FSTS_REG) & VTD_FSTS_IQE)) {
/* Process Invalidation Queue here */
vtd_fetch_inv_desc(s);
@@ -1959,8 +1924,7 @@ static void vtd_handle_fsts_write(IntelIOMMUState *s)
if ((fectl_reg & VTD_FECTL_IP) && !(fsts_reg & status_fields)) {
vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0);
- VTD_DPRINTF(FLOG, "all pending interrupt conditions serviced, clear "
- "IP field of FECTL_REG");
+ trace_vtd_fsts_clear_ip();
}
/* FIXME: when IQE is Clear, should we try to fetch some Invalidation
* Descriptors if there are any when Queued Invalidation is enabled?
@@ -1975,11 +1939,12 @@ static void vtd_handle_fectl_write(IntelIOMMUState *s)
* software clears the IM field? Or just check if the IM field is zero?
*/
fectl_reg = vtd_get_long_raw(s, DMAR_FECTL_REG);
+
+ trace_vtd_reg_write_fectl(fectl_reg);
+
if ((fectl_reg & VTD_FECTL_IP) && !(fectl_reg & VTD_FECTL_IM)) {
vtd_generate_interrupt(s, DMAR_FEADDR_REG, DMAR_FEDATA_REG);
vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0);
- VTD_DPRINTF(FLOG, "IM field is cleared, generate "
- "fault event interrupt");
}
}
@@ -1989,9 +1954,8 @@ static void vtd_handle_ics_write(IntelIOMMUState *s)
uint32_t iectl_reg = vtd_get_long_raw(s, DMAR_IECTL_REG);
if ((iectl_reg & VTD_IECTL_IP) && !(ics_reg & VTD_ICS_IWC)) {
+ trace_vtd_reg_ics_clear_ip();
vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0);
- VTD_DPRINTF(INV, "pending completion interrupt condition serviced, "
- "clear IP field of IECTL_REG");
}
}
@@ -2003,11 +1967,12 @@ static void vtd_handle_iectl_write(IntelIOMMUState *s)
* software clears the IM field? Or just check if the IM field is zero?
*/
iectl_reg = vtd_get_long_raw(s, DMAR_IECTL_REG);
+
+ trace_vtd_reg_write_iectl(iectl_reg);
+
if ((iectl_reg & VTD_IECTL_IP) && !(iectl_reg & VTD_IECTL_IM)) {
vtd_generate_interrupt(s, DMAR_IEADDR_REG, DMAR_IEDATA_REG);
vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0);
- VTD_DPRINTF(INV, "IM field is cleared, generate "
- "invalidation event interrupt");
}
}
@@ -2016,10 +1981,10 @@ static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size)
IntelIOMMUState *s = opaque;
uint64_t val;
+ trace_vtd_reg_read(addr, size);
+
if (addr + size > DMAR_REG_SIZE) {
- VTD_DPRINTF(GENERAL, "error: addr outside region: max 0x%"PRIx64
- ", got 0x%"PRIx64 " %d",
- (uint64_t)DMAR_REG_SIZE, addr, size);
+ trace_vtd_err("Read MMIO over range.");
return (uint64_t)-1;
}
@@ -2058,8 +2023,7 @@ static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size)
val = vtd_get_quad(s, addr);
}
}
- VTD_DPRINTF(CSR, "addr 0x%"PRIx64 " size %d val 0x%"PRIx64,
- addr, size, val);
+
return val;
}
@@ -2068,26 +2032,22 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
{
IntelIOMMUState *s = opaque;
+ trace_vtd_reg_write(addr, size, val);
+
if (addr + size > DMAR_REG_SIZE) {
- VTD_DPRINTF(GENERAL, "error: addr outside region: max 0x%"PRIx64
- ", got 0x%"PRIx64 " %d",
- (uint64_t)DMAR_REG_SIZE, addr, size);
+ trace_vtd_err("Write MMIO over range.");
return;
}
switch (addr) {
/* Global Command Register, 32-bit */
case DMAR_GCMD_REG:
- VTD_DPRINTF(CSR, "DMAR_GCMD_REG write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
vtd_set_long(s, addr, val);
vtd_handle_gcmd_write(s);
break;
/* Context Command Register, 64-bit */
case DMAR_CCMD_REG:
- VTD_DPRINTF(CSR, "DMAR_CCMD_REG write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
if (size == 4) {
vtd_set_long(s, addr, val);
} else {
@@ -2097,8 +2057,6 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
break;
case DMAR_CCMD_REG_HI:
- VTD_DPRINTF(CSR, "DMAR_CCMD_REG_HI write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
assert(size == 4);
vtd_set_long(s, addr, val);
vtd_handle_ccmd_write(s);
@@ -2106,8 +2064,6 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
/* IOTLB Invalidation Register, 64-bit */
case DMAR_IOTLB_REG:
- VTD_DPRINTF(INV, "DMAR_IOTLB_REG write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
if (size == 4) {
vtd_set_long(s, addr, val);
} else {
@@ -2117,8 +2073,6 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
break;
case DMAR_IOTLB_REG_HI:
- VTD_DPRINTF(INV, "DMAR_IOTLB_REG_HI write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
assert(size == 4);
vtd_set_long(s, addr, val);
vtd_handle_iotlb_write(s);
@@ -2126,8 +2080,6 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
/* Invalidate Address Register, 64-bit */
case DMAR_IVA_REG:
- VTD_DPRINTF(INV, "DMAR_IVA_REG write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
if (size == 4) {
vtd_set_long(s, addr, val);
} else {
@@ -2136,16 +2088,12 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
break;
case DMAR_IVA_REG_HI:
- VTD_DPRINTF(INV, "DMAR_IVA_REG_HI write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
assert(size == 4);
vtd_set_long(s, addr, val);
break;
/* Fault Status Register, 32-bit */
case DMAR_FSTS_REG:
- VTD_DPRINTF(FLOG, "DMAR_FSTS_REG write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
assert(size == 4);
vtd_set_long(s, addr, val);
vtd_handle_fsts_write(s);
@@ -2153,8 +2101,6 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
/* Fault Event Control Register, 32-bit */
case DMAR_FECTL_REG:
- VTD_DPRINTF(FLOG, "DMAR_FECTL_REG write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
assert(size == 4);
vtd_set_long(s, addr, val);
vtd_handle_fectl_write(s);
@@ -2162,40 +2108,30 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
/* Fault Event Data Register, 32-bit */
case DMAR_FEDATA_REG:
- VTD_DPRINTF(FLOG, "DMAR_FEDATA_REG write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
assert(size == 4);
vtd_set_long(s, addr, val);
break;
/* Fault Event Address Register, 32-bit */
case DMAR_FEADDR_REG:
- VTD_DPRINTF(FLOG, "DMAR_FEADDR_REG write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
assert(size == 4);
vtd_set_long(s, addr, val);
break;
/* Fault Event Upper Address Register, 32-bit */
case DMAR_FEUADDR_REG:
- VTD_DPRINTF(FLOG, "DMAR_FEUADDR_REG write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
assert(size == 4);
vtd_set_long(s, addr, val);
break;
/* Protected Memory Enable Register, 32-bit */
case DMAR_PMEN_REG:
- VTD_DPRINTF(CSR, "DMAR_PMEN_REG write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
assert(size == 4);
vtd_set_long(s, addr, val);
break;
/* Root Table Address Register, 64-bit */
case DMAR_RTADDR_REG:
- VTD_DPRINTF(CSR, "DMAR_RTADDR_REG write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
if (size == 4) {
vtd_set_long(s, addr, val);
} else {
@@ -2204,16 +2140,12 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
break;
case DMAR_RTADDR_REG_HI:
- VTD_DPRINTF(CSR, "DMAR_RTADDR_REG_HI write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
assert(size == 4);
vtd_set_long(s, addr, val);
break;
/* Invalidation Queue Tail Register, 64-bit */
case DMAR_IQT_REG:
- VTD_DPRINTF(INV, "DMAR_IQT_REG write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
if (size == 4) {
vtd_set_long(s, addr, val);
} else {
@@ -2223,8 +2155,6 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
break;
case DMAR_IQT_REG_HI:
- VTD_DPRINTF(INV, "DMAR_IQT_REG_HI write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
assert(size == 4);
vtd_set_long(s, addr, val);
/* 19:63 of IQT_REG is RsvdZ, do nothing here */
@@ -2232,8 +2162,6 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
/* Invalidation Queue Address Register, 64-bit */
case DMAR_IQA_REG:
- VTD_DPRINTF(INV, "DMAR_IQA_REG write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
if (size == 4) {
vtd_set_long(s, addr, val);
} else {
@@ -2242,16 +2170,12 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
break;
case DMAR_IQA_REG_HI:
- VTD_DPRINTF(INV, "DMAR_IQA_REG_HI write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
assert(size == 4);
vtd_set_long(s, addr, val);
break;
/* Invalidation Completion Status Register, 32-bit */
case DMAR_ICS_REG:
- VTD_DPRINTF(INV, "DMAR_ICS_REG write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
assert(size == 4);
vtd_set_long(s, addr, val);
vtd_handle_ics_write(s);
@@ -2259,8 +2183,6 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
/* Invalidation Event Control Register, 32-bit */
case DMAR_IECTL_REG:
- VTD_DPRINTF(INV, "DMAR_IECTL_REG write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
assert(size == 4);
vtd_set_long(s, addr, val);
vtd_handle_iectl_write(s);
@@ -2268,32 +2190,24 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
/* Invalidation Event Data Register, 32-bit */
case DMAR_IEDATA_REG:
- VTD_DPRINTF(INV, "DMAR_IEDATA_REG write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
assert(size == 4);
vtd_set_long(s, addr, val);
break;
/* Invalidation Event Address Register, 32-bit */
case DMAR_IEADDR_REG:
- VTD_DPRINTF(INV, "DMAR_IEADDR_REG write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
assert(size == 4);
vtd_set_long(s, addr, val);
break;
/* Invalidation Event Upper Address Register, 32-bit */
case DMAR_IEUADDR_REG:
- VTD_DPRINTF(INV, "DMAR_IEUADDR_REG write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
assert(size == 4);
vtd_set_long(s, addr, val);
break;
/* Fault Recording Registers, 128-bit */
case DMAR_FRCD_REG_0_0:
- VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_0 write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
if (size == 4) {
vtd_set_long(s, addr, val);
} else {
@@ -2302,15 +2216,11 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
break;
case DMAR_FRCD_REG_0_1:
- VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_1 write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
assert(size == 4);
vtd_set_long(s, addr, val);
break;
case DMAR_FRCD_REG_0_2:
- VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_2 write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
if (size == 4) {
vtd_set_long(s, addr, val);
} else {
@@ -2321,8 +2231,6 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
break;
case DMAR_FRCD_REG_0_3:
- VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_3 write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
assert(size == 4);
vtd_set_long(s, addr, val);
/* May clear bit 127 (Fault), update PPF */
@@ -2330,8 +2238,6 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
break;
case DMAR_IRTA_REG:
- VTD_DPRINTF(IR, "DMAR_IRTA_REG write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
if (size == 4) {
vtd_set_long(s, addr, val);
} else {
@@ -2340,15 +2246,11 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
break;
case DMAR_IRTA_REG_HI:
- VTD_DPRINTF(IR, "DMAR_IRTA_REG_HI write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
assert(size == 4);
vtd_set_long(s, addr, val);
break;
default:
- VTD_DPRINTF(GENERAL, "error: unhandled reg write addr 0x%"PRIx64
- ", size %d, val 0x%"PRIx64, addr, size, val);
if (size == 4) {
vtd_set_long(s, addr, val);
} else {
@@ -2362,31 +2264,38 @@ static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr,
{
VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
IntelIOMMUState *s = vtd_as->iommu_state;
- IOMMUTLBEntry ret = {
+ IOMMUTLBEntry iotlb = {
+ /* We'll fill in the rest later. */
.target_as = &address_space_memory,
- .iova = addr,
- .translated_addr = 0,
- .addr_mask = ~(hwaddr)0,
- .perm = IOMMU_NONE,
};
+ bool success;
- if (!s->dmar_enabled) {
+ if (likely(s->dmar_enabled)) {
+ success = vtd_do_iommu_translate(vtd_as, vtd_as->bus, vtd_as->devfn,
+ addr, flag & IOMMU_WO, &iotlb);
+ } else {
/* DMAR disabled, passthrough, use 4k-page*/
- ret.iova = addr & VTD_PAGE_MASK_4K;
- ret.translated_addr = addr & VTD_PAGE_MASK_4K;
- ret.addr_mask = ~VTD_PAGE_MASK_4K;
- ret.perm = IOMMU_RW;
- return ret;
+ iotlb.iova = addr & VTD_PAGE_MASK_4K;
+ iotlb.translated_addr = addr & VTD_PAGE_MASK_4K;
+ iotlb.addr_mask = ~VTD_PAGE_MASK_4K;
+ iotlb.perm = IOMMU_RW;
+ success = true;
}
- vtd_do_iommu_translate(vtd_as, vtd_as->bus, vtd_as->devfn, addr,
- flag & IOMMU_WO, &ret);
- VTD_DPRINTF(MMU,
- "bus %"PRIu8 " slot %"PRIu8 " func %"PRIu8 " devfn %"PRIu8
- " iova 0x%"PRIx64 " hpa 0x%"PRIx64, pci_bus_num(vtd_as->bus),
- VTD_PCI_SLOT(vtd_as->devfn), VTD_PCI_FUNC(vtd_as->devfn),
- vtd_as->devfn, addr, ret.translated_addr);
- return ret;
+ if (likely(success)) {
+ trace_vtd_dmar_translate(pci_bus_num(vtd_as->bus),
+ VTD_PCI_SLOT(vtd_as->devfn),
+ VTD_PCI_FUNC(vtd_as->devfn),
+ iotlb.iova, iotlb.translated_addr,
+ iotlb.addr_mask);
+ } else {
+ trace_vtd_err_dmar_translate(pci_bus_num(vtd_as->bus),
+ VTD_PCI_SLOT(vtd_as->devfn),
+ VTD_PCI_FUNC(vtd_as->devfn),
+ iotlb.iova);
+ }
+
+ return iotlb;
}
static void vtd_iommu_notify_flag_changed(MemoryRegion *iommu,
@@ -2484,25 +2393,23 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
addr = iommu->intr_root + index * sizeof(*entry);
if (dma_memory_read(&address_space_memory, addr, entry,
sizeof(*entry))) {
- VTD_DPRINTF(GENERAL, "error: fail to access IR root at 0x%"PRIx64
- " + %"PRIu16, iommu->intr_root, index);
+ trace_vtd_err("Memory read failed for IRTE.");
return -VTD_FR_IR_ROOT_INVAL;
}
+ trace_vtd_ir_irte_get(index, le64_to_cpu(entry->data[1]),
+ le64_to_cpu(entry->data[0]));
+
if (!entry->irte.present) {
- VTD_DPRINTF(GENERAL, "error: present flag not set in IRTE"
- " entry index %u value 0x%"PRIx64 " 0x%"PRIx64,
- index, le64_to_cpu(entry->data[1]),
- le64_to_cpu(entry->data[0]));
+ trace_vtd_err_irte(index, le64_to_cpu(entry->data[1]),
+ le64_to_cpu(entry->data[0]));
return -VTD_FR_IR_ENTRY_P;
}
if (entry->irte.__reserved_0 || entry->irte.__reserved_1 ||
entry->irte.__reserved_2) {
- VTD_DPRINTF(GENERAL, "error: IRTE entry index %"PRIu16
- " reserved fields non-zero: 0x%"PRIx64 " 0x%"PRIx64,
- index, le64_to_cpu(entry->data[1]),
- le64_to_cpu(entry->data[0]));
+ trace_vtd_err_irte(index, le64_to_cpu(entry->data[1]),
+ le64_to_cpu(entry->data[0]));
return -VTD_FR_IR_IRTE_RSVD;
}
@@ -2511,15 +2418,12 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
source_id = le32_to_cpu(entry->irte.source_id);
switch (entry->irte.sid_vtype) {
case VTD_SVT_NONE:
- VTD_DPRINTF(IR, "No SID validation for IRTE index %d", index);
break;
case VTD_SVT_ALL:
mask = vtd_svt_mask[entry->irte.sid_q];
if ((source_id & mask) != (sid & mask)) {
- VTD_DPRINTF(GENERAL, "SID validation for IRTE index "
- "%d failed (reqid 0x%04x sid 0x%04x)", index,
- sid, source_id);
+ trace_vtd_err_irte_sid(index, sid, source_id);
return -VTD_FR_IR_SID_ERR;
}
break;
@@ -2529,16 +2433,13 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
bus_min = source_id & 0xff;
bus = sid >> 8;
if (bus > bus_max || bus < bus_min) {
- VTD_DPRINTF(GENERAL, "SID validation for IRTE index %d "
- "failed (bus %d outside %d-%d)", index, bus,
- bus_min, bus_max);
+ trace_vtd_err_irte_sid_bus(index, bus, bus_min, bus_max);
return -VTD_FR_IR_SID_ERR;
}
break;
default:
- VTD_DPRINTF(GENERAL, "Invalid SVT bits (0x%x) in IRTE index "
- "%d", entry->irte.sid_vtype, index);
+ trace_vtd_err_irte_svt(index, entry->irte.sid_vtype);
/* Take this as verification failure. */
return -VTD_FR_IR_SID_ERR;
break;
@@ -2573,10 +2474,8 @@ static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index,
irq->dest_mode = irte.irte.dest_mode;
irq->redir_hint = irte.irte.redir_hint;
- VTD_DPRINTF(IR, "remapping interrupt index %d: trig:%u,vec:%u,"
- "deliver:%u,dest:%u,dest_mode:%u", index,
- irq->trigger_mode, irq->vector, irq->delivery_mode,
- irq->dest, irq->dest_mode);
+ trace_vtd_ir_remap(index, irq->trigger_mode, irq->vector,
+ irq->delivery_mode, irq->dest, irq->dest_mode);
return 0;
}
@@ -2618,28 +2517,29 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
assert(origin && translated);
+ trace_vtd_ir_remap_msi_req(origin->address, origin->data);
+
if (!iommu || !iommu->intr_enabled) {
- goto do_not_translate;
+ memcpy(translated, origin, sizeof(*origin));
+ goto out;
}
if (origin->address & VTD_MSI_ADDR_HI_MASK) {
- VTD_DPRINTF(GENERAL, "error: MSI addr high 32 bits nonzero"
- " during interrupt remapping: 0x%"PRIx32,
- (uint32_t)((origin->address & VTD_MSI_ADDR_HI_MASK) >> \
- VTD_MSI_ADDR_HI_SHIFT));
+ trace_vtd_err("MSI address high 32 bits non-zero when "
+ "Interrupt Remapping enabled.");
return -VTD_FR_IR_REQ_RSVD;
}
addr.data = origin->address & VTD_MSI_ADDR_LO_MASK;
if (addr.addr.__head != 0xfee) {
- VTD_DPRINTF(GENERAL, "error: MSI addr low 32 bits invalid: "
- "0x%"PRIx32, addr.data);
+ trace_vtd_err("MSI addr low 32 bit invalid.");
return -VTD_FR_IR_REQ_RSVD;
}
/* This is compatible mode. */
if (addr.addr.int_mode != VTD_IR_INT_FORMAT_REMAP) {
- goto do_not_translate;
+ memcpy(translated, origin, sizeof(*origin));
+ goto out;
}
index = addr.addr.index_h << 15 | le16_to_cpu(addr.addr.index_l);
@@ -2658,34 +2558,28 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
}
if (addr.addr.sub_valid) {
- VTD_DPRINTF(IR, "received MSI interrupt");
+ trace_vtd_ir_remap_type("MSI");
if (origin->data & VTD_IR_MSI_DATA_RESERVED) {
- VTD_DPRINTF(GENERAL, "error: MSI data bits non-zero for "
- "interrupt remappable entry: 0x%"PRIx32,
- origin->data);
+ trace_vtd_err_ir_msi_invalid(sid, origin->address, origin->data);
return -VTD_FR_IR_REQ_RSVD;
}
} else {
uint8_t vector = origin->data & 0xff;
uint8_t trigger_mode = (origin->data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
- VTD_DPRINTF(IR, "received IOAPIC interrupt");
+ trace_vtd_ir_remap_type("IOAPIC");
/* IOAPIC entry vector should be aligned with IRTE vector
* (see vt-d spec 5.1.5.1). */
if (vector != irq.vector) {
- VTD_DPRINTF(GENERAL, "IOAPIC vector inconsistent: "
- "entry: %d, IRTE: %d, index: %d",
- vector, irq.vector, index);
+ trace_vtd_warn_ir_vector(sid, index, vector, irq.vector);
}
/* The Trigger Mode field must match the Trigger Mode in the IRTE.
* (see vt-d spec 5.1.5.1). */
if (trigger_mode != irq.trigger_mode) {
- VTD_DPRINTF(GENERAL, "IOAPIC trigger mode inconsistent: "
- "entry: %u, IRTE: %u, index: %d",
- trigger_mode, irq.trigger_mode, index);
+ trace_vtd_warn_ir_trigger(sid, index, trigger_mode,
+ irq.trigger_mode);
}
-
}
/*
@@ -2697,13 +2591,9 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
/* Translate VTDIrq to MSI message */
vtd_generate_msi_message(&irq, translated);
- VTD_DPRINTF(IR, "mapping MSI 0x%"PRIx64":0x%"PRIx32 " -> "
- "0x%"PRIx64":0x%"PRIx32, origin->address, origin->data,
- translated->address, translated->data);
- return 0;
-
-do_not_translate:
- memcpy(translated, origin, sizeof(*origin));
+out:
+ trace_vtd_ir_remap_msi(origin->address, origin->data,
+ translated->address, translated->data);
return 0;
}
@@ -2740,16 +2630,10 @@ static MemTxResult vtd_mem_ir_write(void *opaque, hwaddr addr,
ret = vtd_interrupt_remap_msi(opaque, &from, &to, sid);
if (ret) {
/* TODO: report error */
- VTD_DPRINTF(GENERAL, "int remap fail for addr 0x%"PRIx64
- " data 0x%"PRIx32, from.address, from.data);
/* Drop this interrupt */
return MEMTX_ERROR;
}
- VTD_DPRINTF(IR, "delivering MSI 0x%"PRIx64":0x%"PRIx32
- " for device sid 0x%04x",
- to.address, to.data, sid);
-
apic_get_class()->send_msi(&to);
return MEMTX_OK;
@@ -3052,7 +2936,6 @@ static void vtd_reset(DeviceState *dev)
{
IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev);
- VTD_DPRINTF(GENERAL, "");
vtd_init(s);
/*
@@ -3125,7 +3008,6 @@ static void vtd_realize(DeviceState *dev, Error **errp)
}
bus = pcms->bus;
- VTD_DPRINTF(GENERAL, "");
x86_iommu->type = TYPE_INTEL;
if (!vtd_decide_config(s, errp)) {
@@ -3173,7 +3055,6 @@ static const TypeInfo vtd_info = {
static void vtd_register_types(void)
{
- VTD_DPRINTF(GENERAL, "");
type_register_static(&vtd_info);
}
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 0e73a65bf2..f50ecd8b73 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -384,6 +384,7 @@ typedef struct VTDIOTLBPageInvInfo VTDIOTLBPageInvInfo;
/* Pagesize of VTD paging structures, including root and context tables */
#define VTD_PAGE_SHIFT 12
#define VTD_PAGE_SIZE (1ULL << VTD_PAGE_SHIFT)
+#define VTD_PAGE_MASK (VTD_PAGE_SIZE - 1)
#define VTD_PAGE_SHIFT_4K 12
#define VTD_PAGE_MASK_4K (~((1ULL << VTD_PAGE_SHIFT_4K) - 1))
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 5b8c6fbbea..db41cca063 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1692,6 +1692,7 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
MemoryRegion *mr = ddc->get_memory_region(dimm);
uint64_t align = TARGET_PAGE_SIZE;
+ bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM);
if (memory_region_get_alignment(mr) && pcmc->enforce_aligned_dimm) {
align = memory_region_get_alignment(mr);
@@ -1703,17 +1704,18 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
goto out;
}
+ if (is_nvdimm && !pcms->acpi_nvdimm_state.is_enabled) {
+ error_setg(&local_err,
+ "nvdimm is not enabled: missing 'nvdimm' in '-M'");
+ goto out;
+ }
+
pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, &local_err);
if (local_err) {
goto out;
}
- if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) {
- if (!pcms->acpi_nvdimm_state.is_enabled) {
- error_setg(&local_err,
- "nvdimm is not enabled: missing 'nvdimm' in '-M'");
- goto out;
- }
+ if (is_nvdimm) {
nvdimm_plug(&pcms->acpi_nvdimm_state);
}
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index 72556dad48..5f111d6dde 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -19,6 +19,13 @@ vtd_inv_desc_wait_sw(uint64_t addr, uint32_t data) "wait invalidate status write
vtd_inv_desc_wait_irq(const char *msg) "%s"
vtd_inv_desc_wait_invalid(uint64_t hi, uint64_t lo) "invalid wait desc hi 0x%"PRIx64" lo 0x%"PRIx64
vtd_inv_desc_wait_write_fail(uint64_t hi, uint64_t lo) "write fail for wait desc hi 0x%"PRIx64" lo 0x%"PRIx64
+vtd_inv_desc_iec(uint32_t granularity, uint32_t index, uint32_t mask) "granularity 0x%"PRIx32" index 0x%"PRIx32" mask 0x%"PRIx32
+vtd_inv_qi_enable(bool enable) "enabled %d"
+vtd_inv_qi_setup(uint64_t addr, int size) "addr 0x%"PRIx64" size %d"
+vtd_inv_qi_head(uint16_t head) "read head %d"
+vtd_inv_qi_tail(uint16_t head) "write tail %d"
+vtd_inv_qi_fetch(void) ""
+vtd_context_cache_reset(void) ""
vtd_re_not_present(uint8_t bus) "Root entry bus %"PRIu8" not present"
vtd_re_invalid(uint64_t hi, uint64_t lo) "invalid root entry hi 0x%"PRIx64" lo 0x%"PRIx64
vtd_ce_not_present(uint8_t bus, uint8_t devfn) "Context entry bus %"PRIu8" devfn %"PRIu8" not present"
@@ -40,6 +47,43 @@ vtd_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device
vtd_as_unmap_whole(uint8_t bus, uint8_t slot, uint8_t fn, uint64_t iova, uint64_t size) "Device %02x:%02x.%x start 0x%"PRIx64" size 0x%"PRIx64
vtd_translate_pt(uint16_t sid, uint64_t addr) "source id 0x%"PRIu16", iova 0x%"PRIx64
vtd_pt_enable_fast_path(uint16_t sid, bool success) "sid 0x%"PRIu16" %d"
+vtd_irq_generate(uint64_t addr, uint64_t data) "addr 0x%"PRIx64" data 0x%"PRIx64
+vtd_reg_read(uint64_t addr, uint64_t size) "addr 0x%"PRIx64" size 0x%"PRIx64
+vtd_reg_write(uint64_t addr, uint64_t size, uint64_t val) "addr 0x%"PRIx64" size 0x%"PRIx64" value 0x%"PRIx64
+vtd_reg_dmar_root(uint64_t addr, bool extended) "addr 0x%"PRIx64" extended %d"
+vtd_reg_ir_root(uint64_t addr, uint32_t size) "addr 0x%"PRIx64" size 0x%"PRIx32
+vtd_reg_write_gcmd(uint32_t status, uint32_t val) "status 0x%"PRIx32" value 0x%"PRIx32
+vtd_reg_write_fectl(uint32_t value) "value 0x%"PRIx32
+vtd_reg_write_iectl(uint32_t value) "value 0x%"PRIx32
+vtd_reg_ics_clear_ip(void) ""
+vtd_dmar_translate(uint8_t bus, uint8_t slot, uint8_t func, uint64_t iova, uint64_t gpa, uint64_t mask) "dev %02x:%02x.%02x iova 0x%"PRIx64" -> gpa 0x%"PRIx64" mask 0x%"PRIx64
+vtd_dmar_enable(bool en) "enable %d"
+vtd_dmar_fault(uint16_t sid, int fault, uint64_t addr, bool is_write) "sid 0x%"PRIx16" fault %d addr 0x%"PRIx64" write %d"
+vtd_ir_enable(bool en) "enable %d"
+vtd_ir_irte_get(int index, uint64_t lo, uint64_t hi) "index %d low 0x%"PRIx64" high 0x%"PRIx64
+vtd_ir_remap(int index, int tri, int vec, int deliver, uint32_t dest, int dest_mode) "index %d trigger %d vector %d deliver %d dest 0x%"PRIx32" mode %d"
+vtd_ir_remap_type(const char *type) "%s"
+vtd_ir_remap_msi(uint64_t addr, uint64_t data, uint64_t addr2, uint64_t data2) "(addr 0x%"PRIx64", data 0x%"PRIx64") -> (addr 0x%"PRIx64", data 0x%"PRIx64")"
+vtd_ir_remap_msi_req(uint64_t addr, uint64_t data) "addr 0x%"PRIx64" data 0x%"PRIx64
+vtd_fsts_ppf(bool set) "FSTS PPF bit set to %d"
+vtd_fsts_clear_ip(void) ""
+vtd_frr_new(int index, uint64_t hi, uint64_t lo) "index %d high 0x%"PRIx64" low 0x%"PRIx64
+vtd_err(const char *str) "%s"
+vtd_err_dmar_iova_overflow(uint64_t iova) "iova 0x%"PRIx64
+vtd_err_dmar_slpte_read_error(uint64_t iova, int level) "iova 0x%"PRIx64" level %d"
+vtd_err_dmar_slpte_perm_error(uint64_t iova, int level, uint64_t slpte, bool is_write) "iova 0x%"PRIx64" level %d slpte 0x%"PRIx64" write %d"
+vtd_err_dmar_slpte_resv_error(uint64_t iova, int level, uint64_t slpte) "iova 0x%"PRIx64" level %d slpte 0x%"PRIx64
+vtd_err_dmar_translate(uint8_t bus, uint8_t slot, uint8_t func, uint64_t iova) "dev %02x:%02x.%02x iova 0x%"PRIx64
+vtd_err_qi_enable(uint16_t tail) "tail 0x%"PRIx16
+vtd_err_qi_disable(uint16_t head, uint16_t tail, int type) "head 0x%"PRIx16" tail 0x%"PRIx16" last_desc_type %d"
+vtd_err_qi_tail(uint16_t tail, uint16_t size) "tail 0x%"PRIx16" size 0x%"PRIx16
+vtd_err_irte(int index, uint64_t lo, uint64_t hi) "index %d low 0x%"PRIx64" high 0x%"PRIx64
+vtd_err_irte_sid(int index, uint16_t req, uint16_t target) "index %d SVT_ALL sid 0x%"PRIx16" (should be: 0x%"PRIx16")"
+vtd_err_irte_sid_bus(int index, uint8_t bus, uint8_t min, uint8_t max) "index %d SVT_BUS bus 0x%"PRIx8" (should be: 0x%"PRIx8"-0x%"PRIx8")"
+vtd_err_irte_svt(int index, int type) "index %d SVT type %d"
+vtd_err_ir_msi_invalid(uint16_t sid, uint64_t addr, uint64_t data) "sid 0x%"PRIx16" addr 0x%"PRIx64" data 0x%"PRIx64
+vtd_warn_ir_vector(uint16_t sid, int index, int vec, int target) "sid 0x%"PRIx16" index %d vec %d (should be: %d)"
+vtd_warn_ir_trigger(uint16_t sid, int index, int trig, int target) "sid 0x%"PRIx16" index %d trigger %d (should be: %d)"
# hw/i386/amd_iommu.c
amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" + offset 0x%"PRIx32
diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
index 6367d041f0..2f0819d977 100644
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c
@@ -491,9 +491,9 @@ static void setup_interrupt(IVShmemState *s, int vector, Error **errp)
static void process_msg_shmem(IVShmemState *s, int fd, Error **errp)
{
+ Error *local_err = NULL;
struct stat buf;
size_t size;
- void *ptr;
if (s->ivshmem_bar2) {
error_setg(errp, "server sent unexpected shared memory message");
@@ -522,15 +522,13 @@ static void process_msg_shmem(IVShmemState *s, int fd, Error **errp)
}
/* mmap the region and map into the BAR2 */
- ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
- if (ptr == MAP_FAILED) {
- error_setg_errno(errp, errno, "Failed to mmap shared memory");
- close(fd);
+ memory_region_init_ram_from_fd(&s->server_bar2, OBJECT(s),
+ "ivshmem.bar2", size, true, fd, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
return;
}
- memory_region_init_ram_ptr(&s->server_bar2, OBJECT(s),
- "ivshmem.bar2", size, ptr);
- memory_region_set_fd(&s->server_bar2, fd);
+
s->ivshmem_bar2 = &s->server_bar2;
}
diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index cd5c49616e..28cb97b60f 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -134,7 +134,7 @@ static void q35_host_get_mmcfg_size(Object *obj, Visitor *v, const char *name,
visit_type_uint32(v, name, &value, errp);
}
-static Property mch_props[] = {
+static Property q35_host_props[] = {
DEFINE_PROP_UINT64(PCIE_HOST_MCFG_BASE, Q35PCIHost, parent_obj.base_addr,
MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT),
DEFINE_PROP_SIZE(PCI_HOST_PROP_PCI_HOLE64_SIZE, Q35PCIHost,
@@ -154,7 +154,7 @@ static void q35_host_class_init(ObjectClass *klass, void *data)
hc->root_bus_path = q35_host_root_bus_path;
dc->realize = q35_host_realize;
- dc->props = mch_props;
+ dc->props = q35_host_props;
/* Reason: needs to be wired up by pc_q35_init */
dc->user_creatable = false;
set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
@@ -369,7 +369,7 @@ static void mch_update_smram(MCHPCIState *mch)
tseg_size = 1024 * 1024 * 8;
break;
default:
- tseg_size = 0;
+ tseg_size = 1024 * 1024 * (uint32_t)mch->ext_tseg_mbytes;
break;
}
} else {
@@ -392,6 +392,17 @@ static void mch_update_smram(MCHPCIState *mch)
memory_region_transaction_commit();
}
+static void mch_update_ext_tseg_mbytes(MCHPCIState *mch)
+{
+ PCIDevice *pd = PCI_DEVICE(mch);
+ uint8_t *reg = pd->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES;
+
+ if (mch->ext_tseg_mbytes > 0 &&
+ pci_get_word(reg) == MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY) {
+ pci_set_word(reg, mch->ext_tseg_mbytes);
+ }
+}
+
static void mch_write_config(PCIDevice *d,
uint32_t address, uint32_t val, int len)
{
@@ -413,6 +424,11 @@ static void mch_write_config(PCIDevice *d,
MCH_HOST_BRIDGE_SMRAM_SIZE)) {
mch_update_smram(mch);
}
+
+ if (ranges_overlap(address, len, MCH_HOST_BRIDGE_EXT_TSEG_MBYTES,
+ MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_SIZE)) {
+ mch_update_ext_tseg_mbytes(mch);
+ }
}
static void mch_update(MCHPCIState *mch)
@@ -420,6 +436,7 @@ static void mch_update(MCHPCIState *mch)
mch_update_pciexbar(mch);
mch_update_pam(mch);
mch_update_smram(mch);
+ mch_update_ext_tseg_mbytes(mch);
}
static int mch_post_load(void *opaque, int version_id)
@@ -457,6 +474,11 @@ static void mch_reset(DeviceState *qdev)
d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK;
d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK;
+ if (mch->ext_tseg_mbytes > 0) {
+ pci_set_word(d->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES,
+ MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY);
+ }
+
mch_update(mch);
}
@@ -465,6 +487,12 @@ static void mch_realize(PCIDevice *d, Error **errp)
int i;
MCHPCIState *mch = MCH_PCI_DEVICE(d);
+ if (mch->ext_tseg_mbytes > MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_MAX) {
+ error_setg(errp, "invalid extended-tseg-mbytes value: %" PRIu16,
+ mch->ext_tseg_mbytes);
+ return;
+ }
+
/* setup pci memory mapping */
pc_pci_as_mapping_init(OBJECT(mch), mch->system_memory,
mch->pci_address_space);
@@ -530,6 +558,12 @@ uint64_t mch_mcfg_base(void)
return MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT;
}
+static Property mch_props[] = {
+ DEFINE_PROP_UINT16("extended-tseg-mbytes", MCHPCIState, ext_tseg_mbytes,
+ 16),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
static void mch_class_init(ObjectClass *klass, void *data)
{
PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
@@ -538,6 +572,7 @@ static void mch_class_init(ObjectClass *klass, void *data)
k->realize = mch_realize;
k->config_write = mch_write_config;
dc->reset = mch_reset;
+ dc->props = mch_props;
set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
dc->desc = "Host bridge";
dc->vmsd = &vmstate_mch;
diff --git a/hw/scsi/Makefile.objs b/hw/scsi/Makefile.objs
index 54d8754e9a..b188f7242b 100644
--- a/hw/scsi/Makefile.objs
+++ b/hw/scsi/Makefile.objs
@@ -11,4 +11,5 @@ obj-$(CONFIG_PSERIES) += spapr_vscsi.o
ifeq ($(CONFIG_VIRTIO),y)
obj-y += virtio-scsi.o virtio-scsi-dataplane.o
obj-$(CONFIG_VHOST_SCSI) += vhost-scsi-common.o vhost-scsi.o
+obj-$(CONFIG_VHOST_USER_SCSI) += vhost-scsi-common.o vhost-user-scsi.o
endif
diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c
index 804122ab05..734fdaef90 100644
--- a/hw/scsi/megasas.c
+++ b/hw/scsi/megasas.c
@@ -63,6 +63,7 @@ typedef struct MegasasCmd {
hwaddr pa;
hwaddr pa_size;
+ uint32_t dcmd_opcode;
union mfi_frame *frame;
SCSIRequest *req;
QEMUSGList qsg;
@@ -309,9 +310,11 @@ static int megasas_build_sense(MegasasCmd *cmd, uint8_t *sense_ptr,
PCIDevice *pcid = PCI_DEVICE(cmd->state);
uint32_t pa_hi = 0, pa_lo;
hwaddr pa;
+ int frame_sense_len;
- if (sense_len > cmd->frame->header.sense_len) {
- sense_len = cmd->frame->header.sense_len;
+ frame_sense_len = cmd->frame->header.sense_len;
+ if (sense_len > frame_sense_len) {
+ sense_len = frame_sense_len;
}
if (sense_len) {
pa_lo = le32_to_cpu(cmd->frame->pass.sense_addr_lo);
@@ -511,6 +514,7 @@ static MegasasCmd *megasas_enqueue_frame(MegasasState *s,
cmd->context &= (uint64_t)0xFFFFFFFF;
}
cmd->count = count;
+ cmd->dcmd_opcode = -1;
s->busy++;
if (s->consumer_pa) {
@@ -605,6 +609,9 @@ static void megasas_reset_frames(MegasasState *s)
static void megasas_abort_command(MegasasCmd *cmd)
{
/* Never abort internal commands. */
+ if (cmd->dcmd_opcode != -1) {
+ return;
+ }
if (cmd->req != NULL) {
scsi_req_cancel(cmd->req);
}
@@ -673,15 +680,16 @@ out:
static int megasas_map_dcmd(MegasasState *s, MegasasCmd *cmd)
{
dma_addr_t iov_pa, iov_size;
+ int iov_count;
cmd->flags = le16_to_cpu(cmd->frame->header.flags);
- if (!cmd->frame->header.sge_count) {
+ iov_count = cmd->frame->header.sge_count;
+ if (!iov_count) {
trace_megasas_dcmd_zero_sge(cmd->index);
cmd->iov_size = 0;
return 0;
- } else if (cmd->frame->header.sge_count > 1) {
- trace_megasas_dcmd_invalid_sge(cmd->index,
- cmd->frame->header.sge_count);
+ } else if (iov_count > 1) {
+ trace_megasas_dcmd_invalid_sge(cmd->index, iov_count);
cmd->iov_size = 0;
return -EINVAL;
}
@@ -1012,7 +1020,6 @@ static int megasas_pd_get_info_submit(SCSIDevice *sdev, int lun,
uint64_t pd_size;
uint16_t pd_id = ((sdev->id & 0xFF) << 8) | (lun & 0xFF);
uint8_t cmdbuf[6];
- SCSIRequest *req;
size_t len, resid;
if (!cmd->iov_buf) {
@@ -1021,8 +1028,8 @@ static int megasas_pd_get_info_submit(SCSIDevice *sdev, int lun,
info->inquiry_data[0] = 0x7f; /* Force PQual 0x3, PType 0x1f */
info->vpd_page83[0] = 0x7f;
megasas_setup_inquiry(cmdbuf, 0, sizeof(info->inquiry_data));
- req = scsi_req_new(sdev, cmd->index, lun, cmdbuf, cmd);
- if (!req) {
+ cmd->req = scsi_req_new(sdev, cmd->index, lun, cmdbuf, cmd);
+ if (!cmd->req) {
trace_megasas_dcmd_req_alloc_failed(cmd->index,
"PD get info std inquiry");
g_free(cmd->iov_buf);
@@ -1031,26 +1038,26 @@ static int megasas_pd_get_info_submit(SCSIDevice *sdev, int lun,
}
trace_megasas_dcmd_internal_submit(cmd->index,
"PD get info std inquiry", lun);
- len = scsi_req_enqueue(req);
+ len = scsi_req_enqueue(cmd->req);
if (len > 0) {
cmd->iov_size = len;
- scsi_req_continue(req);
+ scsi_req_continue(cmd->req);
}
return MFI_STAT_INVALID_STATUS;
} else if (info->inquiry_data[0] != 0x7f && info->vpd_page83[0] == 0x7f) {
megasas_setup_inquiry(cmdbuf, 0x83, sizeof(info->vpd_page83));
- req = scsi_req_new(sdev, cmd->index, lun, cmdbuf, cmd);
- if (!req) {
+ cmd->req = scsi_req_new(sdev, cmd->index, lun, cmdbuf, cmd);
+ if (!cmd->req) {
trace_megasas_dcmd_req_alloc_failed(cmd->index,
"PD get info vpd inquiry");
return MFI_STAT_FLASH_ALLOC_FAIL;
}
trace_megasas_dcmd_internal_submit(cmd->index,
"PD get info vpd inquiry", lun);
- len = scsi_req_enqueue(req);
+ len = scsi_req_enqueue(cmd->req);
if (len > 0) {
cmd->iov_size = len;
- scsi_req_continue(req);
+ scsi_req_continue(cmd->req);
}
return MFI_STAT_INVALID_STATUS;
}
@@ -1212,7 +1219,6 @@ static int megasas_ld_get_info_submit(SCSIDevice *sdev, int lun,
struct mfi_ld_info *info = cmd->iov_buf;
size_t dcmd_size = sizeof(struct mfi_ld_info);
uint8_t cdb[6];
- SCSIRequest *req;
ssize_t len, resid;
uint16_t sdev_id = ((sdev->id & 0xFF) << 8) | (lun & 0xFF);
uint64_t ld_size;
@@ -1221,8 +1227,8 @@ static int megasas_ld_get_info_submit(SCSIDevice *sdev, int lun,
cmd->iov_buf = g_malloc0(dcmd_size);
info = cmd->iov_buf;
megasas_setup_inquiry(cdb, 0x83, sizeof(info->vpd_page83));
- req = scsi_req_new(sdev, cmd->index, lun, cdb, cmd);
- if (!req) {
+ cmd->req = scsi_req_new(sdev, cmd->index, lun, cdb, cmd);
+ if (!cmd->req) {
trace_megasas_dcmd_req_alloc_failed(cmd->index,
"LD get info vpd inquiry");
g_free(cmd->iov_buf);
@@ -1231,10 +1237,10 @@ static int megasas_ld_get_info_submit(SCSIDevice *sdev, int lun,
}
trace_megasas_dcmd_internal_submit(cmd->index,
"LD get info vpd inquiry", lun);
- len = scsi_req_enqueue(req);
+ len = scsi_req_enqueue(cmd->req);
if (len > 0) {
cmd->iov_size = len;
- scsi_req_continue(req);
+ scsi_req_continue(cmd->req);
}
return MFI_STAT_INVALID_STATUS;
}
@@ -1559,22 +1565,21 @@ static const struct dcmd_cmd_tbl_t {
static int megasas_handle_dcmd(MegasasState *s, MegasasCmd *cmd)
{
- int opcode;
int retval = 0;
size_t len;
const struct dcmd_cmd_tbl_t *cmdptr = dcmd_cmd_tbl;
- opcode = le32_to_cpu(cmd->frame->dcmd.opcode);
- trace_megasas_handle_dcmd(cmd->index, opcode);
+ cmd->dcmd_opcode = le32_to_cpu(cmd->frame->dcmd.opcode);
+ trace_megasas_handle_dcmd(cmd->index, cmd->dcmd_opcode);
if (megasas_map_dcmd(s, cmd) < 0) {
return MFI_STAT_MEMORY_NOT_AVAILABLE;
}
- while (cmdptr->opcode != -1 && cmdptr->opcode != opcode) {
+ while (cmdptr->opcode != -1 && cmdptr->opcode != cmd->dcmd_opcode) {
cmdptr++;
}
len = cmd->iov_size;
if (cmdptr->opcode == -1) {
- trace_megasas_dcmd_unhandled(cmd->index, opcode, len);
+ trace_megasas_dcmd_unhandled(cmd->index, cmd->dcmd_opcode, len);
retval = megasas_dcmd_dummy(s, cmd);
} else {
trace_megasas_dcmd_enter(cmd->index, cmdptr->desc, len);
@@ -1587,15 +1592,14 @@ static int megasas_handle_dcmd(MegasasState *s, MegasasCmd *cmd)
}
static int megasas_finish_internal_dcmd(MegasasCmd *cmd,
- SCSIRequest *req)
+ SCSIRequest *req, size_t resid)
{
- int opcode;
int retval = MFI_STAT_OK;
int lun = req->lun;
- opcode = le32_to_cpu(cmd->frame->dcmd.opcode);
- trace_megasas_dcmd_internal_finish(cmd->index, opcode, lun);
- switch (opcode) {
+ trace_megasas_dcmd_internal_finish(cmd->index, cmd->dcmd_opcode, lun);
+ cmd->iov_size -= resid;
+ switch (cmd->dcmd_opcode) {
case MFI_DCMD_PD_GET_INFO:
retval = megasas_pd_get_info_submit(req->dev, lun, cmd);
break;
@@ -1603,7 +1607,7 @@ static int megasas_finish_internal_dcmd(MegasasCmd *cmd,
retval = megasas_ld_get_info_submit(req->dev, lun, cmd);
break;
default:
- trace_megasas_dcmd_internal_invalid(cmd->index, opcode);
+ trace_megasas_dcmd_internal_invalid(cmd->index, cmd->dcmd_opcode);
retval = MFI_STAT_INVALID_DCMD;
break;
}
@@ -1647,43 +1651,42 @@ static int megasas_enqueue_req(MegasasCmd *cmd, bool is_write)
}
static int megasas_handle_scsi(MegasasState *s, MegasasCmd *cmd,
- bool is_logical)
+ int frame_cmd)
{
uint8_t *cdb;
+ int target_id, lun_id, cdb_len;
bool is_write;
struct SCSIDevice *sdev = NULL;
+ bool is_logical = (frame_cmd == MFI_CMD_LD_SCSI_IO);
cdb = cmd->frame->pass.cdb;
+ target_id = cmd->frame->header.target_id;
+ lun_id = cmd->frame->header.lun_id;
+ cdb_len = cmd->frame->header.cdb_len;
if (is_logical) {
- if (cmd->frame->header.target_id >= MFI_MAX_LD ||
- cmd->frame->header.lun_id != 0) {
+ if (target_id >= MFI_MAX_LD || lun_id != 0) {
trace_megasas_scsi_target_not_present(
- mfi_frame_desc[cmd->frame->header.frame_cmd], is_logical,
- cmd->frame->header.target_id, cmd->frame->header.lun_id);
+ mfi_frame_desc[frame_cmd], is_logical, target_id, lun_id);
return MFI_STAT_DEVICE_NOT_FOUND;
}
}
- sdev = scsi_device_find(&s->bus, 0, cmd->frame->header.target_id,
- cmd->frame->header.lun_id);
+ sdev = scsi_device_find(&s->bus, 0, target_id, lun_id);
cmd->iov_size = le32_to_cpu(cmd->frame->header.data_len);
- trace_megasas_handle_scsi(mfi_frame_desc[cmd->frame->header.frame_cmd],
- is_logical, cmd->frame->header.target_id,
- cmd->frame->header.lun_id, sdev, cmd->iov_size);
+ trace_megasas_handle_scsi(mfi_frame_desc[frame_cmd], is_logical,
+ target_id, lun_id, sdev, cmd->iov_size);
if (!sdev || (megasas_is_jbod(s) && is_logical)) {
trace_megasas_scsi_target_not_present(
- mfi_frame_desc[cmd->frame->header.frame_cmd], is_logical,
- cmd->frame->header.target_id, cmd->frame->header.lun_id);
+ mfi_frame_desc[frame_cmd], is_logical, target_id, lun_id);
return MFI_STAT_DEVICE_NOT_FOUND;
}
- if (cmd->frame->header.cdb_len > 16) {
+ if (cdb_len > 16) {
trace_megasas_scsi_invalid_cdb_len(
- mfi_frame_desc[cmd->frame->header.frame_cmd], is_logical,
- cmd->frame->header.target_id, cmd->frame->header.lun_id,
- cmd->frame->header.cdb_len);
+ mfi_frame_desc[frame_cmd], is_logical,
+ target_id, lun_id, cdb_len);
megasas_write_sense(cmd, SENSE_CODE(INVALID_OPCODE));
cmd->frame->header.scsi_status = CHECK_CONDITION;
s->event_count++;
@@ -1697,12 +1700,10 @@ static int megasas_handle_scsi(MegasasState *s, MegasasCmd *cmd,
return MFI_STAT_SCSI_DONE_WITH_ERROR;
}
- cmd->req = scsi_req_new(sdev, cmd->index,
- cmd->frame->header.lun_id, cdb, cmd);
+ cmd->req = scsi_req_new(sdev, cmd->index, lun_id, cdb, cmd);
if (!cmd->req) {
trace_megasas_scsi_req_alloc_failed(
- mfi_frame_desc[cmd->frame->header.frame_cmd],
- cmd->frame->header.target_id, cmd->frame->header.lun_id);
+ mfi_frame_desc[frame_cmd], target_id, lun_id);
megasas_write_sense(cmd, SENSE_CODE(NO_SENSE));
cmd->frame->header.scsi_status = BUSY;
s->event_count++;
@@ -1723,43 +1724,41 @@ static int megasas_handle_scsi(MegasasState *s, MegasasCmd *cmd,
return MFI_STAT_INVALID_STATUS;
}
-static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd)
+static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd, int frame_cmd)
{
uint32_t lba_count, lba_start_hi, lba_start_lo;
uint64_t lba_start;
- bool is_write = (cmd->frame->header.frame_cmd == MFI_CMD_LD_WRITE);
+ bool is_write = (frame_cmd == MFI_CMD_LD_WRITE);
uint8_t cdb[16];
int len;
struct SCSIDevice *sdev = NULL;
+ int target_id, lun_id, cdb_len;
lba_count = le32_to_cpu(cmd->frame->io.header.data_len);
lba_start_lo = le32_to_cpu(cmd->frame->io.lba_lo);
lba_start_hi = le32_to_cpu(cmd->frame->io.lba_hi);
lba_start = ((uint64_t)lba_start_hi << 32) | lba_start_lo;
- if (cmd->frame->header.target_id < MFI_MAX_LD &&
- cmd->frame->header.lun_id == 0) {
- sdev = scsi_device_find(&s->bus, 0, cmd->frame->header.target_id,
- cmd->frame->header.lun_id);
+ target_id = cmd->frame->header.target_id;
+ lun_id = cmd->frame->header.lun_id;
+ cdb_len = cmd->frame->header.cdb_len;
+
+ if (target_id < MFI_MAX_LD && lun_id == 0) {
+ sdev = scsi_device_find(&s->bus, 0, target_id, lun_id);
}
trace_megasas_handle_io(cmd->index,
- mfi_frame_desc[cmd->frame->header.frame_cmd],
- cmd->frame->header.target_id,
- cmd->frame->header.lun_id,
+ mfi_frame_desc[frame_cmd], target_id, lun_id,
(unsigned long)lba_start, (unsigned long)lba_count);
if (!sdev) {
trace_megasas_io_target_not_present(cmd->index,
- mfi_frame_desc[cmd->frame->header.frame_cmd],
- cmd->frame->header.target_id, cmd->frame->header.lun_id);
+ mfi_frame_desc[frame_cmd], target_id, lun_id);
return MFI_STAT_DEVICE_NOT_FOUND;
}
- if (cmd->frame->header.cdb_len > 16) {
+ if (cdb_len > 16) {
trace_megasas_scsi_invalid_cdb_len(
- mfi_frame_desc[cmd->frame->header.frame_cmd], 1,
- cmd->frame->header.target_id, cmd->frame->header.lun_id,
- cmd->frame->header.cdb_len);
+ mfi_frame_desc[frame_cmd], 1, target_id, lun_id, cdb_len);
megasas_write_sense(cmd, SENSE_CODE(INVALID_OPCODE));
cmd->frame->header.scsi_status = CHECK_CONDITION;
s->event_count++;
@@ -1776,11 +1775,10 @@ static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd)
megasas_encode_lba(cdb, lba_start, lba_count, is_write);
cmd->req = scsi_req_new(sdev, cmd->index,
- cmd->frame->header.lun_id, cdb, cmd);
+ lun_id, cdb, cmd);
if (!cmd->req) {
trace_megasas_scsi_req_alloc_failed(
- mfi_frame_desc[cmd->frame->header.frame_cmd],
- cmd->frame->header.target_id, cmd->frame->header.lun_id);
+ mfi_frame_desc[frame_cmd], target_id, lun_id);
megasas_write_sense(cmd, SENSE_CODE(NO_SENSE));
cmd->frame->header.scsi_status = BUSY;
s->event_count++;
@@ -1797,23 +1795,11 @@ static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd)
return MFI_STAT_INVALID_STATUS;
}
-static int megasas_finish_internal_command(MegasasCmd *cmd,
- SCSIRequest *req, size_t resid)
-{
- int retval = MFI_STAT_INVALID_CMD;
-
- if (cmd->frame->header.frame_cmd == MFI_CMD_DCMD) {
- cmd->iov_size -= resid;
- retval = megasas_finish_internal_dcmd(cmd, req);
- }
- return retval;
-}
-
static QEMUSGList *megasas_get_sg_list(SCSIRequest *req)
{
MegasasCmd *cmd = req->hba_private;
- if (cmd->frame->header.frame_cmd == MFI_CMD_DCMD) {
+ if (cmd->dcmd_opcode != -1) {
return NULL;
} else {
return &cmd->qsg;
@@ -1824,18 +1810,16 @@ static void megasas_xfer_complete(SCSIRequest *req, uint32_t len)
{
MegasasCmd *cmd = req->hba_private;
uint8_t *buf;
- uint32_t opcode;
trace_megasas_io_complete(cmd->index, len);
- if (cmd->frame->header.frame_cmd != MFI_CMD_DCMD) {
+ if (cmd->dcmd_opcode != -1) {
scsi_req_continue(req);
return;
}
buf = scsi_req_get_buf(req);
- opcode = le32_to_cpu(cmd->frame->dcmd.opcode);
- if (opcode == MFI_DCMD_PD_GET_INFO && cmd->iov_buf) {
+ if (cmd->dcmd_opcode == MFI_DCMD_PD_GET_INFO && cmd->iov_buf) {
struct mfi_pd_info *info = cmd->iov_buf;
if (info->inquiry_data[0] == 0x7f) {
@@ -1846,7 +1830,7 @@ static void megasas_xfer_complete(SCSIRequest *req, uint32_t len)
memcpy(info->vpd_page83, buf, len);
}
scsi_req_continue(req);
- } else if (opcode == MFI_DCMD_LD_GET_INFO) {
+ } else if (cmd->dcmd_opcode == MFI_DCMD_LD_GET_INFO) {
struct mfi_ld_info *info = cmd->iov_buf;
if (cmd->iov_buf) {
@@ -1868,11 +1852,11 @@ static void megasas_command_complete(SCSIRequest *req, uint32_t status,
return;
}
- if (cmd->req == NULL) {
+ if (cmd->dcmd_opcode != -1) {
/*
* Internal command complete
*/
- cmd_status = megasas_finish_internal_command(cmd, req, resid);
+ cmd_status = megasas_finish_internal_dcmd(cmd, req, resid);
if (cmd_status == MFI_STAT_INVALID_STATUS) {
return;
}
@@ -1943,6 +1927,7 @@ static void megasas_handle_frame(MegasasState *s, uint64_t frame_addr,
{
uint8_t frame_status = MFI_STAT_INVALID_CMD;
uint64_t frame_context;
+ int frame_cmd;
MegasasCmd *cmd;
/*
@@ -1961,7 +1946,8 @@ static void megasas_handle_frame(MegasasState *s, uint64_t frame_addr,
s->event_count++;
return;
}
- switch (cmd->frame->header.frame_cmd) {
+ frame_cmd = cmd->frame->header.frame_cmd;
+ switch (frame_cmd) {
case MFI_CMD_INIT:
frame_status = megasas_init_firmware(s, cmd);
break;
@@ -1972,18 +1958,15 @@ static void megasas_handle_frame(MegasasState *s, uint64_t frame_addr,
frame_status = megasas_handle_abort(s, cmd);
break;
case MFI_CMD_PD_SCSI_IO:
- frame_status = megasas_handle_scsi(s, cmd, 0);
- break;
case MFI_CMD_LD_SCSI_IO:
- frame_status = megasas_handle_scsi(s, cmd, 1);
+ frame_status = megasas_handle_scsi(s, cmd, frame_cmd);
break;
case MFI_CMD_LD_READ:
case MFI_CMD_LD_WRITE:
- frame_status = megasas_handle_io(s, cmd);
+ frame_status = megasas_handle_io(s, cmd, frame_cmd);
break;
default:
- trace_megasas_unhandled_frame_cmd(cmd->index,
- cmd->frame->header.frame_cmd);
+ trace_megasas_unhandled_frame_cmd(cmd->index, frame_cmd);
s->event_count++;
break;
}
diff --git a/hw/scsi/vhost-scsi-common.c b/hw/scsi/vhost-scsi-common.c
index e41c0314db..d434b3e99a 100644
--- a/hw/scsi/vhost-scsi-common.c
+++ b/hw/scsi/vhost-scsi-common.c
@@ -16,7 +16,6 @@
*/
#include "qemu/osdep.h"
-#include <linux/vhost.h>
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "migration/migration.h"
diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
new file mode 100644
index 0000000000..500fa6a067
--- /dev/null
+++ b/hw/scsi/vhost-user-scsi.c
@@ -0,0 +1,205 @@
+/*
+ * vhost-user-scsi host device
+ *
+ * Copyright (c) 2016 Nutanix Inc. All rights reserved.
+ *
+ * Author:
+ * Felipe Franciosi <felipe@nutanix.com>
+ *
+ * This work is largely based on the "vhost-scsi" implementation by:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ * Nicholas Bellinger <nab@risingtidesystems.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/typedefs.h"
+#include "qom/object.h"
+#include "hw/fw-path-provider.h"
+#include "hw/qdev-core.h"
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-backend.h"
+#include "hw/virtio/vhost-user-scsi.h"
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/virtio-access.h"
+#include "chardev/char-fe.h"
+
+/* Features supported by the host application */
+static const int user_feature_bits[] = {
+ VIRTIO_F_NOTIFY_ON_EMPTY,
+ VIRTIO_RING_F_INDIRECT_DESC,
+ VIRTIO_RING_F_EVENT_IDX,
+ VIRTIO_SCSI_F_HOTPLUG,
+ VHOST_INVALID_FEATURE_BIT
+};
+
+static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
+{
+ VHostUserSCSI *s = (VHostUserSCSI *)vdev;
+ VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
+ bool start = (status & VIRTIO_CONFIG_S_DRIVER_OK) && vdev->vm_running;
+
+ if (vsc->dev.started == start) {
+ return;
+ }
+
+ if (start) {
+ int ret;
+
+ ret = vhost_scsi_common_start(vsc);
+ if (ret < 0) {
+ error_report("unable to start vhost-user-scsi: %s", strerror(-ret));
+ exit(1);
+ }
+ } else {
+ vhost_scsi_common_stop(vsc);
+ }
+}
+
+static void vhost_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq)
+{
+}
+
+static void vhost_user_scsi_realize(DeviceState *dev, Error **errp)
+{
+ VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
+ VHostUserSCSI *s = VHOST_USER_SCSI(dev);
+ VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
+ Error *err = NULL;
+ int ret;
+
+ if (!vs->conf.chardev.chr) {
+ error_setg(errp, "vhost-user-scsi: missing chardev");
+ return;
+ }
+
+ virtio_scsi_common_realize(dev, vhost_dummy_handle_output,
+ vhost_dummy_handle_output,
+ vhost_dummy_handle_output, &err);
+ if (err != NULL) {
+ error_propagate(errp, err);
+ return;
+ }
+
+ vsc->dev.nvqs = 2 + vs->conf.num_queues;
+ vsc->dev.vqs = g_new(struct vhost_virtqueue, vsc->dev.nvqs);
+ vsc->dev.vq_index = 0;
+ vsc->dev.backend_features = 0;
+
+ ret = vhost_dev_init(&vsc->dev, (void *)&vs->conf.chardev,
+ VHOST_BACKEND_TYPE_USER, 0);
+ if (ret < 0) {
+ error_setg(errp, "vhost-user-scsi: vhost initialization failed: %s",
+ strerror(-ret));
+ return;
+ }
+
+ /* Channel and lun both are 0 for bootable vhost-user-scsi disk */
+ vsc->channel = 0;
+ vsc->lun = 0;
+ vsc->target = vs->conf.boot_tpgt;
+}
+
+static void vhost_user_scsi_unrealize(DeviceState *dev, Error **errp)
+{
+ VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+ VHostUserSCSI *s = VHOST_USER_SCSI(dev);
+ VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
+
+ /* This will stop the vhost backend. */
+ vhost_user_scsi_set_status(vdev, 0);
+
+ vhost_dev_cleanup(&vsc->dev);
+ g_free(vsc->dev.vqs);
+
+ virtio_scsi_common_unrealize(dev, errp);
+}
+
+static uint64_t vhost_user_scsi_get_features(VirtIODevice *vdev,
+ uint64_t features, Error **errp)
+{
+ VHostUserSCSI *s = VHOST_USER_SCSI(vdev);
+
+ /* Turn on predefined features supported by this device */
+ features |= s->host_features;
+
+ return vhost_scsi_common_get_features(vdev, features, errp);
+}
+
+static Property vhost_user_scsi_properties[] = {
+ DEFINE_PROP_CHR("chardev", VirtIOSCSICommon, conf.chardev),
+ DEFINE_PROP_UINT32("boot_tpgt", VirtIOSCSICommon, conf.boot_tpgt, 0),
+ DEFINE_PROP_UINT32("num_queues", VirtIOSCSICommon, conf.num_queues, 1),
+ DEFINE_PROP_UINT32("max_sectors", VirtIOSCSICommon, conf.max_sectors,
+ 0xFFFF),
+ DEFINE_PROP_UINT32("cmd_per_lun", VirtIOSCSICommon, conf.cmd_per_lun, 128),
+ DEFINE_PROP_BIT64("hotplug", VHostUserSCSI, host_features,
+ VIRTIO_SCSI_F_HOTPLUG,
+ true),
+ DEFINE_PROP_BIT64("param_change", VHostUserSCSI, host_features,
+ VIRTIO_SCSI_F_CHANGE,
+ true),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static const VMStateDescription vmstate_vhost_scsi = {
+ .name = "virtio-scsi",
+ .minimum_version_id = 1,
+ .version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_VIRTIO_DEVICE,
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static void vhost_user_scsi_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+ FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(klass);
+
+ dc->props = vhost_user_scsi_properties;
+ dc->vmsd = &vmstate_vhost_scsi;
+ set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+ vdc->realize = vhost_user_scsi_realize;
+ vdc->unrealize = vhost_user_scsi_unrealize;
+ vdc->get_features = vhost_user_scsi_get_features;
+ vdc->set_config = vhost_scsi_common_set_config;
+ vdc->set_status = vhost_user_scsi_set_status;
+ fwc->get_dev_path = vhost_scsi_common_get_fw_dev_path;
+}
+
+static void vhost_user_scsi_instance_init(Object *obj)
+{
+ VHostSCSICommon *vsc = VHOST_SCSI_COMMON(obj);
+
+ vsc->feature_bits = user_feature_bits;
+
+ /* Add the bootindex property for this object */
+ device_add_bootindex_property(obj, &vsc->bootindex, "bootindex", NULL,
+ DEVICE(vsc), NULL);
+}
+
+static const TypeInfo vhost_user_scsi_info = {
+ .name = TYPE_VHOST_USER_SCSI,
+ .parent = TYPE_VHOST_SCSI_COMMON,
+ .instance_size = sizeof(VHostUserSCSI),
+ .class_init = vhost_user_scsi_class_init,
+ .instance_init = vhost_user_scsi_instance_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_FW_PATH_PROVIDER },
+ { }
+ },
+};
+
+static void virtio_register_types(void)
+{
+ type_register_static(&vhost_user_scsi_info);
+}
+
+type_init(virtio_register_types)
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index f9b7244808..20d6a08616 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -2135,6 +2135,61 @@ static const TypeInfo vhost_scsi_pci_info = {
};
#endif
+#ifdef CONFIG_LINUX
+/* vhost-user-scsi-pci */
+static Property vhost_user_scsi_pci_properties[] = {
+ DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors,
+ DEV_NVECTORS_UNSPECIFIED),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vhost_user_scsi_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+ VHostUserSCSIPCI *dev = VHOST_USER_SCSI_PCI(vpci_dev);
+ DeviceState *vdev = DEVICE(&dev->vdev);
+ VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev);
+
+ if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) {
+ vpci_dev->nvectors = vs->conf.num_queues + 3;
+ }
+
+ qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus));
+ object_property_set_bool(OBJECT(vdev), true, "realized", errp);
+}
+
+static void vhost_user_scsi_pci_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+ PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+ k->realize = vhost_user_scsi_pci_realize;
+ set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+ dc->props = vhost_user_scsi_pci_properties;
+ pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
+ pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_SCSI;
+ pcidev_k->revision = 0x00;
+ pcidev_k->class_id = PCI_CLASS_STORAGE_SCSI;
+}
+
+static void vhost_user_scsi_pci_instance_init(Object *obj)
+{
+ VHostUserSCSIPCI *dev = VHOST_USER_SCSI_PCI(obj);
+
+ virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+ TYPE_VHOST_USER_SCSI);
+ object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev),
+ "bootindex", &error_abort);
+}
+
+static const TypeInfo vhost_user_scsi_pci_info = {
+ .name = TYPE_VHOST_USER_SCSI_PCI,
+ .parent = TYPE_VIRTIO_PCI,
+ .instance_size = sizeof(VHostUserSCSIPCI),
+ .instance_init = vhost_user_scsi_pci_instance_init,
+ .class_init = vhost_user_scsi_pci_class_init,
+};
+#endif
+
/* vhost-vsock-pci */
#ifdef CONFIG_VHOST_VSOCK
@@ -2612,6 +2667,9 @@ static void virtio_pci_register_types(void)
#ifdef CONFIG_VHOST_SCSI
type_register_static(&vhost_scsi_pci_info);
#endif
+#ifdef CONFIG_LINUX
+ type_register_static(&vhost_user_scsi_pci_info);
+#endif
#ifdef CONFIG_VHOST_VSOCK
type_register_static(&vhost_vsock_pci_info);
#endif
diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
index b095dfc6d9..69f5959623 100644
--- a/hw/virtio/virtio-pci.h
+++ b/hw/virtio/virtio-pci.h
@@ -26,6 +26,7 @@
#include "hw/virtio/virtio-input.h"
#include "hw/virtio/virtio-gpu.h"
#include "hw/virtio/virtio-crypto.h"
+#include "hw/virtio/vhost-user-scsi.h"
#ifdef CONFIG_VIRTFS
#include "hw/9pfs/virtio-9p.h"
@@ -44,6 +45,7 @@ typedef struct VirtIOBalloonPCI VirtIOBalloonPCI;
typedef struct VirtIOSerialPCI VirtIOSerialPCI;
typedef struct VirtIONetPCI VirtIONetPCI;
typedef struct VHostSCSIPCI VHostSCSIPCI;
+typedef struct VHostUserSCSIPCI VHostUserSCSIPCI;
typedef struct VirtIORngPCI VirtIORngPCI;
typedef struct VirtIOInputPCI VirtIOInputPCI;
typedef struct VirtIOInputHIDPCI VirtIOInputHIDPCI;
@@ -230,6 +232,15 @@ struct VHostSCSIPCI {
};
#endif
+#define TYPE_VHOST_USER_SCSI_PCI "vhost-user-scsi-pci"
+#define VHOST_USER_SCSI_PCI(obj) \
+ OBJECT_CHECK(VHostUserSCSIPCI, (obj), TYPE_VHOST_USER_SCSI_PCI)
+
+struct VHostUserSCSIPCI {
+ VirtIOPCIProxy parent_obj;
+ VHostUserSCSI vdev;
+};
+
/*
* virtio-blk-pci: This extends VirtioPCIProxy.
*/
diff --git a/include/block/accounting.h b/include/block/accounting.h
index 20891639d5..b833d26d6c 100644
--- a/include/block/accounting.h
+++ b/include/block/accounting.h
@@ -26,8 +26,10 @@
#define BLOCK_ACCOUNTING_H
#include "qemu/timed-average.h"
+#include "qemu/thread.h"
typedef struct BlockAcctTimedStats BlockAcctTimedStats;
+typedef struct BlockAcctStats BlockAcctStats;
enum BlockAcctType {
BLOCK_ACCT_READ,
@@ -37,12 +39,14 @@ enum BlockAcctType {
};
struct BlockAcctTimedStats {
+ BlockAcctStats *stats;
TimedAverage latency[BLOCK_MAX_IOTYPE];
unsigned interval_length; /* in seconds */
QSLIST_ENTRY(BlockAcctTimedStats) entries;
};
-typedef struct BlockAcctStats {
+struct BlockAcctStats {
+ QemuMutex lock;
uint64_t nr_bytes[BLOCK_MAX_IOTYPE];
uint64_t nr_ops[BLOCK_MAX_IOTYPE];
uint64_t invalid_ops[BLOCK_MAX_IOTYPE];
@@ -53,7 +57,7 @@ typedef struct BlockAcctStats {
QSLIST_HEAD(, BlockAcctTimedStats) intervals;
bool account_invalid;
bool account_failed;
-} BlockAcctStats;
+};
typedef struct BlockAcctCookie {
int64_t bytes;
@@ -61,7 +65,8 @@ typedef struct BlockAcctCookie {
enum BlockAcctType type;
} BlockAcctCookie;
-void block_acct_init(BlockAcctStats *stats, bool account_invalid,
+void block_acct_init(BlockAcctStats *stats);
+void block_acct_setup(BlockAcctStats *stats, bool account_invalid,
bool account_failed);
void block_acct_cleanup(BlockAcctStats *stats);
void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length);
diff --git a/include/block/block.h b/include/block/block.h
index 9b355e92d8..a4f09df95a 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -402,7 +402,8 @@ void bdrv_drain_all(void);
* block_job_defer_to_main_loop for how to do it). \
*/ \
assert(!bs_->wakeup); \
- bs_->wakeup = true; \
+ /* Set bs->wakeup before evaluating cond. */ \
+ atomic_mb_set(&bs_->wakeup, true); \
while (busy_) { \
if ((cond)) { \
waited_ = busy_ = true; \
@@ -414,7 +415,7 @@ void bdrv_drain_all(void);
waited_ |= busy_; \
} \
} \
- bs_->wakeup = false; \
+ atomic_set(&bs_->wakeup, false); \
} \
waited_; })
diff --git a/include/block/block_int.h b/include/block/block_int.h
index cb78c4fa82..748970055e 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -29,6 +29,7 @@
#include "qemu/option.h"
#include "qemu/queue.h"
#include "qemu/coroutine.h"
+#include "qemu/stats64.h"
#include "qemu/timer.h"
#include "qapi-types.h"
#include "qemu/hbitmap.h"
@@ -595,11 +596,6 @@ struct BlockDriverState {
/* Protected by AioContext lock */
- /* If true, copy read backing sectors into image. Can be >1 if more
- * than one client has requested copy-on-read.
- */
- int copy_on_read;
-
/* If we are reading a disk image, give its size in sectors.
* Generally read-only; it is written to by load_snapshot and
* save_snaphost, but the block layer is quiescent during those.
@@ -609,34 +605,57 @@ struct BlockDriverState {
/* Callback before write request is processed */
NotifierWithReturnList before_write_notifiers;
- /* number of in-flight requests; overall and serialising */
- unsigned int in_flight;
- unsigned int serialising_in_flight;
+ /* threshold limit for writes, in bytes. "High water mark". */
+ uint64_t write_threshold_offset;
+ NotifierWithReturn write_threshold_notifier;
- bool wakeup;
+ /* Writing to the list requires the BQL _and_ the dirty_bitmap_mutex.
+ * Reading from the list can be done with either the BQL or the
+ * dirty_bitmap_mutex. Modifying a bitmap only requires
+ * dirty_bitmap_mutex. */
+ QemuMutex dirty_bitmap_mutex;
+ QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
/* Offset after the highest byte written to */
- uint64_t wr_highest_offset;
+ Stat64 wr_highest_offset;
- /* threshold limit for writes, in bytes. "High water mark". */
- uint64_t write_threshold_offset;
- NotifierWithReturn write_threshold_notifier;
+ /* If true, copy read backing sectors into image. Can be >1 if more
+ * than one client has requested copy-on-read. Accessed with atomic
+ * ops.
+ */
+ int copy_on_read;
- /* counter for nested bdrv_io_plug */
- unsigned io_plugged;
+ /* number of in-flight requests; overall and serialising.
+ * Accessed with atomic ops.
+ */
+ unsigned int in_flight;
+ unsigned int serialising_in_flight;
- QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
- CoQueue flush_queue; /* Serializing flush queue */
- bool active_flush_req; /* Flush request in flight? */
- unsigned int write_gen; /* Current data generation */
- unsigned int flushed_gen; /* Flushed write generation */
+ /* Internal to BDRV_POLL_WHILE and bdrv_wakeup. Accessed with atomic
+ * ops.
+ */
+ bool wakeup;
- QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
+ /* counter for nested bdrv_io_plug.
+ * Accessed with atomic ops.
+ */
+ unsigned io_plugged;
/* do we need to tell the quest if we have a volatile write cache? */
int enable_write_cache;
+ /* Accessed with atomic ops. */
int quiesce_counter;
+ unsigned int write_gen; /* Current data generation */
+
+ /* Protected by reqs_lock. */
+ CoMutex reqs_lock;
+ QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
+ CoQueue flush_queue; /* Serializing flush queue */
+ bool active_flush_req; /* Flush request in flight? */
+
+ /* Only read/written by whoever has set active_flush_req to true. */
+ unsigned int flushed_gen; /* Flushed write generation */
};
struct BlockBackendRootState {
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 9dea14ba03..ad6558af56 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -36,8 +36,6 @@ bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap);
const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap);
int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap);
DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap);
-int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
- int64_t sector);
void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
int64_t cur_sector, int64_t nr_sectors);
void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
@@ -45,6 +43,9 @@ void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap, int64_t sector,
int nb_sectors);
+int bdrv_dirty_bitmap_get_meta_locked(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap, int64_t sector,
+ int nb_sectors);
void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap, int64_t sector,
int nb_sectors);
@@ -52,11 +53,6 @@ BdrvDirtyBitmapIter *bdrv_dirty_meta_iter_new(BdrvDirtyBitmap *bitmap);
BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap,
uint64_t first_sector);
void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter);
-int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter);
-void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t sector_num);
-int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
-int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap);
-void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap,
uint64_t start, uint64_t count);
@@ -72,4 +68,19 @@ void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap,
bool finish);
void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap);
+/* Functions that require manual locking. */
+void bdrv_dirty_bitmap_lock(BdrvDirtyBitmap *bitmap);
+void bdrv_dirty_bitmap_unlock(BdrvDirtyBitmap *bitmap);
+int bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+ int64_t sector);
+void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
+ int64_t cur_sector, int64_t nr_sectors);
+void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
+ int64_t cur_sector, int64_t nr_sectors);
+int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter);
+void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t sector_num);
+int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
+int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap);
+void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
+
#endif
diff --git a/include/block/nbd.h b/include/block/nbd.h
index 416257abca..6d75d5a670 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -123,12 +123,8 @@ enum {
* aren't overflowing some other buffer. */
#define NBD_MAX_NAME_SIZE 256
-ssize_t nbd_wr_syncv(QIOChannel *ioc,
- struct iovec *iov,
- size_t niov,
- size_t length,
- bool do_read,
- Error **errp);
+ssize_t nbd_rwv(QIOChannel *ioc, struct iovec *iov, size_t niov, size_t length,
+ bool do_read, Error **errp);
int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
QCryptoTLSCreds *tlscreds, const char *hostname,
QIOChannel **outioc,
@@ -162,7 +158,7 @@ void nbd_client_new(NBDExport *exp,
QIOChannelSocket *sioc,
QCryptoTLSCreds *tlscreds,
const char *tlsaclname,
- void (*close)(NBDClient *));
+ void (*close_fn)(NBDClient *, bool));
void nbd_client_get(NBDClient *client);
void nbd_client_put(NBDClient *client);
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 80e605a96a..37f8e78e71 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -456,6 +456,26 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
bool share,
const char *path,
Error **errp);
+
+/**
+ * memory_region_init_ram_from_fd: Initialize RAM memory region with a
+ * mmap-ed backend.
+ *
+ * @mr: the #MemoryRegion to be initialized.
+ * @owner: the object that tracks the region's reference count
+ * @name: the name of the region.
+ * @size: size of the region.
+ * @share: %true if memory must be mmaped with the MAP_SHARED flag
+ * @fd: the fd to mmap.
+ * @errp: pointer to Error*, to store an error if it happens.
+ */
+void memory_region_init_ram_from_fd(MemoryRegion *mr,
+ struct Object *owner,
+ const char *name,
+ uint64_t size,
+ bool share,
+ int fd,
+ Error **errp);
#endif
/**
@@ -805,17 +825,6 @@ static inline bool memory_region_is_rom(MemoryRegion *mr)
int memory_region_get_fd(MemoryRegion *mr);
/**
- * memory_region_set_fd: Mark a RAM memory region as backed by a
- * file descriptor.
- *
- * This function is typically used after memory_region_init_ram_ptr().
- *
- * @mr: the memory region being queried.
- * @fd: the file descriptor that backs @mr.
- */
-void memory_region_set_fd(MemoryRegion *mr, int fd);
-
-/**
* memory_region_from_host: Convert a pointer into a RAM memory region
* and an offset within it.
*
diff --git a/include/exec/poison.h b/include/exec/poison.h
index 3ca7929cce..5ffed4d56e 100644
--- a/include/exec/poison.h
+++ b/include/exec/poison.h
@@ -12,17 +12,28 @@
#pragma GCC poison TARGET_CRIS
#pragma GCC poison TARGET_LM32
#pragma GCC poison TARGET_M68K
+#pragma GCC poison TARGET_MICROBLAZE
#pragma GCC poison TARGET_MIPS
+#pragma GCC poison TARGET_ABI_MIPSO32
#pragma GCC poison TARGET_MIPS64
+#pragma GCC poison TARGET_ABI_MIPSN64
+#pragma GCC poison TARGET_MOXIE
+#pragma GCC poison TARGET_NIOS2
#pragma GCC poison TARGET_OPENRISC
#pragma GCC poison TARGET_PPC
#pragma GCC poison TARGET_PPCEMB
#pragma GCC poison TARGET_PPC64
#pragma GCC poison TARGET_ABI32
+#pragma GCC poison TARGET_S390X
#pragma GCC poison TARGET_SH4
#pragma GCC poison TARGET_SPARC
#pragma GCC poison TARGET_SPARC64
+#pragma GCC poison TARGET_TRICORE
+#pragma GCC poison TARGET_UNICORE32
+#pragma GCC poison TARGET_XTENSA
+#pragma GCC poison TARGET_NAME
+#pragma GCC poison TARGET_SUPPORTS_MTTCG
#pragma GCC poison TARGET_WORDS_BIGENDIAN
#pragma GCC poison BSWAP_NEEDED
@@ -50,5 +61,25 @@
#pragma GCC poison CPU_INTERRUPT_TGT_INT_1
#pragma GCC poison CPU_INTERRUPT_TGT_INT_2
+#pragma GCC poison CONFIG_ALPHA_DIS
+#pragma GCC poison CONFIG_ARM_A64_DIS
+#pragma GCC poison CONFIG_ARM_DIS
+#pragma GCC poison CONFIG_CRIS_DIS
+#pragma GCC poison CONFIG_I386_DIS
+#pragma GCC poison CONFIG_LM32_DIS
+#pragma GCC poison CONFIG_M68K_DIS
+#pragma GCC poison CONFIG_MICROBLAZE_DIS
+#pragma GCC poison CONFIG_MIPS_DIS
+#pragma GCC poison CONFIG_MOXIE_DIS
+#pragma GCC poison CONFIG_NIOS2_DIS
+#pragma GCC poison CONFIG_PPC_DIS
+#pragma GCC poison CONFIG_S390_DIS
+#pragma GCC poison CONFIG_SH4_DIS
+#pragma GCC poison CONFIG_SPARC_DIS
+#pragma GCC poison CONFIG_XTENSA_DIS
+
+#pragma GCC poison CONFIG_LINUX_USER
+#pragma GCC poison CONFIG_VHOST_NET
+
#endif
#endif
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 140efa840c..73d1bea8b6 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -65,6 +65,9 @@ unsigned long last_ram_page(void);
RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
bool share, const char *mem_path,
Error **errp);
+RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
+ bool share, int fd,
+ Error **errp);
RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
MemoryRegion *mr, Error **errp);
RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp);
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index d071c9c0e9..233216abdc 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -384,6 +384,11 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
#define PC_COMPAT_2_9 \
HW_COMPAT_2_9 \
+ {\
+ .driver = "mch",\
+ .property = "extended-tseg-mbytes",\
+ .value = stringify(0),\
+ },\
#define PC_COMPAT_2_8 \
HW_COMPAT_2_8 \
diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h
index 53b6760c16..58983c00b3 100644
--- a/include/hw/pci-host/q35.h
+++ b/include/hw/pci-host/q35.h
@@ -60,6 +60,7 @@ typedef struct MCHPCIState {
uint64_t above_4g_mem_size;
uint64_t pci_hole64_size;
uint32_t short_root_bus;
+ uint16_t ext_tseg_mbytes;
} MCHPCIState;
typedef struct Q35PCIHost {
@@ -91,6 +92,11 @@ typedef struct Q35PCIHost {
/* D0:F0 configuration space */
#define MCH_HOST_BRIDGE_REVISION_DEFAULT 0x0
+#define MCH_HOST_BRIDGE_EXT_TSEG_MBYTES 0x50
+#define MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_SIZE 2
+#define MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY 0xffff
+#define MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_MAX 0xfff
+
#define MCH_HOST_BRIDGE_PCIEXBAR 0x60 /* 64bit register */
#define MCH_HOST_BRIDGE_PCIEXBAR_SIZE 8 /* 64bit register */
#define MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT 0xb0000000
diff --git a/include/hw/virtio/vhost-user-scsi.h b/include/hw/virtio/vhost-user-scsi.h
new file mode 100644
index 0000000000..01861f78d0
--- /dev/null
+++ b/include/hw/virtio/vhost-user-scsi.h
@@ -0,0 +1,35 @@
+/*
+ * vhost-user-scsi host device
+ *
+ * Copyright (c) 2016 Nutanix Inc. All rights reserved.
+ *
+ * Author:
+ * Felipe Franciosi <felipe@nutanix.com>
+ *
+ * This file is largely based on "vhost-scsi.h" by:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef VHOST_USER_SCSI_H
+#define VHOST_USER_SCSI_H
+
+#include "qemu-common.h"
+#include "hw/qdev.h"
+#include "hw/virtio/virtio-scsi.h"
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-scsi-common.h"
+
+#define TYPE_VHOST_USER_SCSI "vhost-user-scsi"
+#define VHOST_USER_SCSI(obj) \
+ OBJECT_CHECK(VHostUserSCSI, (obj), TYPE_VHOST_USER_SCSI)
+
+typedef struct VHostUserSCSI {
+ VHostSCSICommon parent_obj;
+ uint64_t host_features;
+} VHostUserSCSI;
+
+#endif /* VHOST_USER_SCSI_H */
diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
index eac2013ddd..de6ae5a9f6 100644
--- a/include/hw/virtio/virtio-scsi.h
+++ b/include/hw/virtio/virtio-scsi.h
@@ -21,6 +21,7 @@
#include "hw/virtio/virtio.h"
#include "hw/pci/pci.h"
#include "hw/scsi/scsi.h"
+#include "chardev/char-fe.h"
#include "sysemu/iothread.h"
#define TYPE_VIRTIO_SCSI_COMMON "virtio-scsi-common"
@@ -53,6 +54,7 @@ struct VirtIOSCSIConf {
char *vhostfd;
char *wwpn;
#endif
+ CharBackend chardev;
uint32_t boot_tpgt;
IOThread *iothread;
};
diff --git a/include/qemu/stats64.h b/include/qemu/stats64.h
new file mode 100644
index 0000000000..4a357b3e9d
--- /dev/null
+++ b/include/qemu/stats64.h
@@ -0,0 +1,193 @@
+/*
+ * Atomic operations on 64-bit quantities.
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_STATS64_H
+#define QEMU_STATS64_H 1
+
+#include "qemu/atomic.h"
+
+/* This provides atomic operations on 64-bit type, using a reader-writer
+ * spinlock on architectures that do not have 64-bit accesses. Even on
+ * those architectures, it tries hard not to take the lock.
+ */
+
+typedef struct Stat64 {
+#ifdef CONFIG_ATOMIC64
+ uint64_t value;
+#else
+ uint32_t low, high;
+ uint32_t lock;
+#endif
+} Stat64;
+
+#ifdef CONFIG_ATOMIC64
+static inline void stat64_init(Stat64 *s, uint64_t value)
+{
+ /* This is not guaranteed to be atomic! */
+ *s = (Stat64) { value };
+}
+
+static inline uint64_t stat64_get(const Stat64 *s)
+{
+ return atomic_read__nocheck(&s->value);
+}
+
+static inline void stat64_add(Stat64 *s, uint64_t value)
+{
+ atomic_add(&s->value, value);
+}
+
+static inline void stat64_min(Stat64 *s, uint64_t value)
+{
+ uint64_t orig = atomic_read__nocheck(&s->value);
+ while (orig > value) {
+ orig = atomic_cmpxchg__nocheck(&s->value, orig, value);
+ }
+}
+
+static inline void stat64_max(Stat64 *s, uint64_t value)
+{
+ uint64_t orig = atomic_read__nocheck(&s->value);
+ while (orig < value) {
+ orig = atomic_cmpxchg__nocheck(&s->value, orig, value);
+ }
+}
+#else
+uint64_t stat64_get(const Stat64 *s);
+bool stat64_min_slow(Stat64 *s, uint64_t value);
+bool stat64_max_slow(Stat64 *s, uint64_t value);
+bool stat64_add32_carry(Stat64 *s, uint32_t low, uint32_t high);
+
+static inline void stat64_init(Stat64 *s, uint64_t value)
+{
+ /* This is not guaranteed to be atomic! */
+ *s = (Stat64) { .low = value, .high = value >> 32, .lock = 0 };
+}
+
+static inline void stat64_add(Stat64 *s, uint64_t value)
+{
+ uint32_t low, high;
+ high = value >> 32;
+ low = (uint32_t) value;
+ if (!low) {
+ if (high) {
+ atomic_add(&s->high, high);
+ }
+ return;
+ }
+
+ for (;;) {
+ uint32_t orig = s->low;
+ uint32_t result = orig + low;
+ uint32_t old;
+
+ if (result < low || high) {
+ /* If the high part is affected, take the lock. */
+ if (stat64_add32_carry(s, low, high)) {
+ return;
+ }
+ continue;
+ }
+
+ /* No carry, try with a 32-bit cmpxchg. The result is independent of
+ * the high 32 bits, so it can race just fine with stat64_add32_carry
+ * and even stat64_get!
+ */
+ old = atomic_cmpxchg(&s->low, orig, result);
+ if (orig == old) {
+ return;
+ }
+ }
+}
+
+static inline void stat64_min(Stat64 *s, uint64_t value)
+{
+ uint32_t low, high;
+ uint32_t orig_low, orig_high;
+
+ high = value >> 32;
+ low = (uint32_t) value;
+ do {
+ orig_high = atomic_read(&s->high);
+ if (orig_high < high) {
+ return;
+ }
+
+ if (orig_high == high) {
+ /* High 32 bits are equal. Read low after high, otherwise we
+ * can get a false positive (e.g. 0x1235,0x0000 changes to
+ * 0x1234,0x8000 and we read it as 0x1234,0x0000). Pairs with
+ * the write barrier in stat64_min_slow.
+ */
+ smp_rmb();
+ orig_low = atomic_read(&s->low);
+ if (orig_low <= low) {
+ return;
+ }
+
+ /* See if we were lucky and a writer raced against us. The
+ * barrier is theoretically unnecessary, but if we remove it
+ * we may miss being lucky.
+ */
+ smp_rmb();
+ orig_high = atomic_read(&s->high);
+ if (orig_high < high) {
+ return;
+ }
+ }
+
+ /* If the value changes in any way, we have to take the lock. */
+ } while (!stat64_min_slow(s, value));
+}
+
+static inline void stat64_max(Stat64 *s, uint64_t value)
+{
+ uint32_t low, high;
+ uint32_t orig_low, orig_high;
+
+ high = value >> 32;
+ low = (uint32_t) value;
+ do {
+ orig_high = atomic_read(&s->high);
+ if (orig_high > high) {
+ return;
+ }
+
+ if (orig_high == high) {
+ /* High 32 bits are equal. Read low after high, otherwise we
+ * can get a false positive (e.g. 0x1234,0x8000 changes to
+ * 0x1235,0x0000 and we read it as 0x1235,0x8000). Pairs with
+ * the write barrier in stat64_max_slow.
+ */
+ smp_rmb();
+ orig_low = atomic_read(&s->low);
+ if (orig_low >= low) {
+ return;
+ }
+
+ /* See if we were lucky and a writer raced against us. The
+ * barrier is theoretically unnecessary, but if we remove it
+ * we may miss being lucky.
+ */
+ smp_rmb();
+ orig_high = atomic_read(&s->high);
+ if (orig_high > high) {
+ return;
+ }
+ }
+
+ /* If the value changes in any way, we have to take the lock. */
+ } while (!stat64_max_slow(s, value));
+}
+
+#endif
+
+#endif
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 840ad6134c..999eb2333a 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -72,15 +72,13 @@ typedef struct BlockDevOps {
* fields that must be public. This is in particular for QLIST_ENTRY() and
* friends so that BlockBackends can be kept in lists outside block-backend.c */
typedef struct BlockBackendPublic {
- /* I/O throttling has its own locking, but also some fields are
- * protected by the AioContext lock.
- */
-
- /* Protected by AioContext lock. */
+ /* throttled_reqs_lock protects the CoQueues for throttled requests. */
+ CoMutex throttled_reqs_lock;
CoQueue throttled_reqs[2];
/* Nonzero if the I/O limits are currently being ignored; generally
- * it is zero. */
+ * it is zero. Accessed with atomic operations.
+ */
unsigned int io_limits_disabled;
/* The following fields are protected by the ThrottleGroup lock.
diff --git a/include/ui/spice-display.h b/include/ui/spice-display.h
index 184d4c373a..4ba9444dba 100644
--- a/include/ui/spice-display.h
+++ b/include/ui/spice-display.h
@@ -140,6 +140,8 @@ struct SimpleSpiceCursor {
QXLCursor cursor;
};
+extern bool spice_opengl;
+
int qemu_spice_rect_is_empty(const QXLRect* r);
void qemu_spice_rect_union(QXLRect *dest, const QXLRect *r);
diff --git a/memory.c b/memory.c
index 0ddc4cc28d..e08fa0ae6c 100644
--- a/memory.c
+++ b/memory.c
@@ -1397,6 +1397,22 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
mr->ram_block = qemu_ram_alloc_from_file(size, mr, share, path, errp);
mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
}
+
+void memory_region_init_ram_from_fd(MemoryRegion *mr,
+ struct Object *owner,
+ const char *name,
+ uint64_t size,
+ bool share,
+ int fd,
+ Error **errp)
+{
+ memory_region_init(mr, owner, name, size);
+ mr->ram = true;
+ mr->terminates = true;
+ mr->destructor = memory_region_destructor_ram;
+ mr->ram_block = qemu_ram_alloc_from_fd(size, mr, share, fd, errp);
+ mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
+}
#endif
void memory_region_init_ram_ptr(MemoryRegion *mr,
@@ -1835,16 +1851,6 @@ int memory_region_get_fd(MemoryRegion *mr)
return fd;
}
-void memory_region_set_fd(MemoryRegion *mr, int fd)
-{
- rcu_read_lock();
- while (mr->alias) {
- mr = mr->alias;
- }
- mr->ram_block->fd = fd;
- rcu_read_unlock();
-}
-
void *memory_region_get_ram_ptr(MemoryRegion *mr)
{
void *ptr;
diff --git a/migration/block.c b/migration/block.c
index 3aae5a375e..7674ae1078 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -341,10 +341,8 @@ static int set_dirty_tracking(void)
int ret;
QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
- aio_context_acquire(blk_get_aio_context(bmds->blk));
bmds->dirty_bitmap = bdrv_create_dirty_bitmap(blk_bs(bmds->blk),
BLOCK_SIZE, NULL, NULL);
- aio_context_release(blk_get_aio_context(bmds->blk));
if (!bmds->dirty_bitmap) {
ret = -errno;
goto fail;
@@ -355,9 +353,7 @@ static int set_dirty_tracking(void)
fail:
QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
if (bmds->dirty_bitmap) {
- aio_context_acquire(blk_get_aio_context(bmds->blk));
bdrv_release_dirty_bitmap(blk_bs(bmds->blk), bmds->dirty_bitmap);
- aio_context_release(blk_get_aio_context(bmds->blk));
}
}
return ret;
@@ -370,9 +366,7 @@ static void unset_dirty_tracking(void)
BlkMigDevState *bmds;
QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
- aio_context_acquire(blk_get_aio_context(bmds->blk));
bdrv_release_dirty_bitmap(blk_bs(bmds->blk), bmds->dirty_bitmap);
- aio_context_release(blk_get_aio_context(bmds->blk));
}
}
@@ -531,13 +525,16 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
} else {
blk_mig_unlock();
}
- if (bdrv_get_dirty(bs, bmds->dirty_bitmap, sector)) {
-
+ bdrv_dirty_bitmap_lock(bmds->dirty_bitmap);
+ if (bdrv_get_dirty_locked(bs, bmds->dirty_bitmap, sector)) {
if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
nr_sectors = total_sectors - sector;
} else {
nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
}
+ bdrv_reset_dirty_bitmap_locked(bmds->dirty_bitmap, sector, nr_sectors);
+ bdrv_dirty_bitmap_unlock(bmds->dirty_bitmap);
+
blk = g_new(BlkMigBlock, 1);
blk->buf = g_malloc(BLOCK_SIZE);
blk->bmds = bmds;
@@ -570,12 +567,12 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
g_free(blk);
}
- bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, sector, nr_sectors);
sector += nr_sectors;
bmds->cur_dirty = sector;
-
break;
}
+
+ bdrv_dirty_bitmap_unlock(bmds->dirty_bitmap);
sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
bmds->cur_dirty = sector;
}
diff --git a/nbd/client.c b/nbd/client.c
index 595d99ed30..b97143fa60 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -86,32 +86,6 @@ static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
*/
-/* Discard length bytes from channel. Return -errno on failure and 0 on
- * success*/
-static int drop_sync(QIOChannel *ioc, size_t size, Error **errp)
-{
- ssize_t ret = 0;
- char small[1024];
- char *buffer;
-
- buffer = sizeof(small) >= size ? small : g_malloc(MIN(65536, size));
- while (size > 0) {
- ssize_t count = MIN(65536, size);
- ret = read_sync(ioc, buffer, MIN(65536, size), errp);
-
- if (ret < 0) {
- goto cleanup;
- }
- size -= count;
- }
-
- cleanup:
- if (buffer != small) {
- g_free(buffer);
- }
- return ret;
-}
-
/* Send an option request.
*
* The request is for option @opt, with @data containing @len bytes of
@@ -135,12 +109,12 @@ static int nbd_send_option_request(QIOChannel *ioc, uint32_t opt,
stl_be_p(&req.option, opt);
stl_be_p(&req.length, len);
- if (write_sync(ioc, &req, sizeof(req), errp) < 0) {
+ if (nbd_write(ioc, &req, sizeof(req), errp) < 0) {
error_prepend(errp, "Failed to send option request header");
return -1;
}
- if (len && write_sync(ioc, (char *) data, len, errp) < 0) {
+ if (len && nbd_write(ioc, (char *) data, len, errp) < 0) {
error_prepend(errp, "Failed to send option request data");
return -1;
}
@@ -169,7 +143,7 @@ static int nbd_receive_option_reply(QIOChannel *ioc, uint32_t opt,
nbd_opt_reply *reply, Error **errp)
{
QEMU_BUILD_BUG_ON(sizeof(*reply) != 20);
- if (read_sync(ioc, reply, sizeof(*reply), errp) < 0) {
+ if (nbd_read(ioc, reply, sizeof(*reply), errp) < 0) {
error_prepend(errp, "failed to read option reply");
nbd_send_opt_abort(ioc);
return -1;
@@ -218,7 +192,7 @@ static int nbd_handle_reply_err(QIOChannel *ioc, nbd_opt_reply *reply,
goto cleanup;
}
msg = g_malloc(reply->length + 1);
- if (read_sync(ioc, msg, reply->length, errp) < 0) {
+ if (nbd_read(ioc, msg, reply->length, errp) < 0) {
error_prepend(errp, "failed to read option error message");
goto cleanup;
}
@@ -320,7 +294,7 @@ static int nbd_receive_list(QIOChannel *ioc, const char *want, bool *match,
nbd_send_opt_abort(ioc);
return -1;
}
- if (read_sync(ioc, &namelen, sizeof(namelen), errp) < 0) {
+ if (nbd_read(ioc, &namelen, sizeof(namelen), errp) < 0) {
error_prepend(errp, "failed to read option name length");
nbd_send_opt_abort(ioc);
return -1;
@@ -333,7 +307,7 @@ static int nbd_receive_list(QIOChannel *ioc, const char *want, bool *match,
return -1;
}
if (namelen != strlen(want)) {
- if (drop_sync(ioc, len, errp) < 0) {
+ if (nbd_drop(ioc, len, errp) < 0) {
error_prepend(errp, "failed to skip export name with wrong length");
nbd_send_opt_abort(ioc);
return -1;
@@ -342,14 +316,14 @@ static int nbd_receive_list(QIOChannel *ioc, const char *want, bool *match,
}
assert(namelen < sizeof(name));
- if (read_sync(ioc, name, namelen, errp) < 0) {
+ if (nbd_read(ioc, name, namelen, errp) < 0) {
error_prepend(errp, "failed to read export name");
nbd_send_opt_abort(ioc);
return -1;
}
name[namelen] = '\0';
len -= namelen;
- if (drop_sync(ioc, len, errp) < 0) {
+ if (nbd_drop(ioc, len, errp) < 0) {
error_prepend(errp, "failed to read export description");
nbd_send_opt_abort(ioc);
return -1;
@@ -476,7 +450,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
goto fail;
}
- if (read_sync(ioc, buf, 8, errp) < 0) {
+ if (nbd_read(ioc, buf, 8, errp) < 0) {
error_prepend(errp, "Failed to read data");
goto fail;
}
@@ -502,7 +476,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
goto fail;
}
- if (read_sync(ioc, &magic, sizeof(magic), errp) < 0) {
+ if (nbd_read(ioc, &magic, sizeof(magic), errp) < 0) {
error_prepend(errp, "Failed to read magic");
goto fail;
}
@@ -514,7 +488,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
uint16_t globalflags;
bool fixedNewStyle = false;
- if (read_sync(ioc, &globalflags, sizeof(globalflags), errp) < 0) {
+ if (nbd_read(ioc, &globalflags, sizeof(globalflags), errp) < 0) {
error_prepend(errp, "Failed to read server flags");
goto fail;
}
@@ -532,7 +506,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
}
/* client requested flags */
clientflags = cpu_to_be32(clientflags);
- if (write_sync(ioc, &clientflags, sizeof(clientflags), errp) < 0) {
+ if (nbd_write(ioc, &clientflags, sizeof(clientflags), errp) < 0) {
error_prepend(errp, "Failed to send clientflags field");
goto fail;
}
@@ -570,13 +544,13 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
}
/* Read the response */
- if (read_sync(ioc, &s, sizeof(s), errp) < 0) {
+ if (nbd_read(ioc, &s, sizeof(s), errp) < 0) {
error_prepend(errp, "Failed to read export length");
goto fail;
}
*size = be64_to_cpu(s);
- if (read_sync(ioc, flags, sizeof(*flags), errp) < 0) {
+ if (nbd_read(ioc, flags, sizeof(*flags), errp) < 0) {
error_prepend(errp, "Failed to read export flags");
goto fail;
}
@@ -593,14 +567,14 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
goto fail;
}
- if (read_sync(ioc, &s, sizeof(s), errp) < 0) {
+ if (nbd_read(ioc, &s, sizeof(s), errp) < 0) {
error_prepend(errp, "Failed to read export length");
goto fail;
}
*size = be64_to_cpu(s);
TRACE("Size is %" PRIu64, *size);
- if (read_sync(ioc, &oldflags, sizeof(oldflags), errp) < 0) {
+ if (nbd_read(ioc, &oldflags, sizeof(oldflags), errp) < 0) {
error_prepend(errp, "Failed to read export flags");
goto fail;
}
@@ -616,7 +590,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
}
TRACE("Size is %" PRIu64 ", export flags %" PRIx16, *size, *flags);
- if (zeroes && drop_sync(ioc, 124, errp) < 0) {
+ if (zeroes && nbd_drop(ioc, 124, errp) < 0) {
error_prepend(errp, "Failed to read reserved block");
goto fail;
}
@@ -759,7 +733,7 @@ ssize_t nbd_send_request(QIOChannel *ioc, NBDRequest *request)
stq_be_p(buf + 16, request->from);
stl_be_p(buf + 24, request->len);
- return write_sync(ioc, buf, sizeof(buf), NULL);
+ return nbd_write(ioc, buf, sizeof(buf), NULL);
}
ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, Error **errp)
@@ -768,7 +742,7 @@ ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, Error **errp)
uint32_t magic;
ssize_t ret;
- ret = read_sync_eof(ioc, buf, sizeof(buf), errp);
+ ret = nbd_read_eof(ioc, buf, sizeof(buf), errp);
if (ret <= 0) {
return ret;
}
diff --git a/nbd/common.c b/nbd/common.c
index bd81637ab9..6b5c1b7b02 100644
--- a/nbd/common.c
+++ b/nbd/common.c
@@ -24,12 +24,8 @@
* The function may be called from coroutine or from non-coroutine context.
* When called from non-coroutine context @ioc must be in blocking mode.
*/
-ssize_t nbd_wr_syncv(QIOChannel *ioc,
- struct iovec *iov,
- size_t niov,
- size_t length,
- bool do_read,
- Error **errp)
+ssize_t nbd_rwv(QIOChannel *ioc, struct iovec *iov, size_t niov, size_t length,
+ bool do_read, Error **errp)
{
ssize_t done = 0;
struct iovec *local_iov = g_new(struct iovec, niov);
@@ -69,6 +65,32 @@ ssize_t nbd_wr_syncv(QIOChannel *ioc,
return done;
}
+/* Discard length bytes from channel. Return -errno on failure and 0 on
+ * success */
+int nbd_drop(QIOChannel *ioc, size_t size, Error **errp)
+{
+ ssize_t ret = 0;
+ char small[1024];
+ char *buffer;
+
+ buffer = sizeof(small) >= size ? small : g_malloc(MIN(65536, size));
+ while (size > 0) {
+ ssize_t count = MIN(65536, size);
+ ret = nbd_read(ioc, buffer, MIN(65536, size), errp);
+
+ if (ret < 0) {
+ goto cleanup;
+ }
+ size -= count;
+ }
+
+ cleanup:
+ if (buffer != small) {
+ g_free(buffer);
+ }
+ return ret;
+}
+
void nbd_tls_handshake(QIOTask *task,
void *opaque)
diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index d6071640a0..39bfed177c 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -94,14 +94,14 @@
#define NBD_ENOSPC 28
#define NBD_ESHUTDOWN 108
-/* read_sync_eof
+/* nbd_read_eof
* Tries to read @size bytes from @ioc. Returns number of bytes actually read.
* May return a value >= 0 and < size only on EOF, i.e. when iteratively called
- * qio_channel_readv() returns 0. So, there are no needs to call read_sync_eof
+ * qio_channel_readv() returns 0. So, there is no need to call nbd_read_eof
* iteratively.
*/
-static inline ssize_t read_sync_eof(QIOChannel *ioc, void *buffer, size_t size,
- Error **errp)
+static inline ssize_t nbd_read_eof(QIOChannel *ioc, void *buffer, size_t size,
+ Error **errp)
{
struct iovec iov = { .iov_base = buffer, .iov_len = size };
/* Sockets are kept in blocking mode in the negotiation phase. After
@@ -109,16 +109,16 @@ static inline ssize_t read_sync_eof(QIOChannel *ioc, void *buffer, size_t size,
* our request/reply. Synchronization is done with recv_coroutine, so
* that this is coroutine-safe.
*/
- return nbd_wr_syncv(ioc, &iov, 1, size, true, errp);
+ return nbd_rwv(ioc, &iov, 1, size, true, errp);
}
-/* read_sync
+/* nbd_read
* Reads @size bytes from @ioc. Returns 0 on success.
*/
-static inline int read_sync(QIOChannel *ioc, void *buffer, size_t size,
- Error **errp)
+static inline int nbd_read(QIOChannel *ioc, void *buffer, size_t size,
+ Error **errp)
{
- ssize_t ret = read_sync_eof(ioc, buffer, size, errp);
+ ssize_t ret = nbd_read_eof(ioc, buffer, size, errp);
if (ret >= 0 && ret != size) {
ret = -EINVAL;
@@ -128,15 +128,15 @@ static inline int read_sync(QIOChannel *ioc, void *buffer, size_t size,
return ret < 0 ? ret : 0;
}
-/* write_sync
+/* nbd_write
* Writes @size bytes to @ioc. Returns 0 on success.
*/
-static inline int write_sync(QIOChannel *ioc, const void *buffer, size_t size,
- Error **errp)
+static inline int nbd_write(QIOChannel *ioc, const void *buffer, size_t size,
+ Error **errp)
{
struct iovec iov = { .iov_base = (void *) buffer, .iov_len = size };
- ssize_t ret = nbd_wr_syncv(ioc, &iov, 1, size, false, errp);
+ ssize_t ret = nbd_rwv(ioc, &iov, 1, size, false, errp);
assert(ret < 0 || ret == size);
@@ -153,4 +153,6 @@ struct NBDTLSHandshakeData {
void nbd_tls_handshake(QIOTask *task,
void *opaque);
+int nbd_drop(QIOChannel *ioc, size_t size, Error **errp);
+
#endif
diff --git a/nbd/server.c b/nbd/server.c
index 49b55f6ede..8a70c054a6 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -81,7 +81,7 @@ static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
struct NBDClient {
int refcount;
- void (*close)(NBDClient *client);
+ void (*close_fn)(NBDClient *client, bool negotiated);
bool no_zeroes;
NBDExport *exp;
@@ -104,69 +104,6 @@ struct NBDClient {
static void nbd_client_receive_next_request(NBDClient *client);
-static gboolean nbd_negotiate_continue(QIOChannel *ioc,
- GIOCondition condition,
- void *opaque)
-{
- qemu_coroutine_enter(opaque);
- return TRUE;
-}
-
-static int nbd_negotiate_read(QIOChannel *ioc, void *buffer, size_t size)
-{
- ssize_t ret;
- guint watch;
-
- assert(qemu_in_coroutine());
- /* Negotiation are always in main loop. */
- watch = qio_channel_add_watch(ioc,
- G_IO_IN,
- nbd_negotiate_continue,
- qemu_coroutine_self(),
- NULL);
- ret = read_sync(ioc, buffer, size, NULL);
- g_source_remove(watch);
- return ret;
-
-}
-
-static int nbd_negotiate_write(QIOChannel *ioc, const void *buffer, size_t size)
-{
- ssize_t ret;
- guint watch;
-
- assert(qemu_in_coroutine());
- /* Negotiation are always in main loop. */
- watch = qio_channel_add_watch(ioc,
- G_IO_OUT,
- nbd_negotiate_continue,
- qemu_coroutine_self(),
- NULL);
- ret = write_sync(ioc, buffer, size, NULL);
- g_source_remove(watch);
- return ret;
-}
-
-static int nbd_negotiate_drop_sync(QIOChannel *ioc, size_t size)
-{
- ssize_t ret;
- uint8_t *buffer = g_malloc(MIN(65536, size));
-
- while (size > 0) {
- size_t count = MIN(65536, size);
- ret = nbd_negotiate_read(ioc, buffer, count);
- if (ret < 0) {
- g_free(buffer);
- return ret;
- }
-
- size -= count;
- }
-
- g_free(buffer);
- return 0;
-}
-
/* Basic flow for negotiation
Server Client
@@ -205,22 +142,22 @@ static int nbd_negotiate_send_rep_len(QIOChannel *ioc, uint32_t type,
type, opt, len);
magic = cpu_to_be64(NBD_REP_MAGIC);
- if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) < 0) {
+ if (nbd_write(ioc, &magic, sizeof(magic), NULL) < 0) {
LOG("write failed (rep magic)");
return -EINVAL;
}
opt = cpu_to_be32(opt);
- if (nbd_negotiate_write(ioc, &opt, sizeof(opt)) < 0) {
+ if (nbd_write(ioc, &opt, sizeof(opt), NULL) < 0) {
LOG("write failed (rep opt)");
return -EINVAL;
}
type = cpu_to_be32(type);
- if (nbd_negotiate_write(ioc, &type, sizeof(type)) < 0) {
+ if (nbd_write(ioc, &type, sizeof(type), NULL) < 0) {
LOG("write failed (rep type)");
return -EINVAL;
}
len = cpu_to_be32(len);
- if (nbd_negotiate_write(ioc, &len, sizeof(len)) < 0) {
+ if (nbd_write(ioc, &len, sizeof(len), NULL) < 0) {
LOG("write failed (rep data length)");
return -EINVAL;
}
@@ -255,7 +192,7 @@ nbd_negotiate_send_rep_err(QIOChannel *ioc, uint32_t type,
if (ret < 0) {
goto out;
}
- if (nbd_negotiate_write(ioc, msg, len) < 0) {
+ if (nbd_write(ioc, msg, len, NULL) < 0) {
LOG("write failed (error message)");
ret = -EIO;
} else {
@@ -274,27 +211,27 @@ static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp)
uint32_t len;
const char *name = exp->name ? exp->name : "";
const char *desc = exp->description ? exp->description : "";
- int rc;
+ int ret;
TRACE("Advertising export name '%s' description '%s'", name, desc);
name_len = strlen(name);
desc_len = strlen(desc);
len = name_len + desc_len + sizeof(len);
- rc = nbd_negotiate_send_rep_len(ioc, NBD_REP_SERVER, NBD_OPT_LIST, len);
- if (rc < 0) {
- return rc;
+ ret = nbd_negotiate_send_rep_len(ioc, NBD_REP_SERVER, NBD_OPT_LIST, len);
+ if (ret < 0) {
+ return ret;
}
len = cpu_to_be32(name_len);
- if (nbd_negotiate_write(ioc, &len, sizeof(len)) < 0) {
+ if (nbd_write(ioc, &len, sizeof(len), NULL) < 0) {
LOG("write failed (name length)");
return -EINVAL;
}
- if (nbd_negotiate_write(ioc, name, name_len) < 0) {
+ if (nbd_write(ioc, name, name_len, NULL) < 0) {
LOG("write failed (name buffer)");
return -EINVAL;
}
- if (nbd_negotiate_write(ioc, desc, desc_len) < 0) {
+ if (nbd_write(ioc, desc, desc_len, NULL) < 0) {
LOG("write failed (description buffer)");
return -EINVAL;
}
@@ -308,7 +245,7 @@ static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length)
NBDExport *exp;
if (length) {
- if (nbd_negotiate_drop_sync(client->ioc, length) < 0) {
+ if (nbd_drop(client->ioc, length, NULL) < 0) {
return -EIO;
}
return nbd_negotiate_send_rep_err(client->ioc,
@@ -328,7 +265,6 @@ static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length)
static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length)
{
- int rc = -EINVAL;
char name[NBD_MAX_NAME_SIZE + 1];
/* Client sends:
@@ -337,11 +273,11 @@ static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length)
TRACE("Checking length");
if (length >= sizeof(name)) {
LOG("Bad length received");
- goto fail;
+ return -EINVAL;
}
- if (nbd_negotiate_read(client->ioc, name, length) < 0) {
+ if (nbd_read(client->ioc, name, length, NULL) < 0) {
LOG("read failed");
- goto fail;
+ return -EINVAL;
}
name[length] = '\0';
@@ -350,14 +286,13 @@ static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length)
client->exp = nbd_export_find(name);
if (!client->exp) {
LOG("export not found");
- goto fail;
+ return -EINVAL;
}
QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
nbd_export_get(client->exp);
- rc = 0;
-fail:
- return rc;
+
+ return 0;
}
/* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the
@@ -372,7 +307,7 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
TRACE("Setting up TLS");
ioc = client->ioc;
if (length) {
- if (nbd_negotiate_drop_sync(ioc, length) < 0) {
+ if (nbd_drop(ioc, length, NULL) < 0) {
return NULL;
}
nbd_negotiate_send_rep_err(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS,
@@ -436,7 +371,7 @@ static int nbd_negotiate_options(NBDClient *client)
... Rest of request
*/
- if (nbd_negotiate_read(client->ioc, &flags, sizeof(flags)) < 0) {
+ if (nbd_read(client->ioc, &flags, sizeof(flags), NULL) < 0) {
LOG("read failed");
return -EIO;
}
@@ -462,7 +397,7 @@ static int nbd_negotiate_options(NBDClient *client)
uint32_t clientflags, length;
uint64_t magic;
- if (nbd_negotiate_read(client->ioc, &magic, sizeof(magic)) < 0) {
+ if (nbd_read(client->ioc, &magic, sizeof(magic), NULL) < 0) {
LOG("read failed");
return -EINVAL;
}
@@ -472,15 +407,15 @@ static int nbd_negotiate_options(NBDClient *client)
return -EINVAL;
}
- if (nbd_negotiate_read(client->ioc, &clientflags,
- sizeof(clientflags)) < 0)
+ if (nbd_read(client->ioc, &clientflags,
+ sizeof(clientflags), NULL) < 0)
{
LOG("read failed");
return -EINVAL;
}
clientflags = be32_to_cpu(clientflags);
- if (nbd_negotiate_read(client->ioc, &length, sizeof(length)) < 0) {
+ if (nbd_read(client->ioc, &length, sizeof(length), NULL) < 0) {
LOG("read failed");
return -EINVAL;
}
@@ -510,7 +445,7 @@ static int nbd_negotiate_options(NBDClient *client)
return -EINVAL;
default:
- if (nbd_negotiate_drop_sync(client->ioc, length) < 0) {
+ if (nbd_drop(client->ioc, length, NULL) < 0) {
return -EIO;
}
ret = nbd_negotiate_send_rep_err(client->ioc,
@@ -548,7 +483,7 @@ static int nbd_negotiate_options(NBDClient *client)
return nbd_negotiate_handle_export_name(client, length);
case NBD_OPT_STARTTLS:
- if (nbd_negotiate_drop_sync(client->ioc, length) < 0) {
+ if (nbd_drop(client->ioc, length, NULL) < 0) {
return -EIO;
}
if (client->tlscreds) {
@@ -567,7 +502,7 @@ static int nbd_negotiate_options(NBDClient *client)
}
break;
default:
- if (nbd_negotiate_drop_sync(client->ioc, length) < 0) {
+ if (nbd_drop(client->ioc, length, NULL) < 0) {
return -EIO;
}
ret = nbd_negotiate_send_rep_err(client->ioc,
@@ -598,16 +533,10 @@ static int nbd_negotiate_options(NBDClient *client)
}
}
-typedef struct {
- NBDClient *client;
- Coroutine *co;
-} NBDClientNewData;
-
-static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
+static coroutine_fn int nbd_negotiate(NBDClient *client)
{
- NBDClient *client = data->client;
char buf[8 + 8 + 8 + 128];
- int rc;
+ int ret;
const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA |
NBD_FLAG_SEND_WRITE_ZEROES);
@@ -633,7 +562,6 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
*/
qio_channel_set_blocking(client->ioc, false, NULL);
- rc = -EINVAL;
TRACE("Beginning negotiation.");
memset(buf, 0, sizeof(buf));
@@ -654,21 +582,21 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
if (oldStyle) {
if (client->tlscreds) {
TRACE("TLS cannot be enabled with oldstyle protocol");
- goto fail;
+ return -EINVAL;
}
- if (nbd_negotiate_write(client->ioc, buf, sizeof(buf)) < 0) {
+ if (nbd_write(client->ioc, buf, sizeof(buf), NULL) < 0) {
LOG("write failed");
- goto fail;
+ return -EINVAL;
}
} else {
- if (nbd_negotiate_write(client->ioc, buf, 18) < 0) {
+ if (nbd_write(client->ioc, buf, 18, NULL) < 0) {
LOG("write failed");
- goto fail;
+ return -EINVAL;
}
- rc = nbd_negotiate_options(client);
- if (rc != 0) {
+ ret = nbd_negotiate_options(client);
+ if (ret != 0) {
LOG("option negotiation failed");
- goto fail;
+ return ret;
}
TRACE("advertising size %" PRIu64 " and flags %x",
@@ -676,25 +604,25 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
stq_be_p(buf + 18, client->exp->size);
stw_be_p(buf + 26, client->exp->nbdflags | myflags);
len = client->no_zeroes ? 10 : sizeof(buf) - 18;
- if (nbd_negotiate_write(client->ioc, buf + 18, len) < 0) {
+ ret = nbd_write(client->ioc, buf + 18, len, NULL);
+ if (ret < 0) {
LOG("write failed");
- goto fail;
+ return ret;
}
}
TRACE("Negotiation succeeded.");
- rc = 0;
-fail:
- return rc;
+
+ return 0;
}
-static ssize_t nbd_receive_request(QIOChannel *ioc, NBDRequest *request)
+static int nbd_receive_request(QIOChannel *ioc, NBDRequest *request)
{
uint8_t buf[NBD_REQUEST_SIZE];
uint32_t magic;
- ssize_t ret;
+ int ret;
- ret = read_sync(ioc, buf, sizeof(buf), NULL);
+ ret = nbd_read(ioc, buf, sizeof(buf), NULL);
if (ret < 0) {
return ret;
}
@@ -726,7 +654,7 @@ static ssize_t nbd_receive_request(QIOChannel *ioc, NBDRequest *request)
return 0;
}
-static ssize_t nbd_send_reply(QIOChannel *ioc, NBDReply *reply)
+static int nbd_send_reply(QIOChannel *ioc, NBDReply *reply)
{
uint8_t buf[NBD_REPLY_SIZE];
@@ -745,7 +673,7 @@ static ssize_t nbd_send_reply(QIOChannel *ioc, NBDReply *reply)
stl_be_p(buf + 4, reply->error);
stq_be_p(buf + 8, reply->handle);
- return write_sync(ioc, buf, sizeof(buf), NULL);
+ return nbd_write(ioc, buf, sizeof(buf), NULL);
}
#define MAX_NBD_REQUESTS 16
@@ -778,7 +706,7 @@ void nbd_client_put(NBDClient *client)
}
}
-static void client_close(NBDClient *client)
+static void client_close(NBDClient *client, bool negotiated)
{
if (client->closing) {
return;
@@ -793,8 +721,8 @@ static void client_close(NBDClient *client)
NULL);
/* Also tell the client, so that they release their reference. */
- if (client->close) {
- client->close(client);
+ if (client->close_fn) {
+ client->close_fn(client, negotiated);
}
}
@@ -975,7 +903,7 @@ void nbd_export_close(NBDExport *exp)
nbd_export_get(exp);
QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) {
- client_close(client);
+ client_close(client, true);
}
nbd_export_set_name(exp, NULL);
nbd_export_set_description(exp, NULL);
@@ -1032,25 +960,24 @@ void nbd_export_close_all(void)
}
}
-static ssize_t nbd_co_send_reply(NBDRequestData *req, NBDReply *reply,
- int len)
+static int nbd_co_send_reply(NBDRequestData *req, NBDReply *reply, int len)
{
NBDClient *client = req->client;
- ssize_t rc, ret;
+ int ret;
g_assert(qemu_in_coroutine());
qemu_co_mutex_lock(&client->send_lock);
client->send_coroutine = qemu_coroutine_self();
if (!len) {
- rc = nbd_send_reply(client->ioc, reply);
+ ret = nbd_send_reply(client->ioc, reply);
} else {
qio_channel_set_cork(client->ioc, true);
- rc = nbd_send_reply(client->ioc, reply);
- if (rc >= 0) {
- ret = write_sync(client->ioc, req->data, len, NULL);
+ ret = nbd_send_reply(client->ioc, reply);
+ if (ret == 0) {
+ ret = nbd_write(client->ioc, req->data, len, NULL);
if (ret < 0) {
- rc = -EIO;
+ ret = -EIO;
}
}
qio_channel_set_cork(client->ioc, false);
@@ -1058,28 +985,23 @@ static ssize_t nbd_co_send_reply(NBDRequestData *req, NBDReply *reply,
client->send_coroutine = NULL;
qemu_co_mutex_unlock(&client->send_lock);
- return rc;
+ return ret;
}
-/* Collect a client request. Return 0 if request looks valid, -EAGAIN
- * to keep trying the collection, -EIO to drop connection right away,
- * and any other negative value to report an error to the client
- * (although the caller may still need to disconnect after reporting
- * the error). */
-static ssize_t nbd_co_receive_request(NBDRequestData *req,
- NBDRequest *request)
+/* nbd_co_receive_request
+ * Collect a client request. Return 0 if request looks valid, -EIO to drop
+ * connection right away, and any other negative value to report an error to
+ * the client (although the caller may still need to disconnect after reporting
+ * the error).
+ */
+static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request)
{
NBDClient *client = req->client;
- ssize_t rc;
g_assert(qemu_in_coroutine());
assert(client->recv_coroutine == qemu_coroutine_self());
- rc = nbd_receive_request(client->ioc, request);
- if (rc < 0) {
- if (rc != -EAGAIN) {
- rc = -EIO;
- }
- goto out;
+ if (nbd_receive_request(client->ioc, request) < 0) {
+ return -EIO;
}
TRACE("Decoding type");
@@ -1093,8 +1015,7 @@ static ssize_t nbd_co_receive_request(NBDRequestData *req,
/* Special case: we're going to disconnect without a reply,
* whether or not flags, from, or len are bogus */
TRACE("Request type is DISCONNECT");
- rc = -EIO;
- goto out;
+ return -EIO;
}
/* Check for sanity in the parameters, part 1. Defer as many
@@ -1102,31 +1023,27 @@ static ssize_t nbd_co_receive_request(NBDRequestData *req,
* payload, so we can try and keep the connection alive. */
if ((request->from + request->len) < request->from) {
LOG("integer overflow detected, you're probably being attacked");
- rc = -EINVAL;
- goto out;
+ return -EINVAL;
}
if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE) {
if (request->len > NBD_MAX_BUFFER_SIZE) {
LOG("len (%" PRIu32" ) is larger than max len (%u)",
request->len, NBD_MAX_BUFFER_SIZE);
- rc = -EINVAL;
- goto out;
+ return -EINVAL;
}
req->data = blk_try_blockalign(client->exp->blk, request->len);
if (req->data == NULL) {
- rc = -ENOMEM;
- goto out;
+ return -ENOMEM;
}
}
if (request->type == NBD_CMD_WRITE) {
TRACE("Reading %" PRIu32 " byte(s)", request->len);
- if (read_sync(client->ioc, req->data, request->len, NULL) < 0) {
+ if (nbd_read(client->ioc, req->data, request->len, NULL) < 0) {
LOG("reading from socket failed");
- rc = -EIO;
- goto out;
+ return -EIO;
}
req->complete = true;
}
@@ -1136,28 +1053,19 @@ static ssize_t nbd_co_receive_request(NBDRequestData *req,
LOG("operation past EOF; From: %" PRIu64 ", Len: %" PRIu32
", Size: %" PRIu64, request->from, request->len,
(uint64_t)client->exp->size);
- rc = request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL;
- goto out;
+ return request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL;
}
if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) {
LOG("unsupported flags (got 0x%x)", request->flags);
- rc = -EINVAL;
- goto out;
+ return -EINVAL;
}
if (request->type != NBD_CMD_WRITE_ZEROES &&
(request->flags & NBD_CMD_FLAG_NO_HOLE)) {
LOG("unexpected flags (got 0x%x)", request->flags);
- rc = -EINVAL;
- goto out;
+ return -EINVAL;
}
- rc = 0;
-
-out:
- client->recv_coroutine = NULL;
- nbd_client_receive_next_request(client);
-
- return rc;
+ return 0;
}
/* Owns a reference to the NBDClient passed as opaque. */
@@ -1168,8 +1076,9 @@ static coroutine_fn void nbd_trip(void *opaque)
NBDRequestData *req;
NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */
NBDReply reply;
- ssize_t ret;
+ int ret;
int flags;
+ int reply_data_len = 0;
TRACE("Reading request.");
if (client->closing) {
@@ -1179,11 +1088,10 @@ static coroutine_fn void nbd_trip(void *opaque)
req = nbd_request_get(client);
ret = nbd_co_receive_request(req, &request);
- if (ret == -EAGAIN) {
- goto done;
- }
+ client->recv_coroutine = NULL;
+ nbd_client_receive_next_request(client);
if (ret == -EIO) {
- goto out;
+ goto disconnect;
}
reply.handle = request.handle;
@@ -1191,7 +1099,7 @@ static coroutine_fn void nbd_trip(void *opaque)
if (ret < 0) {
reply.error = -ret;
- goto error_reply;
+ goto reply;
}
if (client->closing) {
@@ -1212,7 +1120,7 @@ static coroutine_fn void nbd_trip(void *opaque)
if (ret < 0) {
LOG("flush failed");
reply.error = -ret;
- goto error_reply;
+ break;
}
}
@@ -1221,12 +1129,12 @@ static coroutine_fn void nbd_trip(void *opaque)
if (ret < 0) {
LOG("reading from file failed");
reply.error = -ret;
- goto error_reply;
+ break;
}
+ reply_data_len = request.len;
TRACE("Read %" PRIu32" byte(s)", request.len);
- if (nbd_co_send_reply(req, &reply, request.len) < 0)
- goto out;
+
break;
case NBD_CMD_WRITE:
TRACE("Request type is WRITE");
@@ -1234,7 +1142,7 @@ static coroutine_fn void nbd_trip(void *opaque)
if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
TRACE("Server is read-only, return error");
reply.error = EROFS;
- goto error_reply;
+ break;
}
TRACE("Writing to device");
@@ -1248,21 +1156,16 @@ static coroutine_fn void nbd_trip(void *opaque)
if (ret < 0) {
LOG("writing to file failed");
reply.error = -ret;
- goto error_reply;
}
- if (nbd_co_send_reply(req, &reply, 0) < 0) {
- goto out;
- }
break;
-
case NBD_CMD_WRITE_ZEROES:
TRACE("Request type is WRITE_ZEROES");
if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
TRACE("Server is read-only, return error");
reply.error = EROFS;
- goto error_reply;
+ break;
}
TRACE("Writing to device");
@@ -1279,14 +1182,9 @@ static coroutine_fn void nbd_trip(void *opaque)
if (ret < 0) {
LOG("writing to file failed");
reply.error = -ret;
- goto error_reply;
}
- if (nbd_co_send_reply(req, &reply, 0) < 0) {
- goto out;
- }
break;
-
case NBD_CMD_DISC:
/* unreachable, thanks to special case in nbd_co_receive_request() */
abort();
@@ -1299,9 +1197,7 @@ static coroutine_fn void nbd_trip(void *opaque)
LOG("flush failed");
reply.error = -ret;
}
- if (nbd_co_send_reply(req, &reply, 0) < 0) {
- goto out;
- }
+
break;
case NBD_CMD_TRIM:
TRACE("Request type is TRIM");
@@ -1311,21 +1207,19 @@ static coroutine_fn void nbd_trip(void *opaque)
LOG("discard failed");
reply.error = -ret;
}
- if (nbd_co_send_reply(req, &reply, 0) < 0) {
- goto out;
- }
+
break;
default:
LOG("invalid request type (%" PRIu32 ") received", request.type);
reply.error = EINVAL;
- error_reply:
- /* We must disconnect after NBD_CMD_WRITE if we did not
- * read the payload.
- */
- if (nbd_co_send_reply(req, &reply, 0) < 0 || !req->complete) {
- goto out;
- }
- break;
+ }
+
+reply:
+ /* We must disconnect after NBD_CMD_WRITE if we did not
+ * read the payload.
+ */
+ if (nbd_co_send_reply(req, &reply, reply_data_len) < 0 || !req->complete) {
+ goto disconnect;
}
TRACE("Request/Reply complete");
@@ -1335,9 +1229,9 @@ done:
nbd_client_put(client);
return;
-out:
+disconnect:
nbd_request_put(req);
- client_close(client);
+ client_close(client, true);
nbd_client_put(client);
}
@@ -1352,8 +1246,7 @@ static void nbd_client_receive_next_request(NBDClient *client)
static coroutine_fn void nbd_co_client_start(void *opaque)
{
- NBDClientNewData *data = opaque;
- NBDClient *client = data->client;
+ NBDClient *client = opaque;
NBDExport *exp = client->exp;
if (exp) {
@@ -1362,25 +1255,28 @@ static coroutine_fn void nbd_co_client_start(void *opaque)
}
qemu_co_mutex_init(&client->send_lock);
- if (nbd_negotiate(data)) {
- client_close(client);
- goto out;
+ if (nbd_negotiate(client)) {
+ client_close(client, false);
+ return;
}
nbd_client_receive_next_request(client);
-
-out:
- g_free(data);
}
+/*
+ * Create a new client listener on the given export @exp, using the
+ * given channel @sioc. Begin servicing it in a coroutine. When the
+ * connection closes, call @close_fn with an indication of whether the
+ * client completed negotiation.
+ */
void nbd_client_new(NBDExport *exp,
QIOChannelSocket *sioc,
QCryptoTLSCreds *tlscreds,
const char *tlsaclname,
- void (*close_fn)(NBDClient *))
+ void (*close_fn)(NBDClient *, bool))
{
NBDClient *client;
- NBDClientNewData *data = g_new(NBDClientNewData, 1);
+ Coroutine *co;
client = g_malloc0(sizeof(NBDClient));
client->refcount = 1;
@@ -1394,9 +1290,8 @@ void nbd_client_new(NBDExport *exp,
object_ref(OBJECT(client->sioc));
client->ioc = QIO_CHANNEL(sioc);
object_ref(OBJECT(client->ioc));
- client->close = close_fn;
+ client->close_fn = close_fn;
- data->client = client;
- data->co = qemu_coroutine_create(nbd_co_client_start, data);
- qemu_coroutine_enter(data->co);
+ co = qemu_coroutine_create(nbd_co_client_start, client);
+ qemu_coroutine_enter(co);
}
diff --git a/qemu-doc.texi b/qemu-doc.texi
index 965ba5929e..21079fd675 100644
--- a/qemu-doc.texi
+++ b/qemu-doc.texi
@@ -1,11 +1,12 @@
\input texinfo @c -*- texinfo -*-
@c %**start of header
@setfilename qemu-doc.info
+@include version.texi
@documentlanguage en
@documentencoding UTF-8
-@settitle QEMU Emulator User Documentation
+@settitle QEMU version @value{VERSION} User Documentation
@exampleindent 0
@paragraphindent 0
@c %**end of header
@@ -19,7 +20,7 @@
@iftex
@titlepage
@sp 7
-@center @titlefont{QEMU Emulator}
+@center @titlefont{QEMU version @value{VERSION}}
@sp 1
@center @titlefont{User Documentation}
@sp 3
diff --git a/qemu-nbd.c b/qemu-nbd.c
index 651f85ecc1..4dd3fd4732 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -336,10 +336,10 @@ static void nbd_export_closed(NBDExport *exp)
static void nbd_update_server_watch(void);
-static void nbd_client_closed(NBDClient *client)
+static void nbd_client_closed(NBDClient *client, bool negotiated)
{
nb_fds--;
- if (nb_fds == 0 && !persistent && state == RUNNING) {
+ if (negotiated && nb_fds == 0 && !persistent && state == RUNNING) {
state = TERMINATE;
}
nbd_update_server_watch();
@@ -581,6 +581,10 @@ int main(int argc, char **argv)
sa_sigterm.sa_handler = termsig_handler;
sigaction(SIGTERM, &sa_sigterm, NULL);
+#ifdef CONFIG_POSIX
+ signal(SIGPIPE, SIG_IGN);
+#endif
+
module_call_init(MODULE_INIT_TRACE);
qcrypto_init(&error_fatal);
diff --git a/rules.mak b/rules.mak
index 2a2fb72e85..6e943335f3 100644
--- a/rules.mak
+++ b/rules.mak
@@ -377,7 +377,7 @@ define unnest-vars
endef
TEXI2MAN = $(call quiet-command, \
- perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl -I docs $< $@.pod && \
+ perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $(TEXI2PODFLAGS) $< $@.pod && \
$(POD2MAN) --section=$(subst .,,$(suffix $@)) --center=" " --release=" " $@.pod > $@, \
"GEN","$@")
diff --git a/target/i386/hax-all.c b/target/i386/hax-all.c
index 097db5cae1..ba6117d7de 100644
--- a/target/i386/hax-all.c
+++ b/target/i386/hax-all.c
@@ -514,9 +514,10 @@ static int hax_vcpu_hax_exec(CPUArchState *env)
hax_vcpu_interrupt(env);
qemu_mutex_unlock_iothread();
+ cpu_exec_start(cpu);
hax_ret = hax_vcpu_run(vcpu);
+ cpu_exec_end(cpu);
qemu_mutex_lock_iothread();
- current_cpu = cpu;
/* Simply continue the vcpu_run if system call interrupted */
if (hax_ret == -EINTR || hax_ret == -EAGAIN) {
diff --git a/target/m68k/Makefile.objs b/target/m68k/Makefile.objs
index 02cf616a78..39141ab93d 100644
--- a/target/m68k/Makefile.objs
+++ b/target/m68k/Makefile.objs
@@ -1,3 +1,3 @@
obj-y += m68k-semi.o
-obj-y += translate.o op_helper.o helper.o cpu.o
+obj-y += translate.o op_helper.o helper.o cpu.o fpu_helper.o
obj-y += gdbstub.o
diff --git a/target/m68k/fpu_helper.c b/target/m68k/fpu_helper.c
new file mode 100644
index 0000000000..5bf2576c2b
--- /dev/null
+++ b/target/m68k/fpu_helper.c
@@ -0,0 +1,112 @@
+/*
+ * m68k FPU helpers
+ *
+ * Copyright (c) 2006-2007 CodeSourcery
+ * Written by Paul Brook
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/helper-proto.h"
+
+uint32_t HELPER(f64_to_i32)(CPUM68KState *env, float64 val)
+{
+ return float64_to_int32(val, &env->fp_status);
+}
+
+float32 HELPER(f64_to_f32)(CPUM68KState *env, float64 val)
+{
+ return float64_to_float32(val, &env->fp_status);
+}
+
+float64 HELPER(i32_to_f64)(CPUM68KState *env, uint32_t val)
+{
+ return int32_to_float64(val, &env->fp_status);
+}
+
+float64 HELPER(f32_to_f64)(CPUM68KState *env, float32 val)
+{
+ return float32_to_float64(val, &env->fp_status);
+}
+
+float64 HELPER(iround_f64)(CPUM68KState *env, float64 val)
+{
+ return float64_round_to_int(val, &env->fp_status);
+}
+
+float64 HELPER(itrunc_f64)(CPUM68KState *env, float64 val)
+{
+ return float64_trunc_to_int(val, &env->fp_status);
+}
+
+float64 HELPER(sqrt_f64)(CPUM68KState *env, float64 val)
+{
+ return float64_sqrt(val, &env->fp_status);
+}
+
+float64 HELPER(abs_f64)(float64 val)
+{
+ return float64_abs(val);
+}
+
+float64 HELPER(chs_f64)(float64 val)
+{
+ return float64_chs(val);
+}
+
+float64 HELPER(add_f64)(CPUM68KState *env, float64 a, float64 b)
+{
+ return float64_add(a, b, &env->fp_status);
+}
+
+float64 HELPER(sub_f64)(CPUM68KState *env, float64 a, float64 b)
+{
+ return float64_sub(a, b, &env->fp_status);
+}
+
+float64 HELPER(mul_f64)(CPUM68KState *env, float64 a, float64 b)
+{
+ return float64_mul(a, b, &env->fp_status);
+}
+
+float64 HELPER(div_f64)(CPUM68KState *env, float64 a, float64 b)
+{
+ return float64_div(a, b, &env->fp_status);
+}
+
+float64 HELPER(sub_cmp_f64)(CPUM68KState *env, float64 a, float64 b)
+{
+ /* ??? This may incorrectly raise exceptions. */
+ /* ??? Should flush denormals to zero. */
+ float64 res;
+ res = float64_sub(a, b, &env->fp_status);
+ if (float64_is_quiet_nan(res, &env->fp_status)) {
+ /* +/-inf compares equal against itself, but sub returns nan. */
+ if (!float64_is_quiet_nan(a, &env->fp_status)
+ && !float64_is_quiet_nan(b, &env->fp_status)) {
+ res = float64_zero;
+ if (float64_lt_quiet(a, res, &env->fp_status)) {
+ res = float64_chs(res);
+ }
+ }
+ }
+ return res;
+}
+
+uint32_t HELPER(compare_f64)(CPUM68KState *env, float64 val)
+{
+ return float64_compare_quiet(val, float64_zero, &env->fp_status);
+}
diff --git a/target/m68k/helper.c b/target/m68k/helper.c
index f750d3dbaa..5ca9911657 100644
--- a/target/m68k/helper.c
+++ b/target/m68k/helper.c
@@ -284,94 +284,6 @@ void HELPER(set_sr)(CPUM68KState *env, uint32_t val)
m68k_switch_sp(env);
}
-/* FPU helpers. */
-uint32_t HELPER(f64_to_i32)(CPUM68KState *env, float64 val)
-{
- return float64_to_int32(val, &env->fp_status);
-}
-
-float32 HELPER(f64_to_f32)(CPUM68KState *env, float64 val)
-{
- return float64_to_float32(val, &env->fp_status);
-}
-
-float64 HELPER(i32_to_f64)(CPUM68KState *env, uint32_t val)
-{
- return int32_to_float64(val, &env->fp_status);
-}
-
-float64 HELPER(f32_to_f64)(CPUM68KState *env, float32 val)
-{
- return float32_to_float64(val, &env->fp_status);
-}
-
-float64 HELPER(iround_f64)(CPUM68KState *env, float64 val)
-{
- return float64_round_to_int(val, &env->fp_status);
-}
-
-float64 HELPER(itrunc_f64)(CPUM68KState *env, float64 val)
-{
- return float64_trunc_to_int(val, &env->fp_status);
-}
-
-float64 HELPER(sqrt_f64)(CPUM68KState *env, float64 val)
-{
- return float64_sqrt(val, &env->fp_status);
-}
-
-float64 HELPER(abs_f64)(float64 val)
-{
- return float64_abs(val);
-}
-
-float64 HELPER(chs_f64)(float64 val)
-{
- return float64_chs(val);
-}
-
-float64 HELPER(add_f64)(CPUM68KState *env, float64 a, float64 b)
-{
- return float64_add(a, b, &env->fp_status);
-}
-
-float64 HELPER(sub_f64)(CPUM68KState *env, float64 a, float64 b)
-{
- return float64_sub(a, b, &env->fp_status);
-}
-
-float64 HELPER(mul_f64)(CPUM68KState *env, float64 a, float64 b)
-{
- return float64_mul(a, b, &env->fp_status);
-}
-
-float64 HELPER(div_f64)(CPUM68KState *env, float64 a, float64 b)
-{
- return float64_div(a, b, &env->fp_status);
-}
-
-float64 HELPER(sub_cmp_f64)(CPUM68KState *env, float64 a, float64 b)
-{
- /* ??? This may incorrectly raise exceptions. */
- /* ??? Should flush denormals to zero. */
- float64 res;
- res = float64_sub(a, b, &env->fp_status);
- if (float64_is_quiet_nan(res, &env->fp_status)) {
- /* +/-inf compares equal against itself, but sub returns nan. */
- if (!float64_is_quiet_nan(a, &env->fp_status)
- && !float64_is_quiet_nan(b, &env->fp_status)) {
- res = float64_zero;
- if (float64_lt_quiet(a, res, &env->fp_status))
- res = float64_chs(res);
- }
- }
- return res;
-}
-
-uint32_t HELPER(compare_f64)(CPUM68KState *env, float64 val)
-{
- return float64_compare_quiet(val, float64_zero, &env->fp_status);
-}
/* MAC unit. */
/* FIXME: The MAC unit implementation is a bit of a mess. Some helpers
diff --git a/target/m68k/translate.c b/target/m68k/translate.c
index ad4d4efb8d..dfecfb6e5f 100644
--- a/target/m68k/translate.c
+++ b/target/m68k/translate.c
@@ -565,7 +565,7 @@ static void gen_flush_flags(DisasContext *s)
t1 = tcg_temp_new();
tcg_gen_add_i32(t0, QREG_CC_N, QREG_CC_V);
gen_ext(t0, t0, s->cc_op - CC_OP_SUBB, 1);
- tcg_gen_xor_i32(t1, QREG_CC_N, QREG_CC_V);
+ tcg_gen_xor_i32(t1, QREG_CC_N, t0);
tcg_gen_xor_i32(QREG_CC_V, QREG_CC_V, t0);
tcg_temp_free(t0);
tcg_gen_and_i32(QREG_CC_V, QREG_CC_V, t1);
@@ -669,6 +669,21 @@ static inline int insn_opsize(int insn)
}
}
+static inline int ext_opsize(int ext, int pos)
+{
+ switch ((ext >> pos) & 7) {
+ case 0: return OS_LONG;
+ case 1: return OS_SINGLE;
+ case 2: return OS_EXTENDED;
+ case 3: return OS_PACKED;
+ case 4: return OS_WORD;
+ case 5: return OS_DOUBLE;
+ case 6: return OS_BYTE;
+ default:
+ g_assert_not_reached();
+ }
+}
+
/* Assign value to a register. If the width is less than the register width
only the low part of the register is set. */
static void gen_partset_reg(int opsize, TCGv reg, TCGv val)
@@ -4111,20 +4126,19 @@ DISAS_INSN(fpu)
tmp32 = tcg_temp_new_i32();
/* fmove */
/* ??? TODO: Proper behavior on overflow. */
- switch ((ext >> 10) & 7) {
- case 0:
- opsize = OS_LONG;
+
+ opsize = ext_opsize(ext, 10);
+ switch (opsize) {
+ case OS_LONG:
gen_helper_f64_to_i32(tmp32, cpu_env, src);
break;
- case 1:
- opsize = OS_SINGLE;
+ case OS_SINGLE:
gen_helper_f64_to_f32(tmp32, cpu_env, src);
break;
- case 4:
- opsize = OS_WORD;
+ case OS_WORD:
gen_helper_f64_to_i32(tmp32, cpu_env, src);
break;
- case 5: /* OS_DOUBLE */
+ case OS_DOUBLE:
tcg_gen_mov_i32(tmp32, AREG(insn, 0));
switch ((insn >> 3) & 7) {
case 2:
@@ -4153,8 +4167,7 @@ DISAS_INSN(fpu)
}
tcg_temp_free_i32(tmp32);
return;
- case 6:
- opsize = OS_BYTE;
+ case OS_BYTE:
gen_helper_f64_to_i32(tmp32, cpu_env, src);
break;
default:
@@ -4227,15 +4240,7 @@ DISAS_INSN(fpu)
}
if (ext & (1 << 14)) {
/* Source effective address. */
- switch ((ext >> 10) & 7) {
- case 0: opsize = OS_LONG; break;
- case 1: opsize = OS_SINGLE; break;
- case 4: opsize = OS_WORD; break;
- case 5: opsize = OS_DOUBLE; break;
- case 6: opsize = OS_BYTE; break;
- default:
- goto undef;
- }
+ opsize = ext_opsize(ext, 10);
if (opsize == OS_DOUBLE) {
tmp32 = tcg_temp_new_i32();
tcg_gen_mov_i32(tmp32, AREG(insn, 0));
diff --git a/tcg-runtime.c b/tcg/tcg-runtime.c
index ec3a34e461..ec3a34e461 100644
--- a/tcg-runtime.c
+++ b/tcg/tcg-runtime.c
diff --git a/tci.c b/tcg/tci.c
index 4bdc645f2a..4bdc645f2a 100644
--- a/tci.c
+++ b/tcg/tci.c
diff --git a/tests/Makefile.include b/tests/Makefile.include
index f42f3dfa72..77da9b7f4b 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -205,6 +205,8 @@ check-qtest-pci-y += tests/intel-hda-test$(EXESUF)
gcov-files-pci-y += hw/audio/intel-hda.c hw/audio/hda-codec.c
check-qtest-pci-$(CONFIG_EVENTFD) += tests/ivshmem-test$(EXESUF)
gcov-files-pci-y += hw/misc/ivshmem.c
+check-qtest-pci-y += tests/megasas-test$(EXESUF)
+gcov-files-pci-y += hw/scsi/megasas.c
check-qtest-i386-y = tests/endianness-test$(EXESUF)
check-qtest-i386-y += tests/fdc-test$(EXESUF)
@@ -755,6 +757,7 @@ tests/test-filter-mirror$(EXESUF): tests/test-filter-mirror.o $(qtest-obj-y)
tests/test-filter-redirector$(EXESUF): tests/test-filter-redirector.o $(qtest-obj-y)
tests/test-x86-cpuid-compat$(EXESUF): tests/test-x86-cpuid-compat.o $(qtest-obj-y)
tests/ivshmem-test$(EXESUF): tests/ivshmem-test.o contrib/ivshmem-server/ivshmem-server.o $(libqos-pc-obj-y) $(libqos-spapr-obj-y)
+tests/megasas-test$(EXESUF): tests/megasas-test.o $(libqos-spapr-obj-y) $(libqos-pc-obj-y)
tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o contrib/libvhost-user/libvhost-user.o $(test-util-obj-y)
tests/test-uuid$(EXESUF): tests/test-uuid.o $(test-util-obj-y)
tests/test-arm-mptimer$(EXESUF): tests/test-arm-mptimer.o
diff --git a/tests/docker/Makefile.include b/tests/docker/Makefile.include
index 03eda37bf4..0ed8c3d323 100644
--- a/tests/docker/Makefile.include
+++ b/tests/docker/Makefile.include
@@ -126,7 +126,7 @@ docker-run: docker-qemu-src
" COPYING $(EXECUTABLE) to $(IMAGE)"))
$(call quiet-command, \
$(SRC_PATH)/tests/docker/docker.py run \
- -t \
+ $(if $(NOUSER),,-u $(shell id -u)) -t \
$(if $V,,--rm) \
$(if $(DEBUG),-i,--net=none) \
-e TARGET_LIST=$(TARGET_LIST) \
diff --git a/tests/docker/dockerfiles/centos6.docker b/tests/docker/dockerfiles/centos6.docker
index 34e0d3b91e..17a4d24d54 100644
--- a/tests/docker/dockerfiles/centos6.docker
+++ b/tests/docker/dockerfiles/centos6.docker
@@ -1,7 +1,7 @@
FROM centos:6
RUN yum install -y epel-release
ENV PACKAGES libfdt-devel ccache \
- tar git make gcc g++ \
+ tar git make gcc g++ flex bison \
zlib-devel glib2-devel SDL-devel pixman-devel \
epel-release
RUN yum install -y $PACKAGES
diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker
index c4f80ad3d8..4eaa8ed2a5 100644
--- a/tests/docker/dockerfiles/fedora.docker
+++ b/tests/docker/dockerfiles/fedora.docker
@@ -1,8 +1,8 @@
FROM fedora:latest
ENV PACKAGES \
- ccache git tar PyYAML sparse flex bison python2 \
+ ccache git tar PyYAML sparse flex bison python2 bzip2 hostname \
glib2-devel pixman-devel zlib-devel SDL-devel libfdt-devel \
- gcc gcc-c++ clang make perl which bc findutils \
+ gcc gcc-c++ clang make perl which bc findutils libaio-devel \
mingw32-pixman mingw32-glib2 mingw32-gmp mingw32-SDL mingw32-pkg-config \
mingw32-gtk2 mingw32-gtk3 mingw32-gnutls mingw32-nettle mingw32-libtasn1 \
mingw32-libjpeg-turbo mingw32-libpng mingw32-curl mingw32-libssh2 \
diff --git a/tests/megasas-test.c b/tests/megasas-test.c
new file mode 100644
index 0000000000..ce960e7f81
--- /dev/null
+++ b/tests/megasas-test.c
@@ -0,0 +1,86 @@
+/*
+ * QTest testcase for LSI MegaRAID
+ *
+ * Copyright (c) 2017 Red Hat Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "libqtest.h"
+#include "qemu/bswap.h"
+#include "libqos/libqos-pc.h"
+#include "libqos/libqos-spapr.h"
+
+static QOSState *qmegasas_start(const char *extra_opts)
+{
+ const char *arch = qtest_get_arch();
+ const char *cmd = "-drive id=hd0,if=none,file=null-co://,format=raw "
+ "-device megasas,id=scsi0,addr=04.0 "
+ "-device scsi-hd,bus=scsi0.0,drive=hd0 %s";
+
+ if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
+ return qtest_pc_boot(cmd, extra_opts ? : "");
+ }
+
+ g_printerr("virtio-scsi tests are only available on x86 or ppc64\n");
+ exit(EXIT_FAILURE);
+}
+
+static void qmegasas_stop(QOSState *qs)
+{
+ qtest_shutdown(qs);
+}
+
+/* Tests only initialization so far. TODO: Replace with functional tests */
+static void pci_nop(void)
+{
+ QOSState *qs;
+
+ qs = qmegasas_start(NULL);
+ qmegasas_stop(qs);
+}
+
+/* This used to cause a NULL pointer dereference. */
+static void megasas_pd_get_info_fuzz(void)
+{
+ QPCIDevice *dev;
+ QOSState *qs;
+ QPCIBar bar;
+ uint32_t context[256];
+ uint64_t context_pa;
+ int i;
+
+ qs = qmegasas_start(NULL);
+ dev = qpci_device_find(qs->pcibus, QPCI_DEVFN(4,0));
+ g_assert(dev != NULL);
+
+ qpci_device_enable(dev);
+ bar = qpci_iomap(dev, 0, NULL);
+
+ memset(context, 0, sizeof(context));
+ context[0] = cpu_to_le32(0x05050505);
+ context[1] = cpu_to_le32(0x01010101);
+ for (i = 2; i < ARRAY_SIZE(context); i++) {
+ context[i] = cpu_to_le32(0x41414141);
+ }
+ context[6] = cpu_to_le32(0x02020000);
+ context[7] = cpu_to_le32(0);
+
+ context_pa = qmalloc(qs, sizeof(context));
+ memwrite(context_pa, context, sizeof(context));
+ qpci_io_writel(dev, bar, 0x40, context_pa);
+
+ g_free(dev);
+ qmegasas_stop(qs);
+}
+
+int main(int argc, char **argv)
+{
+ g_test_init(&argc, &argv, NULL);
+ qtest_add_func("/megasas/pci/nop", pci_nop);
+ qtest_add_func("/megasas/dcmd/pd-get-info/fuzz", megasas_pd_get_info_fuzz);
+
+ return g_test_run();
+}
diff --git a/tests/q35-test.c b/tests/q35-test.c
index cc58f3ecf4..f98bed7a2d 100644
--- a/tests/q35-test.c
+++ b/tests/q35-test.c
@@ -15,6 +15,48 @@
#include "libqos/pci-pc.h"
#include "hw/pci-host/q35.h"
+#define TSEG_SIZE_TEST_GUEST_RAM_MBYTES 128
+
+/* @esmramc_tseg_sz: ESMRAMC.TSEG_SZ bitmask for selecting the requested TSEG
+ * size. Must be a subset of
+ * MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_MASK.
+ *
+ * @extended_tseg_mbytes: Size of the extended TSEG. Only consulted if
+ * @esmramc_tseg_sz equals
+ * MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_MASK precisely.
+ *
+ * @expected_tseg_mbytes: Expected guest-visible TSEG size in megabytes,
+ * matching @esmramc_tseg_sz and @extended_tseg_mbytes
+ * above.
+ */
+struct TsegSizeArgs {
+ uint8_t esmramc_tseg_sz;
+ uint16_t extended_tseg_mbytes;
+ uint16_t expected_tseg_mbytes;
+};
+typedef struct TsegSizeArgs TsegSizeArgs;
+
+static const TsegSizeArgs tseg_1mb = {
+ .esmramc_tseg_sz = MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_1MB,
+ .extended_tseg_mbytes = 0,
+ .expected_tseg_mbytes = 1,
+};
+static const TsegSizeArgs tseg_2mb = {
+ .esmramc_tseg_sz = MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_2MB,
+ .extended_tseg_mbytes = 0,
+ .expected_tseg_mbytes = 2,
+};
+static const TsegSizeArgs tseg_8mb = {
+ .esmramc_tseg_sz = MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_8MB,
+ .extended_tseg_mbytes = 0,
+ .expected_tseg_mbytes = 8,
+};
+static const TsegSizeArgs tseg_ext_16mb = {
+ .esmramc_tseg_sz = MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_MASK,
+ .extended_tseg_mbytes = 16,
+ .expected_tseg_mbytes = 16,
+};
+
static void smram_set_bit(QPCIDevice *pcidev, uint8_t mask, bool enabled)
{
uint8_t smram;
@@ -42,6 +84,8 @@ static void test_smram_lock(void)
QPCIDevice *pcidev;
QDict *response;
+ qtest_start("-M q35");
+
pcibus = qpci_init_pc(NULL);
g_assert(pcibus != NULL);
@@ -74,19 +118,86 @@ static void test_smram_lock(void)
g_free(pcidev);
qpci_free_pc(pcibus);
+
+ qtest_end();
}
-int main(int argc, char **argv)
+static void test_tseg_size(const void *data)
{
- int ret;
+ const TsegSizeArgs *args = data;
+ char *cmdline;
+ QPCIBus *pcibus;
+ QPCIDevice *pcidev;
+ uint8_t smram_val;
+ uint8_t esmramc_val;
+ uint32_t ram_offs;
+
+ if (args->esmramc_tseg_sz == MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_MASK) {
+ cmdline = g_strdup_printf("-M q35 -m %uM "
+ "-global mch.extended-tseg-mbytes=%u",
+ TSEG_SIZE_TEST_GUEST_RAM_MBYTES,
+ args->extended_tseg_mbytes);
+ } else {
+ cmdline = g_strdup_printf("-M q35 -m %uM",
+ TSEG_SIZE_TEST_GUEST_RAM_MBYTES);
+ }
+ qtest_start(cmdline);
+ g_free(cmdline);
- g_test_init(&argc, &argv, NULL);
+ /* locate the DRAM controller */
+ pcibus = qpci_init_pc(NULL);
+ g_assert(pcibus != NULL);
+ pcidev = qpci_device_find(pcibus, 0);
+ g_assert(pcidev != NULL);
- qtest_add_func("/q35/smram/lock", test_smram_lock);
+ /* Set TSEG size. Restrict TSEG visibility to SMM by setting T_EN. */
+ esmramc_val = qpci_config_readb(pcidev, MCH_HOST_BRIDGE_ESMRAMC);
+ esmramc_val &= ~MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_MASK;
+ esmramc_val |= args->esmramc_tseg_sz;
+ esmramc_val |= MCH_HOST_BRIDGE_ESMRAMC_T_EN;
+ qpci_config_writeb(pcidev, MCH_HOST_BRIDGE_ESMRAMC, esmramc_val);
+
+ /* Enable TSEG by setting G_SMRAME. Close TSEG by setting D_CLS. */
+ smram_val = qpci_config_readb(pcidev, MCH_HOST_BRIDGE_SMRAM);
+ smram_val &= ~(MCH_HOST_BRIDGE_SMRAM_D_OPEN |
+ MCH_HOST_BRIDGE_SMRAM_D_LCK);
+ smram_val |= (MCH_HOST_BRIDGE_SMRAM_D_CLS |
+ MCH_HOST_BRIDGE_SMRAM_G_SMRAME);
+ qpci_config_writeb(pcidev, MCH_HOST_BRIDGE_SMRAM, smram_val);
+
+ /* lock TSEG */
+ smram_val |= MCH_HOST_BRIDGE_SMRAM_D_LCK;
+ qpci_config_writeb(pcidev, MCH_HOST_BRIDGE_SMRAM, smram_val);
+
+ /* Now check that the byte right before the TSEG is r/w, and that the first
+ * byte in the TSEG always reads as 0xff.
+ */
+ ram_offs = (TSEG_SIZE_TEST_GUEST_RAM_MBYTES - args->expected_tseg_mbytes) *
+ 1024 * 1024 - 1;
+ g_assert_cmpint(readb(ram_offs), ==, 0);
+ writeb(ram_offs, 1);
+ g_assert_cmpint(readb(ram_offs), ==, 1);
+
+ ram_offs++;
+ g_assert_cmpint(readb(ram_offs), ==, 0xff);
+ writeb(ram_offs, 1);
+ g_assert_cmpint(readb(ram_offs), ==, 0xff);
- qtest_start("-M q35");
- ret = g_test_run();
+ g_free(pcidev);
+ qpci_free_pc(pcibus);
qtest_end();
+}
+
+int main(int argc, char **argv)
+{
+ g_test_init(&argc, &argv, NULL);
+
+ qtest_add_func("/q35/smram/lock", test_smram_lock);
- return ret;
+ qtest_add_data_func("/q35/tseg-size/1mb", &tseg_1mb, test_tseg_size);
+ qtest_add_data_func("/q35/tseg-size/2mb", &tseg_2mb, test_tseg_size);
+ qtest_add_data_func("/q35/tseg-size/8mb", &tseg_8mb, test_tseg_size);
+ qtest_add_data_func("/q35/tseg-size/ext/16mb", &tseg_ext_16mb,
+ test_tseg_size);
+ return g_test_run();
}
diff --git a/trace-events b/trace-events
index fd83087e39..bae63fdb1d 100644
--- a/trace-events
+++ b/trace-events
@@ -55,28 +55,6 @@ dma_complete(void *dbs, int ret, void *cb) "dbs=%p ret=%d cb=%p"
dma_blk_cb(void *dbs, int ret) "dbs=%p ret=%d"
dma_map_wait(void *dbs) "dbs=%p"
-# kvm-all.c
-kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"
-kvm_vm_ioctl(int type, void *arg) "type 0x%x, arg %p"
-kvm_vcpu_ioctl(int cpu_index, int type, void *arg) "cpu_index %d, type 0x%x, arg %p"
-kvm_run_exit(int cpu_index, uint32_t reason) "cpu_index %d, reason %d"
-kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p"
-kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s"
-kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s"
-kvm_irqchip_commit_routes(void) ""
-kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d"
-kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
-kvm_irqchip_release_virq(int virq) "virq %d"
-
-# TCG related tracing (mostly disabled by default)
-# cpu-exec.c
-disable exec_tb(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR
-disable exec_tb_nocache(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR
-disable exec_tb_exit(void *last_tb, unsigned int flags) "tb:%p flags=%x"
-
-# translate-all.c
-translate_block(void *tb, uintptr_t pc, uint8_t *tb_code) "tb:%p, pc:0x%"PRIxPTR", tb_code:%p"
-
# memory.c
memory_region_ops_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
memory_region_ops_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
diff --git a/ui/cocoa.m b/ui/cocoa.m
index 004ec2711c..1f010d3ae7 100644
--- a/ui/cocoa.m
+++ b/ui/cocoa.m
@@ -52,6 +52,8 @@
/* macOS 10.12 deprecated many constants, #define the new names for older SDKs */
#if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_12
#define NSEventMaskAny NSAnyEventMask
+#define NSEventModifierFlagCapsLock NSAlphaShiftKeyMask
+#define NSEventModifierFlagShift NSShiftKeyMask
#define NSEventModifierFlagCommand NSCommandKeyMask
#define NSEventModifierFlagControl NSControlKeyMask
#define NSEventModifierFlagOption NSAlternateKeyMask
@@ -268,7 +270,7 @@ static void handleAnyDeviceErrors(Error * err)
NSWindow *fullScreenWindow;
float cx,cy,cw,ch,cdx,cdy;
CGDataProviderRef dataProviderRef;
- int modifiers_state[256];
+ BOOL modifiers_state[256];
BOOL isMouseGrabbed;
BOOL isFullscreen;
BOOL isAbsoluteEnabled;
@@ -536,18 +538,59 @@ QemuCocoaView *cocoaView;
}
}
+- (void) toggleModifier: (int)keycode {
+ // Toggle the stored state.
+ modifiers_state[keycode] = !modifiers_state[keycode];
+ // Send a keyup or keydown depending on the state.
+ qemu_input_event_send_key_qcode(dcl->con, keycode, modifiers_state[keycode]);
+}
+
+- (void) toggleStatefulModifier: (int)keycode {
+ // Toggle the stored state.
+ modifiers_state[keycode] = !modifiers_state[keycode];
+ // Generate keydown and keyup.
+ qemu_input_event_send_key_qcode(dcl->con, keycode, true);
+ qemu_input_event_send_key_qcode(dcl->con, keycode, false);
+}
+
- (void) handleEvent:(NSEvent *)event
{
COCOA_DEBUG("QemuCocoaView: handleEvent\n");
int buttons = 0;
- int keycode;
+ int keycode = 0;
bool mouse_event = false;
NSPoint p = [event locationInWindow];
switch ([event type]) {
case NSEventTypeFlagsChanged:
- keycode = cocoa_keycode_to_qemu([event keyCode]);
+ if ([event keyCode] == 0) {
+ // When the Cocoa keyCode is zero that means keys should be
+ // synthesized based on the values in in the eventModifiers
+ // bitmask.
+
+ if (qemu_console_is_graphic(NULL)) {
+ NSEventModifierFlags modifiers = [event modifierFlags];
+
+ if (!!(modifiers & NSEventModifierFlagCapsLock) != !!modifiers_state[Q_KEY_CODE_CAPS_LOCK]) {
+ [self toggleStatefulModifier:Q_KEY_CODE_CAPS_LOCK];
+ }
+ if (!!(modifiers & NSEventModifierFlagShift) != !!modifiers_state[Q_KEY_CODE_SHIFT]) {
+ [self toggleModifier:Q_KEY_CODE_SHIFT];
+ }
+ if (!!(modifiers & NSEventModifierFlagControl) != !!modifiers_state[Q_KEY_CODE_CTRL]) {
+ [self toggleModifier:Q_KEY_CODE_CTRL];
+ }
+ if (!!(modifiers & NSEventModifierFlagOption) != !!modifiers_state[Q_KEY_CODE_ALT]) {
+ [self toggleModifier:Q_KEY_CODE_ALT];
+ }
+ if (!!(modifiers & NSEventModifierFlagCommand) != !!modifiers_state[Q_KEY_CODE_META_L]) {
+ [self toggleModifier:Q_KEY_CODE_META_L];
+ }
+ }
+ } else {
+ keycode = cocoa_keycode_to_qemu([event keyCode]);
+ }
if ((keycode == Q_KEY_CODE_META_L || keycode == Q_KEY_CODE_META_R)
&& !isMouseGrabbed) {
@@ -559,16 +602,9 @@ QemuCocoaView *cocoaView;
// emulate caps lock and num lock keydown and keyup
if (keycode == Q_KEY_CODE_CAPS_LOCK ||
keycode == Q_KEY_CODE_NUM_LOCK) {
- qemu_input_event_send_key_qcode(dcl->con, keycode, true);
- qemu_input_event_send_key_qcode(dcl->con, keycode, false);
+ [self toggleStatefulModifier:keycode];
} else if (qemu_console_is_graphic(NULL)) {
- if (modifiers_state[keycode] == 0) { // keydown
- qemu_input_event_send_key_qcode(dcl->con, keycode, true);
- modifiers_state[keycode] = 1;
- } else { // keyup
- qemu_input_event_send_key_qcode(dcl->con, keycode, false);
- modifiers_state[keycode] = 0;
- }
+ [self toggleModifier:keycode];
}
}
diff --git a/ui/spice-core.c b/ui/spice-core.c
index a087ad5da9..182f550f1f 100644
--- a/ui/spice-core.c
+++ b/ui/spice-core.c
@@ -847,6 +847,7 @@ void qemu_spice_init(void)
exit(1);
}
display_opengl = 1;
+ spice_opengl = 1;
}
#endif
}
diff --git a/ui/spice-display.c b/ui/spice-display.c
index b353445f58..042292cc90 100644
--- a/ui/spice-display.c
+++ b/ui/spice-display.c
@@ -27,6 +27,7 @@
#include "ui/spice-display.h"
static int debug = 0;
+bool spice_opengl;
static void GCC_FMT_ATTR(2, 3) dprint(int level, const char *fmt, ...)
{
@@ -1013,7 +1014,7 @@ static void qemu_spice_display_init_one(QemuConsole *con)
ssd->dcl.ops = &display_listener_ops;
#ifdef HAVE_SPICE_GL
- if (display_opengl) {
+ if (spice_opengl) {
ssd->dcl.ops = &display_listener_gl_ops;
ssd->gl_unblock_bh = qemu_bh_new(qemu_spice_gl_unblock_bh, ssd);
ssd->gl_unblock_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
diff --git a/ui/spice-input.c b/ui/spice-input.c
index 86293dd2ce..918580239d 100644
--- a/ui/spice-input.c
+++ b/ui/spice-input.c
@@ -87,7 +87,7 @@ static void kbd_leds(void *opaque, int ledstate)
if (ledstate & QEMU_CAPS_LOCK_LED) {
kbd->ledstate |= SPICE_KEYBOARD_MODIFIER_FLAGS_CAPS_LOCK;
}
- spice_server_kbd_leds(&kbd->sin, ledstate);
+ spice_server_kbd_leds(&kbd->sin, kbd->ledstate);
}
/* mouse bits */
diff --git a/util/Makefile.objs b/util/Makefile.objs
index 94d9477209..50a55ecc75 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -43,4 +43,5 @@ util-obj-y += log.o
util-obj-y += qdist.o
util-obj-y += qht.o
util-obj-y += range.o
+util-obj-y += stats64.o
util-obj-y += systemd.o
diff --git a/util/stats64.c b/util/stats64.c
new file mode 100644
index 0000000000..9968fcceac
--- /dev/null
+++ b/util/stats64.c
@@ -0,0 +1,137 @@
+/*
+ * Atomic operations on 64-bit quantities.
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/atomic.h"
+#include "qemu/stats64.h"
+#include "qemu/processor.h"
+
+#ifndef CONFIG_ATOMIC64
+static inline void stat64_rdlock(Stat64 *s)
+{
+ /* Keep out incoming writers to avoid them starving us. */
+ atomic_add(&s->lock, 2);
+
+ /* If there is a concurrent writer, wait for it. */
+ while (atomic_read(&s->lock) & 1) {
+ cpu_relax();
+ }
+}
+
+static inline void stat64_rdunlock(Stat64 *s)
+{
+ atomic_sub(&s->lock, 2);
+}
+
+static inline bool stat64_wrtrylock(Stat64 *s)
+{
+ return atomic_cmpxchg(&s->lock, 0, 1) == 0;
+}
+
+static inline void stat64_wrunlock(Stat64 *s)
+{
+ atomic_dec(&s->lock);
+}
+
+uint64_t stat64_get(const Stat64 *s)
+{
+ uint32_t high, low;
+
+ stat64_rdlock((Stat64 *)s);
+
+ /* 64-bit writes always take the lock, so we can read in
+ * any order.
+ */
+ high = atomic_read(&s->high);
+ low = atomic_read(&s->low);
+ stat64_rdunlock((Stat64 *)s);
+
+ return ((uint64_t)high << 32) | low;
+}
+
+bool stat64_add32_carry(Stat64 *s, uint32_t low, uint32_t high)
+{
+ uint32_t old;
+
+ if (!stat64_wrtrylock(s)) {
+ cpu_relax();
+ return false;
+ }
+
+ /* 64-bit reads always take the lock, so they don't care about the
+ * order of our update. By updating s->low first, we can check
+ * whether we have to carry into s->high.
+ */
+ old = atomic_fetch_add(&s->low, low);
+ high += (old + low) < old;
+ atomic_add(&s->high, high);
+ stat64_wrunlock(s);
+ return true;
+}
+
+bool stat64_min_slow(Stat64 *s, uint64_t value)
+{
+ uint32_t high, low;
+ uint64_t orig;
+
+ if (!stat64_wrtrylock(s)) {
+ cpu_relax();
+ return false;
+ }
+
+ high = atomic_read(&s->high);
+ low = atomic_read(&s->low);
+
+ orig = ((uint64_t)high << 32) | low;
+ if (orig < value) {
+ /* We have to set low before high, just like stat64_min reads
+ * high before low. The value may become higher temporarily, but
+ * stat64_get does not notice (it takes the lock) and the only ill
+ * effect on stat64_min is that the slow path may be triggered
+ * unnecessarily.
+ */
+ atomic_set(&s->low, (uint32_t)value);
+ smp_wmb();
+ atomic_set(&s->high, value >> 32);
+ }
+ stat64_wrunlock(s);
+ return true;
+}
+
+bool stat64_max_slow(Stat64 *s, uint64_t value)
+{
+ uint32_t high, low;
+ uint64_t orig;
+
+ if (!stat64_wrtrylock(s)) {
+ cpu_relax();
+ return false;
+ }
+
+ high = atomic_read(&s->high);
+ low = atomic_read(&s->low);
+
+ orig = ((uint64_t)high << 32) | low;
+ if (orig > value) {
+ /* We have to set low before high, just like stat64_max reads
+ * high before low. The value may become lower temporarily, but
+ * stat64_get does not notice (it takes the lock) and the only ill
+ * effect on stat64_max is that the slow path may be triggered
+ * unnecessarily.
+ */
+ atomic_set(&s->low, (uint32_t)value);
+ smp_wmb();
+ atomic_set(&s->high, value >> 32);
+ }
+ stat64_wrunlock(s);
+ return true;
+}
+#endif
diff --git a/vl.c b/vl.c
index 32db19e3b9..59fea15488 100644
--- a/vl.c
+++ b/vl.c
@@ -3757,21 +3757,18 @@ int main(int argc, char **argv, char **envp)
qdev_prop_register_global(&kvm_pit_lost_tick_policy);
break;
}
- case QEMU_OPTION_accel: {
- QemuOpts *accel_opts;
-
+ case QEMU_OPTION_accel:
accel_opts = qemu_opts_parse_noisily(qemu_find_opts("accel"),
optarg, true);
optarg = qemu_opt_get(accel_opts, "accel");
if (!optarg || is_help_option(optarg)) {
error_printf("Possible accelerators: kvm, xen, hax, tcg\n");
- exit(1);
+ exit(0);
}
- accel_opts = qemu_opts_create(qemu_find_opts("machine"), NULL,
- false, &error_abort);
- qemu_opt_set(accel_opts, "accel", optarg, &error_abort);
+ opts = qemu_opts_create(qemu_find_opts("machine"), NULL,
+ false, &error_abort);
+ qemu_opt_set(opts, "accel", optarg, &error_abort);
break;
- }
case QEMU_OPTION_usb:
olist = qemu_find_opts("machine");
qemu_opts_parse_noisily(olist, "usb=on", false);