diff options
109 files changed, 3242 insertions, 1212 deletions
diff --git a/.gitignore b/.gitignore index 55a001e3b8..09c2363acf 100644 --- a/.gitignore +++ b/.gitignore @@ -50,6 +50,7 @@ /qemu-version.h.tmp /module_block.h /vscclient +/vhost-user-scsi /fsdev/virtfs-proxy-helper *.[1-9] *.a @@ -99,14 +100,14 @@ /pc-bios/optionrom/kvmvapic.img /pc-bios/s390-ccw/s390-ccw.elf /pc-bios/s390-ccw/s390-ccw.img -/docs/qemu-ga-qapi.texi -/docs/qemu-ga-ref.html -/docs/qemu-ga-ref.info* -/docs/qemu-ga-ref.txt -/docs/qemu-qmp-qapi.texi -/docs/qemu-qmp-ref.html -/docs/qemu-qmp-ref.info* -/docs/qemu-qmp-ref.txt +/docs/interop/qemu-ga-qapi.texi +/docs/interop/qemu-ga-ref.html +/docs/interop/qemu-ga-ref.info* +/docs/interop/qemu-ga-ref.txt +/docs/interop/qemu-qmp-qapi.texi +/docs/interop/qemu-qmp-ref.html +/docs/interop/qemu-qmp-ref.info* +/docs/interop/qemu-qmp-ref.txt /docs/version.texi *.tps .stgit-* @@ -207,8 +207,8 @@ HELPERS-$(CONFIG_LINUX) = qemu-bridge-helper$(EXESUF) ifdef BUILD_DOCS DOCS=qemu-doc.html qemu-doc.txt qemu.1 qemu-img.1 qemu-nbd.8 qemu-ga.8 -DOCS+=docs/qemu-qmp-ref.html docs/qemu-qmp-ref.txt docs/qemu-qmp-ref.7 -DOCS+=docs/qemu-ga-ref.html docs/qemu-ga-ref.txt docs/qemu-ga-ref.7 +DOCS+=docs/interop/qemu-qmp-ref.html docs/interop/qemu-qmp-ref.txt docs/interop/qemu-qmp-ref.7 +DOCS+=docs/interop/qemu-ga-ref.html docs/interop/qemu-ga-ref.txt docs/interop/qemu-ga-ref.7 ifdef CONFIG_VIRTFS DOCS+=fsdev/virtfs-proxy-helper.1 endif @@ -269,6 +269,7 @@ dummy := $(call unnest-vars,, \ ivshmem-client-obj-y \ ivshmem-server-obj-y \ libvhost-user-obj-y \ + vhost-user-scsi-obj-y \ qga-vss-dll-obj-y \ block-obj-y \ block-obj-m \ @@ -473,6 +474,8 @@ ivshmem-client$(EXESUF): $(ivshmem-client-obj-y) $(COMMON_LDADDS) $(call LINK, $^) ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) $(COMMON_LDADDS) $(call LINK, $^) +vhost-user-scsi$(EXESUF): $(vhost-user-scsi-obj-y) + $(call LINK, $^) module_block.h: $(SRC_PATH)/scripts/modules/module_block.py config-host.mak $(call quiet-command,$(PYTHON) $< $@ \ @@ -519,11 +522,12 @@ distclean: clean rm -f qemu-doc.vr qemu-doc.txt rm -f config.log rm -f linux-headers/asm - rm -f docs/qemu-ga-qapi.texi docs/qemu-qmp-qapi.texi docs/version.texi - rm -f docs/qemu-qmp-ref.7 docs/qemu-ga-ref.7 - rm -f docs/qemu-qmp-ref.txt docs/qemu-ga-ref.txt - rm -f docs/qemu-qmp-ref.pdf docs/qemu-ga-ref.pdf - rm -f docs/qemu-qmp-ref.html docs/qemu-ga-ref.html + rm -f docs/version.texi + rm -f docs/interop/qemu-ga-qapi.texi docs/interop/qemu-qmp-qapi.texi + rm -f docs/interop/qemu-qmp-ref.7 docs/interop/qemu-ga-ref.7 + rm -f docs/interop/qemu-qmp-ref.txt docs/interop/qemu-ga-ref.txt + rm -f docs/interop/qemu-qmp-ref.pdf docs/interop/qemu-ga-ref.pdf + rm -f docs/interop/qemu-qmp-ref.html docs/interop/qemu-ga-ref.html for d in $(TARGET_DIRS); do \ rm -rf $$d || exit 1 ; \ done @@ -562,13 +566,13 @@ install-doc: $(DOCS) $(INSTALL_DIR) "$(DESTDIR)$(qemu_docdir)" $(INSTALL_DATA) qemu-doc.html "$(DESTDIR)$(qemu_docdir)" $(INSTALL_DATA) qemu-doc.txt "$(DESTDIR)$(qemu_docdir)" - $(INSTALL_DATA) docs/qemu-qmp-ref.html "$(DESTDIR)$(qemu_docdir)" - $(INSTALL_DATA) docs/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)" + $(INSTALL_DATA) docs/interop/qemu-qmp-ref.html "$(DESTDIR)$(qemu_docdir)" + $(INSTALL_DATA) docs/interop/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)" ifdef CONFIG_POSIX $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" $(INSTALL_DATA) qemu.1 "$(DESTDIR)$(mandir)/man1" $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man7" - $(INSTALL_DATA) docs/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7" + $(INSTALL_DATA) docs/interop/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7" ifneq ($(TOOLS),) $(INSTALL_DATA) qemu-img.1 "$(DESTDIR)$(mandir)/man1" $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man8" @@ -576,9 +580,9 @@ ifneq ($(TOOLS),) endif ifneq (,$(findstring qemu-ga,$(TOOLS))) $(INSTALL_DATA) qemu-ga.8 "$(DESTDIR)$(mandir)/man8" - $(INSTALL_DATA) docs/qemu-ga-ref.html "$(DESTDIR)$(qemu_docdir)" - $(INSTALL_DATA) docs/qemu-ga-ref.txt "$(DESTDIR)$(qemu_docdir)" - $(INSTALL_DATA) docs/qemu-ga-ref.7 "$(DESTDIR)$(mandir)/man7" + $(INSTALL_DATA) docs/interop/qemu-ga-ref.html "$(DESTDIR)$(qemu_docdir)" + $(INSTALL_DATA) docs/interop/qemu-ga-ref.txt "$(DESTDIR)$(qemu_docdir)" + $(INSTALL_DATA) docs/interop/qemu-ga-ref.7 "$(DESTDIR)$(mandir)/man7" endif endif ifdef CONFIG_VIRTFS @@ -666,28 +670,27 @@ ui/console-gl.o: $(SRC_PATH)/ui/console-gl.c \ # documentation MAKEINFO=makeinfo -MAKEINFOFLAGS=--no-split --number-sections -I docs -TEXIFLAG=$(if $(V),,--quiet) +MAKEINFOINCLUDES= -I docs -I $(<D) -I $(@D) +MAKEINFOFLAGS=--no-split --number-sections $(MAKEINFOINCLUDES) +TEXI2PODFLAGS=$(MAKEINFOINCLUDES) "-DVERSION=$(VERSION)" +TEXI2PDFFLAGS=$(if $(V),,--quiet) -I $(SRC_PATH) $(MAKEINFOINCLUDES) docs/version.texi: $(SRC_PATH)/VERSION $(call quiet-command,echo "@set VERSION $(VERSION)" > $@,"GEN","$@") -%.html: %.texi +%.html: %.texi docs/version.texi $(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \ --html $< -o $@,"GEN","$@") -%.info: %.texi +%.info: %.texi docs/version.texi $(call quiet-command,$(MAKEINFO) $(MAKEINFOFLAGS) $< -o $@,"GEN","$@") -%.txt: %.texi +%.txt: %.texi docs/version.texi $(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \ --plaintext $< -o $@,"GEN","$@") -%.pdf: %.texi - $(call quiet-command,texi2pdf $(TEXIFLAG) -I $(SRC_PATH) -I docs $< -o $@,"GEN","$@") - -docs/qemu-ga-ref.html docs/qemu-ga-ref.info docs/qemu-ga-ref.txt docs/qemu-ga-ref.pdf docs/qemu-ga-ref.7.pod: docs/version.texi -docs/qemu-qmp-ref.html docs/qemu-qmp-ref.info docs/qemu-qmp-ref.txt docs/qemu-qmp-ref.pdf docs/qemu-qmp-ref.pod: docs/version.texi +%.pdf: %.texi docs/version.texi + $(call quiet-command,texi2pdf $(TEXI2PDFFLAGS) $< -o $@,"GEN","$@") qemu-options.texi: $(SRC_PATH)/qemu-options.hx $(SRC_PATH)/scripts/hxtool $(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@") @@ -701,12 +704,12 @@ qemu-monitor-info.texi: $(SRC_PATH)/hmp-commands-info.hx $(SRC_PATH)/scripts/hxt qemu-img-cmds.texi: $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/scripts/hxtool $(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@") -docs/qemu-qmp-qapi.texi docs/qemu-ga-qapi.texi: $(SRC_PATH)/scripts/qapi2texi.py $(qapi-py) +docs/interop/qemu-qmp-qapi.texi docs/interop/qemu-ga-qapi.texi: $(SRC_PATH)/scripts/qapi2texi.py $(qapi-py) -docs/qemu-qmp-qapi.texi: $(qapi-modules) +docs/interop/qemu-qmp-qapi.texi: $(qapi-modules) $(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi2texi.py $< > $@,"GEN","$@") -docs/qemu-ga-qapi.texi: $(SRC_PATH)/qga/qapi-schema.json +docs/interop/qemu-ga-qapi.texi: $(SRC_PATH)/qga/qapi-schema.json $(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi2texi.py $< > $@,"GEN","$@") qemu.1: qemu-doc.texi qemu-options.texi qemu-monitor.texi qemu-monitor-info.texi @@ -716,21 +719,25 @@ fsdev/virtfs-proxy-helper.1: fsdev/virtfs-proxy-helper.texi qemu-nbd.8: qemu-nbd.texi qemu-option-trace.texi qemu-ga.8: qemu-ga.texi -html: qemu-doc.html docs/qemu-qmp-ref.html docs/qemu-ga-ref.html -info: qemu-doc.info docs/qemu-qmp-ref.info docs/qemu-ga-ref.info -pdf: qemu-doc.pdf docs/qemu-qmp-ref.pdf docs/qemu-ga-ref.pdf -txt: qemu-doc.txt docs/qemu-qmp-ref.txt docs/qemu-ga-ref.txt +html: qemu-doc.html docs/interop/qemu-qmp-ref.html docs/interop/qemu-ga-ref.html +info: qemu-doc.info docs/interop/qemu-qmp-ref.info docs/interop/qemu-ga-ref.info +pdf: qemu-doc.pdf docs/interop/qemu-qmp-ref.pdf docs/interop/qemu-ga-ref.pdf +txt: qemu-doc.txt docs/interop/qemu-qmp-ref.txt docs/interop/qemu-ga-ref.txt qemu-doc.html qemu-doc.info qemu-doc.pdf qemu-doc.txt: \ qemu-img.texi qemu-nbd.texi qemu-options.texi qemu-option-trace.texi \ qemu-monitor.texi qemu-img-cmds.texi qemu-ga.texi \ qemu-monitor-info.texi -docs/qemu-ga-ref.dvi docs/qemu-ga-ref.html docs/qemu-ga-ref.info docs/qemu-ga-ref.pdf docs/qemu-ga-ref.txt docs/qemu-ga-ref.7: \ -docs/qemu-ga-ref.texi docs/qemu-ga-qapi.texi +docs/interop/qemu-ga-ref.dvi docs/interop/qemu-ga-ref.html \ + docs/interop/qemu-ga-ref.info docs/interop/qemu-ga-ref.pdf \ + docs/interop/qemu-ga-ref.txt docs/interop/qemu-ga-ref.7: \ + docs/interop/qemu-ga-ref.texi docs/interop/qemu-ga-qapi.texi -docs/qemu-qmp-ref.dvi docs/qemu-qmp-ref.html docs/qemu-qmp-ref.info docs/qemu-qmp-ref.pdf docs/qemu-qmp-ref.txt docs/qemu-qmp-ref.7: \ -docs/qemu-qmp-ref.texi docs/qemu-qmp-qapi.texi +docs/interop/qemu-qmp-ref.dvi docs/interop/qemu-qmp-ref.html \ + docs/interop/qemu-qmp-ref.info docs/interop/qemu-qmp-ref.pdf \ + docs/interop/qemu-qmp-ref.txt docs/interop/qemu-qmp-ref.7: \ + docs/interop/qemu-qmp-ref.texi docs/interop/qemu-qmp-qapi.texi ifdef CONFIG_WIN32 @@ -791,9 +798,11 @@ endif # CONFIG_WIN # Add a dependency on the generated files, so that they are always # rebuilt before other object files +ifneq ($(wildcard config-host.mak),) ifneq ($(filter-out $(UNCHECKED_GOALS),$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail)) Makefile: $(GENERATED_FILES) endif +endif .SECONDARY: $(TRACE_HEADERS) $(TRACE_HEADERS:%=%-timestamp) \ $(TRACE_SOURCES) $(TRACE_SOURCES:%=%-timestamp) \ diff --git a/Makefile.objs b/Makefile.objs index 0575802440..b2e6322ef0 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -52,7 +52,6 @@ common-obj-y += migration/ common-obj-y += audio/ common-obj-y += hw/ -common-obj-y += accel.o common-obj-y += replay/ @@ -111,6 +110,10 @@ qga-vss-dll-obj-y = qga/ ivshmem-client-obj-y = contrib/ivshmem-client/ ivshmem-server-obj-y = contrib/ivshmem-server/ libvhost-user-obj-y = contrib/libvhost-user/ +vhost-user-scsi.o-cflags := $(LIBISCSI_CFLAGS) +vhost-user-scsi.o-libs := $(LIBISCSI_LIBS) +vhost-user-scsi-obj-y = contrib/vhost-user-scsi/ +vhost-user-scsi-obj-y += contrib/libvhost-user/libvhost-user.o ###################################################################### trace-events-subdirs = @@ -163,6 +166,8 @@ trace-events-subdirs += target/ppc trace-events-subdirs += qom trace-events-subdirs += linux-user trace-events-subdirs += qapi +trace-events-subdirs += accel/tcg +trace-events-subdirs += accel/kvm trace-events-files = $(SRC_PATH)/trace-events $(trace-events-subdirs:%=$(SRC_PATH)/%/trace-events) diff --git a/Makefile.target b/Makefile.target index ce8dfe44a8..0066579090 100644 --- a/Makefile.target +++ b/Makefile.target @@ -88,20 +88,17 @@ all: $(PROGS) stap ######################################################### # cpu emulator library -obj-y = exec.o translate-all.o cpu-exec.o -obj-y += translate-common.o -obj-y += cpu-exec-common.o +obj-y += exec.o +obj-y += accel/ obj-y += tcg/tcg.o tcg/tcg-op.o tcg/optimize.o -obj-$(CONFIG_TCG_INTERPRETER) += tci.o -obj-y += tcg/tcg-common.o +obj-y += tcg/tcg-common.o tcg/tcg-runtime.o +obj-$(CONFIG_TCG_INTERPRETER) += tcg/tci.o obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o obj-y += fpu/softfloat.o obj-y += target/$(TARGET_BASE_ARCH)/ obj-y += disas.o -obj-y += tcg-runtime.o obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o obj-$(call lnot,$(CONFIG_HAX)) += hax-stub.o -obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/decContext.o obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/decNumber.o @@ -142,8 +139,7 @@ ifdef CONFIG_SOFTMMU obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o numa.o obj-y += qtest.o bootdevice.o obj-y += hw/ -obj-$(CONFIG_KVM) += kvm-all.o -obj-y += memory.o cputlb.o +obj-y += memory.o obj-y += memory_mapping.o obj-y += dump.o obj-y += migration/ram.o diff --git a/accel/Makefile.objs b/accel/Makefile.objs new file mode 100644 index 0000000000..cd5702f347 --- /dev/null +++ b/accel/Makefile.objs @@ -0,0 +1,4 @@ +obj-$(CONFIG_SOFTMMU) += accel.o +obj-y += kvm/ +obj-y += tcg/ +obj-y += stubs/ diff --git a/accel.c b/accel/accel.c index 664bb88422..7c079a5611 100644 --- a/accel.c +++ b/accel/accel.c @@ -34,15 +34,6 @@ #include "hw/xen/xen.h" #include "qom/object.h" -int tcg_tb_size; -static bool tcg_allowed = true; - -static int tcg_init(MachineState *ms) -{ - tcg_exec_init(tcg_tb_size * 1024 * 1024); - return 0; -} - static const TypeInfo accel_type = { .name = TYPE_ACCEL, .parent = TYPE_OBJECT, @@ -129,27 +120,9 @@ void configure_accelerator(MachineState *ms) } } - -static void tcg_accel_class_init(ObjectClass *oc, void *data) -{ - AccelClass *ac = ACCEL_CLASS(oc); - ac->name = "tcg"; - ac->init_machine = tcg_init; - ac->allowed = &tcg_allowed; -} - -#define TYPE_TCG_ACCEL ACCEL_CLASS_NAME("tcg") - -static const TypeInfo tcg_accel_type = { - .name = TYPE_TCG_ACCEL, - .parent = TYPE_ACCEL, - .class_init = tcg_accel_class_init, -}; - static void register_accel_types(void) { type_register_static(&accel_type); - type_register_static(&tcg_accel_type); } type_init(register_accel_types); diff --git a/accel/kvm/Makefile.objs b/accel/kvm/Makefile.objs new file mode 100644 index 0000000000..85351e7de7 --- /dev/null +++ b/accel/kvm/Makefile.objs @@ -0,0 +1 @@ +obj-$(CONFIG_KVM) += kvm-all.o diff --git a/kvm-all.c b/accel/kvm/kvm-all.c index ab8262f672..75feffa504 100644 --- a/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -36,7 +36,7 @@ #include "exec/ram_addr.h" #include "exec/address-spaces.h" #include "qemu/event_notifier.h" -#include "trace-root.h" +#include "trace.h" #include "hw/irq.h" #include "hw/boards.h" @@ -1977,6 +1977,7 @@ int kvm_cpu_exec(CPUState *cpu) } qemu_mutex_unlock_iothread(); + cpu_exec_start(cpu); do { MemTxAttrs attrs; @@ -2106,6 +2107,7 @@ int kvm_cpu_exec(CPUState *cpu) } } while (ret == 0); + cpu_exec_end(cpu); qemu_mutex_lock_iothread(); if (ret < 0) { diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events new file mode 100644 index 0000000000..f89ba5578d --- /dev/null +++ b/accel/kvm/trace-events @@ -0,0 +1,15 @@ +# Trace events for debugging and performance instrumentation + +# kvm-all.c +kvm_ioctl(int type, void *arg) "type 0x%x, arg %p" +kvm_vm_ioctl(int type, void *arg) "type 0x%x, arg %p" +kvm_vcpu_ioctl(int cpu_index, int type, void *arg) "cpu_index %d, type 0x%x, arg %p" +kvm_run_exit(int cpu_index, uint32_t reason) "cpu_index %d, reason %d" +kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p" +kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s" +kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s" +kvm_irqchip_commit_routes(void) "" +kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d" +kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" +kvm_irqchip_release_virq(int virq) "virq %d" + diff --git a/accel/stubs/Makefile.objs b/accel/stubs/Makefile.objs new file mode 100644 index 0000000000..bd5794f222 --- /dev/null +++ b/accel/stubs/Makefile.objs @@ -0,0 +1 @@ +obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o diff --git a/kvm-stub.c b/accel/stubs/kvm-stub.c index ef0c7346af..ef0c7346af 100644 --- a/kvm-stub.c +++ b/accel/stubs/kvm-stub.c diff --git a/accel/tcg/Makefile.objs b/accel/tcg/Makefile.objs new file mode 100644 index 0000000000..f173cd5397 --- /dev/null +++ b/accel/tcg/Makefile.objs @@ -0,0 +1,3 @@ +obj-$(CONFIG_SOFTMMU) += tcg-all.o +obj-$(CONFIG_SOFTMMU) += cputlb.o +obj-y += cpu-exec.o cpu-exec-common.o translate-all.o translate-common.o diff --git a/cpu-exec-common.c b/accel/tcg/cpu-exec-common.c index e81da276bb..e81da276bb 100644 --- a/cpu-exec-common.c +++ b/accel/tcg/cpu-exec-common.c diff --git a/cpu-exec.c b/accel/tcg/cpu-exec.c index 5b181c18ed..3581618bc0 100644 --- a/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -18,7 +18,7 @@ */ #include "qemu/osdep.h" #include "cpu.h" -#include "trace-root.h" +#include "trace.h" #include "disas/disas.h" #include "exec/exec-all.h" #include "tcg.h" diff --git a/cputlb.c b/accel/tcg/cputlb.c index 743776ae19..743776ae19 100644 --- a/cputlb.c +++ b/accel/tcg/cputlb.c diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c new file mode 100644 index 0000000000..dba99315e3 --- /dev/null +++ b/accel/tcg/tcg-all.c @@ -0,0 +1,61 @@ +/* + * QEMU System Emulator, accelerator interfaces + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2014 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "sysemu/accel.h" +#include "sysemu/sysemu.h" +#include "qom/object.h" + +int tcg_tb_size; +static bool tcg_allowed = true; + +static int tcg_init(MachineState *ms) +{ + tcg_exec_init(tcg_tb_size * 1024 * 1024); + return 0; +} + +static void tcg_accel_class_init(ObjectClass *oc, void *data) +{ + AccelClass *ac = ACCEL_CLASS(oc); + ac->name = "tcg"; + ac->init_machine = tcg_init; + ac->allowed = &tcg_allowed; +} + +#define TYPE_TCG_ACCEL ACCEL_CLASS_NAME("tcg") + +static const TypeInfo tcg_accel_type = { + .name = TYPE_TCG_ACCEL, + .parent = TYPE_ACCEL, + .class_init = tcg_accel_class_init, +}; + +static void register_accel_types(void) +{ + type_register_static(&tcg_accel_type); +} + +type_init(register_accel_types); diff --git a/accel/tcg/trace-events b/accel/tcg/trace-events new file mode 100644 index 0000000000..2de8359670 --- /dev/null +++ b/accel/tcg/trace-events @@ -0,0 +1,10 @@ +# Trace events for debugging and performance instrumentation + +# TCG related tracing (mostly disabled by default) +# cpu-exec.c +disable exec_tb(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR +disable exec_tb_nocache(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR +disable exec_tb_exit(void *last_tb, unsigned int flags) "tb:%p flags=%x" + +# translate-all.c +translate_block(void *tb, uintptr_t pc, uint8_t *tb_code) "tb:%p, pc:0x%"PRIxPTR", tb_code:%p" diff --git a/translate-all.c b/accel/tcg/translate-all.c index d4f364d6ff..f6ad46b613 100644 --- a/translate-all.c +++ b/accel/tcg/translate-all.c @@ -25,7 +25,7 @@ #include "qemu-common.h" #define NO_CPU_IO_DEFS #include "cpu.h" -#include "trace-root.h" +#include "trace.h" #include "disas/disas.h" #include "exec/exec-all.h" #include "tcg.h" diff --git a/translate-all.h b/accel/tcg/translate-all.h index ba8e4d63c4..ba8e4d63c4 100644 --- a/translate-all.h +++ b/accel/tcg/translate-all.h diff --git a/translate-common.c b/accel/tcg/translate-common.c index 40fe5a19bb..40fe5a19bb 100644 --- a/translate-common.c +++ b/accel/tcg/translate-common.c @@ -320,6 +320,8 @@ BlockDriverState *bdrv_new(void) QLIST_INIT(&bs->op_blockers[i]); } notifier_with_return_list_init(&bs->before_write_notifiers); + qemu_co_mutex_init(&bs->reqs_lock); + qemu_mutex_init(&bs->dirty_bitmap_mutex); bs->refcnt = 1; bs->aio_context = qemu_get_aio_context(); @@ -1300,7 +1302,9 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, goto fail_opts; } - assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */ + /* bdrv_new() and bdrv_close() make it so */ + assert(atomic_read(&bs->copy_on_read) == 0); + if (bs->open_flags & BDRV_O_COPY_ON_READ) { if (!bs->read_only) { bdrv_enable_copy_on_read(bs); @@ -3063,7 +3067,7 @@ static void bdrv_close(BlockDriverState *bs) g_free(bs->opaque); bs->opaque = NULL; - bs->copy_on_read = 0; + atomic_set(&bs->copy_on_read, 0); bs->backing_file[0] = '\0'; bs->backing_format[0] = '\0'; bs->total_sectors = 0; @@ -3422,7 +3426,7 @@ int bdrv_truncate(BdrvChild *child, int64_t offset, Error **errp) ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); bdrv_dirty_bitmap_truncate(bs); bdrv_parent_cb_resize(bs); - ++bs->write_gen; + atomic_inc(&bs->write_gen); } return ret; } diff --git a/block/accounting.c b/block/accounting.c index 3f457c4e73..87ef5bbfaa 100644 --- a/block/accounting.c +++ b/block/accounting.c @@ -32,23 +32,28 @@ static QEMUClockType clock_type = QEMU_CLOCK_REALTIME; static const int qtest_latency_ns = NANOSECONDS_PER_SECOND / 1000; -void block_acct_init(BlockAcctStats *stats, bool account_invalid, - bool account_failed) +void block_acct_init(BlockAcctStats *stats) { - stats->account_invalid = account_invalid; - stats->account_failed = account_failed; - + qemu_mutex_init(&stats->lock); if (qtest_enabled()) { clock_type = QEMU_CLOCK_VIRTUAL; } } +void block_acct_setup(BlockAcctStats *stats, bool account_invalid, + bool account_failed) +{ + stats->account_invalid = account_invalid; + stats->account_failed = account_failed; +} + void block_acct_cleanup(BlockAcctStats *stats) { BlockAcctTimedStats *s, *next; QSLIST_FOREACH_SAFE(s, &stats->intervals, entries, next) { g_free(s); } + qemu_mutex_destroy(&stats->lock); } void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length) @@ -58,12 +63,15 @@ void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length) s = g_new0(BlockAcctTimedStats, 1); s->interval_length = interval_length; + s->stats = stats; + qemu_mutex_lock(&stats->lock); QSLIST_INSERT_HEAD(&stats->intervals, s, entries); for (i = 0; i < BLOCK_MAX_IOTYPE; i++) { timed_average_init(&s->latency[i], clock_type, (uint64_t) interval_length * NANOSECONDS_PER_SECOND); } + qemu_mutex_unlock(&stats->lock); } BlockAcctTimedStats *block_acct_interval_next(BlockAcctStats *stats, @@ -86,7 +94,8 @@ void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie, cookie->type = type; } -void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie) +static void block_account_one_io(BlockAcctStats *stats, BlockAcctCookie *cookie, + bool failed) { BlockAcctTimedStats *s; int64_t time_ns = qemu_clock_get_ns(clock_type); @@ -98,31 +107,16 @@ void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie) assert(cookie->type < BLOCK_MAX_IOTYPE); - stats->nr_bytes[cookie->type] += cookie->bytes; - stats->nr_ops[cookie->type]++; - stats->total_time_ns[cookie->type] += latency_ns; - stats->last_access_time_ns = time_ns; + qemu_mutex_lock(&stats->lock); - QSLIST_FOREACH(s, &stats->intervals, entries) { - timed_average_account(&s->latency[cookie->type], latency_ns); + if (failed) { + stats->failed_ops[cookie->type]++; + } else { + stats->nr_bytes[cookie->type] += cookie->bytes; + stats->nr_ops[cookie->type]++; } -} - -void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie) -{ - assert(cookie->type < BLOCK_MAX_IOTYPE); - - stats->failed_ops[cookie->type]++; - - if (stats->account_failed) { - BlockAcctTimedStats *s; - int64_t time_ns = qemu_clock_get_ns(clock_type); - int64_t latency_ns = time_ns - cookie->start_time_ns; - - if (qtest_enabled()) { - latency_ns = qtest_latency_ns; - } + if (!failed || stats->account_failed) { stats->total_time_ns[cookie->type] += latency_ns; stats->last_access_time_ns = time_ns; @@ -130,29 +124,45 @@ void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie) timed_average_account(&s->latency[cookie->type], latency_ns); } } + + qemu_mutex_unlock(&stats->lock); +} + +void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie) +{ + block_account_one_io(stats, cookie, false); +} + +void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie) +{ + block_account_one_io(stats, cookie, true); } void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type) { assert(type < BLOCK_MAX_IOTYPE); - /* block_acct_done() and block_acct_failed() update - * total_time_ns[], but this one does not. The reason is that - * invalid requests are accounted during their submission, - * therefore there's no actual I/O involved. */ - + /* block_account_one_io() updates total_time_ns[], but this one does + * not. The reason is that invalid requests are accounted during their + * submission, therefore there's no actual I/O involved. + */ + qemu_mutex_lock(&stats->lock); stats->invalid_ops[type]++; if (stats->account_invalid) { stats->last_access_time_ns = qemu_clock_get_ns(clock_type); } + qemu_mutex_unlock(&stats->lock); } void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type, int num_requests) { assert(type < BLOCK_MAX_IOTYPE); + + qemu_mutex_lock(&stats->lock); stats->merged[type] += num_requests; + qemu_mutex_unlock(&stats->lock); } int64_t block_acct_idle_time_ns(BlockAcctStats *stats) @@ -167,7 +177,9 @@ double block_acct_queue_depth(BlockAcctTimedStats *stats, assert(type < BLOCK_MAX_IOTYPE); + qemu_mutex_lock(&stats->stats->lock); sum = timed_average_sum(&stats->latency[type], &elapsed); + qemu_mutex_unlock(&stats->stats->lock); return (double) sum / elapsed; } diff --git a/block/block-backend.c b/block/block-backend.c index 7d7f3697d1..a2bbae90b1 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -216,8 +216,10 @@ BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm) blk->shared_perm = shared_perm; blk_set_enable_write_cache(blk, true); + qemu_co_mutex_init(&blk->public.throttled_reqs_lock); qemu_co_queue_init(&blk->public.throttled_reqs[0]); qemu_co_queue_init(&blk->public.throttled_reqs[1]); + block_acct_init(&blk->stats); notifier_list_init(&blk->remove_bs_notifiers); notifier_list_init(&blk->insert_bs_notifiers); @@ -1953,7 +1955,7 @@ static void blk_root_drained_begin(BdrvChild *child) /* Note that blk->root may not be accessible here yet if we are just * attaching to a BlockDriverState that is drained. Use child instead. */ - if (blk->public.io_limits_disabled++ == 0) { + if (atomic_fetch_inc(&blk->public.io_limits_disabled) == 0) { throttle_group_restart_blk(blk); } } @@ -1964,7 +1966,7 @@ static void blk_root_drained_end(BdrvChild *child) assert(blk->quiesce_counter); assert(blk->public.io_limits_disabled); - --blk->public.io_limits_disabled; + atomic_dec(&blk->public.io_limits_disabled); if (--blk->quiesce_counter == 0) { if (blk->dev_ops && blk->dev_ops->drained_end) { diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 519737c8d3..a04c6e4154 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -37,6 +37,7 @@ * or enabled. A frozen bitmap can only abdicate() or reclaim(). */ struct BdrvDirtyBitmap { + QemuMutex *mutex; HBitmap *bitmap; /* Dirty sector bitmap implementation */ HBitmap *meta; /* Meta dirty bitmap */ BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */ @@ -52,6 +53,27 @@ struct BdrvDirtyBitmapIter { BdrvDirtyBitmap *bitmap; }; +static inline void bdrv_dirty_bitmaps_lock(BlockDriverState *bs) +{ + qemu_mutex_lock(&bs->dirty_bitmap_mutex); +} + +static inline void bdrv_dirty_bitmaps_unlock(BlockDriverState *bs) +{ + qemu_mutex_unlock(&bs->dirty_bitmap_mutex); +} + +void bdrv_dirty_bitmap_lock(BdrvDirtyBitmap *bitmap) +{ + qemu_mutex_lock(bitmap->mutex); +} + +void bdrv_dirty_bitmap_unlock(BdrvDirtyBitmap *bitmap) +{ + qemu_mutex_unlock(bitmap->mutex); +} + +/* Called with BQL or dirty_bitmap lock taken. */ BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name) { BdrvDirtyBitmap *bm; @@ -65,6 +87,7 @@ BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name) return NULL; } +/* Called with BQL taken. */ void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap) { assert(!bdrv_dirty_bitmap_frozen(bitmap)); @@ -72,6 +95,7 @@ void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap) bitmap->name = NULL; } +/* Called with BQL taken. */ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, uint32_t granularity, const char *name, @@ -96,11 +120,14 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, return NULL; } bitmap = g_new0(BdrvDirtyBitmap, 1); + bitmap->mutex = &bs->dirty_bitmap_mutex; bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity)); bitmap->size = bitmap_size; bitmap->name = g_strdup(name); bitmap->disabled = false; + bdrv_dirty_bitmaps_lock(bs); QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list); + bdrv_dirty_bitmaps_unlock(bs); return bitmap; } @@ -119,20 +146,24 @@ void bdrv_create_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap, int chunk_size) { assert(!bitmap->meta); + qemu_mutex_lock(bitmap->mutex); bitmap->meta = hbitmap_create_meta(bitmap->bitmap, chunk_size * BITS_PER_BYTE); + qemu_mutex_unlock(bitmap->mutex); } void bdrv_release_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap) { assert(bitmap->meta); + qemu_mutex_lock(bitmap->mutex); hbitmap_free_meta(bitmap->bitmap); bitmap->meta = NULL; + qemu_mutex_unlock(bitmap->mutex); } -int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, int64_t sector, - int nb_sectors) +int bdrv_dirty_bitmap_get_meta_locked(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, int64_t sector, + int nb_sectors) { uint64_t i; int sectors_per_bit = 1 << hbitmap_granularity(bitmap->meta); @@ -147,11 +178,26 @@ int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs, return false; } +int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, int64_t sector, + int nb_sectors) +{ + bool dirty; + + qemu_mutex_lock(bitmap->mutex); + dirty = bdrv_dirty_bitmap_get_meta_locked(bs, bitmap, sector, nb_sectors); + qemu_mutex_unlock(bitmap->mutex); + + return dirty; +} + void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector, int nb_sectors) { + qemu_mutex_lock(bitmap->mutex); hbitmap_reset(bitmap->meta, sector, nb_sectors); + qemu_mutex_unlock(bitmap->mutex); } int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap) @@ -164,16 +210,19 @@ const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap) return bitmap->name; } +/* Called with BQL taken. */ bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap) { return bitmap->successor; } +/* Called with BQL taken. */ bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap) { return !(bitmap->disabled || bitmap->successor); } +/* Called with BQL taken. */ DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap) { if (bdrv_dirty_bitmap_frozen(bitmap)) { @@ -188,6 +237,7 @@ DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap) /** * Create a successor bitmap destined to replace this bitmap after an operation. * Requires that the bitmap is not frozen and has no successor. + * Called with BQL taken. */ int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, Error **errp) @@ -220,6 +270,7 @@ int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs, /** * For a bitmap with a successor, yield our name to the successor, * delete the old bitmap, and return a handle to the new bitmap. + * Called with BQL taken. */ BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, @@ -247,6 +298,7 @@ BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs, * In cases of failure where we can no longer safely delete the parent, * we may wish to re-join the parent and child/successor. * The merged parent will be un-frozen, but not explicitly re-enabled. + * Called with BQL taken. */ BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *parent, @@ -271,25 +323,30 @@ BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs, /** * Truncates _all_ bitmaps attached to a BDS. + * Called with BQL taken. */ void bdrv_dirty_bitmap_truncate(BlockDriverState *bs) { BdrvDirtyBitmap *bitmap; uint64_t size = bdrv_nb_sectors(bs); + bdrv_dirty_bitmaps_lock(bs); QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { assert(!bdrv_dirty_bitmap_frozen(bitmap)); assert(!bitmap->active_iterators); hbitmap_truncate(bitmap->bitmap, size); bitmap->size = size; } + bdrv_dirty_bitmaps_unlock(bs); } +/* Called with BQL taken. */ static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, bool only_named) { BdrvDirtyBitmap *bm, *next; + bdrv_dirty_bitmaps_lock(bs); QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) { if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) { assert(!bm->active_iterators); @@ -301,15 +358,19 @@ static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs, g_free(bm); if (bitmap) { - return; + goto out; } } } if (bitmap) { abort(); } + +out: + bdrv_dirty_bitmaps_unlock(bs); } +/* Called with BQL taken. */ void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) { bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false); @@ -318,18 +379,21 @@ void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) /** * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()). * There must not be any frozen bitmaps attached. + * Called with BQL taken. */ void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs) { bdrv_do_release_matching_dirty_bitmap(bs, NULL, true); } +/* Called with BQL taken. */ void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap) { assert(!bdrv_dirty_bitmap_frozen(bitmap)); bitmap->disabled = true; } +/* Called with BQL taken. */ void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap) { assert(!bdrv_dirty_bitmap_frozen(bitmap)); @@ -342,6 +406,7 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs) BlockDirtyInfoList *list = NULL; BlockDirtyInfoList **plist = &list; + bdrv_dirty_bitmaps_lock(bs); QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1); BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1); @@ -354,12 +419,14 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs) *plist = entry; plist = &entry->next; } + bdrv_dirty_bitmaps_unlock(bs); return list; } -int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, - int64_t sector) +/* Called within bdrv_dirty_bitmap_lock..unlock */ +int bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, + int64_t sector) { if (bitmap) { return hbitmap_get(bitmap->bitmap, sector); @@ -432,23 +499,42 @@ int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter) return hbitmap_iter_next(&iter->hbi); } +/* Called within bdrv_dirty_bitmap_lock..unlock */ +void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, + int64_t cur_sector, int64_t nr_sectors) +{ + assert(bdrv_dirty_bitmap_enabled(bitmap)); + hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); +} + void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, int64_t cur_sector, int64_t nr_sectors) { + bdrv_dirty_bitmap_lock(bitmap); + bdrv_set_dirty_bitmap_locked(bitmap, cur_sector, nr_sectors); + bdrv_dirty_bitmap_unlock(bitmap); +} + +/* Called within bdrv_dirty_bitmap_lock..unlock */ +void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, + int64_t cur_sector, int64_t nr_sectors) +{ assert(bdrv_dirty_bitmap_enabled(bitmap)); - hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); + hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); } void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, int64_t cur_sector, int64_t nr_sectors) { - assert(bdrv_dirty_bitmap_enabled(bitmap)); - hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); + bdrv_dirty_bitmap_lock(bitmap); + bdrv_reset_dirty_bitmap_locked(bitmap, cur_sector, nr_sectors); + bdrv_dirty_bitmap_unlock(bitmap); } void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out) { assert(bdrv_dirty_bitmap_enabled(bitmap)); + bdrv_dirty_bitmap_lock(bitmap); if (!out) { hbitmap_reset_all(bitmap->bitmap); } else { @@ -457,6 +543,7 @@ void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out) hbitmap_granularity(backup)); *out = backup; } + bdrv_dirty_bitmap_unlock(bitmap); } void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in) @@ -508,12 +595,19 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int64_t nr_sectors) { BdrvDirtyBitmap *bitmap; + + if (QLIST_EMPTY(&bs->dirty_bitmaps)) { + return; + } + + bdrv_dirty_bitmaps_lock(bs); QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { if (!bdrv_dirty_bitmap_enabled(bitmap)) { continue; } hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); } + bdrv_dirty_bitmaps_unlock(bs); } /** diff --git a/block/io.c b/block/io.c index ed31810c0a..91611ffb2a 100644 --- a/block/io.c +++ b/block/io.c @@ -130,13 +130,13 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) */ void bdrv_enable_copy_on_read(BlockDriverState *bs) { - bs->copy_on_read++; + atomic_inc(&bs->copy_on_read); } void bdrv_disable_copy_on_read(BlockDriverState *bs) { - assert(bs->copy_on_read > 0); - bs->copy_on_read--; + int old = atomic_fetch_dec(&bs->copy_on_read); + assert(old >= 1); } /* Check if any requests are in-flight (including throttled requests) */ @@ -241,7 +241,7 @@ void bdrv_drained_begin(BlockDriverState *bs) return; } - if (!bs->quiesce_counter++) { + if (atomic_fetch_inc(&bs->quiesce_counter) == 0) { aio_disable_external(bdrv_get_aio_context(bs)); bdrv_parent_drained_begin(bs); } @@ -252,7 +252,7 @@ void bdrv_drained_begin(BlockDriverState *bs) void bdrv_drained_end(BlockDriverState *bs) { assert(bs->quiesce_counter > 0); - if (--bs->quiesce_counter > 0) { + if (atomic_fetch_dec(&bs->quiesce_counter) > 1) { return; } @@ -375,11 +375,13 @@ void bdrv_drain_all(void) static void tracked_request_end(BdrvTrackedRequest *req) { if (req->serialising) { - req->bs->serialising_in_flight--; + atomic_dec(&req->bs->serialising_in_flight); } + qemu_co_mutex_lock(&req->bs->reqs_lock); QLIST_REMOVE(req, list); qemu_co_queue_restart_all(&req->wait_queue); + qemu_co_mutex_unlock(&req->bs->reqs_lock); } /** @@ -404,7 +406,9 @@ static void tracked_request_begin(BdrvTrackedRequest *req, qemu_co_queue_init(&req->wait_queue); + qemu_co_mutex_lock(&bs->reqs_lock); QLIST_INSERT_HEAD(&bs->tracked_requests, req, list); + qemu_co_mutex_unlock(&bs->reqs_lock); } static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align) @@ -414,7 +418,7 @@ static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align) - overlap_offset; if (!req->serialising) { - req->bs->serialising_in_flight++; + atomic_inc(&req->bs->serialising_in_flight); req->serialising = true; } @@ -501,7 +505,8 @@ static void dummy_bh_cb(void *opaque) void bdrv_wakeup(BlockDriverState *bs) { - if (bs->wakeup) { + /* The barrier (or an atomic op) is in the caller. */ + if (atomic_read(&bs->wakeup)) { aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL); } } @@ -519,12 +524,13 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self) bool retry; bool waited = false; - if (!bs->serialising_in_flight) { + if (!atomic_read(&bs->serialising_in_flight)) { return false; } do { retry = false; + qemu_co_mutex_lock(&bs->reqs_lock); QLIST_FOREACH(req, &bs->tracked_requests, list) { if (req == self || (!req->serialising && !self->serialising)) { continue; @@ -543,7 +549,7 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self) * (instead of producing a deadlock in the former case). */ if (!req->waiting_for) { self->waiting_for = req; - qemu_co_queue_wait(&req->wait_queue, NULL); + qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock); self->waiting_for = NULL; retry = true; waited = true; @@ -551,6 +557,7 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self) } } } + qemu_co_mutex_unlock(&bs->reqs_lock); } while (retry); return waited; @@ -1144,7 +1151,7 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child, bdrv_inc_in_flight(bs); /* Don't do copy-on-read if we read data before write operation */ - if (bs->copy_on_read && !(flags & BDRV_REQ_NO_SERIALISING)) { + if (atomic_read(&bs->copy_on_read) && !(flags & BDRV_REQ_NO_SERIALISING)) { flags |= BDRV_REQ_COPY_ON_READ; } @@ -1401,12 +1408,10 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, } bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE); - ++bs->write_gen; + atomic_inc(&bs->write_gen); bdrv_set_dirty(bs, start_sector, end_sector - start_sector); - if (bs->wr_highest_offset < offset + bytes) { - bs->wr_highest_offset = offset + bytes; - } + stat64_max(&bs->wr_highest_offset, offset + bytes); if (ret >= 0) { bs->total_sectors = MAX(bs->total_sectors, end_sector); @@ -2292,14 +2297,17 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs) goto early_exit; } - current_gen = bs->write_gen; + qemu_co_mutex_lock(&bs->reqs_lock); + current_gen = atomic_read(&bs->write_gen); /* Wait until any previous flushes are completed */ while (bs->active_flush_req) { - qemu_co_queue_wait(&bs->flush_queue, NULL); + qemu_co_queue_wait(&bs->flush_queue, &bs->reqs_lock); } + /* Flushes reach this point in nondecreasing current_gen order. */ bs->active_flush_req = true; + qemu_co_mutex_unlock(&bs->reqs_lock); /* Write back all layers by calling one driver function */ if (bs->drv->bdrv_co_flush) { @@ -2371,9 +2379,12 @@ out: if (ret == 0) { bs->flushed_gen = current_gen; } + + qemu_co_mutex_lock(&bs->reqs_lock); bs->active_flush_req = false; /* Return value is ignored - it's ok if wait queue is empty */ qemu_co_queue_next(&bs->flush_queue); + qemu_co_mutex_unlock(&bs->reqs_lock); early_exit: bdrv_dec_in_flight(bs); @@ -2517,7 +2528,7 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, } ret = 0; out: - ++bs->write_gen; + atomic_inc(&bs->write_gen); bdrv_set_dirty(bs, req.offset >> BDRV_SECTOR_BITS, req.bytes >> BDRV_SECTOR_BITS); tracked_request_end(&req); @@ -2644,7 +2655,7 @@ void bdrv_io_plug(BlockDriverState *bs) bdrv_io_plug(child->bs); } - if (bs->io_plugged++ == 0) { + if (atomic_fetch_inc(&bs->io_plugged) == 0) { BlockDriver *drv = bs->drv; if (drv && drv->bdrv_io_plug) { drv->bdrv_io_plug(bs); @@ -2657,7 +2668,7 @@ void bdrv_io_unplug(BlockDriverState *bs) BdrvChild *child; assert(bs->io_plugged); - if (--bs->io_plugged == 0) { + if (atomic_fetch_dec(&bs->io_plugged) == 1) { BlockDriver *drv = bs->drv; if (drv && drv->bdrv_io_unplug) { drv->bdrv_io_unplug(bs); diff --git a/block/iscsi.c b/block/iscsi.c index 5daa201181..b5f7a228b9 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -1732,6 +1732,10 @@ static QemuOptsList runtime_opts = { .name = "timeout", .type = QEMU_OPT_NUMBER, }, + { + .name = "filename", + .type = QEMU_OPT_STRING, + }, { /* end of list */ } }, }; @@ -1747,12 +1751,27 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, char *initiator_name = NULL; QemuOpts *opts; Error *local_err = NULL; - const char *transport_name, *portal, *target; + const char *transport_name, *portal, *target, *filename; #if LIBISCSI_API_VERSION >= (20160603) enum iscsi_transport_type transport; #endif int i, ret = 0, timeout = 0, lun; + /* If we are given a filename, parse the filename, with precedence given to + * filename encoded options */ + filename = qdict_get_try_str(options, "filename"); + if (filename) { + error_report("Warning: 'filename' option specified. " + "This is an unsupported option, and may be deprecated " + "in the future"); + iscsi_parse_filename(filename, options, &local_err); + if (local_err) { + ret = -EINVAL; + error_propagate(errp, local_err); + goto exit; + } + } + opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); qemu_opts_absorb_qdict(opts, options, &local_err); if (local_err) { @@ -1967,6 +1986,7 @@ out: } memset(iscsilun, 0, sizeof(IscsiLun)); } +exit: return ret; } diff --git a/block/mirror.c b/block/mirror.c index a2a970301c..19afcc6f1a 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -342,6 +342,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) int max_io_sectors = MAX((s->buf_size >> BDRV_SECTOR_BITS) / MAX_IN_FLIGHT, MAX_IO_SECTORS); + bdrv_dirty_bitmap_lock(s->dirty_bitmap); sector_num = bdrv_dirty_iter_next(s->dbi); if (sector_num < 0) { bdrv_set_dirty_iter(s->dbi, 0); @@ -349,6 +350,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap)); assert(sector_num >= 0); } + bdrv_dirty_bitmap_unlock(s->dirty_bitmap); first_chunk = sector_num / sectors_per_chunk; while (test_bit(first_chunk, s->in_flight_bitmap)) { @@ -360,12 +362,13 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) /* Find the number of consective dirty chunks following the first dirty * one, and wait for in flight requests in them. */ + bdrv_dirty_bitmap_lock(s->dirty_bitmap); while (nb_chunks * sectors_per_chunk < (s->buf_size >> BDRV_SECTOR_BITS)) { int64_t next_dirty; int64_t next_sector = sector_num + nb_chunks * sectors_per_chunk; int64_t next_chunk = next_sector / sectors_per_chunk; if (next_sector >= end || - !bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) { + !bdrv_get_dirty_locked(source, s->dirty_bitmap, next_sector)) { break; } if (test_bit(next_chunk, s->in_flight_bitmap)) { @@ -386,8 +389,10 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) * calling bdrv_get_block_status_above could yield - if some blocks are * marked dirty in this window, we need to know. */ - bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num, - nb_chunks * sectors_per_chunk); + bdrv_reset_dirty_bitmap_locked(s->dirty_bitmap, sector_num, + nb_chunks * sectors_per_chunk); + bdrv_dirty_bitmap_unlock(s->dirty_bitmap); + bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks); while (nb_chunks > 0 && sector_num < end) { int64_t ret; @@ -506,6 +511,8 @@ static void mirror_exit(BlockJob *job, void *opaque) BlockDriverState *mirror_top_bs = s->mirror_top_bs; Error *local_err = NULL; + bdrv_release_dirty_bitmap(src, s->dirty_bitmap); + /* Make sure that the source BDS doesn't go away before we called * block_job_completed(). */ bdrv_ref(src); @@ -904,7 +911,6 @@ immediate_exit: g_free(s->cow_bitmap); g_free(s->in_flight_bitmap); bdrv_dirty_iter_free(s->dbi); - bdrv_release_dirty_bitmap(bs, s->dirty_bitmap); data = g_malloc(sizeof(*data)); data->ret = ret; diff --git a/block/nbd-client.c b/block/nbd-client.c index 87d19c7253..d64e775385 100644 --- a/block/nbd-client.c +++ b/block/nbd-client.c @@ -144,8 +144,8 @@ static int nbd_co_send_request(BlockDriverState *bs, qio_channel_set_cork(s->ioc, true); rc = nbd_send_request(s->ioc, request); if (rc >= 0) { - ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, request->len, - false, NULL); + ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, false, + NULL); if (ret != request->len) { rc = -EIO; } @@ -173,8 +173,8 @@ static void nbd_co_receive_reply(NBDClientSession *s, reply->error = EIO; } else { if (qiov && reply->error == 0) { - ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, request->len, - true, NULL); + ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, true, + NULL); if (ret != request->len) { reply->error = EIO; } diff --git a/block/nfs.c b/block/nfs.c index 848b2c0bb0..18c87d2f25 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -730,7 +730,9 @@ nfs_get_allocated_file_size_cb(int ret, struct nfs_context *nfs, void *data, if (task->ret < 0) { error_report("NFS Error: %s", nfs_get_error(nfs)); } - task->complete = 1; + + /* Set task->complete before reading bs->wakeup. */ + atomic_mb_set(&task->complete, 1); bdrv_wakeup(task->bs); } diff --git a/block/qapi.c b/block/qapi.c index a40922ea26..14b60ae66c 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -441,7 +441,7 @@ static BlockStats *bdrv_query_bds_stats(const BlockDriverState *bs, s->node_name = g_strdup(bdrv_get_node_name(bs)); } - s->stats->wr_highest_offset = bs->wr_highest_offset; + s->stats->wr_highest_offset = stat64_get(&bs->wr_highest_offset); if (bs->file) { s->has_parent = true; diff --git a/block/rbd.c b/block/rbd.c index e551639e47..ff44e5f437 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -340,6 +340,10 @@ static QemuOptsList runtime_opts = { .type = QEMU_OPT_STRING, .help = "Legacy rados key/value option parameters", }, + { + .name = "filename", + .type = QEMU_OPT_STRING, + }, { /* end of list */ } }, }; @@ -541,12 +545,27 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, { BDRVRBDState *s = bs->opaque; const char *pool, *snap, *conf, *user, *image_name, *keypairs; - const char *secretid; + const char *secretid, *filename; QemuOpts *opts; Error *local_err = NULL; char *mon_host = NULL; int r; + /* If we are given a filename, parse the filename, with precedence given to + * filename encoded options */ + filename = qdict_get_try_str(options, "filename"); + if (filename) { + error_report("Warning: 'filename' option specified. " + "This is an unsupported option, and may be deprecated " + "in the future"); + qemu_rbd_parse_filename(filename, options, &local_err); + if (local_err) { + r = -EINVAL; + error_propagate(errp, local_err); + goto exit; + } + } + opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); qemu_opts_absorb_qdict(opts, options, &local_err); if (local_err) { @@ -665,6 +684,7 @@ failed_shutdown: failed_opts: qemu_opts_del(opts); g_free(mon_host); +exit: return r; } diff --git a/block/sheepdog.c b/block/sheepdog.c index a18315a1ca..5ebf5d9fbb 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -698,7 +698,8 @@ out: srco->co = NULL; srco->ret = ret; - srco->finished = true; + /* Set srco->finished before reading bs->wakeup. */ + atomic_mb_set(&srco->finished, true); if (srco->bs) { bdrv_wakeup(srco->bs); } diff --git a/block/throttle-groups.c b/block/throttle-groups.c index b73e7a800b..a181cb1dee 100644 --- a/block/throttle-groups.c +++ b/block/throttle-groups.c @@ -240,7 +240,7 @@ static bool throttle_group_schedule_timer(BlockBackend *blk, bool is_write) ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); bool must_wait; - if (blkp->io_limits_disabled) { + if (atomic_read(&blkp->io_limits_disabled)) { return false; } @@ -260,6 +260,25 @@ static bool throttle_group_schedule_timer(BlockBackend *blk, bool is_write) return must_wait; } +/* Start the next pending I/O request for a BlockBackend. Return whether + * any request was actually pending. + * + * @blk: the current BlockBackend + * @is_write: the type of operation (read/write) + */ +static bool coroutine_fn throttle_group_co_restart_queue(BlockBackend *blk, + bool is_write) +{ + BlockBackendPublic *blkp = blk_get_public(blk); + bool ret; + + qemu_co_mutex_lock(&blkp->throttled_reqs_lock); + ret = qemu_co_queue_next(&blkp->throttled_reqs[is_write]); + qemu_co_mutex_unlock(&blkp->throttled_reqs_lock); + + return ret; +} + /* Look for the next pending I/O request and schedule it. * * This assumes that tg->lock is held. @@ -287,12 +306,12 @@ static void schedule_next_request(BlockBackend *blk, bool is_write) if (!must_wait) { /* Give preference to requests from the current blk */ if (qemu_in_coroutine() && - qemu_co_queue_next(&blkp->throttled_reqs[is_write])) { + throttle_group_co_restart_queue(blk, is_write)) { token = blk; } else { ThrottleTimers *tt = &blk_get_public(token)->throttle_timers; int64_t now = qemu_clock_get_ns(tt->clock_type); - timer_mod(tt->timers[is_write], now + 1); + timer_mod(tt->timers[is_write], now); tg->any_timer_armed[is_write] = true; } tg->tokens[is_write] = token; @@ -326,7 +345,10 @@ void coroutine_fn throttle_group_co_io_limits_intercept(BlockBackend *blk, if (must_wait || blkp->pending_reqs[is_write]) { blkp->pending_reqs[is_write]++; qemu_mutex_unlock(&tg->lock); - qemu_co_queue_wait(&blkp->throttled_reqs[is_write], NULL); + qemu_co_mutex_lock(&blkp->throttled_reqs_lock); + qemu_co_queue_wait(&blkp->throttled_reqs[is_write], + &blkp->throttled_reqs_lock); + qemu_co_mutex_unlock(&blkp->throttled_reqs_lock); qemu_mutex_lock(&tg->lock); blkp->pending_reqs[is_write]--; } @@ -340,15 +362,50 @@ void coroutine_fn throttle_group_co_io_limits_intercept(BlockBackend *blk, qemu_mutex_unlock(&tg->lock); } +typedef struct { + BlockBackend *blk; + bool is_write; +} RestartData; + +static void coroutine_fn throttle_group_restart_queue_entry(void *opaque) +{ + RestartData *data = opaque; + BlockBackend *blk = data->blk; + bool is_write = data->is_write; + BlockBackendPublic *blkp = blk_get_public(blk); + ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts); + bool empty_queue; + + empty_queue = !throttle_group_co_restart_queue(blk, is_write); + + /* If the request queue was empty then we have to take care of + * scheduling the next one */ + if (empty_queue) { + qemu_mutex_lock(&tg->lock); + schedule_next_request(blk, is_write); + qemu_mutex_unlock(&tg->lock); + } +} + +static void throttle_group_restart_queue(BlockBackend *blk, bool is_write) +{ + Coroutine *co; + RestartData rd = { + .blk = blk, + .is_write = is_write + }; + + co = qemu_coroutine_create(throttle_group_restart_queue_entry, &rd); + aio_co_enter(blk_get_aio_context(blk), co); +} + void throttle_group_restart_blk(BlockBackend *blk) { BlockBackendPublic *blkp = blk_get_public(blk); - int i; - for (i = 0; i < 2; i++) { - while (qemu_co_enter_next(&blkp->throttled_reqs[i])) { - ; - } + if (blkp->throttle_state) { + throttle_group_restart_queue(blk, 0); + throttle_group_restart_queue(blk, 1); } } @@ -376,8 +433,7 @@ void throttle_group_config(BlockBackend *blk, ThrottleConfig *cfg) throttle_config(ts, tt, cfg); qemu_mutex_unlock(&tg->lock); - qemu_co_enter_next(&blkp->throttled_reqs[0]); - qemu_co_enter_next(&blkp->throttled_reqs[1]); + throttle_group_restart_blk(blk); } /* Get the throttle configuration from a particular group. Similar to @@ -408,7 +464,6 @@ static void timer_cb(BlockBackend *blk, bool is_write) BlockBackendPublic *blkp = blk_get_public(blk); ThrottleState *ts = blkp->throttle_state; ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); - bool empty_queue; /* The timer has just been fired, so we can update the flag */ qemu_mutex_lock(&tg->lock); @@ -416,17 +471,7 @@ static void timer_cb(BlockBackend *blk, bool is_write) qemu_mutex_unlock(&tg->lock); /* Run the request that was waiting for this timer */ - aio_context_acquire(blk_get_aio_context(blk)); - empty_queue = !qemu_co_enter_next(&blkp->throttled_reqs[is_write]); - aio_context_release(blk_get_aio_context(blk)); - - /* If the request queue was empty then we have to take care of - * scheduling the next one */ - if (empty_queue) { - qemu_mutex_lock(&tg->lock); - schedule_next_request(blk, is_write); - qemu_mutex_unlock(&tg->lock); - } + throttle_group_restart_queue(blk, is_write); } static void read_timer_cb(void *opaque) diff --git a/blockdev-nbd.c b/blockdev-nbd.c index dd0860f4a6..28f551a7b0 100644 --- a/blockdev-nbd.c +++ b/blockdev-nbd.c @@ -27,6 +27,10 @@ typedef struct NBDServerData { static NBDServerData *nbd_server; +static void nbd_blockdev_client_closed(NBDClient *client, bool ignored) +{ + nbd_client_put(client); +} static gboolean nbd_accept(QIOChannel *ioc, GIOCondition condition, gpointer opaque) @@ -46,7 +50,7 @@ static gboolean nbd_accept(QIOChannel *ioc, GIOCondition condition, qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server"); nbd_client_new(NULL, cioc, nbd_server->tlscreds, NULL, - nbd_client_put); + nbd_blockdev_client_closed); object_unref(OBJECT(cioc)); return TRUE; } diff --git a/blockdev.c b/blockdev.c index 6472548186..39d6b9712b 100644 --- a/blockdev.c +++ b/blockdev.c @@ -595,7 +595,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, autostart = 0; } - block_acct_init(blk_get_stats(blk), account_invalid, account_failed); + block_acct_setup(blk_get_stats(blk), account_invalid, account_failed); if (!parse_stats_intervals(blk_get_stats(blk), interval_list, errp)) { blk_unref(blk); @@ -1362,12 +1362,10 @@ out_aio_context: static BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node, const char *name, BlockDriverState **pbs, - AioContext **paio, Error **errp) { BlockDriverState *bs; BdrvDirtyBitmap *bitmap; - AioContext *aio_context; if (!node) { error_setg(errp, "Node cannot be NULL"); @@ -1383,29 +1381,17 @@ static BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node, return NULL; } - aio_context = bdrv_get_aio_context(bs); - aio_context_acquire(aio_context); - bitmap = bdrv_find_dirty_bitmap(bs, name); if (!bitmap) { error_setg(errp, "Dirty bitmap '%s' not found", name); - goto fail; + return NULL; } if (pbs) { *pbs = bs; } - if (paio) { - *paio = aio_context; - } else { - aio_context_release(aio_context); - } return bitmap; - - fail: - aio_context_release(aio_context); - return NULL; } /* New and old BlockDriverState structs for atomic group operations */ @@ -1791,7 +1777,7 @@ static void external_snapshot_commit(BlkActionState *common) /* We don't need (or want) to use the transactional * bdrv_reopen_multiple() across all the entries at once, because we * don't want to abort all of them if one of them fails the reopen */ - if (!state->old_bs->copy_on_read) { + if (!atomic_read(&state->old_bs->copy_on_read)) { bdrv_reopen(state->old_bs, state->old_bs->open_flags & ~BDRV_O_RDWR, NULL); } @@ -2025,7 +2011,6 @@ static void block_dirty_bitmap_clear_prepare(BlkActionState *common, state->bitmap = block_dirty_bitmap_lookup(action->node, action->name, &state->bs, - &state->aio_context, errp); if (!state->bitmap) { return; @@ -2733,7 +2718,6 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, bool has_granularity, uint32_t granularity, Error **errp) { - AioContext *aio_context; BlockDriverState *bs; if (!name || name[0] == '\0') { @@ -2746,14 +2730,11 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, return; } - aio_context = bdrv_get_aio_context(bs); - aio_context_acquire(aio_context); - if (has_granularity) { if (granularity < 512 || !is_power_of_2(granularity)) { error_setg(errp, "Granularity must be power of 2 " "and at least 512"); - goto out; + return; } } else { /* Default to cluster size, if available: */ @@ -2761,19 +2742,15 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, } bdrv_create_dirty_bitmap(bs, granularity, name, errp); - - out: - aio_context_release(aio_context); } void qmp_block_dirty_bitmap_remove(const char *node, const char *name, Error **errp) { - AioContext *aio_context; BlockDriverState *bs; BdrvDirtyBitmap *bitmap; - bitmap = block_dirty_bitmap_lookup(node, name, &bs, &aio_context, errp); + bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); if (!bitmap || !bs) { return; } @@ -2782,13 +2759,10 @@ void qmp_block_dirty_bitmap_remove(const char *node, const char *name, error_setg(errp, "Bitmap '%s' is currently frozen and cannot be removed", name); - goto out; + return; } bdrv_dirty_bitmap_make_anon(bitmap); bdrv_release_dirty_bitmap(bs, bitmap); - - out: - aio_context_release(aio_context); } /** @@ -2798,11 +2772,10 @@ void qmp_block_dirty_bitmap_remove(const char *node, const char *name, void qmp_block_dirty_bitmap_clear(const char *node, const char *name, Error **errp) { - AioContext *aio_context; BdrvDirtyBitmap *bitmap; BlockDriverState *bs; - bitmap = block_dirty_bitmap_lookup(node, name, &bs, &aio_context, errp); + bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); if (!bitmap || !bs) { return; } @@ -2811,18 +2784,15 @@ void qmp_block_dirty_bitmap_clear(const char *node, const char *name, error_setg(errp, "Bitmap '%s' is currently frozen and cannot be modified", name); - goto out; + return; } else if (!bdrv_dirty_bitmap_enabled(bitmap)) { error_setg(errp, "Bitmap '%s' is currently disabled and cannot be cleared", name); - goto out; + return; } bdrv_clear_dirty_bitmap(bitmap, NULL); - - out: - aio_context_release(aio_context); } void hmp_drive_del(Monitor *mon, const QDict *qdict) @@ -407,7 +407,7 @@ QEMU_CFLAGS="-fno-strict-aliasing -fno-common -fwrapv $QEMU_CFLAGS" QEMU_CFLAGS="-Wall -Wundef -Wwrite-strings -Wmissing-prototypes $QEMU_CFLAGS" QEMU_CFLAGS="-Wstrict-prototypes -Wredundant-decls $QEMU_CFLAGS" QEMU_CFLAGS="-D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE $QEMU_CFLAGS" -QEMU_INCLUDES="-I. -I\$(SRC_PATH) -I\$(SRC_PATH)/include" +QEMU_INCLUDES="-I. -I\$(SRC_PATH) -I\$(SRC_PATH)/accel/tcg -I\$(SRC_PATH)/include" if test "$debug_info" = "yes"; then CFLAGS="-g $CFLAGS" LDFLAGS="-g $LDFLAGS" @@ -2301,14 +2301,14 @@ fi # GTK probe if test "$gtkabi" = ""; then - # The GTK ABI was not specified explicitly, so try whether 2.0 is available. - # Use 3.0 as a fallback if that is available. - if $pkg_config --exists "gtk+-2.0 >= 2.18.0"; then - gtkabi=2.0 - elif $pkg_config --exists "gtk+-3.0 >= 3.0.0"; then + # The GTK ABI was not specified explicitly, so try whether 3.0 is available. + # Use 2.0 as a fallback if that is available. + if $pkg_config --exists "gtk+-3.0 >= 3.0.0"; then gtkabi=3.0 - else + elif $pkg_config --exists "gtk+-2.0 >= 2.18.0"; then gtkabi=2.0 + else + gtkabi=3.0 fi fi @@ -2331,7 +2331,7 @@ if test "$gtk" != "no"; then libs_softmmu="$gtk_libs $libs_softmmu" gtk="yes" elif test "$gtk" = "yes"; then - feature_not_found "gtk" "Install gtk2 or gtk3 devel" + feature_not_found "gtk" "Install gtk3-devel" else gtk="no" fi @@ -2598,12 +2598,12 @@ fi # sdl-config even without cross prefix, and favour pkg-config over sdl-config. if test "$sdlabi" = ""; then - if $pkg_config --exists "sdl"; then - sdlabi=1.2 - elif $pkg_config --exists "sdl2"; then + if $pkg_config --exists "sdl2"; then sdlabi=2.0 - else + elif $pkg_config --exists "sdl"; then sdlabi=1.2 + else + sdlabi=2.0 fi fi @@ -2630,7 +2630,7 @@ elif has ${sdl_config}; then sdlversion=$($sdlconfig --version) else if test "$sdl" = "yes" ; then - feature_not_found "sdl" "Install SDL devel" + feature_not_found "sdl" "Install SDL2-devel" fi sdl=no fi @@ -6374,7 +6374,7 @@ fi # build tree in object directory in case the source is not in the current directory DIRS="tests tests/tcg tests/tcg/cris tests/tcg/lm32 tests/libqos tests/qapi-schema tests/tcg/xtensa tests/qemu-iotests" -DIRS="$DIRS docs fsdev" +DIRS="$DIRS docs docs/interop fsdev" DIRS="$DIRS pc-bios/optionrom pc-bios/spapr-rtas pc-bios/s390-ccw" DIRS="$DIRS roms/seabios roms/vgabios" DIRS="$DIRS qapi-generated" diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h index af02a31ebe..53ef222c0b 100644 --- a/contrib/libvhost-user/libvhost-user.h +++ b/contrib/libvhost-user/libvhost-user.h @@ -17,6 +17,7 @@ #include <stdint.h> #include <stdbool.h> #include <stddef.h> +#include <sys/poll.h> #include <linux/vhost.h> #include "standard-headers/linux/virtio_ring.h" @@ -192,11 +193,11 @@ typedef struct VuVirtq { } VuVirtq; enum VuWatchCondtion { - VU_WATCH_IN = 1 << 0, - VU_WATCH_OUT = 1 << 1, - VU_WATCH_PRI = 1 << 2, - VU_WATCH_ERR = 1 << 3, - VU_WATCH_HUP = 1 << 4, + VU_WATCH_IN = POLLIN, + VU_WATCH_OUT = POLLOUT, + VU_WATCH_PRI = POLLPRI, + VU_WATCH_ERR = POLLERR, + VU_WATCH_HUP = POLLHUP, }; typedef void (*vu_panic_cb) (VuDev *dev, const char *err); diff --git a/contrib/vhost-user-scsi/Makefile.objs b/contrib/vhost-user-scsi/Makefile.objs new file mode 100644 index 0000000000..e83a38a85b --- /dev/null +++ b/contrib/vhost-user-scsi/Makefile.objs @@ -0,0 +1 @@ +vhost-user-scsi-obj-y = vhost-user-scsi.o diff --git a/contrib/vhost-user-scsi/vhost-user-scsi.c b/contrib/vhost-user-scsi/vhost-user-scsi.c new file mode 100644 index 0000000000..b5ae02c96f --- /dev/null +++ b/contrib/vhost-user-scsi/vhost-user-scsi.c @@ -0,0 +1,886 @@ +/* + * vhost-user-scsi sample application + * + * Copyright (c) 2016 Nutanix Inc. All rights reserved. + * + * Author: + * Felipe Franciosi <felipe@nutanix.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 only. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "contrib/libvhost-user/libvhost-user.h" +#include "hw/virtio/virtio-scsi.h" +#include "iscsi/iscsi.h" + +#include <glib.h> + +/* Small compat shim from glib 2.32 */ +#ifndef G_SOURCE_CONTINUE +#define G_SOURCE_CONTINUE TRUE +#endif +#ifndef G_SOURCE_REMOVE +#define G_SOURCE_REMOVE FALSE +#endif + +/* #define VUS_DEBUG 1 */ + +/** Log helpers **/ + +#define PPRE \ + struct timespec ts; \ + char timebuf[64]; \ + struct tm tm; \ + (void)clock_gettime(CLOCK_REALTIME, &ts); \ + (void)strftime(timebuf, 64, "%Y%m%d %T", gmtime_r(&ts.tv_sec, &tm)) + +#define PEXT(lvl, msg, ...) do { \ + PPRE; \ + fprintf(stderr, "%s.%06ld " lvl ": %s:%s():%d: " msg "\n", \ + timebuf, ts.tv_nsec / 1000, \ + __FILE__, __func__, __LINE__, ## __VA_ARGS__); \ +} while (0) + +#define PNOR(lvl, msg, ...) do { \ + PPRE; \ + fprintf(stderr, "%s.%06ld " lvl ": " msg "\n", \ + timebuf, ts.tv_nsec / 1000, ## __VA_ARGS__); \ +} while (0) + +#ifdef VUS_DEBUG +#define PDBG(msg, ...) PEXT("DBG", msg, ## __VA_ARGS__) +#define PERR(msg, ...) PEXT("ERR", msg, ## __VA_ARGS__) +#define PLOG(msg, ...) PEXT("LOG", msg, ## __VA_ARGS__) +#else +#define PDBG(msg, ...) { } +#define PERR(msg, ...) PNOR("ERR", msg, ## __VA_ARGS__) +#define PLOG(msg, ...) PNOR("LOG", msg, ## __VA_ARGS__) +#endif + +/** vhost-user-scsi specific definitions **/ + + /* Only 1 LUN and device supported today */ +#define VUS_MAX_LUNS 1 +#define VUS_MAX_DEVS 1 + +#define VUS_ISCSI_INITIATOR "iqn.2016-11.com.nutanix:vhost-user-scsi" + +typedef struct iscsi_lun { + struct iscsi_context *iscsi_ctx; + int iscsi_lun; +} iscsi_lun_t; + +typedef struct vhost_scsi_dev { + VuDev vu_dev; + int server_sock; + GMainLoop *loop; + GTree *fdmap; /* fd -> gsource context id */ + iscsi_lun_t luns[VUS_MAX_LUNS]; +} vhost_scsi_dev_t; + +static vhost_scsi_dev_t *vhost_scsi_devs[VUS_MAX_DEVS]; + +/** glib event loop integration for libvhost-user and misc callbacks **/ + +QEMU_BUILD_BUG_ON((int)G_IO_IN != (int)VU_WATCH_IN); +QEMU_BUILD_BUG_ON((int)G_IO_OUT != (int)VU_WATCH_OUT); +QEMU_BUILD_BUG_ON((int)G_IO_PRI != (int)VU_WATCH_PRI); +QEMU_BUILD_BUG_ON((int)G_IO_ERR != (int)VU_WATCH_ERR); +QEMU_BUILD_BUG_ON((int)G_IO_HUP != (int)VU_WATCH_HUP); + +typedef struct vus_gsrc { + GSource parent; + vhost_scsi_dev_t *vdev_scsi; + GPollFD gfd; + vu_watch_cb vu_cb; +} vus_gsrc_t; + +static gint vus_fdmap_compare(gconstpointer a, gconstpointer b) +{ + return (b > a) - (b < a); +} + +static gboolean vus_gsrc_prepare(GSource *src, gint *timeout) +{ + assert(timeout); + + *timeout = -1; + return FALSE; +} + +static gboolean vus_gsrc_check(GSource *src) +{ + vus_gsrc_t *vus_src = (vus_gsrc_t *)src; + + assert(vus_src); + + return vus_src->gfd.revents & vus_src->gfd.events; +} + +static gboolean vus_gsrc_dispatch(GSource *src, GSourceFunc cb, gpointer data) +{ + vhost_scsi_dev_t *vdev_scsi; + vus_gsrc_t *vus_src = (vus_gsrc_t *)src; + + assert(vus_src); + assert(!(vus_src->vu_cb && cb)); + + vdev_scsi = vus_src->vdev_scsi; + + assert(vdev_scsi); + + if (cb) { + return cb(data); + } + if (vus_src->vu_cb) { + vus_src->vu_cb(&vdev_scsi->vu_dev, vus_src->gfd.revents, data); + } + return G_SOURCE_CONTINUE; +} + +static GSourceFuncs vus_gsrc_funcs = { + vus_gsrc_prepare, + vus_gsrc_check, + vus_gsrc_dispatch, + NULL +}; + +static int vus_gsrc_new(vhost_scsi_dev_t *vdev_scsi, int fd, GIOCondition cond, + vu_watch_cb vu_cb, GSourceFunc gsrc_cb, gpointer data) +{ + GSource *vus_gsrc; + vus_gsrc_t *vus_src; + guint id; + + assert(vdev_scsi); + assert(fd >= 0); + assert(vu_cb || gsrc_cb); + assert(!(vu_cb && gsrc_cb)); + + vus_gsrc = g_source_new(&vus_gsrc_funcs, sizeof(vus_gsrc_t)); + if (!vus_gsrc) { + PERR("Error creating GSource for new watch"); + return -1; + } + vus_src = (vus_gsrc_t *)vus_gsrc; + + vus_src->vdev_scsi = vdev_scsi; + vus_src->gfd.fd = fd; + vus_src->gfd.events = cond; + vus_src->vu_cb = vu_cb; + + g_source_add_poll(vus_gsrc, &vus_src->gfd); + g_source_set_callback(vus_gsrc, gsrc_cb, data, NULL); + id = g_source_attach(vus_gsrc, NULL); + assert(id); + g_source_unref(vus_gsrc); + + g_tree_insert(vdev_scsi->fdmap, (gpointer)(uintptr_t)fd, + (gpointer)(uintptr_t)id); + + return 0; +} + +/* from libiscsi's scsi-lowlevel.h ** + * + * nb. We can't directly include scsi-lowlevel.h due to a namespace conflict: + * QEMU's scsi.h also defines "SCSI_XFER_NONE". + */ + +#define SCSI_CDB_MAX_SIZE 16 + +struct scsi_iovector { + struct scsi_iovec *iov; + int niov; + int nalloc; + size_t offset; + int consumed; +}; + +struct scsi_allocated_memory { + struct scsi_allocated_memory *next; + char buf[0]; +}; + +struct scsi_data { + int size; + unsigned char *data; +}; + +enum scsi_sense_key { + SCSI_SENSE_NO_SENSE = 0x00, + SCSI_SENSE_RECOVERED_ERROR = 0x01, + SCSI_SENSE_NOT_READY = 0x02, + SCSI_SENSE_MEDIUM_ERROR = 0x03, + SCSI_SENSE_HARDWARE_ERROR = 0x04, + SCSI_SENSE_ILLEGAL_REQUEST = 0x05, + SCSI_SENSE_UNIT_ATTENTION = 0x06, + SCSI_SENSE_DATA_PROTECTION = 0x07, + SCSI_SENSE_BLANK_CHECK = 0x08, + SCSI_SENSE_VENDOR_SPECIFIC = 0x09, + SCSI_SENSE_COPY_ABORTED = 0x0a, + SCSI_SENSE_COMMAND_ABORTED = 0x0b, + SCSI_SENSE_OBSOLETE_ERROR_CODE = 0x0c, + SCSI_SENSE_OVERFLOW_COMMAND = 0x0d, + SCSI_SENSE_MISCOMPARE = 0x0e +}; + +struct scsi_sense { + unsigned char error_type; + enum scsi_sense_key key; + int ascq; + unsigned sense_specific:1; + unsigned ill_param_in_cdb:1; + unsigned bit_pointer_valid:1; + unsigned char bit_pointer; + uint16_t field_pointer; +}; + +enum scsi_residual { + SCSI_RESIDUAL_NO_RESIDUAL = 0, + SCSI_RESIDUAL_UNDERFLOW, + SCSI_RESIDUAL_OVERFLOW +}; + +struct scsi_task { + int status; + int cdb_size; + int xfer_dir; + int expxferlen; + unsigned char cdb[SCSI_CDB_MAX_SIZE]; + enum scsi_residual residual_status; + size_t residual; + struct scsi_sense sense; + struct scsi_data datain; + struct scsi_allocated_memory *mem; + void *ptr; + + uint32_t itt; + uint32_t cmdsn; + uint32_t lun; + + struct scsi_iovector iovector_in; + struct scsi_iovector iovector_out; +}; + +/** libiscsi integration **/ + +static int iscsi_add_lun(iscsi_lun_t *lun, char *iscsi_uri) +{ + struct iscsi_url *iscsi_url; + struct iscsi_context *iscsi_ctx; + int ret = 0; + + assert(lun); + assert(iscsi_uri); + + iscsi_ctx = iscsi_create_context(VUS_ISCSI_INITIATOR); + if (!iscsi_ctx) { + PERR("Unable to create iSCSI context"); + return -1; + } + + iscsi_url = iscsi_parse_full_url(iscsi_ctx, iscsi_uri); + if (!iscsi_url) { + PERR("Unable to parse iSCSI URL: %s", iscsi_get_error(iscsi_ctx)); + goto fail; + } + + iscsi_set_session_type(iscsi_ctx, ISCSI_SESSION_NORMAL); + iscsi_set_header_digest(iscsi_ctx, ISCSI_HEADER_DIGEST_NONE_CRC32C); + if (iscsi_full_connect_sync(iscsi_ctx, iscsi_url->portal, iscsi_url->lun)) { + PERR("Unable to login to iSCSI portal: %s", iscsi_get_error(iscsi_ctx)); + goto fail; + } + + lun->iscsi_ctx = iscsi_ctx; + lun->iscsi_lun = iscsi_url->lun; + + PDBG("Context %p created for lun 0: %s", iscsi_ctx, iscsi_uri); + +out: + if (iscsi_url) { + iscsi_destroy_url(iscsi_url); + } + return ret; + +fail: + (void)iscsi_destroy_context(iscsi_ctx); + ret = -1; + goto out; +} + +static struct scsi_task *scsi_task_new(int cdb_len, uint8_t *cdb, int dir, + int xfer_len) { + struct scsi_task *task; + + assert(cdb_len > 0); + assert(cdb); + + task = calloc(1, sizeof(struct scsi_task)); + if (!task) { + PERR("Error allocating task: %s", strerror(errno)); + return NULL; + } + + memcpy(task->cdb, cdb, cdb_len); + task->cdb_size = cdb_len; + task->xfer_dir = dir; + task->expxferlen = xfer_len; + + return task; +} + +static int get_cdb_len(uint8_t *cdb) +{ + assert(cdb); + + switch (cdb[0] >> 5) { + case 0: return 6; + case 1: /* fall through */ + case 2: return 10; + case 4: return 16; + case 5: return 12; + } + PERR("Unable to determine cdb len (0x%02hhX)", cdb[0] >> 5); + return -1; +} + +static int handle_cmd_sync(struct iscsi_context *ctx, + VirtIOSCSICmdReq *req, + struct iovec *out, unsigned int out_len, + VirtIOSCSICmdResp *rsp, + struct iovec *in, unsigned int in_len) { + struct scsi_task *task; + uint32_t dir; + uint32_t len; + int cdb_len; + int i; + + assert(ctx); + assert(req); + assert(rsp); + + if (!(!req->lun[1] && req->lun[2] == 0x40 && !req->lun[3])) { + /* Ignore anything different than target=0, lun=0 */ + PDBG("Ignoring unconnected lun (0x%hhX, 0x%hhX)", + req->lun[1], req->lun[3]); + rsp->status = SCSI_STATUS_CHECK_CONDITION; + memset(rsp->sense, 0, sizeof(rsp->sense)); + rsp->sense_len = 18; + rsp->sense[0] = 0x70; + rsp->sense[2] = SCSI_SENSE_ILLEGAL_REQUEST; + rsp->sense[7] = 10; + rsp->sense[12] = 0x24; + + return 0; + } + + cdb_len = get_cdb_len(req->cdb); + if (cdb_len == -1) { + return -1; + } + + len = 0; + if (!out_len && !in_len) { + dir = SCSI_XFER_NONE; + } else if (out_len) { + dir = SCSI_XFER_TO_DEV; + for (i = 0; i < out_len; i++) { + len += out[i].iov_len; + } + } else { + dir = SCSI_XFER_FROM_DEV; + for (i = 0; i < in_len; i++) { + len += in[i].iov_len; + } + } + + task = scsi_task_new(cdb_len, req->cdb, dir, len); + if (!task) { + PERR("Unable to create iscsi task"); + return -1; + } + + if (dir == SCSI_XFER_TO_DEV) { + task->iovector_out.iov = (struct scsi_iovec *)out; + task->iovector_out.niov = out_len; + } else if (dir == SCSI_XFER_FROM_DEV) { + task->iovector_in.iov = (struct scsi_iovec *)in; + task->iovector_in.niov = in_len; + } + + PDBG("Sending iscsi cmd (cdb_len=%d, dir=%d, task=%p)", + cdb_len, dir, task); + if (!iscsi_scsi_command_sync(ctx, 0, task, NULL)) { + PERR("Error serving SCSI command"); + free(task); + return -1; + } + + memset(rsp, 0, sizeof(*rsp)); + + rsp->status = task->status; + rsp->resid = task->residual; + + if (task->status == SCSI_STATUS_CHECK_CONDITION) { + rsp->response = VIRTIO_SCSI_S_FAILURE; + rsp->sense_len = task->datain.size - 2; + memcpy(rsp->sense, &task->datain.data[2], rsp->sense_len); + } + + free(task); + + PDBG("Filled in rsp: status=%hhX, resid=%u, response=%hhX, sense_len=%u", + rsp->status, rsp->resid, rsp->response, rsp->sense_len); + + return 0; +} + +/** libvhost-user callbacks **/ + +static vhost_scsi_dev_t *vdev_scsi_find_by_vu(VuDev *vu_dev); + +static void vus_panic_cb(VuDev *vu_dev, const char *buf) +{ + vhost_scsi_dev_t *vdev_scsi; + + assert(vu_dev); + + vdev_scsi = vdev_scsi_find_by_vu(vu_dev); + + if (buf) { + PERR("vu_panic: %s", buf); + } + + if (vdev_scsi) { + assert(vdev_scsi->loop); + g_main_loop_quit(vdev_scsi->loop); + } +} + +static void vus_add_watch_cb(VuDev *vu_dev, int fd, int vu_evt, vu_watch_cb cb, + void *pvt) { + vhost_scsi_dev_t *vdev_scsi; + guint id; + + assert(vu_dev); + assert(fd >= 0); + assert(cb); + + vdev_scsi = vdev_scsi_find_by_vu(vu_dev); + if (!vdev_scsi) { + vus_panic_cb(vu_dev, NULL); + return; + } + + id = (guint)(uintptr_t)g_tree_lookup(vdev_scsi->fdmap, + (gpointer)(uintptr_t)fd); + if (id) { + GSource *vus_src = g_main_context_find_source_by_id(NULL, id); + assert(vus_src); + g_source_destroy(vus_src); + (void)g_tree_remove(vdev_scsi->fdmap, (gpointer)(uintptr_t)fd); + } + + if (vus_gsrc_new(vdev_scsi, fd, vu_evt, cb, NULL, pvt)) { + vus_panic_cb(vu_dev, NULL); + } +} + +static void vus_del_watch_cb(VuDev *vu_dev, int fd) +{ + vhost_scsi_dev_t *vdev_scsi; + guint id; + + assert(vu_dev); + assert(fd >= 0); + + vdev_scsi = vdev_scsi_find_by_vu(vu_dev); + if (!vdev_scsi) { + vus_panic_cb(vu_dev, NULL); + return; + } + + id = (guint)(uintptr_t)g_tree_lookup(vdev_scsi->fdmap, + (gpointer)(uintptr_t)fd); + if (id) { + GSource *vus_src = g_main_context_find_source_by_id(NULL, id); + assert(vus_src); + g_source_destroy(vus_src); + (void)g_tree_remove(vdev_scsi->fdmap, (gpointer)(uintptr_t)fd); + } +} + +static void vus_proc_ctl(VuDev *vu_dev, int idx) +{ + /* Control VQ not implemented */ +} + +static void vus_proc_evt(VuDev *vu_dev, int idx) +{ + /* Event VQ not implemented */ +} + +static void vus_proc_req(VuDev *vu_dev, int idx) +{ + vhost_scsi_dev_t *vdev_scsi; + VuVirtq *vq; + + assert(vu_dev); + + vdev_scsi = vdev_scsi_find_by_vu(vu_dev); + if (!vdev_scsi) { + vus_panic_cb(vu_dev, NULL); + return; + } + + if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) { + PERR("VQ Index out of range: %d", idx); + vus_panic_cb(vu_dev, NULL); + return; + } + + vq = vu_get_queue(vu_dev, idx); + if (!vq) { + PERR("Error fetching VQ (dev=%p, idx=%d)", vu_dev, idx); + vus_panic_cb(vu_dev, NULL); + return; + } + + PDBG("Got kicked on vq[%d]@%p", idx, vq); + + while (1) { + VuVirtqElement *elem; + VirtIOSCSICmdReq *req; + VirtIOSCSICmdResp *rsp; + + elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement)); + if (!elem) { + PDBG("No more elements pending on vq[%d]@%p", idx, vq); + break; + } + PDBG("Popped elem@%p", elem); + + assert(!((elem->out_num > 1) && (elem->in_num > 1))); + assert((elem->out_num > 0) && (elem->in_num > 0)); + + if (elem->out_sg[0].iov_len < sizeof(VirtIOSCSICmdReq)) { + PERR("Invalid virtio-scsi req header"); + vus_panic_cb(vu_dev, NULL); + break; + } + req = (VirtIOSCSICmdReq *)elem->out_sg[0].iov_base; + + if (elem->in_sg[0].iov_len < sizeof(VirtIOSCSICmdResp)) { + PERR("Invalid virtio-scsi rsp header"); + vus_panic_cb(vu_dev, NULL); + break; + } + rsp = (VirtIOSCSICmdResp *)elem->in_sg[0].iov_base; + + if (handle_cmd_sync(vdev_scsi->luns[0].iscsi_ctx, + req, &elem->out_sg[1], elem->out_num - 1, + rsp, &elem->in_sg[1], elem->in_num - 1) != 0) { + vus_panic_cb(vu_dev, NULL); + break; + } + + vu_queue_push(vu_dev, vq, elem, 0); + vu_queue_notify(vu_dev, vq); + + free(elem); + } +} + +static void vus_queue_set_started(VuDev *vu_dev, int idx, bool started) +{ + VuVirtq *vq; + + assert(vu_dev); + + if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) { + PERR("VQ Index out of range: %d", idx); + vus_panic_cb(vu_dev, NULL); + return; + } + + vq = vu_get_queue(vu_dev, idx); + + switch (idx) { + case 0: + vu_set_queue_handler(vu_dev, vq, started ? vus_proc_ctl : NULL); + break; + case 1: + vu_set_queue_handler(vu_dev, vq, started ? vus_proc_evt : NULL); + break; + default: + vu_set_queue_handler(vu_dev, vq, started ? vus_proc_req : NULL); + } +} + +static const VuDevIface vus_iface = { + .queue_set_started = vus_queue_set_started, +}; + +static gboolean vus_vhost_cb(gpointer data) +{ + VuDev *vu_dev = (VuDev *)data; + + assert(vu_dev); + + if (!vu_dispatch(vu_dev) != 0) { + PERR("Error processing vhost message"); + vus_panic_cb(vu_dev, NULL); + return G_SOURCE_REMOVE; + } + + return G_SOURCE_CONTINUE; +} + +/** misc helpers **/ + +static int unix_sock_new(char *unix_fn) +{ + int sock; + struct sockaddr_un un; + size_t len; + + assert(unix_fn); + + sock = socket(AF_UNIX, SOCK_STREAM, 0); + if (sock <= 0) { + perror("socket"); + return -1; + } + + un.sun_family = AF_UNIX; + (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn); + len = sizeof(un.sun_family) + strlen(un.sun_path); + + (void)unlink(unix_fn); + if (bind(sock, (struct sockaddr *)&un, len) < 0) { + perror("bind"); + goto fail; + } + + if (listen(sock, 1) < 0) { + perror("listen"); + goto fail; + } + + return sock; + +fail: + (void)close(sock); + + return -1; +} + +/** vhost-user-scsi **/ + +static vhost_scsi_dev_t *vdev_scsi_find_by_vu(VuDev *vu_dev) +{ + int i; + + assert(vu_dev); + + for (i = 0; i < VUS_MAX_DEVS; i++) { + if (&vhost_scsi_devs[i]->vu_dev == vu_dev) { + return vhost_scsi_devs[i]; + } + } + + PERR("Unknown VuDev %p", vu_dev); + return NULL; +} + +static void vdev_scsi_deinit(vhost_scsi_dev_t *vdev_scsi) +{ + if (!vdev_scsi) { + return; + } + + if (vdev_scsi->server_sock >= 0) { + struct sockaddr_storage ss; + socklen_t sslen = sizeof(ss); + + if (getsockname(vdev_scsi->server_sock, (struct sockaddr *)&ss, + &sslen) == 0) { + struct sockaddr_un *su = (struct sockaddr_un *)&ss; + (void)unlink(su->sun_path); + } + + (void)close(vdev_scsi->server_sock); + vdev_scsi->server_sock = -1; + } + + if (vdev_scsi->loop) { + g_main_loop_unref(vdev_scsi->loop); + vdev_scsi->loop = NULL; + } +} + +static vhost_scsi_dev_t *vdev_scsi_new(char *unix_fn) +{ + vhost_scsi_dev_t *vdev_scsi = NULL; + + assert(unix_fn); + + vdev_scsi = calloc(1, sizeof(vhost_scsi_dev_t)); + if (!vdev_scsi) { + PERR("calloc: %s", strerror(errno)); + return NULL; + } + + vdev_scsi->server_sock = unix_sock_new(unix_fn); + if (vdev_scsi->server_sock < 0) { + goto err; + } + + vdev_scsi->loop = g_main_loop_new(NULL, FALSE); + if (!vdev_scsi->loop) { + PERR("Error creating glib event loop"); + goto err; + } + + vdev_scsi->fdmap = g_tree_new(vus_fdmap_compare); + if (!vdev_scsi->fdmap) { + PERR("Error creating glib tree for fdmap"); + goto err; + } + + return vdev_scsi; + +err: + vdev_scsi_deinit(vdev_scsi); + free(vdev_scsi); + + return NULL; +} + +static int vdev_scsi_add_iscsi_lun(vhost_scsi_dev_t *vdev_scsi, + char *iscsi_uri, uint32_t lun) { + assert(vdev_scsi); + assert(iscsi_uri); + assert(lun < VUS_MAX_LUNS); + + if (vdev_scsi->luns[lun].iscsi_ctx) { + PERR("Lun %d already configured", lun); + return -1; + } + + if (iscsi_add_lun(&vdev_scsi->luns[lun], iscsi_uri) != 0) { + return -1; + } + + return 0; +} + +static int vdev_scsi_run(vhost_scsi_dev_t *vdev_scsi) +{ + int cli_sock; + int ret = 0; + + assert(vdev_scsi); + assert(vdev_scsi->server_sock >= 0); + assert(vdev_scsi->loop); + + cli_sock = accept(vdev_scsi->server_sock, (void *)0, (void *)0); + if (cli_sock < 0) { + perror("accept"); + return -1; + } + + vu_init(&vdev_scsi->vu_dev, + cli_sock, + vus_panic_cb, + vus_add_watch_cb, + vus_del_watch_cb, + &vus_iface); + + if (vus_gsrc_new(vdev_scsi, cli_sock, G_IO_IN, NULL, vus_vhost_cb, + &vdev_scsi->vu_dev)) { + goto fail; + } + + g_main_loop_run(vdev_scsi->loop); + +out: + vu_deinit(&vdev_scsi->vu_dev); + + return ret; + +fail: + ret = -1; + goto out; +} + +int main(int argc, char **argv) +{ + vhost_scsi_dev_t *vdev_scsi = NULL; + char *unix_fn = NULL; + char *iscsi_uri = NULL; + int opt, err = EXIT_SUCCESS; + + while ((opt = getopt(argc, argv, "u:i:")) != -1) { + switch (opt) { + case 'h': + goto help; + case 'u': + unix_fn = strdup(optarg); + break; + case 'i': + iscsi_uri = strdup(optarg); + break; + default: + goto help; + } + } + if (!unix_fn || !iscsi_uri) { + goto help; + } + + vdev_scsi = vdev_scsi_new(unix_fn); + if (!vdev_scsi) { + goto err; + } + vhost_scsi_devs[0] = vdev_scsi; + + if (vdev_scsi_add_iscsi_lun(vdev_scsi, iscsi_uri, 0) != 0) { + goto err; + } + + if (vdev_scsi_run(vdev_scsi) != 0) { + goto err; + } + +out: + if (vdev_scsi) { + vdev_scsi_deinit(vdev_scsi); + free(vdev_scsi); + } + if (unix_fn) { + free(unix_fn); + } + if (iscsi_uri) { + free(iscsi_uri); + } + + return err; + +err: + err = EXIT_FAILURE; + goto out; + +help: + fprintf(stderr, "Usage: %s [ -u unix_sock_path -i iscsi_uri ] | [ -h ]\n", + argv[0]); + fprintf(stderr, " -u path to unix socket\n"); + fprintf(stderr, " -i iscsi uri for lun 0\n"); + fprintf(stderr, " -h print help and quit\n"); + + goto err; +} diff --git a/default-configs/pci.mak b/default-configs/pci.mak index 3bbeb62d9a..53ff10975c 100644 --- a/default-configs/pci.mak +++ b/default-configs/pci.mak @@ -43,3 +43,4 @@ CONFIG_VGA=y CONFIG_VGA_PCI=y CONFIG_IVSHMEM=$(CONFIG_EVENTFD) CONFIG_ROCKER=y +CONFIG_VHOST_USER_SCSI=$(CONFIG_LINUX) diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak index 18aed56fc0..b227a36179 100644 --- a/default-configs/s390x-softmmu.mak +++ b/default-configs/s390x-softmmu.mak @@ -1,5 +1,6 @@ CONFIG_PCI=y CONFIG_VIRTIO_PCI=y +CONFIG_VHOST_USER_SCSI=$(CONFIG_LINUX) CONFIG_VIRTIO=y CONFIG_SCLPCONSOLE=y CONFIG_TERMINAL3270=y diff --git a/docs/specs/parallels.txt b/docs/interop/parallels.txt index e9271eba5d..e9271eba5d 100644 --- a/docs/specs/parallels.txt +++ b/docs/interop/parallels.txt diff --git a/docs/specs/qcow2.txt b/docs/interop/qcow2.txt index 80cdfd0e91..80cdfd0e91 100644 --- a/docs/specs/qcow2.txt +++ b/docs/interop/qcow2.txt diff --git a/docs/specs/qed_spec.txt b/docs/interop/qed_spec.txt index 7982e058b2..7982e058b2 100644 --- a/docs/specs/qed_spec.txt +++ b/docs/interop/qed_spec.txt diff --git a/docs/qemu-ga-ref.texi b/docs/interop/qemu-ga-ref.texi index ddb76ce1c2..ddb76ce1c2 100644 --- a/docs/qemu-ga-ref.texi +++ b/docs/interop/qemu-ga-ref.texi diff --git a/docs/qemu-qmp-ref.texi b/docs/interop/qemu-qmp-ref.texi index bb25758bd0..bb25758bd0 100644 --- a/docs/qemu-qmp-ref.texi +++ b/docs/interop/qemu-qmp-ref.texi diff --git a/docs/qmp-intro.txt b/docs/interop/qmp-intro.txt index 60deafbae6..60deafbae6 100644 --- a/docs/qmp-intro.txt +++ b/docs/interop/qmp-intro.txt diff --git a/docs/qmp-spec.txt b/docs/interop/qmp-spec.txt index f8b5356015..f8b5356015 100644 --- a/docs/qmp-spec.txt +++ b/docs/interop/qmp-spec.txt diff --git a/docs/specs/vhost-user.txt b/docs/interop/vhost-user.txt index 481ab56e35..481ab56e35 100644 --- a/docs/specs/vhost-user.txt +++ b/docs/interop/vhost-user.txt diff --git a/docs/vnc-ledstate-Pseudo-encoding.txt b/docs/interop/vnc-ledstate-Pseudo-encoding.txt index 0f124f68b1..0f124f68b1 100644 --- a/docs/vnc-ledstate-Pseudo-encoding.txt +++ b/docs/interop/vnc-ledstate-Pseudo-encoding.txt @@ -1482,25 +1482,17 @@ static int64_t get_file_size(int fd) return size; } -static void *file_ram_alloc(RAMBlock *block, - ram_addr_t memory, - const char *path, - Error **errp) +static int file_ram_open(const char *path, + const char *region_name, + bool *created, + Error **errp) { - bool unlink_on_error = false; char *filename; char *sanitized_name; char *c; - void *area = MAP_FAILED; int fd = -1; - int64_t file_size; - - if (kvm_enabled() && !kvm_has_sync_mmu()) { - error_setg(errp, - "host lacks kvm mmu notifiers, -mem-path unsupported"); - return NULL; - } + *created = false; for (;;) { fd = open(path, O_RDWR); if (fd >= 0) { @@ -1511,13 +1503,13 @@ static void *file_ram_alloc(RAMBlock *block, /* @path names a file that doesn't exist, create it */ fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644); if (fd >= 0) { - unlink_on_error = true; + *created = true; break; } } else if (errno == EISDIR) { /* @path names a directory, create a file there */ /* Make name safe to use with mkstemp by replacing '/' with '_'. */ - sanitized_name = g_strdup(memory_region_name(block->mr)); + sanitized_name = g_strdup(region_name); for (c = sanitized_name; *c != '\0'; c++) { if (*c == '/') { *c = '_'; @@ -1540,7 +1532,7 @@ static void *file_ram_alloc(RAMBlock *block, error_setg_errno(errp, errno, "can't open backing store %s for guest RAM", path); - goto error; + return -1; } /* * Try again on EINTR and EEXIST. The latter happens when @@ -1548,6 +1540,17 @@ static void *file_ram_alloc(RAMBlock *block, */ } + return fd; +} + +static void *file_ram_alloc(RAMBlock *block, + ram_addr_t memory, + int fd, + bool truncate, + Error **errp) +{ + void *area; + block->page_size = qemu_fd_getpagesize(fd); block->mr->align = block->page_size; #if defined(__s390x__) @@ -1556,20 +1559,11 @@ static void *file_ram_alloc(RAMBlock *block, } #endif - file_size = get_file_size(fd); - if (memory < block->page_size) { error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to " "or larger than page size 0x%zx", memory, block->page_size); - goto error; - } - - if (file_size > 0 && file_size < memory) { - error_setg(errp, "backing store %s size 0x%" PRIx64 - " does not match 'size' option 0x" RAM_ADDR_FMT, - path, file_size, memory); - goto error; + return NULL; } memory = ROUND_UP(memory, block->page_size); @@ -1588,7 +1582,7 @@ static void *file_ram_alloc(RAMBlock *block, * those labels. Therefore, extending the non-empty backend file * is disabled as well. */ - if (!file_size && ftruncate(fd, memory)) { + if (truncate && ftruncate(fd, memory)) { perror("ftruncate"); } @@ -1597,30 +1591,19 @@ static void *file_ram_alloc(RAMBlock *block, if (area == MAP_FAILED) { error_setg_errno(errp, errno, "unable to map backing store for guest RAM"); - goto error; + return NULL; } if (mem_prealloc) { os_mem_prealloc(fd, area, memory, smp_cpus, errp); if (errp && *errp) { - goto error; + qemu_ram_munmap(area, memory); + return NULL; } } block->fd = fd; return area; - -error: - if (area != MAP_FAILED) { - qemu_ram_munmap(area, memory); - } - if (unlink_on_error) { - unlink(path); - } - if (fd != -1) { - close(fd); - } - return NULL; } #endif @@ -1931,18 +1914,25 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) } #ifdef __linux__ -RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, - bool share, const char *mem_path, - Error **errp) +RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, + bool share, int fd, + Error **errp) { RAMBlock *new_block; Error *local_err = NULL; + int64_t file_size; if (xen_enabled()) { error_setg(errp, "-mem-path not supported with Xen"); return NULL; } + if (kvm_enabled() && !kvm_has_sync_mmu()) { + error_setg(errp, + "host lacks kvm mmu notifiers, -mem-path unsupported"); + return NULL; + } + if (phys_mem_alloc != qemu_anon_ram_alloc) { /* * file_ram_alloc() needs to allocate just like @@ -1955,13 +1945,20 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, } size = HOST_PAGE_ALIGN(size); + file_size = get_file_size(fd); + if (file_size > 0 && file_size < size) { + error_setg(errp, "backing store %s size 0x%" PRIx64 + " does not match 'size' option 0x" RAM_ADDR_FMT, + mem_path, file_size, size); + return NULL; + } + new_block = g_malloc0(sizeof(*new_block)); new_block->mr = mr; new_block->used_length = size; new_block->max_length = size; new_block->flags = share ? RAM_SHARED : 0; - new_block->host = file_ram_alloc(new_block, size, - mem_path, errp); + new_block->host = file_ram_alloc(new_block, size, fd, !file_size, errp); if (!new_block->host) { g_free(new_block); return NULL; @@ -1974,6 +1971,33 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, return NULL; } return new_block; + +} + + +RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, + bool share, const char *mem_path, + Error **errp) +{ + int fd; + bool created; + RAMBlock *block; + + fd = file_ram_open(mem_path, memory_region_name(mr), &created, errp); + if (fd < 0) { + return NULL; + } + + block = qemu_ram_alloc_from_fd(size, mr, share, fd, errp); + if (!block) { + if (created) { + unlink(mem_path); + } + close(fd); + return NULL; + } + + return block; } #endif diff --git a/fpu/softfloat-specialize.h b/fpu/softfloat-specialize.h index 100c8a98bf..de2c5d5702 100644 --- a/fpu/softfloat-specialize.h +++ b/fpu/softfloat-specialize.h @@ -111,7 +111,7 @@ float16 float16_default_nan(float_status *status) *----------------------------------------------------------------------------*/ float32 float32_default_nan(float_status *status) { -#if defined(TARGET_SPARC) +#if defined(TARGET_SPARC) || defined(TARGET_M68K) return const_float32(0x7FFFFFFF); #elif defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA) || \ defined(TARGET_XTENSA) || defined(TARGET_S390X) || defined(TARGET_TRICORE) @@ -136,7 +136,7 @@ float32 float32_default_nan(float_status *status) *----------------------------------------------------------------------------*/ float64 float64_default_nan(float_status *status) { -#if defined(TARGET_SPARC) +#if defined(TARGET_SPARC) || defined(TARGET_M68K) return const_float64(LIT64(0x7FFFFFFFFFFFFFFF)); #elif defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA) || \ defined(TARGET_S390X) @@ -162,7 +162,10 @@ float64 float64_default_nan(float_status *status) floatx80 floatx80_default_nan(float_status *status) { floatx80 r; - +#if defined(TARGET_M68K) + r.low = LIT64(0xFFFFFFFFFFFFFFFF); + r.high = 0x7FFF; +#else if (status->snan_bit_is_one) { r.low = LIT64(0xBFFFFFFFFFFFFFFF); r.high = 0x7FFF; @@ -170,6 +173,7 @@ floatx80 floatx80_default_nan(float_status *status) r.low = LIT64(0xC000000000000000); r.high = 0xFFFF; } +#endif return r; } @@ -502,6 +506,30 @@ static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN, return 1; } } +#elif defined(TARGET_M68K) +static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN, + flag aIsLargerSignificand) +{ + /* M68000 FAMILY PROGRAMMER'S REFERENCE MANUAL + * 3.4 FLOATING-POINT INSTRUCTION DETAILS + * If either operand, but not both operands, of an operation is a + * nonsignaling NaN, then that NaN is returned as the result. If both + * operands are nonsignaling NaNs, then the destination operand + * nonsignaling NaN is returned as the result. + * If either operand to an operation is a signaling NaN (SNaN), then the + * SNaN bit is set in the FPSR EXC byte. If the SNaN exception enable bit + * is set in the FPCR ENABLE byte, then the exception is taken and the + * destination is not modified. If the SNaN exception enable bit is not + * set, setting the SNaN bit in the operand to a one converts the SNaN to + * a nonsignaling NaN. The operation then continues as described in the + * preceding paragraph for nonsignaling NaNs. + */ + if (aIsQNaN || aIsSNaN) { /* a is the destination operand */ + return 0; /* return the destination operand */ + } else { + return 1; /* return b */ + } +} #else static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN, flag aIsLargerSignificand) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 15610b9de8..a9b59bdce5 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -37,24 +37,6 @@ #include "kvm_i386.h" #include "trace.h" -/*#define DEBUG_INTEL_IOMMU*/ -#ifdef DEBUG_INTEL_IOMMU -enum { - DEBUG_GENERAL, DEBUG_CSR, DEBUG_INV, DEBUG_MMU, DEBUG_FLOG, - DEBUG_CACHE, DEBUG_IR, -}; -#define VTD_DBGBIT(x) (1 << DEBUG_##x) -static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT(CSR); - -#define VTD_DPRINTF(what, fmt, ...) do { \ - if (vtd_dbgflags & VTD_DBGBIT(what)) { \ - fprintf(stderr, "(vtd)%s: " fmt "\n", __func__, \ - ## __VA_ARGS__); } \ - } while (0) -#else -#define VTD_DPRINTF(what, fmt, ...) do {} while (0) -#endif - static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val, uint64_t wmask, uint64_t w1cmask) { @@ -199,9 +181,10 @@ static void vtd_reset_context_cache(IntelIOMMUState *s) GHashTableIter bus_it; uint32_t devfn_it; + trace_vtd_context_cache_reset(); + g_hash_table_iter_init(&bus_it, s->vtd_as_by_busptr); - VTD_DPRINTF(CACHE, "global context_cache_gen=1"); while (g_hash_table_iter_next (&bus_it, NULL, (void**)&vtd_bus)) { for (devfn_it = 0; devfn_it < X86_IOMMU_PCI_DEVFN_MAX; ++devfn_it) { vtd_as = vtd_bus->dev_as[devfn_it]; @@ -291,8 +274,8 @@ static void vtd_generate_interrupt(IntelIOMMUState *s, hwaddr mesg_addr_reg, msi.address = vtd_get_long_raw(s, mesg_addr_reg); msi.data = vtd_get_long_raw(s, mesg_data_reg); - VTD_DPRINTF(FLOG, "msi: addr 0x%"PRIx64 " data 0x%"PRIx32, - msi.address, msi.data); + trace_vtd_irq_generate(msi.address, msi.data); + apic_get_class()->send_msi(&msi); } @@ -304,14 +287,14 @@ static void vtd_generate_fault_event(IntelIOMMUState *s, uint32_t pre_fsts) { if (pre_fsts & VTD_FSTS_PPF || pre_fsts & VTD_FSTS_PFO || pre_fsts & VTD_FSTS_IQE) { - VTD_DPRINTF(FLOG, "there are previous interrupt conditions " - "to be serviced by software, fault event is not generated " - "(FSTS_REG 0x%"PRIx32 ")", pre_fsts); + trace_vtd_err("There are previous interrupt conditions " + "to be serviced by software, fault event " + "is not generated."); return; } vtd_set_clear_mask_long(s, DMAR_FECTL_REG, 0, VTD_FECTL_IP); if (vtd_get_long_raw(s, DMAR_FECTL_REG) & VTD_FECTL_IM) { - VTD_DPRINTF(FLOG, "Interrupt Mask set, fault event is not generated"); + trace_vtd_err("Interrupt Mask set, irq is not generated."); } else { vtd_generate_interrupt(s, DMAR_FEADDR_REG, DMAR_FEDATA_REG); vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0); @@ -348,7 +331,7 @@ static void vtd_update_fsts_ppf(IntelIOMMUState *s) } } vtd_set_clear_mask_long(s, DMAR_FSTS_REG, VTD_FSTS_PPF, ppf_mask); - VTD_DPRINTF(FLOG, "set PPF of FSTS_REG to %d", ppf_mask ? 1 : 0); + trace_vtd_fsts_ppf(!!ppf_mask); } static void vtd_set_frcd_and_update_ppf(IntelIOMMUState *s, uint16_t index) @@ -380,8 +363,8 @@ static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index, } vtd_set_quad_raw(s, frcd_reg_addr, lo); vtd_set_quad_raw(s, frcd_reg_addr + 8, hi); - VTD_DPRINTF(FLOG, "record to FRCD_REG #%"PRIu16 ": hi 0x%"PRIx64 - ", lo 0x%"PRIx64, index, hi, lo); + + trace_vtd_frr_new(index, hi, lo); } /* Try to collapse multiple pending faults from the same requester */ @@ -393,7 +376,6 @@ static bool vtd_try_collapse_fault(IntelIOMMUState *s, uint16_t source_id) for (i = 0; i < DMAR_FRCD_REG_NR; i++) { frcd_reg = vtd_get_quad_raw(s, addr); - VTD_DPRINTF(FLOG, "frcd_reg #%d 0x%"PRIx64, i, frcd_reg); if ((frcd_reg & VTD_FRCD_F) && ((frcd_reg & VTD_FRCD_SID_MASK) == source_id)) { return true; @@ -416,21 +398,24 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id, /* This is not a normal fault reason case. Drop it. */ return; } - VTD_DPRINTF(FLOG, "sid 0x%"PRIx16 ", fault %d, addr 0x%"PRIx64 - ", is_write %d", source_id, fault, addr, is_write); + + trace_vtd_dmar_fault(source_id, fault, addr, is_write); + if (fsts_reg & VTD_FSTS_PFO) { - VTD_DPRINTF(FLOG, "new fault is not recorded due to " - "Primary Fault Overflow"); + trace_vtd_err("New fault is not recorded due to " + "Primary Fault Overflow."); return; } + if (vtd_try_collapse_fault(s, source_id)) { - VTD_DPRINTF(FLOG, "new fault is not recorded due to " - "compression of faults"); + trace_vtd_err("New fault is not recorded due to " + "compression of faults."); return; } + if (vtd_is_frcd_set(s, s->next_frcd_reg)) { - VTD_DPRINTF(FLOG, "Primary Fault Overflow and " - "new fault is not recorded, set PFO field"); + trace_vtd_err("Next Fault Recording Reg is used, " + "new fault is not recorded, set PFO field."); vtd_set_clear_mask_long(s, DMAR_FSTS_REG, 0, VTD_FSTS_PFO); return; } @@ -438,8 +423,8 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id, vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault, is_write); if (fsts_reg & VTD_FSTS_PPF) { - VTD_DPRINTF(FLOG, "there are pending faults already, " - "fault event is not generated"); + trace_vtd_err("There are pending faults already, " + "fault event is not generated."); vtd_set_frcd_and_update_ppf(s, s->next_frcd_reg); s->next_frcd_reg++; if (s->next_frcd_reg == DMAR_FRCD_REG_NR) { @@ -702,7 +687,7 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write, uint64_t access_right_check; if (!vtd_iova_range_check(iova, ce)) { - VTD_DPRINTF(GENERAL, "error: iova 0x%"PRIx64 " exceeds limits", iova); + trace_vtd_err_dmar_iova_overflow(iova); return -VTD_FR_ADDR_BEYOND_MGAW; } @@ -714,9 +699,7 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write, slpte = vtd_get_slpte(addr, offset); if (slpte == (uint64_t)-1) { - VTD_DPRINTF(GENERAL, "error: fail to access second-level paging " - "entry at level %"PRIu32 " for iova 0x%"PRIx64, - level, iova); + trace_vtd_err_dmar_slpte_read_error(iova, level); if (level == vtd_ce_get_level(ce)) { /* Invalid programming of context-entry */ return -VTD_FR_CONTEXT_ENTRY_INV; @@ -727,15 +710,11 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write, *reads = (*reads) && (slpte & VTD_SL_R); *writes = (*writes) && (slpte & VTD_SL_W); if (!(slpte & access_right_check)) { - VTD_DPRINTF(GENERAL, "error: lack of %s permission for " - "iova 0x%"PRIx64 " slpte 0x%"PRIx64, - (is_write ? "write" : "read"), iova, slpte); + trace_vtd_err_dmar_slpte_perm_error(iova, level, slpte, is_write); return is_write ? -VTD_FR_WRITE : -VTD_FR_READ; } if (vtd_slpte_nonzero_rsvd(slpte, level)) { - VTD_DPRINTF(GENERAL, "error: non-zero reserved field in second " - "level paging entry level %"PRIu32 " slpte 0x%"PRIx64, - level, slpte); + trace_vtd_err_dmar_slpte_resv_error(iova, level, slpte); return -VTD_FR_PAGING_ENTRY_RSVD; } @@ -1090,8 +1069,10 @@ out: * @devfn: The devfn, which is the combined of device and function number * @is_write: The access is a write operation * @entry: IOMMUTLBEntry that contain the addr to be translated and result + * + * Returns true if translation is successful, otherwise false. */ -static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, +static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, uint8_t devfn, hwaddr addr, bool is_write, IOMMUTLBEntry *entry) { @@ -1125,6 +1106,7 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, page_mask = iotlb_entry->mask; goto out; } + /* Try to fetch context-entry from cache first */ if (cc_entry->context_cache_gen == s->context_cache_gen) { trace_vtd_iotlb_cc_hit(bus_num, devfn, cc_entry->context_entry.hi, @@ -1142,7 +1124,7 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, } else { vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write); } - return; + goto error; } /* Update context-cache */ trace_vtd_iotlb_cc_update(bus_num, devfn, ce.hi, ce.lo, @@ -1157,8 +1139,9 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, * Also, let's ignore IOTLB caching as well for PT devices. */ if (vtd_ce_get_type(&ce) == VTD_CONTEXT_TT_PASS_THROUGH) { + entry->iova = addr & VTD_PAGE_MASK; entry->translated_addr = entry->iova; - entry->addr_mask = VTD_PAGE_SIZE - 1; + entry->addr_mask = VTD_PAGE_MASK; entry->perm = IOMMU_RW; trace_vtd_translate_pt(source_id, entry->iova); @@ -1173,7 +1156,7 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, */ vtd_pt_enable_fast_path(s, source_id); - return; + return true; } ret_fr = vtd_iova_to_slpte(&ce, addr, is_write, &slpte, &level, @@ -1185,7 +1168,7 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, } else { vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write); } - return; + goto error; } page_mask = vtd_slpt_level_page_mask(level); @@ -1196,6 +1179,14 @@ out: entry->translated_addr = vtd_get_slpte_addr(slpte) & page_mask; entry->addr_mask = ~page_mask; entry->perm = IOMMU_ACCESS_FLAG(reads, writes); + return true; + +error: + entry->iova = 0; + entry->translated_addr = 0; + entry->addr_mask = 0; + entry->perm = IOMMU_NONE; + return false; } static void vtd_root_table_setup(IntelIOMMUState *s) @@ -1204,8 +1195,7 @@ static void vtd_root_table_setup(IntelIOMMUState *s) s->root_extended = s->root & VTD_RTADDR_RTT; s->root &= VTD_RTADDR_ADDR_MASK; - VTD_DPRINTF(CSR, "root_table addr 0x%"PRIx64 " %s", s->root, - (s->root_extended ? "(extended)" : "")); + trace_vtd_reg_dmar_root(s->root, s->root_extended); } static void vtd_iec_notify_all(IntelIOMMUState *s, bool global, @@ -1225,8 +1215,7 @@ static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s) /* Notify global invalidation */ vtd_iec_notify_all(s, true, 0, 0); - VTD_DPRINTF(CSR, "int remap table addr 0x%"PRIx64 " size %"PRIu32, - s->intr_root, s->intr_size); + trace_vtd_reg_ir_root(s->intr_root, s->intr_size); } static void vtd_iommu_replay_all(IntelIOMMUState *s) @@ -1328,11 +1317,8 @@ static uint64_t vtd_context_cache_invalidate(IntelIOMMUState *s, uint64_t val) switch (type) { case VTD_CCMD_DOMAIN_INVL: - VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16, - (uint16_t)VTD_CCMD_DID(val)); /* Fall through */ case VTD_CCMD_GLOBAL_INVL: - VTD_DPRINTF(INV, "global invalidation"); caig = VTD_CCMD_GLOBAL_INVL_A; vtd_context_global_invalidate(s); break; @@ -1343,7 +1329,7 @@ static uint64_t vtd_context_cache_invalidate(IntelIOMMUState *s, uint64_t val) break; default: - VTD_DPRINTF(GENERAL, "error: invalid granularity"); + trace_vtd_err("Context cache invalidate type error."); caig = 0; } return caig; @@ -1351,7 +1337,7 @@ static uint64_t vtd_context_cache_invalidate(IntelIOMMUState *s, uint64_t val) static void vtd_iotlb_global_invalidate(IntelIOMMUState *s) { - trace_vtd_iotlb_reset("global invalidation recved"); + trace_vtd_inv_desc_iotlb_global(); vtd_reset_iotlb(s); vtd_iommu_replay_all(s); } @@ -1362,6 +1348,8 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id) VTDContextEntry ce; VTDAddressSpace *vtd_as; + trace_vtd_inv_desc_iotlb_domain(domain_id); + g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain, &domain_id); @@ -1407,6 +1395,8 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id, { VTDIOTLBPageInvInfo info; + trace_vtd_inv_desc_iotlb_pages(domain_id, addr, am); + assert(am <= VTD_MAMV); info.domain_id = domain_id; info.addr = addr; @@ -1429,15 +1419,12 @@ static uint64_t vtd_iotlb_flush(IntelIOMMUState *s, uint64_t val) switch (type) { case VTD_TLB_GLOBAL_FLUSH: - VTD_DPRINTF(INV, "global invalidation"); iaig = VTD_TLB_GLOBAL_FLUSH_A; vtd_iotlb_global_invalidate(s); break; case VTD_TLB_DSI_FLUSH: domain_id = VTD_TLB_DID(val); - VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16, - domain_id); iaig = VTD_TLB_DSI_FLUSH_A; vtd_iotlb_domain_invalidate(s, domain_id); break; @@ -1447,11 +1434,8 @@ static uint64_t vtd_iotlb_flush(IntelIOMMUState *s, uint64_t val) addr = vtd_get_quad_raw(s, DMAR_IVA_REG); am = VTD_IVA_AM(addr); addr = VTD_IVA_ADDR(addr); - VTD_DPRINTF(INV, "page-selective invalidation domain 0x%"PRIx16 - " addr 0x%"PRIx64 " mask %"PRIu8, domain_id, addr, am); if (am > VTD_MAMV) { - VTD_DPRINTF(GENERAL, "error: supported max address mask value is " - "%"PRIu8, (uint8_t)VTD_MAMV); + trace_vtd_err("IOTLB PSI flush: address mask overflow."); iaig = 0; break; } @@ -1460,7 +1444,7 @@ static uint64_t vtd_iotlb_flush(IntelIOMMUState *s, uint64_t val) break; default: - VTD_DPRINTF(GENERAL, "error: invalid granularity"); + trace_vtd_err("IOTLB flush: invalid granularity."); iaig = 0; } return iaig; @@ -1481,21 +1465,19 @@ static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool en) { uint64_t iqa_val = vtd_get_quad_raw(s, DMAR_IQA_REG); - VTD_DPRINTF(INV, "Queued Invalidation Enable %s", (en ? "on" : "off")); + trace_vtd_inv_qi_enable(en); + if (en) { if (vtd_queued_inv_enable_check(s)) { s->iq = iqa_val & VTD_IQA_IQA_MASK; /* 2^(x+8) entries */ s->iq_size = 1UL << ((iqa_val & VTD_IQA_QS) + 8); s->qi_enabled = true; - VTD_DPRINTF(INV, "DMAR_IQA_REG 0x%"PRIx64, iqa_val); - VTD_DPRINTF(INV, "Invalidation Queue addr 0x%"PRIx64 " size %d", - s->iq, s->iq_size); + trace_vtd_inv_qi_setup(s->iq, s->iq_size); /* Ok - report back to driver */ vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_QIES); } else { - VTD_DPRINTF(GENERAL, "error: can't enable Queued Invalidation: " - "tail %"PRIu16, s->iq_tail); + trace_vtd_err_qi_enable(s->iq_tail); } } else { if (vtd_queued_inv_disable_check(s)) { @@ -1506,10 +1488,7 @@ static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool en) /* Ok - report back to driver */ vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_QIES, 0); } else { - VTD_DPRINTF(GENERAL, "error: can't disable Queued Invalidation: " - "head %"PRIu16 ", tail %"PRIu16 - ", last_descriptor %"PRIu8, - s->iq_head, s->iq_tail, s->iq_last_desc_type); + trace_vtd_err_qi_disable(s->iq_head, s->iq_tail, s->iq_last_desc_type); } } } @@ -1517,8 +1496,6 @@ static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool en) /* Set Root Table Pointer */ static void vtd_handle_gcmd_srtp(IntelIOMMUState *s) { - VTD_DPRINTF(CSR, "set Root Table Pointer"); - vtd_root_table_setup(s); /* Ok - report back to driver */ vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_RTPS); @@ -1527,8 +1504,6 @@ static void vtd_handle_gcmd_srtp(IntelIOMMUState *s) /* Set Interrupt Remap Table Pointer */ static void vtd_handle_gcmd_sirtp(IntelIOMMUState *s) { - VTD_DPRINTF(CSR, "set Interrupt Remap Table Pointer"); - vtd_interrupt_remap_table_setup(s); /* Ok - report back to driver */ vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRTPS); @@ -1541,7 +1516,7 @@ static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en) return; } - VTD_DPRINTF(CSR, "Translation Enable %s", (en ? "on" : "off")); + trace_vtd_dmar_enable(en); if (en) { s->dmar_enabled = true; @@ -1562,7 +1537,7 @@ static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en) /* Handle Interrupt Remap Enable/Disable */ static void vtd_handle_gcmd_ire(IntelIOMMUState *s, bool en) { - VTD_DPRINTF(CSR, "Interrupt Remap Enable %s", (en ? "on" : "off")); + trace_vtd_ir_enable(en); if (en) { s->intr_enabled = true; @@ -1582,7 +1557,7 @@ static void vtd_handle_gcmd_write(IntelIOMMUState *s) uint32_t val = vtd_get_long_raw(s, DMAR_GCMD_REG); uint32_t changed = status ^ val; - VTD_DPRINTF(CSR, "value 0x%"PRIx32 " status 0x%"PRIx32, val, status); + trace_vtd_reg_write_gcmd(status, val); if (changed & VTD_GCMD_TE) { /* Translation enable/disable */ vtd_handle_gcmd_te(s, val & VTD_GCMD_TE); @@ -1614,8 +1589,8 @@ static void vtd_handle_ccmd_write(IntelIOMMUState *s) /* Context-cache invalidation request */ if (val & VTD_CCMD_ICC) { if (s->qi_enabled) { - VTD_DPRINTF(GENERAL, "error: Queued Invalidation enabled, " - "should not use register-based invalidation"); + trace_vtd_err("Queued Invalidation enabled, " + "should not use register-based invalidation"); return; } ret = vtd_context_cache_invalidate(s, val); @@ -1623,7 +1598,6 @@ static void vtd_handle_ccmd_write(IntelIOMMUState *s) vtd_set_clear_mask_quad(s, DMAR_CCMD_REG, VTD_CCMD_ICC, 0ULL); ret = vtd_set_clear_mask_quad(s, DMAR_CCMD_REG, VTD_CCMD_CAIG_MASK, ret); - VTD_DPRINTF(INV, "CCMD_REG write-back val: 0x%"PRIx64, ret); } } @@ -1636,8 +1610,8 @@ static void vtd_handle_iotlb_write(IntelIOMMUState *s) /* IOTLB invalidation request */ if (val & VTD_TLB_IVT) { if (s->qi_enabled) { - VTD_DPRINTF(GENERAL, "error: Queued Invalidation enabled, " - "should not use register-based invalidation"); + trace_vtd_err("Queued Invalidation enabled, " + "should not use register-based invalidation."); return; } ret = vtd_iotlb_flush(s, val); @@ -1645,7 +1619,6 @@ static void vtd_handle_iotlb_write(IntelIOMMUState *s) vtd_set_clear_mask_quad(s, DMAR_IOTLB_REG, VTD_TLB_IVT, 0ULL); ret = vtd_set_clear_mask_quad(s, DMAR_IOTLB_REG, VTD_TLB_FLUSH_GRANU_MASK_A, ret); - VTD_DPRINTF(INV, "IOTLB_REG write-back val: 0x%"PRIx64, ret); } } @@ -1656,11 +1629,9 @@ static bool vtd_get_inv_desc(dma_addr_t base_addr, uint32_t offset, dma_addr_t addr = base_addr + offset * sizeof(*inv_desc); if (dma_memory_read(&address_space_memory, addr, inv_desc, sizeof(*inv_desc))) { - VTD_DPRINTF(GENERAL, "error: fail to fetch Invalidation Descriptor " - "base_addr 0x%"PRIx64 " offset %"PRIu32, base_addr, offset); + trace_vtd_err("Read INV DESC failed."); inv_desc->lo = 0; inv_desc->hi = 0; - return false; } inv_desc->lo = le64_to_cpu(inv_desc->lo); @@ -1746,13 +1717,11 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) switch (inv_desc->lo & VTD_INV_DESC_IOTLB_G) { case VTD_INV_DESC_IOTLB_GLOBAL: - trace_vtd_inv_desc_iotlb_global(); vtd_iotlb_global_invalidate(s); break; case VTD_INV_DESC_IOTLB_DOMAIN: domain_id = VTD_INV_DESC_IOTLB_DID(inv_desc->lo); - trace_vtd_inv_desc_iotlb_domain(domain_id); vtd_iotlb_domain_invalidate(s, domain_id); break; @@ -1760,7 +1729,6 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) domain_id = VTD_INV_DESC_IOTLB_DID(inv_desc->lo); addr = VTD_INV_DESC_IOTLB_ADDR(inv_desc->hi); am = VTD_INV_DESC_IOTLB_AM(inv_desc->hi); - trace_vtd_inv_desc_iotlb_pages(domain_id, addr, am); if (am > VTD_MAMV) { trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo); return false; @@ -1778,10 +1746,9 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) static bool vtd_process_inv_iec_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) { - VTD_DPRINTF(INV, "inv ir glob %d index %d mask %d", - inv_desc->iec.granularity, - inv_desc->iec.index, - inv_desc->iec.index_mask); + trace_vtd_inv_desc_iec(inv_desc->iec.granularity, + inv_desc->iec.index, + inv_desc->iec.index_mask); vtd_iec_notify_all(s, !inv_desc->iec.granularity, inv_desc->iec.index, @@ -1810,9 +1777,7 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) || (inv_desc->hi & VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI)) { - VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Device " - "IOTLB Invalidate Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64, - inv_desc->hi, inv_desc->lo); + trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo); return false; } @@ -1857,7 +1822,7 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s) VTDInvDesc inv_desc; uint8_t desc_type; - VTD_DPRINTF(INV, "iq head %"PRIu16, s->iq_head); + trace_vtd_inv_qi_head(s->iq_head); if (!vtd_get_inv_desc(s->iq, s->iq_head, &inv_desc)) { s->iq_last_desc_type = VTD_INV_DESC_NONE; return false; @@ -1896,8 +1861,7 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s) break; case VTD_INV_DESC_DEVICE: - VTD_DPRINTF(INV, "Device IOTLB Invalidation Descriptor hi 0x%"PRIx64 - " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo); + trace_vtd_inv_desc("device", inv_desc.hi, inv_desc.lo); if (!vtd_process_device_iotlb_desc(s, &inv_desc)) { return false; } @@ -1917,11 +1881,11 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s) /* Try to fetch and process more Invalidation Descriptors */ static void vtd_fetch_inv_desc(IntelIOMMUState *s) { - VTD_DPRINTF(INV, "fetch Invalidation Descriptors"); + trace_vtd_inv_qi_fetch(); + if (s->iq_tail >= s->iq_size) { /* Detects an invalid Tail pointer */ - VTD_DPRINTF(GENERAL, "error: iq_tail is %"PRIu16 - " while iq_size is %"PRIu16, s->iq_tail, s->iq_size); + trace_vtd_err_qi_tail(s->iq_tail, s->iq_size); vtd_handle_inv_queue_error(s); return; } @@ -1944,7 +1908,8 @@ static void vtd_handle_iqt_write(IntelIOMMUState *s) uint64_t val = vtd_get_quad_raw(s, DMAR_IQT_REG); s->iq_tail = VTD_IQT_QT(val); - VTD_DPRINTF(INV, "set iq tail %"PRIu16, s->iq_tail); + trace_vtd_inv_qi_tail(s->iq_tail); + if (s->qi_enabled && !(vtd_get_long_raw(s, DMAR_FSTS_REG) & VTD_FSTS_IQE)) { /* Process Invalidation Queue here */ vtd_fetch_inv_desc(s); @@ -1959,8 +1924,7 @@ static void vtd_handle_fsts_write(IntelIOMMUState *s) if ((fectl_reg & VTD_FECTL_IP) && !(fsts_reg & status_fields)) { vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0); - VTD_DPRINTF(FLOG, "all pending interrupt conditions serviced, clear " - "IP field of FECTL_REG"); + trace_vtd_fsts_clear_ip(); } /* FIXME: when IQE is Clear, should we try to fetch some Invalidation * Descriptors if there are any when Queued Invalidation is enabled? @@ -1975,11 +1939,12 @@ static void vtd_handle_fectl_write(IntelIOMMUState *s) * software clears the IM field? Or just check if the IM field is zero? */ fectl_reg = vtd_get_long_raw(s, DMAR_FECTL_REG); + + trace_vtd_reg_write_fectl(fectl_reg); + if ((fectl_reg & VTD_FECTL_IP) && !(fectl_reg & VTD_FECTL_IM)) { vtd_generate_interrupt(s, DMAR_FEADDR_REG, DMAR_FEDATA_REG); vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0); - VTD_DPRINTF(FLOG, "IM field is cleared, generate " - "fault event interrupt"); } } @@ -1989,9 +1954,8 @@ static void vtd_handle_ics_write(IntelIOMMUState *s) uint32_t iectl_reg = vtd_get_long_raw(s, DMAR_IECTL_REG); if ((iectl_reg & VTD_IECTL_IP) && !(ics_reg & VTD_ICS_IWC)) { + trace_vtd_reg_ics_clear_ip(); vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0); - VTD_DPRINTF(INV, "pending completion interrupt condition serviced, " - "clear IP field of IECTL_REG"); } } @@ -2003,11 +1967,12 @@ static void vtd_handle_iectl_write(IntelIOMMUState *s) * software clears the IM field? Or just check if the IM field is zero? */ iectl_reg = vtd_get_long_raw(s, DMAR_IECTL_REG); + + trace_vtd_reg_write_iectl(iectl_reg); + if ((iectl_reg & VTD_IECTL_IP) && !(iectl_reg & VTD_IECTL_IM)) { vtd_generate_interrupt(s, DMAR_IEADDR_REG, DMAR_IEDATA_REG); vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0); - VTD_DPRINTF(INV, "IM field is cleared, generate " - "invalidation event interrupt"); } } @@ -2016,10 +1981,10 @@ static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size) IntelIOMMUState *s = opaque; uint64_t val; + trace_vtd_reg_read(addr, size); + if (addr + size > DMAR_REG_SIZE) { - VTD_DPRINTF(GENERAL, "error: addr outside region: max 0x%"PRIx64 - ", got 0x%"PRIx64 " %d", - (uint64_t)DMAR_REG_SIZE, addr, size); + trace_vtd_err("Read MMIO over range."); return (uint64_t)-1; } @@ -2058,8 +2023,7 @@ static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size) val = vtd_get_quad(s, addr); } } - VTD_DPRINTF(CSR, "addr 0x%"PRIx64 " size %d val 0x%"PRIx64, - addr, size, val); + return val; } @@ -2068,26 +2032,22 @@ static void vtd_mem_write(void *opaque, hwaddr addr, { IntelIOMMUState *s = opaque; + trace_vtd_reg_write(addr, size, val); + if (addr + size > DMAR_REG_SIZE) { - VTD_DPRINTF(GENERAL, "error: addr outside region: max 0x%"PRIx64 - ", got 0x%"PRIx64 " %d", - (uint64_t)DMAR_REG_SIZE, addr, size); + trace_vtd_err("Write MMIO over range."); return; } switch (addr) { /* Global Command Register, 32-bit */ case DMAR_GCMD_REG: - VTD_DPRINTF(CSR, "DMAR_GCMD_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); vtd_set_long(s, addr, val); vtd_handle_gcmd_write(s); break; /* Context Command Register, 64-bit */ case DMAR_CCMD_REG: - VTD_DPRINTF(CSR, "DMAR_CCMD_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); if (size == 4) { vtd_set_long(s, addr, val); } else { @@ -2097,8 +2057,6 @@ static void vtd_mem_write(void *opaque, hwaddr addr, break; case DMAR_CCMD_REG_HI: - VTD_DPRINTF(CSR, "DMAR_CCMD_REG_HI write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); assert(size == 4); vtd_set_long(s, addr, val); vtd_handle_ccmd_write(s); @@ -2106,8 +2064,6 @@ static void vtd_mem_write(void *opaque, hwaddr addr, /* IOTLB Invalidation Register, 64-bit */ case DMAR_IOTLB_REG: - VTD_DPRINTF(INV, "DMAR_IOTLB_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); if (size == 4) { vtd_set_long(s, addr, val); } else { @@ -2117,8 +2073,6 @@ static void vtd_mem_write(void *opaque, hwaddr addr, break; case DMAR_IOTLB_REG_HI: - VTD_DPRINTF(INV, "DMAR_IOTLB_REG_HI write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); assert(size == 4); vtd_set_long(s, addr, val); vtd_handle_iotlb_write(s); @@ -2126,8 +2080,6 @@ static void vtd_mem_write(void *opaque, hwaddr addr, /* Invalidate Address Register, 64-bit */ case DMAR_IVA_REG: - VTD_DPRINTF(INV, "DMAR_IVA_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); if (size == 4) { vtd_set_long(s, addr, val); } else { @@ -2136,16 +2088,12 @@ static void vtd_mem_write(void *opaque, hwaddr addr, break; case DMAR_IVA_REG_HI: - VTD_DPRINTF(INV, "DMAR_IVA_REG_HI write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); assert(size == 4); vtd_set_long(s, addr, val); break; /* Fault Status Register, 32-bit */ case DMAR_FSTS_REG: - VTD_DPRINTF(FLOG, "DMAR_FSTS_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); assert(size == 4); vtd_set_long(s, addr, val); vtd_handle_fsts_write(s); @@ -2153,8 +2101,6 @@ static void vtd_mem_write(void *opaque, hwaddr addr, /* Fault Event Control Register, 32-bit */ case DMAR_FECTL_REG: - VTD_DPRINTF(FLOG, "DMAR_FECTL_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); assert(size == 4); vtd_set_long(s, addr, val); vtd_handle_fectl_write(s); @@ -2162,40 +2108,30 @@ static void vtd_mem_write(void *opaque, hwaddr addr, /* Fault Event Data Register, 32-bit */ case DMAR_FEDATA_REG: - VTD_DPRINTF(FLOG, "DMAR_FEDATA_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); assert(size == 4); vtd_set_long(s, addr, val); break; /* Fault Event Address Register, 32-bit */ case DMAR_FEADDR_REG: - VTD_DPRINTF(FLOG, "DMAR_FEADDR_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); assert(size == 4); vtd_set_long(s, addr, val); break; /* Fault Event Upper Address Register, 32-bit */ case DMAR_FEUADDR_REG: - VTD_DPRINTF(FLOG, "DMAR_FEUADDR_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); assert(size == 4); vtd_set_long(s, addr, val); break; /* Protected Memory Enable Register, 32-bit */ case DMAR_PMEN_REG: - VTD_DPRINTF(CSR, "DMAR_PMEN_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); assert(size == 4); vtd_set_long(s, addr, val); break; /* Root Table Address Register, 64-bit */ case DMAR_RTADDR_REG: - VTD_DPRINTF(CSR, "DMAR_RTADDR_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); if (size == 4) { vtd_set_long(s, addr, val); } else { @@ -2204,16 +2140,12 @@ static void vtd_mem_write(void *opaque, hwaddr addr, break; case DMAR_RTADDR_REG_HI: - VTD_DPRINTF(CSR, "DMAR_RTADDR_REG_HI write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); assert(size == 4); vtd_set_long(s, addr, val); break; /* Invalidation Queue Tail Register, 64-bit */ case DMAR_IQT_REG: - VTD_DPRINTF(INV, "DMAR_IQT_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); if (size == 4) { vtd_set_long(s, addr, val); } else { @@ -2223,8 +2155,6 @@ static void vtd_mem_write(void *opaque, hwaddr addr, break; case DMAR_IQT_REG_HI: - VTD_DPRINTF(INV, "DMAR_IQT_REG_HI write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); assert(size == 4); vtd_set_long(s, addr, val); /* 19:63 of IQT_REG is RsvdZ, do nothing here */ @@ -2232,8 +2162,6 @@ static void vtd_mem_write(void *opaque, hwaddr addr, /* Invalidation Queue Address Register, 64-bit */ case DMAR_IQA_REG: - VTD_DPRINTF(INV, "DMAR_IQA_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); if (size == 4) { vtd_set_long(s, addr, val); } else { @@ -2242,16 +2170,12 @@ static void vtd_mem_write(void *opaque, hwaddr addr, break; case DMAR_IQA_REG_HI: - VTD_DPRINTF(INV, "DMAR_IQA_REG_HI write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); assert(size == 4); vtd_set_long(s, addr, val); break; /* Invalidation Completion Status Register, 32-bit */ case DMAR_ICS_REG: - VTD_DPRINTF(INV, "DMAR_ICS_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); assert(size == 4); vtd_set_long(s, addr, val); vtd_handle_ics_write(s); @@ -2259,8 +2183,6 @@ static void vtd_mem_write(void *opaque, hwaddr addr, /* Invalidation Event Control Register, 32-bit */ case DMAR_IECTL_REG: - VTD_DPRINTF(INV, "DMAR_IECTL_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); assert(size == 4); vtd_set_long(s, addr, val); vtd_handle_iectl_write(s); @@ -2268,32 +2190,24 @@ static void vtd_mem_write(void *opaque, hwaddr addr, /* Invalidation Event Data Register, 32-bit */ case DMAR_IEDATA_REG: - VTD_DPRINTF(INV, "DMAR_IEDATA_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); assert(size == 4); vtd_set_long(s, addr, val); break; /* Invalidation Event Address Register, 32-bit */ case DMAR_IEADDR_REG: - VTD_DPRINTF(INV, "DMAR_IEADDR_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); assert(size == 4); vtd_set_long(s, addr, val); break; /* Invalidation Event Upper Address Register, 32-bit */ case DMAR_IEUADDR_REG: - VTD_DPRINTF(INV, "DMAR_IEUADDR_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); assert(size == 4); vtd_set_long(s, addr, val); break; /* Fault Recording Registers, 128-bit */ case DMAR_FRCD_REG_0_0: - VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_0 write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); if (size == 4) { vtd_set_long(s, addr, val); } else { @@ -2302,15 +2216,11 @@ static void vtd_mem_write(void *opaque, hwaddr addr, break; case DMAR_FRCD_REG_0_1: - VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_1 write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); assert(size == 4); vtd_set_long(s, addr, val); break; case DMAR_FRCD_REG_0_2: - VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_2 write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); if (size == 4) { vtd_set_long(s, addr, val); } else { @@ -2321,8 +2231,6 @@ static void vtd_mem_write(void *opaque, hwaddr addr, break; case DMAR_FRCD_REG_0_3: - VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_3 write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); assert(size == 4); vtd_set_long(s, addr, val); /* May clear bit 127 (Fault), update PPF */ @@ -2330,8 +2238,6 @@ static void vtd_mem_write(void *opaque, hwaddr addr, break; case DMAR_IRTA_REG: - VTD_DPRINTF(IR, "DMAR_IRTA_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); if (size == 4) { vtd_set_long(s, addr, val); } else { @@ -2340,15 +2246,11 @@ static void vtd_mem_write(void *opaque, hwaddr addr, break; case DMAR_IRTA_REG_HI: - VTD_DPRINTF(IR, "DMAR_IRTA_REG_HI write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); assert(size == 4); vtd_set_long(s, addr, val); break; default: - VTD_DPRINTF(GENERAL, "error: unhandled reg write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); if (size == 4) { vtd_set_long(s, addr, val); } else { @@ -2362,31 +2264,38 @@ static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr, { VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu); IntelIOMMUState *s = vtd_as->iommu_state; - IOMMUTLBEntry ret = { + IOMMUTLBEntry iotlb = { + /* We'll fill in the rest later. */ .target_as = &address_space_memory, - .iova = addr, - .translated_addr = 0, - .addr_mask = ~(hwaddr)0, - .perm = IOMMU_NONE, }; + bool success; - if (!s->dmar_enabled) { + if (likely(s->dmar_enabled)) { + success = vtd_do_iommu_translate(vtd_as, vtd_as->bus, vtd_as->devfn, + addr, flag & IOMMU_WO, &iotlb); + } else { /* DMAR disabled, passthrough, use 4k-page*/ - ret.iova = addr & VTD_PAGE_MASK_4K; - ret.translated_addr = addr & VTD_PAGE_MASK_4K; - ret.addr_mask = ~VTD_PAGE_MASK_4K; - ret.perm = IOMMU_RW; - return ret; + iotlb.iova = addr & VTD_PAGE_MASK_4K; + iotlb.translated_addr = addr & VTD_PAGE_MASK_4K; + iotlb.addr_mask = ~VTD_PAGE_MASK_4K; + iotlb.perm = IOMMU_RW; + success = true; } - vtd_do_iommu_translate(vtd_as, vtd_as->bus, vtd_as->devfn, addr, - flag & IOMMU_WO, &ret); - VTD_DPRINTF(MMU, - "bus %"PRIu8 " slot %"PRIu8 " func %"PRIu8 " devfn %"PRIu8 - " iova 0x%"PRIx64 " hpa 0x%"PRIx64, pci_bus_num(vtd_as->bus), - VTD_PCI_SLOT(vtd_as->devfn), VTD_PCI_FUNC(vtd_as->devfn), - vtd_as->devfn, addr, ret.translated_addr); - return ret; + if (likely(success)) { + trace_vtd_dmar_translate(pci_bus_num(vtd_as->bus), + VTD_PCI_SLOT(vtd_as->devfn), + VTD_PCI_FUNC(vtd_as->devfn), + iotlb.iova, iotlb.translated_addr, + iotlb.addr_mask); + } else { + trace_vtd_err_dmar_translate(pci_bus_num(vtd_as->bus), + VTD_PCI_SLOT(vtd_as->devfn), + VTD_PCI_FUNC(vtd_as->devfn), + iotlb.iova); + } + + return iotlb; } static void vtd_iommu_notify_flag_changed(MemoryRegion *iommu, @@ -2484,25 +2393,23 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index, addr = iommu->intr_root + index * sizeof(*entry); if (dma_memory_read(&address_space_memory, addr, entry, sizeof(*entry))) { - VTD_DPRINTF(GENERAL, "error: fail to access IR root at 0x%"PRIx64 - " + %"PRIu16, iommu->intr_root, index); + trace_vtd_err("Memory read failed for IRTE."); return -VTD_FR_IR_ROOT_INVAL; } + trace_vtd_ir_irte_get(index, le64_to_cpu(entry->data[1]), + le64_to_cpu(entry->data[0])); + if (!entry->irte.present) { - VTD_DPRINTF(GENERAL, "error: present flag not set in IRTE" - " entry index %u value 0x%"PRIx64 " 0x%"PRIx64, - index, le64_to_cpu(entry->data[1]), - le64_to_cpu(entry->data[0])); + trace_vtd_err_irte(index, le64_to_cpu(entry->data[1]), + le64_to_cpu(entry->data[0])); return -VTD_FR_IR_ENTRY_P; } if (entry->irte.__reserved_0 || entry->irte.__reserved_1 || entry->irte.__reserved_2) { - VTD_DPRINTF(GENERAL, "error: IRTE entry index %"PRIu16 - " reserved fields non-zero: 0x%"PRIx64 " 0x%"PRIx64, - index, le64_to_cpu(entry->data[1]), - le64_to_cpu(entry->data[0])); + trace_vtd_err_irte(index, le64_to_cpu(entry->data[1]), + le64_to_cpu(entry->data[0])); return -VTD_FR_IR_IRTE_RSVD; } @@ -2511,15 +2418,12 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index, source_id = le32_to_cpu(entry->irte.source_id); switch (entry->irte.sid_vtype) { case VTD_SVT_NONE: - VTD_DPRINTF(IR, "No SID validation for IRTE index %d", index); break; case VTD_SVT_ALL: mask = vtd_svt_mask[entry->irte.sid_q]; if ((source_id & mask) != (sid & mask)) { - VTD_DPRINTF(GENERAL, "SID validation for IRTE index " - "%d failed (reqid 0x%04x sid 0x%04x)", index, - sid, source_id); + trace_vtd_err_irte_sid(index, sid, source_id); return -VTD_FR_IR_SID_ERR; } break; @@ -2529,16 +2433,13 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index, bus_min = source_id & 0xff; bus = sid >> 8; if (bus > bus_max || bus < bus_min) { - VTD_DPRINTF(GENERAL, "SID validation for IRTE index %d " - "failed (bus %d outside %d-%d)", index, bus, - bus_min, bus_max); + trace_vtd_err_irte_sid_bus(index, bus, bus_min, bus_max); return -VTD_FR_IR_SID_ERR; } break; default: - VTD_DPRINTF(GENERAL, "Invalid SVT bits (0x%x) in IRTE index " - "%d", entry->irte.sid_vtype, index); + trace_vtd_err_irte_svt(index, entry->irte.sid_vtype); /* Take this as verification failure. */ return -VTD_FR_IR_SID_ERR; break; @@ -2573,10 +2474,8 @@ static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index, irq->dest_mode = irte.irte.dest_mode; irq->redir_hint = irte.irte.redir_hint; - VTD_DPRINTF(IR, "remapping interrupt index %d: trig:%u,vec:%u," - "deliver:%u,dest:%u,dest_mode:%u", index, - irq->trigger_mode, irq->vector, irq->delivery_mode, - irq->dest, irq->dest_mode); + trace_vtd_ir_remap(index, irq->trigger_mode, irq->vector, + irq->delivery_mode, irq->dest, irq->dest_mode); return 0; } @@ -2618,28 +2517,29 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu, assert(origin && translated); + trace_vtd_ir_remap_msi_req(origin->address, origin->data); + if (!iommu || !iommu->intr_enabled) { - goto do_not_translate; + memcpy(translated, origin, sizeof(*origin)); + goto out; } if (origin->address & VTD_MSI_ADDR_HI_MASK) { - VTD_DPRINTF(GENERAL, "error: MSI addr high 32 bits nonzero" - " during interrupt remapping: 0x%"PRIx32, - (uint32_t)((origin->address & VTD_MSI_ADDR_HI_MASK) >> \ - VTD_MSI_ADDR_HI_SHIFT)); + trace_vtd_err("MSI address high 32 bits non-zero when " + "Interrupt Remapping enabled."); return -VTD_FR_IR_REQ_RSVD; } addr.data = origin->address & VTD_MSI_ADDR_LO_MASK; if (addr.addr.__head != 0xfee) { - VTD_DPRINTF(GENERAL, "error: MSI addr low 32 bits invalid: " - "0x%"PRIx32, addr.data); + trace_vtd_err("MSI addr low 32 bit invalid."); return -VTD_FR_IR_REQ_RSVD; } /* This is compatible mode. */ if (addr.addr.int_mode != VTD_IR_INT_FORMAT_REMAP) { - goto do_not_translate; + memcpy(translated, origin, sizeof(*origin)); + goto out; } index = addr.addr.index_h << 15 | le16_to_cpu(addr.addr.index_l); @@ -2658,34 +2558,28 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu, } if (addr.addr.sub_valid) { - VTD_DPRINTF(IR, "received MSI interrupt"); + trace_vtd_ir_remap_type("MSI"); if (origin->data & VTD_IR_MSI_DATA_RESERVED) { - VTD_DPRINTF(GENERAL, "error: MSI data bits non-zero for " - "interrupt remappable entry: 0x%"PRIx32, - origin->data); + trace_vtd_err_ir_msi_invalid(sid, origin->address, origin->data); return -VTD_FR_IR_REQ_RSVD; } } else { uint8_t vector = origin->data & 0xff; uint8_t trigger_mode = (origin->data >> MSI_DATA_TRIGGER_SHIFT) & 0x1; - VTD_DPRINTF(IR, "received IOAPIC interrupt"); + trace_vtd_ir_remap_type("IOAPIC"); /* IOAPIC entry vector should be aligned with IRTE vector * (see vt-d spec 5.1.5.1). */ if (vector != irq.vector) { - VTD_DPRINTF(GENERAL, "IOAPIC vector inconsistent: " - "entry: %d, IRTE: %d, index: %d", - vector, irq.vector, index); + trace_vtd_warn_ir_vector(sid, index, vector, irq.vector); } /* The Trigger Mode field must match the Trigger Mode in the IRTE. * (see vt-d spec 5.1.5.1). */ if (trigger_mode != irq.trigger_mode) { - VTD_DPRINTF(GENERAL, "IOAPIC trigger mode inconsistent: " - "entry: %u, IRTE: %u, index: %d", - trigger_mode, irq.trigger_mode, index); + trace_vtd_warn_ir_trigger(sid, index, trigger_mode, + irq.trigger_mode); } - } /* @@ -2697,13 +2591,9 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu, /* Translate VTDIrq to MSI message */ vtd_generate_msi_message(&irq, translated); - VTD_DPRINTF(IR, "mapping MSI 0x%"PRIx64":0x%"PRIx32 " -> " - "0x%"PRIx64":0x%"PRIx32, origin->address, origin->data, - translated->address, translated->data); - return 0; - -do_not_translate: - memcpy(translated, origin, sizeof(*origin)); +out: + trace_vtd_ir_remap_msi(origin->address, origin->data, + translated->address, translated->data); return 0; } @@ -2740,16 +2630,10 @@ static MemTxResult vtd_mem_ir_write(void *opaque, hwaddr addr, ret = vtd_interrupt_remap_msi(opaque, &from, &to, sid); if (ret) { /* TODO: report error */ - VTD_DPRINTF(GENERAL, "int remap fail for addr 0x%"PRIx64 - " data 0x%"PRIx32, from.address, from.data); /* Drop this interrupt */ return MEMTX_ERROR; } - VTD_DPRINTF(IR, "delivering MSI 0x%"PRIx64":0x%"PRIx32 - " for device sid 0x%04x", - to.address, to.data, sid); - apic_get_class()->send_msi(&to); return MEMTX_OK; @@ -3052,7 +2936,6 @@ static void vtd_reset(DeviceState *dev) { IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev); - VTD_DPRINTF(GENERAL, ""); vtd_init(s); /* @@ -3125,7 +3008,6 @@ static void vtd_realize(DeviceState *dev, Error **errp) } bus = pcms->bus; - VTD_DPRINTF(GENERAL, ""); x86_iommu->type = TYPE_INTEL; if (!vtd_decide_config(s, errp)) { @@ -3173,7 +3055,6 @@ static const TypeInfo vtd_info = { static void vtd_register_types(void) { - VTD_DPRINTF(GENERAL, ""); type_register_static(&vtd_info); } diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 0e73a65bf2..f50ecd8b73 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -384,6 +384,7 @@ typedef struct VTDIOTLBPageInvInfo VTDIOTLBPageInvInfo; /* Pagesize of VTD paging structures, including root and context tables */ #define VTD_PAGE_SHIFT 12 #define VTD_PAGE_SIZE (1ULL << VTD_PAGE_SHIFT) +#define VTD_PAGE_MASK (VTD_PAGE_SIZE - 1) #define VTD_PAGE_SHIFT_4K 12 #define VTD_PAGE_MASK_4K (~((1ULL << VTD_PAGE_SHIFT_4K) - 1)) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 5b8c6fbbea..db41cca063 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1692,6 +1692,7 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev, PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); MemoryRegion *mr = ddc->get_memory_region(dimm); uint64_t align = TARGET_PAGE_SIZE; + bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM); if (memory_region_get_alignment(mr) && pcmc->enforce_aligned_dimm) { align = memory_region_get_alignment(mr); @@ -1703,17 +1704,18 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev, goto out; } + if (is_nvdimm && !pcms->acpi_nvdimm_state.is_enabled) { + error_setg(&local_err, + "nvdimm is not enabled: missing 'nvdimm' in '-M'"); + goto out; + } + pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, &local_err); if (local_err) { goto out; } - if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) { - if (!pcms->acpi_nvdimm_state.is_enabled) { - error_setg(&local_err, - "nvdimm is not enabled: missing 'nvdimm' in '-M'"); - goto out; - } + if (is_nvdimm) { nvdimm_plug(&pcms->acpi_nvdimm_state); } diff --git a/hw/i386/trace-events b/hw/i386/trace-events index 72556dad48..5f111d6dde 100644 --- a/hw/i386/trace-events +++ b/hw/i386/trace-events @@ -19,6 +19,13 @@ vtd_inv_desc_wait_sw(uint64_t addr, uint32_t data) "wait invalidate status write vtd_inv_desc_wait_irq(const char *msg) "%s" vtd_inv_desc_wait_invalid(uint64_t hi, uint64_t lo) "invalid wait desc hi 0x%"PRIx64" lo 0x%"PRIx64 vtd_inv_desc_wait_write_fail(uint64_t hi, uint64_t lo) "write fail for wait desc hi 0x%"PRIx64" lo 0x%"PRIx64 +vtd_inv_desc_iec(uint32_t granularity, uint32_t index, uint32_t mask) "granularity 0x%"PRIx32" index 0x%"PRIx32" mask 0x%"PRIx32 +vtd_inv_qi_enable(bool enable) "enabled %d" +vtd_inv_qi_setup(uint64_t addr, int size) "addr 0x%"PRIx64" size %d" +vtd_inv_qi_head(uint16_t head) "read head %d" +vtd_inv_qi_tail(uint16_t head) "write tail %d" +vtd_inv_qi_fetch(void) "" +vtd_context_cache_reset(void) "" vtd_re_not_present(uint8_t bus) "Root entry bus %"PRIu8" not present" vtd_re_invalid(uint64_t hi, uint64_t lo) "invalid root entry hi 0x%"PRIx64" lo 0x%"PRIx64 vtd_ce_not_present(uint8_t bus, uint8_t devfn) "Context entry bus %"PRIu8" devfn %"PRIu8" not present" @@ -40,6 +47,43 @@ vtd_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device vtd_as_unmap_whole(uint8_t bus, uint8_t slot, uint8_t fn, uint64_t iova, uint64_t size) "Device %02x:%02x.%x start 0x%"PRIx64" size 0x%"PRIx64 vtd_translate_pt(uint16_t sid, uint64_t addr) "source id 0x%"PRIu16", iova 0x%"PRIx64 vtd_pt_enable_fast_path(uint16_t sid, bool success) "sid 0x%"PRIu16" %d" +vtd_irq_generate(uint64_t addr, uint64_t data) "addr 0x%"PRIx64" data 0x%"PRIx64 +vtd_reg_read(uint64_t addr, uint64_t size) "addr 0x%"PRIx64" size 0x%"PRIx64 +vtd_reg_write(uint64_t addr, uint64_t size, uint64_t val) "addr 0x%"PRIx64" size 0x%"PRIx64" value 0x%"PRIx64 +vtd_reg_dmar_root(uint64_t addr, bool extended) "addr 0x%"PRIx64" extended %d" +vtd_reg_ir_root(uint64_t addr, uint32_t size) "addr 0x%"PRIx64" size 0x%"PRIx32 +vtd_reg_write_gcmd(uint32_t status, uint32_t val) "status 0x%"PRIx32" value 0x%"PRIx32 +vtd_reg_write_fectl(uint32_t value) "value 0x%"PRIx32 +vtd_reg_write_iectl(uint32_t value) "value 0x%"PRIx32 +vtd_reg_ics_clear_ip(void) "" +vtd_dmar_translate(uint8_t bus, uint8_t slot, uint8_t func, uint64_t iova, uint64_t gpa, uint64_t mask) "dev %02x:%02x.%02x iova 0x%"PRIx64" -> gpa 0x%"PRIx64" mask 0x%"PRIx64 +vtd_dmar_enable(bool en) "enable %d" +vtd_dmar_fault(uint16_t sid, int fault, uint64_t addr, bool is_write) "sid 0x%"PRIx16" fault %d addr 0x%"PRIx64" write %d" +vtd_ir_enable(bool en) "enable %d" +vtd_ir_irte_get(int index, uint64_t lo, uint64_t hi) "index %d low 0x%"PRIx64" high 0x%"PRIx64 +vtd_ir_remap(int index, int tri, int vec, int deliver, uint32_t dest, int dest_mode) "index %d trigger %d vector %d deliver %d dest 0x%"PRIx32" mode %d" +vtd_ir_remap_type(const char *type) "%s" +vtd_ir_remap_msi(uint64_t addr, uint64_t data, uint64_t addr2, uint64_t data2) "(addr 0x%"PRIx64", data 0x%"PRIx64") -> (addr 0x%"PRIx64", data 0x%"PRIx64")" +vtd_ir_remap_msi_req(uint64_t addr, uint64_t data) "addr 0x%"PRIx64" data 0x%"PRIx64 +vtd_fsts_ppf(bool set) "FSTS PPF bit set to %d" +vtd_fsts_clear_ip(void) "" +vtd_frr_new(int index, uint64_t hi, uint64_t lo) "index %d high 0x%"PRIx64" low 0x%"PRIx64 +vtd_err(const char *str) "%s" +vtd_err_dmar_iova_overflow(uint64_t iova) "iova 0x%"PRIx64 +vtd_err_dmar_slpte_read_error(uint64_t iova, int level) "iova 0x%"PRIx64" level %d" +vtd_err_dmar_slpte_perm_error(uint64_t iova, int level, uint64_t slpte, bool is_write) "iova 0x%"PRIx64" level %d slpte 0x%"PRIx64" write %d" +vtd_err_dmar_slpte_resv_error(uint64_t iova, int level, uint64_t slpte) "iova 0x%"PRIx64" level %d slpte 0x%"PRIx64 +vtd_err_dmar_translate(uint8_t bus, uint8_t slot, uint8_t func, uint64_t iova) "dev %02x:%02x.%02x iova 0x%"PRIx64 +vtd_err_qi_enable(uint16_t tail) "tail 0x%"PRIx16 +vtd_err_qi_disable(uint16_t head, uint16_t tail, int type) "head 0x%"PRIx16" tail 0x%"PRIx16" last_desc_type %d" +vtd_err_qi_tail(uint16_t tail, uint16_t size) "tail 0x%"PRIx16" size 0x%"PRIx16 +vtd_err_irte(int index, uint64_t lo, uint64_t hi) "index %d low 0x%"PRIx64" high 0x%"PRIx64 +vtd_err_irte_sid(int index, uint16_t req, uint16_t target) "index %d SVT_ALL sid 0x%"PRIx16" (should be: 0x%"PRIx16")" +vtd_err_irte_sid_bus(int index, uint8_t bus, uint8_t min, uint8_t max) "index %d SVT_BUS bus 0x%"PRIx8" (should be: 0x%"PRIx8"-0x%"PRIx8")" +vtd_err_irte_svt(int index, int type) "index %d SVT type %d" +vtd_err_ir_msi_invalid(uint16_t sid, uint64_t addr, uint64_t data) "sid 0x%"PRIx16" addr 0x%"PRIx64" data 0x%"PRIx64 +vtd_warn_ir_vector(uint16_t sid, int index, int vec, int target) "sid 0x%"PRIx16" index %d vec %d (should be: %d)" +vtd_warn_ir_trigger(uint16_t sid, int index, int trig, int target) "sid 0x%"PRIx16" index %d trigger %d (should be: %d)" # hw/i386/amd_iommu.c amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" + offset 0x%"PRIx32 diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 6367d041f0..2f0819d977 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -491,9 +491,9 @@ static void setup_interrupt(IVShmemState *s, int vector, Error **errp) static void process_msg_shmem(IVShmemState *s, int fd, Error **errp) { + Error *local_err = NULL; struct stat buf; size_t size; - void *ptr; if (s->ivshmem_bar2) { error_setg(errp, "server sent unexpected shared memory message"); @@ -522,15 +522,13 @@ static void process_msg_shmem(IVShmemState *s, int fd, Error **errp) } /* mmap the region and map into the BAR2 */ - ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if (ptr == MAP_FAILED) { - error_setg_errno(errp, errno, "Failed to mmap shared memory"); - close(fd); + memory_region_init_ram_from_fd(&s->server_bar2, OBJECT(s), + "ivshmem.bar2", size, true, fd, &local_err); + if (local_err) { + error_propagate(errp, local_err); return; } - memory_region_init_ram_ptr(&s->server_bar2, OBJECT(s), - "ivshmem.bar2", size, ptr); - memory_region_set_fd(&s->server_bar2, fd); + s->ivshmem_bar2 = &s->server_bar2; } diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index cd5c49616e..28cb97b60f 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -134,7 +134,7 @@ static void q35_host_get_mmcfg_size(Object *obj, Visitor *v, const char *name, visit_type_uint32(v, name, &value, errp); } -static Property mch_props[] = { +static Property q35_host_props[] = { DEFINE_PROP_UINT64(PCIE_HOST_MCFG_BASE, Q35PCIHost, parent_obj.base_addr, MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT), DEFINE_PROP_SIZE(PCI_HOST_PROP_PCI_HOLE64_SIZE, Q35PCIHost, @@ -154,7 +154,7 @@ static void q35_host_class_init(ObjectClass *klass, void *data) hc->root_bus_path = q35_host_root_bus_path; dc->realize = q35_host_realize; - dc->props = mch_props; + dc->props = q35_host_props; /* Reason: needs to be wired up by pc_q35_init */ dc->user_creatable = false; set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); @@ -369,7 +369,7 @@ static void mch_update_smram(MCHPCIState *mch) tseg_size = 1024 * 1024 * 8; break; default: - tseg_size = 0; + tseg_size = 1024 * 1024 * (uint32_t)mch->ext_tseg_mbytes; break; } } else { @@ -392,6 +392,17 @@ static void mch_update_smram(MCHPCIState *mch) memory_region_transaction_commit(); } +static void mch_update_ext_tseg_mbytes(MCHPCIState *mch) +{ + PCIDevice *pd = PCI_DEVICE(mch); + uint8_t *reg = pd->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES; + + if (mch->ext_tseg_mbytes > 0 && + pci_get_word(reg) == MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY) { + pci_set_word(reg, mch->ext_tseg_mbytes); + } +} + static void mch_write_config(PCIDevice *d, uint32_t address, uint32_t val, int len) { @@ -413,6 +424,11 @@ static void mch_write_config(PCIDevice *d, MCH_HOST_BRIDGE_SMRAM_SIZE)) { mch_update_smram(mch); } + + if (ranges_overlap(address, len, MCH_HOST_BRIDGE_EXT_TSEG_MBYTES, + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_SIZE)) { + mch_update_ext_tseg_mbytes(mch); + } } static void mch_update(MCHPCIState *mch) @@ -420,6 +436,7 @@ static void mch_update(MCHPCIState *mch) mch_update_pciexbar(mch); mch_update_pam(mch); mch_update_smram(mch); + mch_update_ext_tseg_mbytes(mch); } static int mch_post_load(void *opaque, int version_id) @@ -457,6 +474,11 @@ static void mch_reset(DeviceState *qdev) d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK; d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK; + if (mch->ext_tseg_mbytes > 0) { + pci_set_word(d->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES, + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY); + } + mch_update(mch); } @@ -465,6 +487,12 @@ static void mch_realize(PCIDevice *d, Error **errp) int i; MCHPCIState *mch = MCH_PCI_DEVICE(d); + if (mch->ext_tseg_mbytes > MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_MAX) { + error_setg(errp, "invalid extended-tseg-mbytes value: %" PRIu16, + mch->ext_tseg_mbytes); + return; + } + /* setup pci memory mapping */ pc_pci_as_mapping_init(OBJECT(mch), mch->system_memory, mch->pci_address_space); @@ -530,6 +558,12 @@ uint64_t mch_mcfg_base(void) return MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT; } +static Property mch_props[] = { + DEFINE_PROP_UINT16("extended-tseg-mbytes", MCHPCIState, ext_tseg_mbytes, + 16), + DEFINE_PROP_END_OF_LIST(), +}; + static void mch_class_init(ObjectClass *klass, void *data) { PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); @@ -538,6 +572,7 @@ static void mch_class_init(ObjectClass *klass, void *data) k->realize = mch_realize; k->config_write = mch_write_config; dc->reset = mch_reset; + dc->props = mch_props; set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); dc->desc = "Host bridge"; dc->vmsd = &vmstate_mch; diff --git a/hw/scsi/Makefile.objs b/hw/scsi/Makefile.objs index 54d8754e9a..b188f7242b 100644 --- a/hw/scsi/Makefile.objs +++ b/hw/scsi/Makefile.objs @@ -11,4 +11,5 @@ obj-$(CONFIG_PSERIES) += spapr_vscsi.o ifeq ($(CONFIG_VIRTIO),y) obj-y += virtio-scsi.o virtio-scsi-dataplane.o obj-$(CONFIG_VHOST_SCSI) += vhost-scsi-common.o vhost-scsi.o +obj-$(CONFIG_VHOST_USER_SCSI) += vhost-scsi-common.o vhost-user-scsi.o endif diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c index 804122ab05..734fdaef90 100644 --- a/hw/scsi/megasas.c +++ b/hw/scsi/megasas.c @@ -63,6 +63,7 @@ typedef struct MegasasCmd { hwaddr pa; hwaddr pa_size; + uint32_t dcmd_opcode; union mfi_frame *frame; SCSIRequest *req; QEMUSGList qsg; @@ -309,9 +310,11 @@ static int megasas_build_sense(MegasasCmd *cmd, uint8_t *sense_ptr, PCIDevice *pcid = PCI_DEVICE(cmd->state); uint32_t pa_hi = 0, pa_lo; hwaddr pa; + int frame_sense_len; - if (sense_len > cmd->frame->header.sense_len) { - sense_len = cmd->frame->header.sense_len; + frame_sense_len = cmd->frame->header.sense_len; + if (sense_len > frame_sense_len) { + sense_len = frame_sense_len; } if (sense_len) { pa_lo = le32_to_cpu(cmd->frame->pass.sense_addr_lo); @@ -511,6 +514,7 @@ static MegasasCmd *megasas_enqueue_frame(MegasasState *s, cmd->context &= (uint64_t)0xFFFFFFFF; } cmd->count = count; + cmd->dcmd_opcode = -1; s->busy++; if (s->consumer_pa) { @@ -605,6 +609,9 @@ static void megasas_reset_frames(MegasasState *s) static void megasas_abort_command(MegasasCmd *cmd) { /* Never abort internal commands. */ + if (cmd->dcmd_opcode != -1) { + return; + } if (cmd->req != NULL) { scsi_req_cancel(cmd->req); } @@ -673,15 +680,16 @@ out: static int megasas_map_dcmd(MegasasState *s, MegasasCmd *cmd) { dma_addr_t iov_pa, iov_size; + int iov_count; cmd->flags = le16_to_cpu(cmd->frame->header.flags); - if (!cmd->frame->header.sge_count) { + iov_count = cmd->frame->header.sge_count; + if (!iov_count) { trace_megasas_dcmd_zero_sge(cmd->index); cmd->iov_size = 0; return 0; - } else if (cmd->frame->header.sge_count > 1) { - trace_megasas_dcmd_invalid_sge(cmd->index, - cmd->frame->header.sge_count); + } else if (iov_count > 1) { + trace_megasas_dcmd_invalid_sge(cmd->index, iov_count); cmd->iov_size = 0; return -EINVAL; } @@ -1012,7 +1020,6 @@ static int megasas_pd_get_info_submit(SCSIDevice *sdev, int lun, uint64_t pd_size; uint16_t pd_id = ((sdev->id & 0xFF) << 8) | (lun & 0xFF); uint8_t cmdbuf[6]; - SCSIRequest *req; size_t len, resid; if (!cmd->iov_buf) { @@ -1021,8 +1028,8 @@ static int megasas_pd_get_info_submit(SCSIDevice *sdev, int lun, info->inquiry_data[0] = 0x7f; /* Force PQual 0x3, PType 0x1f */ info->vpd_page83[0] = 0x7f; megasas_setup_inquiry(cmdbuf, 0, sizeof(info->inquiry_data)); - req = scsi_req_new(sdev, cmd->index, lun, cmdbuf, cmd); - if (!req) { + cmd->req = scsi_req_new(sdev, cmd->index, lun, cmdbuf, cmd); + if (!cmd->req) { trace_megasas_dcmd_req_alloc_failed(cmd->index, "PD get info std inquiry"); g_free(cmd->iov_buf); @@ -1031,26 +1038,26 @@ static int megasas_pd_get_info_submit(SCSIDevice *sdev, int lun, } trace_megasas_dcmd_internal_submit(cmd->index, "PD get info std inquiry", lun); - len = scsi_req_enqueue(req); + len = scsi_req_enqueue(cmd->req); if (len > 0) { cmd->iov_size = len; - scsi_req_continue(req); + scsi_req_continue(cmd->req); } return MFI_STAT_INVALID_STATUS; } else if (info->inquiry_data[0] != 0x7f && info->vpd_page83[0] == 0x7f) { megasas_setup_inquiry(cmdbuf, 0x83, sizeof(info->vpd_page83)); - req = scsi_req_new(sdev, cmd->index, lun, cmdbuf, cmd); - if (!req) { + cmd->req = scsi_req_new(sdev, cmd->index, lun, cmdbuf, cmd); + if (!cmd->req) { trace_megasas_dcmd_req_alloc_failed(cmd->index, "PD get info vpd inquiry"); return MFI_STAT_FLASH_ALLOC_FAIL; } trace_megasas_dcmd_internal_submit(cmd->index, "PD get info vpd inquiry", lun); - len = scsi_req_enqueue(req); + len = scsi_req_enqueue(cmd->req); if (len > 0) { cmd->iov_size = len; - scsi_req_continue(req); + scsi_req_continue(cmd->req); } return MFI_STAT_INVALID_STATUS; } @@ -1212,7 +1219,6 @@ static int megasas_ld_get_info_submit(SCSIDevice *sdev, int lun, struct mfi_ld_info *info = cmd->iov_buf; size_t dcmd_size = sizeof(struct mfi_ld_info); uint8_t cdb[6]; - SCSIRequest *req; ssize_t len, resid; uint16_t sdev_id = ((sdev->id & 0xFF) << 8) | (lun & 0xFF); uint64_t ld_size; @@ -1221,8 +1227,8 @@ static int megasas_ld_get_info_submit(SCSIDevice *sdev, int lun, cmd->iov_buf = g_malloc0(dcmd_size); info = cmd->iov_buf; megasas_setup_inquiry(cdb, 0x83, sizeof(info->vpd_page83)); - req = scsi_req_new(sdev, cmd->index, lun, cdb, cmd); - if (!req) { + cmd->req = scsi_req_new(sdev, cmd->index, lun, cdb, cmd); + if (!cmd->req) { trace_megasas_dcmd_req_alloc_failed(cmd->index, "LD get info vpd inquiry"); g_free(cmd->iov_buf); @@ -1231,10 +1237,10 @@ static int megasas_ld_get_info_submit(SCSIDevice *sdev, int lun, } trace_megasas_dcmd_internal_submit(cmd->index, "LD get info vpd inquiry", lun); - len = scsi_req_enqueue(req); + len = scsi_req_enqueue(cmd->req); if (len > 0) { cmd->iov_size = len; - scsi_req_continue(req); + scsi_req_continue(cmd->req); } return MFI_STAT_INVALID_STATUS; } @@ -1559,22 +1565,21 @@ static const struct dcmd_cmd_tbl_t { static int megasas_handle_dcmd(MegasasState *s, MegasasCmd *cmd) { - int opcode; int retval = 0; size_t len; const struct dcmd_cmd_tbl_t *cmdptr = dcmd_cmd_tbl; - opcode = le32_to_cpu(cmd->frame->dcmd.opcode); - trace_megasas_handle_dcmd(cmd->index, opcode); + cmd->dcmd_opcode = le32_to_cpu(cmd->frame->dcmd.opcode); + trace_megasas_handle_dcmd(cmd->index, cmd->dcmd_opcode); if (megasas_map_dcmd(s, cmd) < 0) { return MFI_STAT_MEMORY_NOT_AVAILABLE; } - while (cmdptr->opcode != -1 && cmdptr->opcode != opcode) { + while (cmdptr->opcode != -1 && cmdptr->opcode != cmd->dcmd_opcode) { cmdptr++; } len = cmd->iov_size; if (cmdptr->opcode == -1) { - trace_megasas_dcmd_unhandled(cmd->index, opcode, len); + trace_megasas_dcmd_unhandled(cmd->index, cmd->dcmd_opcode, len); retval = megasas_dcmd_dummy(s, cmd); } else { trace_megasas_dcmd_enter(cmd->index, cmdptr->desc, len); @@ -1587,15 +1592,14 @@ static int megasas_handle_dcmd(MegasasState *s, MegasasCmd *cmd) } static int megasas_finish_internal_dcmd(MegasasCmd *cmd, - SCSIRequest *req) + SCSIRequest *req, size_t resid) { - int opcode; int retval = MFI_STAT_OK; int lun = req->lun; - opcode = le32_to_cpu(cmd->frame->dcmd.opcode); - trace_megasas_dcmd_internal_finish(cmd->index, opcode, lun); - switch (opcode) { + trace_megasas_dcmd_internal_finish(cmd->index, cmd->dcmd_opcode, lun); + cmd->iov_size -= resid; + switch (cmd->dcmd_opcode) { case MFI_DCMD_PD_GET_INFO: retval = megasas_pd_get_info_submit(req->dev, lun, cmd); break; @@ -1603,7 +1607,7 @@ static int megasas_finish_internal_dcmd(MegasasCmd *cmd, retval = megasas_ld_get_info_submit(req->dev, lun, cmd); break; default: - trace_megasas_dcmd_internal_invalid(cmd->index, opcode); + trace_megasas_dcmd_internal_invalid(cmd->index, cmd->dcmd_opcode); retval = MFI_STAT_INVALID_DCMD; break; } @@ -1647,43 +1651,42 @@ static int megasas_enqueue_req(MegasasCmd *cmd, bool is_write) } static int megasas_handle_scsi(MegasasState *s, MegasasCmd *cmd, - bool is_logical) + int frame_cmd) { uint8_t *cdb; + int target_id, lun_id, cdb_len; bool is_write; struct SCSIDevice *sdev = NULL; + bool is_logical = (frame_cmd == MFI_CMD_LD_SCSI_IO); cdb = cmd->frame->pass.cdb; + target_id = cmd->frame->header.target_id; + lun_id = cmd->frame->header.lun_id; + cdb_len = cmd->frame->header.cdb_len; if (is_logical) { - if (cmd->frame->header.target_id >= MFI_MAX_LD || - cmd->frame->header.lun_id != 0) { + if (target_id >= MFI_MAX_LD || lun_id != 0) { trace_megasas_scsi_target_not_present( - mfi_frame_desc[cmd->frame->header.frame_cmd], is_logical, - cmd->frame->header.target_id, cmd->frame->header.lun_id); + mfi_frame_desc[frame_cmd], is_logical, target_id, lun_id); return MFI_STAT_DEVICE_NOT_FOUND; } } - sdev = scsi_device_find(&s->bus, 0, cmd->frame->header.target_id, - cmd->frame->header.lun_id); + sdev = scsi_device_find(&s->bus, 0, target_id, lun_id); cmd->iov_size = le32_to_cpu(cmd->frame->header.data_len); - trace_megasas_handle_scsi(mfi_frame_desc[cmd->frame->header.frame_cmd], - is_logical, cmd->frame->header.target_id, - cmd->frame->header.lun_id, sdev, cmd->iov_size); + trace_megasas_handle_scsi(mfi_frame_desc[frame_cmd], is_logical, + target_id, lun_id, sdev, cmd->iov_size); if (!sdev || (megasas_is_jbod(s) && is_logical)) { trace_megasas_scsi_target_not_present( - mfi_frame_desc[cmd->frame->header.frame_cmd], is_logical, - cmd->frame->header.target_id, cmd->frame->header.lun_id); + mfi_frame_desc[frame_cmd], is_logical, target_id, lun_id); return MFI_STAT_DEVICE_NOT_FOUND; } - if (cmd->frame->header.cdb_len > 16) { + if (cdb_len > 16) { trace_megasas_scsi_invalid_cdb_len( - mfi_frame_desc[cmd->frame->header.frame_cmd], is_logical, - cmd->frame->header.target_id, cmd->frame->header.lun_id, - cmd->frame->header.cdb_len); + mfi_frame_desc[frame_cmd], is_logical, + target_id, lun_id, cdb_len); megasas_write_sense(cmd, SENSE_CODE(INVALID_OPCODE)); cmd->frame->header.scsi_status = CHECK_CONDITION; s->event_count++; @@ -1697,12 +1700,10 @@ static int megasas_handle_scsi(MegasasState *s, MegasasCmd *cmd, return MFI_STAT_SCSI_DONE_WITH_ERROR; } - cmd->req = scsi_req_new(sdev, cmd->index, - cmd->frame->header.lun_id, cdb, cmd); + cmd->req = scsi_req_new(sdev, cmd->index, lun_id, cdb, cmd); if (!cmd->req) { trace_megasas_scsi_req_alloc_failed( - mfi_frame_desc[cmd->frame->header.frame_cmd], - cmd->frame->header.target_id, cmd->frame->header.lun_id); + mfi_frame_desc[frame_cmd], target_id, lun_id); megasas_write_sense(cmd, SENSE_CODE(NO_SENSE)); cmd->frame->header.scsi_status = BUSY; s->event_count++; @@ -1723,43 +1724,41 @@ static int megasas_handle_scsi(MegasasState *s, MegasasCmd *cmd, return MFI_STAT_INVALID_STATUS; } -static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd) +static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd, int frame_cmd) { uint32_t lba_count, lba_start_hi, lba_start_lo; uint64_t lba_start; - bool is_write = (cmd->frame->header.frame_cmd == MFI_CMD_LD_WRITE); + bool is_write = (frame_cmd == MFI_CMD_LD_WRITE); uint8_t cdb[16]; int len; struct SCSIDevice *sdev = NULL; + int target_id, lun_id, cdb_len; lba_count = le32_to_cpu(cmd->frame->io.header.data_len); lba_start_lo = le32_to_cpu(cmd->frame->io.lba_lo); lba_start_hi = le32_to_cpu(cmd->frame->io.lba_hi); lba_start = ((uint64_t)lba_start_hi << 32) | lba_start_lo; - if (cmd->frame->header.target_id < MFI_MAX_LD && - cmd->frame->header.lun_id == 0) { - sdev = scsi_device_find(&s->bus, 0, cmd->frame->header.target_id, - cmd->frame->header.lun_id); + target_id = cmd->frame->header.target_id; + lun_id = cmd->frame->header.lun_id; + cdb_len = cmd->frame->header.cdb_len; + + if (target_id < MFI_MAX_LD && lun_id == 0) { + sdev = scsi_device_find(&s->bus, 0, target_id, lun_id); } trace_megasas_handle_io(cmd->index, - mfi_frame_desc[cmd->frame->header.frame_cmd], - cmd->frame->header.target_id, - cmd->frame->header.lun_id, + mfi_frame_desc[frame_cmd], target_id, lun_id, (unsigned long)lba_start, (unsigned long)lba_count); if (!sdev) { trace_megasas_io_target_not_present(cmd->index, - mfi_frame_desc[cmd->frame->header.frame_cmd], - cmd->frame->header.target_id, cmd->frame->header.lun_id); + mfi_frame_desc[frame_cmd], target_id, lun_id); return MFI_STAT_DEVICE_NOT_FOUND; } - if (cmd->frame->header.cdb_len > 16) { + if (cdb_len > 16) { trace_megasas_scsi_invalid_cdb_len( - mfi_frame_desc[cmd->frame->header.frame_cmd], 1, - cmd->frame->header.target_id, cmd->frame->header.lun_id, - cmd->frame->header.cdb_len); + mfi_frame_desc[frame_cmd], 1, target_id, lun_id, cdb_len); megasas_write_sense(cmd, SENSE_CODE(INVALID_OPCODE)); cmd->frame->header.scsi_status = CHECK_CONDITION; s->event_count++; @@ -1776,11 +1775,10 @@ static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd) megasas_encode_lba(cdb, lba_start, lba_count, is_write); cmd->req = scsi_req_new(sdev, cmd->index, - cmd->frame->header.lun_id, cdb, cmd); + lun_id, cdb, cmd); if (!cmd->req) { trace_megasas_scsi_req_alloc_failed( - mfi_frame_desc[cmd->frame->header.frame_cmd], - cmd->frame->header.target_id, cmd->frame->header.lun_id); + mfi_frame_desc[frame_cmd], target_id, lun_id); megasas_write_sense(cmd, SENSE_CODE(NO_SENSE)); cmd->frame->header.scsi_status = BUSY; s->event_count++; @@ -1797,23 +1795,11 @@ static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd) return MFI_STAT_INVALID_STATUS; } -static int megasas_finish_internal_command(MegasasCmd *cmd, - SCSIRequest *req, size_t resid) -{ - int retval = MFI_STAT_INVALID_CMD; - - if (cmd->frame->header.frame_cmd == MFI_CMD_DCMD) { - cmd->iov_size -= resid; - retval = megasas_finish_internal_dcmd(cmd, req); - } - return retval; -} - static QEMUSGList *megasas_get_sg_list(SCSIRequest *req) { MegasasCmd *cmd = req->hba_private; - if (cmd->frame->header.frame_cmd == MFI_CMD_DCMD) { + if (cmd->dcmd_opcode != -1) { return NULL; } else { return &cmd->qsg; @@ -1824,18 +1810,16 @@ static void megasas_xfer_complete(SCSIRequest *req, uint32_t len) { MegasasCmd *cmd = req->hba_private; uint8_t *buf; - uint32_t opcode; trace_megasas_io_complete(cmd->index, len); - if (cmd->frame->header.frame_cmd != MFI_CMD_DCMD) { + if (cmd->dcmd_opcode != -1) { scsi_req_continue(req); return; } buf = scsi_req_get_buf(req); - opcode = le32_to_cpu(cmd->frame->dcmd.opcode); - if (opcode == MFI_DCMD_PD_GET_INFO && cmd->iov_buf) { + if (cmd->dcmd_opcode == MFI_DCMD_PD_GET_INFO && cmd->iov_buf) { struct mfi_pd_info *info = cmd->iov_buf; if (info->inquiry_data[0] == 0x7f) { @@ -1846,7 +1830,7 @@ static void megasas_xfer_complete(SCSIRequest *req, uint32_t len) memcpy(info->vpd_page83, buf, len); } scsi_req_continue(req); - } else if (opcode == MFI_DCMD_LD_GET_INFO) { + } else if (cmd->dcmd_opcode == MFI_DCMD_LD_GET_INFO) { struct mfi_ld_info *info = cmd->iov_buf; if (cmd->iov_buf) { @@ -1868,11 +1852,11 @@ static void megasas_command_complete(SCSIRequest *req, uint32_t status, return; } - if (cmd->req == NULL) { + if (cmd->dcmd_opcode != -1) { /* * Internal command complete */ - cmd_status = megasas_finish_internal_command(cmd, req, resid); + cmd_status = megasas_finish_internal_dcmd(cmd, req, resid); if (cmd_status == MFI_STAT_INVALID_STATUS) { return; } @@ -1943,6 +1927,7 @@ static void megasas_handle_frame(MegasasState *s, uint64_t frame_addr, { uint8_t frame_status = MFI_STAT_INVALID_CMD; uint64_t frame_context; + int frame_cmd; MegasasCmd *cmd; /* @@ -1961,7 +1946,8 @@ static void megasas_handle_frame(MegasasState *s, uint64_t frame_addr, s->event_count++; return; } - switch (cmd->frame->header.frame_cmd) { + frame_cmd = cmd->frame->header.frame_cmd; + switch (frame_cmd) { case MFI_CMD_INIT: frame_status = megasas_init_firmware(s, cmd); break; @@ -1972,18 +1958,15 @@ static void megasas_handle_frame(MegasasState *s, uint64_t frame_addr, frame_status = megasas_handle_abort(s, cmd); break; case MFI_CMD_PD_SCSI_IO: - frame_status = megasas_handle_scsi(s, cmd, 0); - break; case MFI_CMD_LD_SCSI_IO: - frame_status = megasas_handle_scsi(s, cmd, 1); + frame_status = megasas_handle_scsi(s, cmd, frame_cmd); break; case MFI_CMD_LD_READ: case MFI_CMD_LD_WRITE: - frame_status = megasas_handle_io(s, cmd); + frame_status = megasas_handle_io(s, cmd, frame_cmd); break; default: - trace_megasas_unhandled_frame_cmd(cmd->index, - cmd->frame->header.frame_cmd); + trace_megasas_unhandled_frame_cmd(cmd->index, frame_cmd); s->event_count++; break; } diff --git a/hw/scsi/vhost-scsi-common.c b/hw/scsi/vhost-scsi-common.c index e41c0314db..d434b3e99a 100644 --- a/hw/scsi/vhost-scsi-common.c +++ b/hw/scsi/vhost-scsi-common.c @@ -16,7 +16,6 @@ */ #include "qemu/osdep.h" -#include <linux/vhost.h> #include "qapi/error.h" #include "qemu/error-report.h" #include "migration/migration.h" diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c new file mode 100644 index 0000000000..500fa6a067 --- /dev/null +++ b/hw/scsi/vhost-user-scsi.c @@ -0,0 +1,205 @@ +/* + * vhost-user-scsi host device + * + * Copyright (c) 2016 Nutanix Inc. All rights reserved. + * + * Author: + * Felipe Franciosi <felipe@nutanix.com> + * + * This work is largely based on the "vhost-scsi" implementation by: + * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> + * Nicholas Bellinger <nab@risingtidesystems.com> + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/typedefs.h" +#include "qom/object.h" +#include "hw/fw-path-provider.h" +#include "hw/qdev-core.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-backend.h" +#include "hw/virtio/vhost-user-scsi.h" +#include "hw/virtio/virtio.h" +#include "hw/virtio/virtio-access.h" +#include "chardev/char-fe.h" + +/* Features supported by the host application */ +static const int user_feature_bits[] = { + VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_RING_F_INDIRECT_DESC, + VIRTIO_RING_F_EVENT_IDX, + VIRTIO_SCSI_F_HOTPLUG, + VHOST_INVALID_FEATURE_BIT +}; + +static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status) +{ + VHostUserSCSI *s = (VHostUserSCSI *)vdev; + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); + bool start = (status & VIRTIO_CONFIG_S_DRIVER_OK) && vdev->vm_running; + + if (vsc->dev.started == start) { + return; + } + + if (start) { + int ret; + + ret = vhost_scsi_common_start(vsc); + if (ret < 0) { + error_report("unable to start vhost-user-scsi: %s", strerror(-ret)); + exit(1); + } + } else { + vhost_scsi_common_stop(vsc); + } +} + +static void vhost_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ +} + +static void vhost_user_scsi_realize(DeviceState *dev, Error **errp) +{ + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); + VHostUserSCSI *s = VHOST_USER_SCSI(dev); + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); + Error *err = NULL; + int ret; + + if (!vs->conf.chardev.chr) { + error_setg(errp, "vhost-user-scsi: missing chardev"); + return; + } + + virtio_scsi_common_realize(dev, vhost_dummy_handle_output, + vhost_dummy_handle_output, + vhost_dummy_handle_output, &err); + if (err != NULL) { + error_propagate(errp, err); + return; + } + + vsc->dev.nvqs = 2 + vs->conf.num_queues; + vsc->dev.vqs = g_new(struct vhost_virtqueue, vsc->dev.nvqs); + vsc->dev.vq_index = 0; + vsc->dev.backend_features = 0; + + ret = vhost_dev_init(&vsc->dev, (void *)&vs->conf.chardev, + VHOST_BACKEND_TYPE_USER, 0); + if (ret < 0) { + error_setg(errp, "vhost-user-scsi: vhost initialization failed: %s", + strerror(-ret)); + return; + } + + /* Channel and lun both are 0 for bootable vhost-user-scsi disk */ + vsc->channel = 0; + vsc->lun = 0; + vsc->target = vs->conf.boot_tpgt; +} + +static void vhost_user_scsi_unrealize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserSCSI *s = VHOST_USER_SCSI(dev); + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); + + /* This will stop the vhost backend. */ + vhost_user_scsi_set_status(vdev, 0); + + vhost_dev_cleanup(&vsc->dev); + g_free(vsc->dev.vqs); + + virtio_scsi_common_unrealize(dev, errp); +} + +static uint64_t vhost_user_scsi_get_features(VirtIODevice *vdev, + uint64_t features, Error **errp) +{ + VHostUserSCSI *s = VHOST_USER_SCSI(vdev); + + /* Turn on predefined features supported by this device */ + features |= s->host_features; + + return vhost_scsi_common_get_features(vdev, features, errp); +} + +static Property vhost_user_scsi_properties[] = { + DEFINE_PROP_CHR("chardev", VirtIOSCSICommon, conf.chardev), + DEFINE_PROP_UINT32("boot_tpgt", VirtIOSCSICommon, conf.boot_tpgt, 0), + DEFINE_PROP_UINT32("num_queues", VirtIOSCSICommon, conf.num_queues, 1), + DEFINE_PROP_UINT32("max_sectors", VirtIOSCSICommon, conf.max_sectors, + 0xFFFF), + DEFINE_PROP_UINT32("cmd_per_lun", VirtIOSCSICommon, conf.cmd_per_lun, 128), + DEFINE_PROP_BIT64("hotplug", VHostUserSCSI, host_features, + VIRTIO_SCSI_F_HOTPLUG, + true), + DEFINE_PROP_BIT64("param_change", VHostUserSCSI, host_features, + VIRTIO_SCSI_F_CHANGE, + true), + DEFINE_PROP_END_OF_LIST(), +}; + +static const VMStateDescription vmstate_vhost_scsi = { + .name = "virtio-scsi", + .minimum_version_id = 1, + .version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_VIRTIO_DEVICE, + VMSTATE_END_OF_LIST() + }, +}; + +static void vhost_user_scsi_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(klass); + + dc->props = vhost_user_scsi_properties; + dc->vmsd = &vmstate_vhost_scsi; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + vdc->realize = vhost_user_scsi_realize; + vdc->unrealize = vhost_user_scsi_unrealize; + vdc->get_features = vhost_user_scsi_get_features; + vdc->set_config = vhost_scsi_common_set_config; + vdc->set_status = vhost_user_scsi_set_status; + fwc->get_dev_path = vhost_scsi_common_get_fw_dev_path; +} + +static void vhost_user_scsi_instance_init(Object *obj) +{ + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(obj); + + vsc->feature_bits = user_feature_bits; + + /* Add the bootindex property for this object */ + device_add_bootindex_property(obj, &vsc->bootindex, "bootindex", NULL, + DEVICE(vsc), NULL); +} + +static const TypeInfo vhost_user_scsi_info = { + .name = TYPE_VHOST_USER_SCSI, + .parent = TYPE_VHOST_SCSI_COMMON, + .instance_size = sizeof(VHostUserSCSI), + .class_init = vhost_user_scsi_class_init, + .instance_init = vhost_user_scsi_instance_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_FW_PATH_PROVIDER }, + { } + }, +}; + +static void virtio_register_types(void) +{ + type_register_static(&vhost_user_scsi_info); +} + +type_init(virtio_register_types) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index f9b7244808..20d6a08616 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -2135,6 +2135,61 @@ static const TypeInfo vhost_scsi_pci_info = { }; #endif +#ifdef CONFIG_LINUX +/* vhost-user-scsi-pci */ +static Property vhost_user_scsi_pci_properties[] = { + DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, + DEV_NVECTORS_UNSPECIFIED), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vhost_user_scsi_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ + VHostUserSCSIPCI *dev = VHOST_USER_SCSI_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&dev->vdev); + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev); + + if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) { + vpci_dev->nvectors = vs->conf.num_queues + 3; + } + + qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus)); + object_property_set_bool(OBJECT(vdev), true, "realized", errp); +} + +static void vhost_user_scsi_pci_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); + k->realize = vhost_user_scsi_pci_realize; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + dc->props = vhost_user_scsi_pci_properties; + pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; + pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_SCSI; + pcidev_k->revision = 0x00; + pcidev_k->class_id = PCI_CLASS_STORAGE_SCSI; +} + +static void vhost_user_scsi_pci_instance_init(Object *obj) +{ + VHostUserSCSIPCI *dev = VHOST_USER_SCSI_PCI(obj); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VHOST_USER_SCSI); + object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev), + "bootindex", &error_abort); +} + +static const TypeInfo vhost_user_scsi_pci_info = { + .name = TYPE_VHOST_USER_SCSI_PCI, + .parent = TYPE_VIRTIO_PCI, + .instance_size = sizeof(VHostUserSCSIPCI), + .instance_init = vhost_user_scsi_pci_instance_init, + .class_init = vhost_user_scsi_pci_class_init, +}; +#endif + /* vhost-vsock-pci */ #ifdef CONFIG_VHOST_VSOCK @@ -2612,6 +2667,9 @@ static void virtio_pci_register_types(void) #ifdef CONFIG_VHOST_SCSI type_register_static(&vhost_scsi_pci_info); #endif +#ifdef CONFIG_LINUX + type_register_static(&vhost_user_scsi_pci_info); +#endif #ifdef CONFIG_VHOST_VSOCK type_register_static(&vhost_vsock_pci_info); #endif diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h index b095dfc6d9..69f5959623 100644 --- a/hw/virtio/virtio-pci.h +++ b/hw/virtio/virtio-pci.h @@ -26,6 +26,7 @@ #include "hw/virtio/virtio-input.h" #include "hw/virtio/virtio-gpu.h" #include "hw/virtio/virtio-crypto.h" +#include "hw/virtio/vhost-user-scsi.h" #ifdef CONFIG_VIRTFS #include "hw/9pfs/virtio-9p.h" @@ -44,6 +45,7 @@ typedef struct VirtIOBalloonPCI VirtIOBalloonPCI; typedef struct VirtIOSerialPCI VirtIOSerialPCI; typedef struct VirtIONetPCI VirtIONetPCI; typedef struct VHostSCSIPCI VHostSCSIPCI; +typedef struct VHostUserSCSIPCI VHostUserSCSIPCI; typedef struct VirtIORngPCI VirtIORngPCI; typedef struct VirtIOInputPCI VirtIOInputPCI; typedef struct VirtIOInputHIDPCI VirtIOInputHIDPCI; @@ -230,6 +232,15 @@ struct VHostSCSIPCI { }; #endif +#define TYPE_VHOST_USER_SCSI_PCI "vhost-user-scsi-pci" +#define VHOST_USER_SCSI_PCI(obj) \ + OBJECT_CHECK(VHostUserSCSIPCI, (obj), TYPE_VHOST_USER_SCSI_PCI) + +struct VHostUserSCSIPCI { + VirtIOPCIProxy parent_obj; + VHostUserSCSI vdev; +}; + /* * virtio-blk-pci: This extends VirtioPCIProxy. */ diff --git a/include/block/accounting.h b/include/block/accounting.h index 20891639d5..b833d26d6c 100644 --- a/include/block/accounting.h +++ b/include/block/accounting.h @@ -26,8 +26,10 @@ #define BLOCK_ACCOUNTING_H #include "qemu/timed-average.h" +#include "qemu/thread.h" typedef struct BlockAcctTimedStats BlockAcctTimedStats; +typedef struct BlockAcctStats BlockAcctStats; enum BlockAcctType { BLOCK_ACCT_READ, @@ -37,12 +39,14 @@ enum BlockAcctType { }; struct BlockAcctTimedStats { + BlockAcctStats *stats; TimedAverage latency[BLOCK_MAX_IOTYPE]; unsigned interval_length; /* in seconds */ QSLIST_ENTRY(BlockAcctTimedStats) entries; }; -typedef struct BlockAcctStats { +struct BlockAcctStats { + QemuMutex lock; uint64_t nr_bytes[BLOCK_MAX_IOTYPE]; uint64_t nr_ops[BLOCK_MAX_IOTYPE]; uint64_t invalid_ops[BLOCK_MAX_IOTYPE]; @@ -53,7 +57,7 @@ typedef struct BlockAcctStats { QSLIST_HEAD(, BlockAcctTimedStats) intervals; bool account_invalid; bool account_failed; -} BlockAcctStats; +}; typedef struct BlockAcctCookie { int64_t bytes; @@ -61,7 +65,8 @@ typedef struct BlockAcctCookie { enum BlockAcctType type; } BlockAcctCookie; -void block_acct_init(BlockAcctStats *stats, bool account_invalid, +void block_acct_init(BlockAcctStats *stats); +void block_acct_setup(BlockAcctStats *stats, bool account_invalid, bool account_failed); void block_acct_cleanup(BlockAcctStats *stats); void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length); diff --git a/include/block/block.h b/include/block/block.h index 9b355e92d8..a4f09df95a 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -402,7 +402,8 @@ void bdrv_drain_all(void); * block_job_defer_to_main_loop for how to do it). \ */ \ assert(!bs_->wakeup); \ - bs_->wakeup = true; \ + /* Set bs->wakeup before evaluating cond. */ \ + atomic_mb_set(&bs_->wakeup, true); \ while (busy_) { \ if ((cond)) { \ waited_ = busy_ = true; \ @@ -414,7 +415,7 @@ void bdrv_drain_all(void); waited_ |= busy_; \ } \ } \ - bs_->wakeup = false; \ + atomic_set(&bs_->wakeup, false); \ } \ waited_; }) diff --git a/include/block/block_int.h b/include/block/block_int.h index cb78c4fa82..748970055e 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -29,6 +29,7 @@ #include "qemu/option.h" #include "qemu/queue.h" #include "qemu/coroutine.h" +#include "qemu/stats64.h" #include "qemu/timer.h" #include "qapi-types.h" #include "qemu/hbitmap.h" @@ -595,11 +596,6 @@ struct BlockDriverState { /* Protected by AioContext lock */ - /* If true, copy read backing sectors into image. Can be >1 if more - * than one client has requested copy-on-read. - */ - int copy_on_read; - /* If we are reading a disk image, give its size in sectors. * Generally read-only; it is written to by load_snapshot and * save_snaphost, but the block layer is quiescent during those. @@ -609,34 +605,57 @@ struct BlockDriverState { /* Callback before write request is processed */ NotifierWithReturnList before_write_notifiers; - /* number of in-flight requests; overall and serialising */ - unsigned int in_flight; - unsigned int serialising_in_flight; + /* threshold limit for writes, in bytes. "High water mark". */ + uint64_t write_threshold_offset; + NotifierWithReturn write_threshold_notifier; - bool wakeup; + /* Writing to the list requires the BQL _and_ the dirty_bitmap_mutex. + * Reading from the list can be done with either the BQL or the + * dirty_bitmap_mutex. Modifying a bitmap only requires + * dirty_bitmap_mutex. */ + QemuMutex dirty_bitmap_mutex; + QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps; /* Offset after the highest byte written to */ - uint64_t wr_highest_offset; + Stat64 wr_highest_offset; - /* threshold limit for writes, in bytes. "High water mark". */ - uint64_t write_threshold_offset; - NotifierWithReturn write_threshold_notifier; + /* If true, copy read backing sectors into image. Can be >1 if more + * than one client has requested copy-on-read. Accessed with atomic + * ops. + */ + int copy_on_read; - /* counter for nested bdrv_io_plug */ - unsigned io_plugged; + /* number of in-flight requests; overall and serialising. + * Accessed with atomic ops. + */ + unsigned int in_flight; + unsigned int serialising_in_flight; - QLIST_HEAD(, BdrvTrackedRequest) tracked_requests; - CoQueue flush_queue; /* Serializing flush queue */ - bool active_flush_req; /* Flush request in flight? */ - unsigned int write_gen; /* Current data generation */ - unsigned int flushed_gen; /* Flushed write generation */ + /* Internal to BDRV_POLL_WHILE and bdrv_wakeup. Accessed with atomic + * ops. + */ + bool wakeup; - QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps; + /* counter for nested bdrv_io_plug. + * Accessed with atomic ops. + */ + unsigned io_plugged; /* do we need to tell the quest if we have a volatile write cache? */ int enable_write_cache; + /* Accessed with atomic ops. */ int quiesce_counter; + unsigned int write_gen; /* Current data generation */ + + /* Protected by reqs_lock. */ + CoMutex reqs_lock; + QLIST_HEAD(, BdrvTrackedRequest) tracked_requests; + CoQueue flush_queue; /* Serializing flush queue */ + bool active_flush_req; /* Flush request in flight? */ + + /* Only read/written by whoever has set active_flush_req to true. */ + unsigned int flushed_gen; /* Flushed write generation */ }; struct BlockBackendRootState { diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h index 9dea14ba03..ad6558af56 100644 --- a/include/block/dirty-bitmap.h +++ b/include/block/dirty-bitmap.h @@ -36,8 +36,6 @@ bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap); const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap); int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap); DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap); -int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, - int64_t sector); void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, int64_t cur_sector, int64_t nr_sectors); void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, @@ -45,6 +43,9 @@ void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector, int nb_sectors); +int bdrv_dirty_bitmap_get_meta_locked(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, int64_t sector, + int nb_sectors); void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector, int nb_sectors); @@ -52,11 +53,6 @@ BdrvDirtyBitmapIter *bdrv_dirty_meta_iter_new(BdrvDirtyBitmap *bitmap); BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap, uint64_t first_sector); void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter); -int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter); -void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t sector_num); -int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap); -int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap); -void bdrv_dirty_bitmap_truncate(BlockDriverState *bs); uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap, uint64_t start, uint64_t count); @@ -72,4 +68,19 @@ void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap, bool finish); void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap); +/* Functions that require manual locking. */ +void bdrv_dirty_bitmap_lock(BdrvDirtyBitmap *bitmap); +void bdrv_dirty_bitmap_unlock(BdrvDirtyBitmap *bitmap); +int bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, + int64_t sector); +void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, + int64_t cur_sector, int64_t nr_sectors); +void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, + int64_t cur_sector, int64_t nr_sectors); +int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter); +void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t sector_num); +int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap); +int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap); +void bdrv_dirty_bitmap_truncate(BlockDriverState *bs); + #endif diff --git a/include/block/nbd.h b/include/block/nbd.h index 416257abca..6d75d5a670 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -123,12 +123,8 @@ enum { * aren't overflowing some other buffer. */ #define NBD_MAX_NAME_SIZE 256 -ssize_t nbd_wr_syncv(QIOChannel *ioc, - struct iovec *iov, - size_t niov, - size_t length, - bool do_read, - Error **errp); +ssize_t nbd_rwv(QIOChannel *ioc, struct iovec *iov, size_t niov, size_t length, + bool do_read, Error **errp); int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, QCryptoTLSCreds *tlscreds, const char *hostname, QIOChannel **outioc, @@ -162,7 +158,7 @@ void nbd_client_new(NBDExport *exp, QIOChannelSocket *sioc, QCryptoTLSCreds *tlscreds, const char *tlsaclname, - void (*close)(NBDClient *)); + void (*close_fn)(NBDClient *, bool)); void nbd_client_get(NBDClient *client); void nbd_client_put(NBDClient *client); diff --git a/include/exec/memory.h b/include/exec/memory.h index 80e605a96a..37f8e78e71 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -456,6 +456,26 @@ void memory_region_init_ram_from_file(MemoryRegion *mr, bool share, const char *path, Error **errp); + +/** + * memory_region_init_ram_from_fd: Initialize RAM memory region with a + * mmap-ed backend. + * + * @mr: the #MemoryRegion to be initialized. + * @owner: the object that tracks the region's reference count + * @name: the name of the region. + * @size: size of the region. + * @share: %true if memory must be mmaped with the MAP_SHARED flag + * @fd: the fd to mmap. + * @errp: pointer to Error*, to store an error if it happens. + */ +void memory_region_init_ram_from_fd(MemoryRegion *mr, + struct Object *owner, + const char *name, + uint64_t size, + bool share, + int fd, + Error **errp); #endif /** @@ -805,17 +825,6 @@ static inline bool memory_region_is_rom(MemoryRegion *mr) int memory_region_get_fd(MemoryRegion *mr); /** - * memory_region_set_fd: Mark a RAM memory region as backed by a - * file descriptor. - * - * This function is typically used after memory_region_init_ram_ptr(). - * - * @mr: the memory region being queried. - * @fd: the file descriptor that backs @mr. - */ -void memory_region_set_fd(MemoryRegion *mr, int fd); - -/** * memory_region_from_host: Convert a pointer into a RAM memory region * and an offset within it. * diff --git a/include/exec/poison.h b/include/exec/poison.h index 3ca7929cce..5ffed4d56e 100644 --- a/include/exec/poison.h +++ b/include/exec/poison.h @@ -12,17 +12,28 @@ #pragma GCC poison TARGET_CRIS #pragma GCC poison TARGET_LM32 #pragma GCC poison TARGET_M68K +#pragma GCC poison TARGET_MICROBLAZE #pragma GCC poison TARGET_MIPS +#pragma GCC poison TARGET_ABI_MIPSO32 #pragma GCC poison TARGET_MIPS64 +#pragma GCC poison TARGET_ABI_MIPSN64 +#pragma GCC poison TARGET_MOXIE +#pragma GCC poison TARGET_NIOS2 #pragma GCC poison TARGET_OPENRISC #pragma GCC poison TARGET_PPC #pragma GCC poison TARGET_PPCEMB #pragma GCC poison TARGET_PPC64 #pragma GCC poison TARGET_ABI32 +#pragma GCC poison TARGET_S390X #pragma GCC poison TARGET_SH4 #pragma GCC poison TARGET_SPARC #pragma GCC poison TARGET_SPARC64 +#pragma GCC poison TARGET_TRICORE +#pragma GCC poison TARGET_UNICORE32 +#pragma GCC poison TARGET_XTENSA +#pragma GCC poison TARGET_NAME +#pragma GCC poison TARGET_SUPPORTS_MTTCG #pragma GCC poison TARGET_WORDS_BIGENDIAN #pragma GCC poison BSWAP_NEEDED @@ -50,5 +61,25 @@ #pragma GCC poison CPU_INTERRUPT_TGT_INT_1 #pragma GCC poison CPU_INTERRUPT_TGT_INT_2 +#pragma GCC poison CONFIG_ALPHA_DIS +#pragma GCC poison CONFIG_ARM_A64_DIS +#pragma GCC poison CONFIG_ARM_DIS +#pragma GCC poison CONFIG_CRIS_DIS +#pragma GCC poison CONFIG_I386_DIS +#pragma GCC poison CONFIG_LM32_DIS +#pragma GCC poison CONFIG_M68K_DIS +#pragma GCC poison CONFIG_MICROBLAZE_DIS +#pragma GCC poison CONFIG_MIPS_DIS +#pragma GCC poison CONFIG_MOXIE_DIS +#pragma GCC poison CONFIG_NIOS2_DIS +#pragma GCC poison CONFIG_PPC_DIS +#pragma GCC poison CONFIG_S390_DIS +#pragma GCC poison CONFIG_SH4_DIS +#pragma GCC poison CONFIG_SPARC_DIS +#pragma GCC poison CONFIG_XTENSA_DIS + +#pragma GCC poison CONFIG_LINUX_USER +#pragma GCC poison CONFIG_VHOST_NET + #endif #endif diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index 140efa840c..73d1bea8b6 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -65,6 +65,9 @@ unsigned long last_ram_page(void); RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, bool share, const char *mem_path, Error **errp); +RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, + bool share, int fd, + Error **errp); RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, MemoryRegion *mr, Error **errp); RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp); diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index d071c9c0e9..233216abdc 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -384,6 +384,11 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); #define PC_COMPAT_2_9 \ HW_COMPAT_2_9 \ + {\ + .driver = "mch",\ + .property = "extended-tseg-mbytes",\ + .value = stringify(0),\ + },\ #define PC_COMPAT_2_8 \ HW_COMPAT_2_8 \ diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h index 53b6760c16..58983c00b3 100644 --- a/include/hw/pci-host/q35.h +++ b/include/hw/pci-host/q35.h @@ -60,6 +60,7 @@ typedef struct MCHPCIState { uint64_t above_4g_mem_size; uint64_t pci_hole64_size; uint32_t short_root_bus; + uint16_t ext_tseg_mbytes; } MCHPCIState; typedef struct Q35PCIHost { @@ -91,6 +92,11 @@ typedef struct Q35PCIHost { /* D0:F0 configuration space */ #define MCH_HOST_BRIDGE_REVISION_DEFAULT 0x0 +#define MCH_HOST_BRIDGE_EXT_TSEG_MBYTES 0x50 +#define MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_SIZE 2 +#define MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY 0xffff +#define MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_MAX 0xfff + #define MCH_HOST_BRIDGE_PCIEXBAR 0x60 /* 64bit register */ #define MCH_HOST_BRIDGE_PCIEXBAR_SIZE 8 /* 64bit register */ #define MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT 0xb0000000 diff --git a/include/hw/virtio/vhost-user-scsi.h b/include/hw/virtio/vhost-user-scsi.h new file mode 100644 index 0000000000..01861f78d0 --- /dev/null +++ b/include/hw/virtio/vhost-user-scsi.h @@ -0,0 +1,35 @@ +/* + * vhost-user-scsi host device + * + * Copyright (c) 2016 Nutanix Inc. All rights reserved. + * + * Author: + * Felipe Franciosi <felipe@nutanix.com> + * + * This file is largely based on "vhost-scsi.h" by: + * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef VHOST_USER_SCSI_H +#define VHOST_USER_SCSI_H + +#include "qemu-common.h" +#include "hw/qdev.h" +#include "hw/virtio/virtio-scsi.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-scsi-common.h" + +#define TYPE_VHOST_USER_SCSI "vhost-user-scsi" +#define VHOST_USER_SCSI(obj) \ + OBJECT_CHECK(VHostUserSCSI, (obj), TYPE_VHOST_USER_SCSI) + +typedef struct VHostUserSCSI { + VHostSCSICommon parent_obj; + uint64_t host_features; +} VHostUserSCSI; + +#endif /* VHOST_USER_SCSI_H */ diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h index eac2013ddd..de6ae5a9f6 100644 --- a/include/hw/virtio/virtio-scsi.h +++ b/include/hw/virtio/virtio-scsi.h @@ -21,6 +21,7 @@ #include "hw/virtio/virtio.h" #include "hw/pci/pci.h" #include "hw/scsi/scsi.h" +#include "chardev/char-fe.h" #include "sysemu/iothread.h" #define TYPE_VIRTIO_SCSI_COMMON "virtio-scsi-common" @@ -53,6 +54,7 @@ struct VirtIOSCSIConf { char *vhostfd; char *wwpn; #endif + CharBackend chardev; uint32_t boot_tpgt; IOThread *iothread; }; diff --git a/include/qemu/stats64.h b/include/qemu/stats64.h new file mode 100644 index 0000000000..4a357b3e9d --- /dev/null +++ b/include/qemu/stats64.h @@ -0,0 +1,193 @@ +/* + * Atomic operations on 64-bit quantities. + * + * Copyright (C) 2017 Red Hat, Inc. + * + * Author: Paolo Bonzini <pbonzini@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_STATS64_H +#define QEMU_STATS64_H 1 + +#include "qemu/atomic.h" + +/* This provides atomic operations on 64-bit type, using a reader-writer + * spinlock on architectures that do not have 64-bit accesses. Even on + * those architectures, it tries hard not to take the lock. + */ + +typedef struct Stat64 { +#ifdef CONFIG_ATOMIC64 + uint64_t value; +#else + uint32_t low, high; + uint32_t lock; +#endif +} Stat64; + +#ifdef CONFIG_ATOMIC64 +static inline void stat64_init(Stat64 *s, uint64_t value) +{ + /* This is not guaranteed to be atomic! */ + *s = (Stat64) { value }; +} + +static inline uint64_t stat64_get(const Stat64 *s) +{ + return atomic_read__nocheck(&s->value); +} + +static inline void stat64_add(Stat64 *s, uint64_t value) +{ + atomic_add(&s->value, value); +} + +static inline void stat64_min(Stat64 *s, uint64_t value) +{ + uint64_t orig = atomic_read__nocheck(&s->value); + while (orig > value) { + orig = atomic_cmpxchg__nocheck(&s->value, orig, value); + } +} + +static inline void stat64_max(Stat64 *s, uint64_t value) +{ + uint64_t orig = atomic_read__nocheck(&s->value); + while (orig < value) { + orig = atomic_cmpxchg__nocheck(&s->value, orig, value); + } +} +#else +uint64_t stat64_get(const Stat64 *s); +bool stat64_min_slow(Stat64 *s, uint64_t value); +bool stat64_max_slow(Stat64 *s, uint64_t value); +bool stat64_add32_carry(Stat64 *s, uint32_t low, uint32_t high); + +static inline void stat64_init(Stat64 *s, uint64_t value) +{ + /* This is not guaranteed to be atomic! */ + *s = (Stat64) { .low = value, .high = value >> 32, .lock = 0 }; +} + +static inline void stat64_add(Stat64 *s, uint64_t value) +{ + uint32_t low, high; + high = value >> 32; + low = (uint32_t) value; + if (!low) { + if (high) { + atomic_add(&s->high, high); + } + return; + } + + for (;;) { + uint32_t orig = s->low; + uint32_t result = orig + low; + uint32_t old; + + if (result < low || high) { + /* If the high part is affected, take the lock. */ + if (stat64_add32_carry(s, low, high)) { + return; + } + continue; + } + + /* No carry, try with a 32-bit cmpxchg. The result is independent of + * the high 32 bits, so it can race just fine with stat64_add32_carry + * and even stat64_get! + */ + old = atomic_cmpxchg(&s->low, orig, result); + if (orig == old) { + return; + } + } +} + +static inline void stat64_min(Stat64 *s, uint64_t value) +{ + uint32_t low, high; + uint32_t orig_low, orig_high; + + high = value >> 32; + low = (uint32_t) value; + do { + orig_high = atomic_read(&s->high); + if (orig_high < high) { + return; + } + + if (orig_high == high) { + /* High 32 bits are equal. Read low after high, otherwise we + * can get a false positive (e.g. 0x1235,0x0000 changes to + * 0x1234,0x8000 and we read it as 0x1234,0x0000). Pairs with + * the write barrier in stat64_min_slow. + */ + smp_rmb(); + orig_low = atomic_read(&s->low); + if (orig_low <= low) { + return; + } + + /* See if we were lucky and a writer raced against us. The + * barrier is theoretically unnecessary, but if we remove it + * we may miss being lucky. + */ + smp_rmb(); + orig_high = atomic_read(&s->high); + if (orig_high < high) { + return; + } + } + + /* If the value changes in any way, we have to take the lock. */ + } while (!stat64_min_slow(s, value)); +} + +static inline void stat64_max(Stat64 *s, uint64_t value) +{ + uint32_t low, high; + uint32_t orig_low, orig_high; + + high = value >> 32; + low = (uint32_t) value; + do { + orig_high = atomic_read(&s->high); + if (orig_high > high) { + return; + } + + if (orig_high == high) { + /* High 32 bits are equal. Read low after high, otherwise we + * can get a false positive (e.g. 0x1234,0x8000 changes to + * 0x1235,0x0000 and we read it as 0x1235,0x8000). Pairs with + * the write barrier in stat64_max_slow. + */ + smp_rmb(); + orig_low = atomic_read(&s->low); + if (orig_low >= low) { + return; + } + + /* See if we were lucky and a writer raced against us. The + * barrier is theoretically unnecessary, but if we remove it + * we may miss being lucky. + */ + smp_rmb(); + orig_high = atomic_read(&s->high); + if (orig_high > high) { + return; + } + } + + /* If the value changes in any way, we have to take the lock. */ + } while (!stat64_max_slow(s, value)); +} + +#endif + +#endif diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 840ad6134c..999eb2333a 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -72,15 +72,13 @@ typedef struct BlockDevOps { * fields that must be public. This is in particular for QLIST_ENTRY() and * friends so that BlockBackends can be kept in lists outside block-backend.c */ typedef struct BlockBackendPublic { - /* I/O throttling has its own locking, but also some fields are - * protected by the AioContext lock. - */ - - /* Protected by AioContext lock. */ + /* throttled_reqs_lock protects the CoQueues for throttled requests. */ + CoMutex throttled_reqs_lock; CoQueue throttled_reqs[2]; /* Nonzero if the I/O limits are currently being ignored; generally - * it is zero. */ + * it is zero. Accessed with atomic operations. + */ unsigned int io_limits_disabled; /* The following fields are protected by the ThrottleGroup lock. diff --git a/include/ui/spice-display.h b/include/ui/spice-display.h index 184d4c373a..4ba9444dba 100644 --- a/include/ui/spice-display.h +++ b/include/ui/spice-display.h @@ -140,6 +140,8 @@ struct SimpleSpiceCursor { QXLCursor cursor; }; +extern bool spice_opengl; + int qemu_spice_rect_is_empty(const QXLRect* r); void qemu_spice_rect_union(QXLRect *dest, const QXLRect *r); @@ -1397,6 +1397,22 @@ void memory_region_init_ram_from_file(MemoryRegion *mr, mr->ram_block = qemu_ram_alloc_from_file(size, mr, share, path, errp); mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; } + +void memory_region_init_ram_from_fd(MemoryRegion *mr, + struct Object *owner, + const char *name, + uint64_t size, + bool share, + int fd, + Error **errp) +{ + memory_region_init(mr, owner, name, size); + mr->ram = true; + mr->terminates = true; + mr->destructor = memory_region_destructor_ram; + mr->ram_block = qemu_ram_alloc_from_fd(size, mr, share, fd, errp); + mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; +} #endif void memory_region_init_ram_ptr(MemoryRegion *mr, @@ -1835,16 +1851,6 @@ int memory_region_get_fd(MemoryRegion *mr) return fd; } -void memory_region_set_fd(MemoryRegion *mr, int fd) -{ - rcu_read_lock(); - while (mr->alias) { - mr = mr->alias; - } - mr->ram_block->fd = fd; - rcu_read_unlock(); -} - void *memory_region_get_ram_ptr(MemoryRegion *mr) { void *ptr; diff --git a/migration/block.c b/migration/block.c index 3aae5a375e..7674ae1078 100644 --- a/migration/block.c +++ b/migration/block.c @@ -341,10 +341,8 @@ static int set_dirty_tracking(void) int ret; QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { - aio_context_acquire(blk_get_aio_context(bmds->blk)); bmds->dirty_bitmap = bdrv_create_dirty_bitmap(blk_bs(bmds->blk), BLOCK_SIZE, NULL, NULL); - aio_context_release(blk_get_aio_context(bmds->blk)); if (!bmds->dirty_bitmap) { ret = -errno; goto fail; @@ -355,9 +353,7 @@ static int set_dirty_tracking(void) fail: QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { if (bmds->dirty_bitmap) { - aio_context_acquire(blk_get_aio_context(bmds->blk)); bdrv_release_dirty_bitmap(blk_bs(bmds->blk), bmds->dirty_bitmap); - aio_context_release(blk_get_aio_context(bmds->blk)); } } return ret; @@ -370,9 +366,7 @@ static void unset_dirty_tracking(void) BlkMigDevState *bmds; QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { - aio_context_acquire(blk_get_aio_context(bmds->blk)); bdrv_release_dirty_bitmap(blk_bs(bmds->blk), bmds->dirty_bitmap); - aio_context_release(blk_get_aio_context(bmds->blk)); } } @@ -531,13 +525,16 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds, } else { blk_mig_unlock(); } - if (bdrv_get_dirty(bs, bmds->dirty_bitmap, sector)) { - + bdrv_dirty_bitmap_lock(bmds->dirty_bitmap); + if (bdrv_get_dirty_locked(bs, bmds->dirty_bitmap, sector)) { if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) { nr_sectors = total_sectors - sector; } else { nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK; } + bdrv_reset_dirty_bitmap_locked(bmds->dirty_bitmap, sector, nr_sectors); + bdrv_dirty_bitmap_unlock(bmds->dirty_bitmap); + blk = g_new(BlkMigBlock, 1); blk->buf = g_malloc(BLOCK_SIZE); blk->bmds = bmds; @@ -570,12 +567,12 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds, g_free(blk); } - bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, sector, nr_sectors); sector += nr_sectors; bmds->cur_dirty = sector; - break; } + + bdrv_dirty_bitmap_unlock(bmds->dirty_bitmap); sector += BDRV_SECTORS_PER_DIRTY_CHUNK; bmds->cur_dirty = sector; } diff --git a/nbd/client.c b/nbd/client.c index 595d99ed30..b97143fa60 100644 --- a/nbd/client.c +++ b/nbd/client.c @@ -86,32 +86,6 @@ static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); */ -/* Discard length bytes from channel. Return -errno on failure and 0 on - * success*/ -static int drop_sync(QIOChannel *ioc, size_t size, Error **errp) -{ - ssize_t ret = 0; - char small[1024]; - char *buffer; - - buffer = sizeof(small) >= size ? small : g_malloc(MIN(65536, size)); - while (size > 0) { - ssize_t count = MIN(65536, size); - ret = read_sync(ioc, buffer, MIN(65536, size), errp); - - if (ret < 0) { - goto cleanup; - } - size -= count; - } - - cleanup: - if (buffer != small) { - g_free(buffer); - } - return ret; -} - /* Send an option request. * * The request is for option @opt, with @data containing @len bytes of @@ -135,12 +109,12 @@ static int nbd_send_option_request(QIOChannel *ioc, uint32_t opt, stl_be_p(&req.option, opt); stl_be_p(&req.length, len); - if (write_sync(ioc, &req, sizeof(req), errp) < 0) { + if (nbd_write(ioc, &req, sizeof(req), errp) < 0) { error_prepend(errp, "Failed to send option request header"); return -1; } - if (len && write_sync(ioc, (char *) data, len, errp) < 0) { + if (len && nbd_write(ioc, (char *) data, len, errp) < 0) { error_prepend(errp, "Failed to send option request data"); return -1; } @@ -169,7 +143,7 @@ static int nbd_receive_option_reply(QIOChannel *ioc, uint32_t opt, nbd_opt_reply *reply, Error **errp) { QEMU_BUILD_BUG_ON(sizeof(*reply) != 20); - if (read_sync(ioc, reply, sizeof(*reply), errp) < 0) { + if (nbd_read(ioc, reply, sizeof(*reply), errp) < 0) { error_prepend(errp, "failed to read option reply"); nbd_send_opt_abort(ioc); return -1; @@ -218,7 +192,7 @@ static int nbd_handle_reply_err(QIOChannel *ioc, nbd_opt_reply *reply, goto cleanup; } msg = g_malloc(reply->length + 1); - if (read_sync(ioc, msg, reply->length, errp) < 0) { + if (nbd_read(ioc, msg, reply->length, errp) < 0) { error_prepend(errp, "failed to read option error message"); goto cleanup; } @@ -320,7 +294,7 @@ static int nbd_receive_list(QIOChannel *ioc, const char *want, bool *match, nbd_send_opt_abort(ioc); return -1; } - if (read_sync(ioc, &namelen, sizeof(namelen), errp) < 0) { + if (nbd_read(ioc, &namelen, sizeof(namelen), errp) < 0) { error_prepend(errp, "failed to read option name length"); nbd_send_opt_abort(ioc); return -1; @@ -333,7 +307,7 @@ static int nbd_receive_list(QIOChannel *ioc, const char *want, bool *match, return -1; } if (namelen != strlen(want)) { - if (drop_sync(ioc, len, errp) < 0) { + if (nbd_drop(ioc, len, errp) < 0) { error_prepend(errp, "failed to skip export name with wrong length"); nbd_send_opt_abort(ioc); return -1; @@ -342,14 +316,14 @@ static int nbd_receive_list(QIOChannel *ioc, const char *want, bool *match, } assert(namelen < sizeof(name)); - if (read_sync(ioc, name, namelen, errp) < 0) { + if (nbd_read(ioc, name, namelen, errp) < 0) { error_prepend(errp, "failed to read export name"); nbd_send_opt_abort(ioc); return -1; } name[namelen] = '\0'; len -= namelen; - if (drop_sync(ioc, len, errp) < 0) { + if (nbd_drop(ioc, len, errp) < 0) { error_prepend(errp, "failed to read export description"); nbd_send_opt_abort(ioc); return -1; @@ -476,7 +450,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, goto fail; } - if (read_sync(ioc, buf, 8, errp) < 0) { + if (nbd_read(ioc, buf, 8, errp) < 0) { error_prepend(errp, "Failed to read data"); goto fail; } @@ -502,7 +476,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, goto fail; } - if (read_sync(ioc, &magic, sizeof(magic), errp) < 0) { + if (nbd_read(ioc, &magic, sizeof(magic), errp) < 0) { error_prepend(errp, "Failed to read magic"); goto fail; } @@ -514,7 +488,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, uint16_t globalflags; bool fixedNewStyle = false; - if (read_sync(ioc, &globalflags, sizeof(globalflags), errp) < 0) { + if (nbd_read(ioc, &globalflags, sizeof(globalflags), errp) < 0) { error_prepend(errp, "Failed to read server flags"); goto fail; } @@ -532,7 +506,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, } /* client requested flags */ clientflags = cpu_to_be32(clientflags); - if (write_sync(ioc, &clientflags, sizeof(clientflags), errp) < 0) { + if (nbd_write(ioc, &clientflags, sizeof(clientflags), errp) < 0) { error_prepend(errp, "Failed to send clientflags field"); goto fail; } @@ -570,13 +544,13 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, } /* Read the response */ - if (read_sync(ioc, &s, sizeof(s), errp) < 0) { + if (nbd_read(ioc, &s, sizeof(s), errp) < 0) { error_prepend(errp, "Failed to read export length"); goto fail; } *size = be64_to_cpu(s); - if (read_sync(ioc, flags, sizeof(*flags), errp) < 0) { + if (nbd_read(ioc, flags, sizeof(*flags), errp) < 0) { error_prepend(errp, "Failed to read export flags"); goto fail; } @@ -593,14 +567,14 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, goto fail; } - if (read_sync(ioc, &s, sizeof(s), errp) < 0) { + if (nbd_read(ioc, &s, sizeof(s), errp) < 0) { error_prepend(errp, "Failed to read export length"); goto fail; } *size = be64_to_cpu(s); TRACE("Size is %" PRIu64, *size); - if (read_sync(ioc, &oldflags, sizeof(oldflags), errp) < 0) { + if (nbd_read(ioc, &oldflags, sizeof(oldflags), errp) < 0) { error_prepend(errp, "Failed to read export flags"); goto fail; } @@ -616,7 +590,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, } TRACE("Size is %" PRIu64 ", export flags %" PRIx16, *size, *flags); - if (zeroes && drop_sync(ioc, 124, errp) < 0) { + if (zeroes && nbd_drop(ioc, 124, errp) < 0) { error_prepend(errp, "Failed to read reserved block"); goto fail; } @@ -759,7 +733,7 @@ ssize_t nbd_send_request(QIOChannel *ioc, NBDRequest *request) stq_be_p(buf + 16, request->from); stl_be_p(buf + 24, request->len); - return write_sync(ioc, buf, sizeof(buf), NULL); + return nbd_write(ioc, buf, sizeof(buf), NULL); } ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, Error **errp) @@ -768,7 +742,7 @@ ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, Error **errp) uint32_t magic; ssize_t ret; - ret = read_sync_eof(ioc, buf, sizeof(buf), errp); + ret = nbd_read_eof(ioc, buf, sizeof(buf), errp); if (ret <= 0) { return ret; } diff --git a/nbd/common.c b/nbd/common.c index bd81637ab9..6b5c1b7b02 100644 --- a/nbd/common.c +++ b/nbd/common.c @@ -24,12 +24,8 @@ * The function may be called from coroutine or from non-coroutine context. * When called from non-coroutine context @ioc must be in blocking mode. */ -ssize_t nbd_wr_syncv(QIOChannel *ioc, - struct iovec *iov, - size_t niov, - size_t length, - bool do_read, - Error **errp) +ssize_t nbd_rwv(QIOChannel *ioc, struct iovec *iov, size_t niov, size_t length, + bool do_read, Error **errp) { ssize_t done = 0; struct iovec *local_iov = g_new(struct iovec, niov); @@ -69,6 +65,32 @@ ssize_t nbd_wr_syncv(QIOChannel *ioc, return done; } +/* Discard length bytes from channel. Return -errno on failure and 0 on + * success */ +int nbd_drop(QIOChannel *ioc, size_t size, Error **errp) +{ + ssize_t ret = 0; + char small[1024]; + char *buffer; + + buffer = sizeof(small) >= size ? small : g_malloc(MIN(65536, size)); + while (size > 0) { + ssize_t count = MIN(65536, size); + ret = nbd_read(ioc, buffer, MIN(65536, size), errp); + + if (ret < 0) { + goto cleanup; + } + size -= count; + } + + cleanup: + if (buffer != small) { + g_free(buffer); + } + return ret; +} + void nbd_tls_handshake(QIOTask *task, void *opaque) diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h index d6071640a0..39bfed177c 100644 --- a/nbd/nbd-internal.h +++ b/nbd/nbd-internal.h @@ -94,14 +94,14 @@ #define NBD_ENOSPC 28 #define NBD_ESHUTDOWN 108 -/* read_sync_eof +/* nbd_read_eof * Tries to read @size bytes from @ioc. Returns number of bytes actually read. * May return a value >= 0 and < size only on EOF, i.e. when iteratively called - * qio_channel_readv() returns 0. So, there are no needs to call read_sync_eof + * qio_channel_readv() returns 0. So, there is no need to call nbd_read_eof * iteratively. */ -static inline ssize_t read_sync_eof(QIOChannel *ioc, void *buffer, size_t size, - Error **errp) +static inline ssize_t nbd_read_eof(QIOChannel *ioc, void *buffer, size_t size, + Error **errp) { struct iovec iov = { .iov_base = buffer, .iov_len = size }; /* Sockets are kept in blocking mode in the negotiation phase. After @@ -109,16 +109,16 @@ static inline ssize_t read_sync_eof(QIOChannel *ioc, void *buffer, size_t size, * our request/reply. Synchronization is done with recv_coroutine, so * that this is coroutine-safe. */ - return nbd_wr_syncv(ioc, &iov, 1, size, true, errp); + return nbd_rwv(ioc, &iov, 1, size, true, errp); } -/* read_sync +/* nbd_read * Reads @size bytes from @ioc. Returns 0 on success. */ -static inline int read_sync(QIOChannel *ioc, void *buffer, size_t size, - Error **errp) +static inline int nbd_read(QIOChannel *ioc, void *buffer, size_t size, + Error **errp) { - ssize_t ret = read_sync_eof(ioc, buffer, size, errp); + ssize_t ret = nbd_read_eof(ioc, buffer, size, errp); if (ret >= 0 && ret != size) { ret = -EINVAL; @@ -128,15 +128,15 @@ static inline int read_sync(QIOChannel *ioc, void *buffer, size_t size, return ret < 0 ? ret : 0; } -/* write_sync +/* nbd_write * Writes @size bytes to @ioc. Returns 0 on success. */ -static inline int write_sync(QIOChannel *ioc, const void *buffer, size_t size, - Error **errp) +static inline int nbd_write(QIOChannel *ioc, const void *buffer, size_t size, + Error **errp) { struct iovec iov = { .iov_base = (void *) buffer, .iov_len = size }; - ssize_t ret = nbd_wr_syncv(ioc, &iov, 1, size, false, errp); + ssize_t ret = nbd_rwv(ioc, &iov, 1, size, false, errp); assert(ret < 0 || ret == size); @@ -153,4 +153,6 @@ struct NBDTLSHandshakeData { void nbd_tls_handshake(QIOTask *task, void *opaque); +int nbd_drop(QIOChannel *ioc, size_t size, Error **errp); + #endif diff --git a/nbd/server.c b/nbd/server.c index 49b55f6ede..8a70c054a6 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -81,7 +81,7 @@ static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); struct NBDClient { int refcount; - void (*close)(NBDClient *client); + void (*close_fn)(NBDClient *client, bool negotiated); bool no_zeroes; NBDExport *exp; @@ -104,69 +104,6 @@ struct NBDClient { static void nbd_client_receive_next_request(NBDClient *client); -static gboolean nbd_negotiate_continue(QIOChannel *ioc, - GIOCondition condition, - void *opaque) -{ - qemu_coroutine_enter(opaque); - return TRUE; -} - -static int nbd_negotiate_read(QIOChannel *ioc, void *buffer, size_t size) -{ - ssize_t ret; - guint watch; - - assert(qemu_in_coroutine()); - /* Negotiation are always in main loop. */ - watch = qio_channel_add_watch(ioc, - G_IO_IN, - nbd_negotiate_continue, - qemu_coroutine_self(), - NULL); - ret = read_sync(ioc, buffer, size, NULL); - g_source_remove(watch); - return ret; - -} - -static int nbd_negotiate_write(QIOChannel *ioc, const void *buffer, size_t size) -{ - ssize_t ret; - guint watch; - - assert(qemu_in_coroutine()); - /* Negotiation are always in main loop. */ - watch = qio_channel_add_watch(ioc, - G_IO_OUT, - nbd_negotiate_continue, - qemu_coroutine_self(), - NULL); - ret = write_sync(ioc, buffer, size, NULL); - g_source_remove(watch); - return ret; -} - -static int nbd_negotiate_drop_sync(QIOChannel *ioc, size_t size) -{ - ssize_t ret; - uint8_t *buffer = g_malloc(MIN(65536, size)); - - while (size > 0) { - size_t count = MIN(65536, size); - ret = nbd_negotiate_read(ioc, buffer, count); - if (ret < 0) { - g_free(buffer); - return ret; - } - - size -= count; - } - - g_free(buffer); - return 0; -} - /* Basic flow for negotiation Server Client @@ -205,22 +142,22 @@ static int nbd_negotiate_send_rep_len(QIOChannel *ioc, uint32_t type, type, opt, len); magic = cpu_to_be64(NBD_REP_MAGIC); - if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) < 0) { + if (nbd_write(ioc, &magic, sizeof(magic), NULL) < 0) { LOG("write failed (rep magic)"); return -EINVAL; } opt = cpu_to_be32(opt); - if (nbd_negotiate_write(ioc, &opt, sizeof(opt)) < 0) { + if (nbd_write(ioc, &opt, sizeof(opt), NULL) < 0) { LOG("write failed (rep opt)"); return -EINVAL; } type = cpu_to_be32(type); - if (nbd_negotiate_write(ioc, &type, sizeof(type)) < 0) { + if (nbd_write(ioc, &type, sizeof(type), NULL) < 0) { LOG("write failed (rep type)"); return -EINVAL; } len = cpu_to_be32(len); - if (nbd_negotiate_write(ioc, &len, sizeof(len)) < 0) { + if (nbd_write(ioc, &len, sizeof(len), NULL) < 0) { LOG("write failed (rep data length)"); return -EINVAL; } @@ -255,7 +192,7 @@ nbd_negotiate_send_rep_err(QIOChannel *ioc, uint32_t type, if (ret < 0) { goto out; } - if (nbd_negotiate_write(ioc, msg, len) < 0) { + if (nbd_write(ioc, msg, len, NULL) < 0) { LOG("write failed (error message)"); ret = -EIO; } else { @@ -274,27 +211,27 @@ static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp) uint32_t len; const char *name = exp->name ? exp->name : ""; const char *desc = exp->description ? exp->description : ""; - int rc; + int ret; TRACE("Advertising export name '%s' description '%s'", name, desc); name_len = strlen(name); desc_len = strlen(desc); len = name_len + desc_len + sizeof(len); - rc = nbd_negotiate_send_rep_len(ioc, NBD_REP_SERVER, NBD_OPT_LIST, len); - if (rc < 0) { - return rc; + ret = nbd_negotiate_send_rep_len(ioc, NBD_REP_SERVER, NBD_OPT_LIST, len); + if (ret < 0) { + return ret; } len = cpu_to_be32(name_len); - if (nbd_negotiate_write(ioc, &len, sizeof(len)) < 0) { + if (nbd_write(ioc, &len, sizeof(len), NULL) < 0) { LOG("write failed (name length)"); return -EINVAL; } - if (nbd_negotiate_write(ioc, name, name_len) < 0) { + if (nbd_write(ioc, name, name_len, NULL) < 0) { LOG("write failed (name buffer)"); return -EINVAL; } - if (nbd_negotiate_write(ioc, desc, desc_len) < 0) { + if (nbd_write(ioc, desc, desc_len, NULL) < 0) { LOG("write failed (description buffer)"); return -EINVAL; } @@ -308,7 +245,7 @@ static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length) NBDExport *exp; if (length) { - if (nbd_negotiate_drop_sync(client->ioc, length) < 0) { + if (nbd_drop(client->ioc, length, NULL) < 0) { return -EIO; } return nbd_negotiate_send_rep_err(client->ioc, @@ -328,7 +265,6 @@ static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length) static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length) { - int rc = -EINVAL; char name[NBD_MAX_NAME_SIZE + 1]; /* Client sends: @@ -337,11 +273,11 @@ static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length) TRACE("Checking length"); if (length >= sizeof(name)) { LOG("Bad length received"); - goto fail; + return -EINVAL; } - if (nbd_negotiate_read(client->ioc, name, length) < 0) { + if (nbd_read(client->ioc, name, length, NULL) < 0) { LOG("read failed"); - goto fail; + return -EINVAL; } name[length] = '\0'; @@ -350,14 +286,13 @@ static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length) client->exp = nbd_export_find(name); if (!client->exp) { LOG("export not found"); - goto fail; + return -EINVAL; } QTAILQ_INSERT_TAIL(&client->exp->clients, client, next); nbd_export_get(client->exp); - rc = 0; -fail: - return rc; + + return 0; } /* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the @@ -372,7 +307,7 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, TRACE("Setting up TLS"); ioc = client->ioc; if (length) { - if (nbd_negotiate_drop_sync(ioc, length) < 0) { + if (nbd_drop(ioc, length, NULL) < 0) { return NULL; } nbd_negotiate_send_rep_err(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS, @@ -436,7 +371,7 @@ static int nbd_negotiate_options(NBDClient *client) ... Rest of request */ - if (nbd_negotiate_read(client->ioc, &flags, sizeof(flags)) < 0) { + if (nbd_read(client->ioc, &flags, sizeof(flags), NULL) < 0) { LOG("read failed"); return -EIO; } @@ -462,7 +397,7 @@ static int nbd_negotiate_options(NBDClient *client) uint32_t clientflags, length; uint64_t magic; - if (nbd_negotiate_read(client->ioc, &magic, sizeof(magic)) < 0) { + if (nbd_read(client->ioc, &magic, sizeof(magic), NULL) < 0) { LOG("read failed"); return -EINVAL; } @@ -472,15 +407,15 @@ static int nbd_negotiate_options(NBDClient *client) return -EINVAL; } - if (nbd_negotiate_read(client->ioc, &clientflags, - sizeof(clientflags)) < 0) + if (nbd_read(client->ioc, &clientflags, + sizeof(clientflags), NULL) < 0) { LOG("read failed"); return -EINVAL; } clientflags = be32_to_cpu(clientflags); - if (nbd_negotiate_read(client->ioc, &length, sizeof(length)) < 0) { + if (nbd_read(client->ioc, &length, sizeof(length), NULL) < 0) { LOG("read failed"); return -EINVAL; } @@ -510,7 +445,7 @@ static int nbd_negotiate_options(NBDClient *client) return -EINVAL; default: - if (nbd_negotiate_drop_sync(client->ioc, length) < 0) { + if (nbd_drop(client->ioc, length, NULL) < 0) { return -EIO; } ret = nbd_negotiate_send_rep_err(client->ioc, @@ -548,7 +483,7 @@ static int nbd_negotiate_options(NBDClient *client) return nbd_negotiate_handle_export_name(client, length); case NBD_OPT_STARTTLS: - if (nbd_negotiate_drop_sync(client->ioc, length) < 0) { + if (nbd_drop(client->ioc, length, NULL) < 0) { return -EIO; } if (client->tlscreds) { @@ -567,7 +502,7 @@ static int nbd_negotiate_options(NBDClient *client) } break; default: - if (nbd_negotiate_drop_sync(client->ioc, length) < 0) { + if (nbd_drop(client->ioc, length, NULL) < 0) { return -EIO; } ret = nbd_negotiate_send_rep_err(client->ioc, @@ -598,16 +533,10 @@ static int nbd_negotiate_options(NBDClient *client) } } -typedef struct { - NBDClient *client; - Coroutine *co; -} NBDClientNewData; - -static coroutine_fn int nbd_negotiate(NBDClientNewData *data) +static coroutine_fn int nbd_negotiate(NBDClient *client) { - NBDClient *client = data->client; char buf[8 + 8 + 8 + 128]; - int rc; + int ret; const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM | NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA | NBD_FLAG_SEND_WRITE_ZEROES); @@ -633,7 +562,6 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data) */ qio_channel_set_blocking(client->ioc, false, NULL); - rc = -EINVAL; TRACE("Beginning negotiation."); memset(buf, 0, sizeof(buf)); @@ -654,21 +582,21 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data) if (oldStyle) { if (client->tlscreds) { TRACE("TLS cannot be enabled with oldstyle protocol"); - goto fail; + return -EINVAL; } - if (nbd_negotiate_write(client->ioc, buf, sizeof(buf)) < 0) { + if (nbd_write(client->ioc, buf, sizeof(buf), NULL) < 0) { LOG("write failed"); - goto fail; + return -EINVAL; } } else { - if (nbd_negotiate_write(client->ioc, buf, 18) < 0) { + if (nbd_write(client->ioc, buf, 18, NULL) < 0) { LOG("write failed"); - goto fail; + return -EINVAL; } - rc = nbd_negotiate_options(client); - if (rc != 0) { + ret = nbd_negotiate_options(client); + if (ret != 0) { LOG("option negotiation failed"); - goto fail; + return ret; } TRACE("advertising size %" PRIu64 " and flags %x", @@ -676,25 +604,25 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data) stq_be_p(buf + 18, client->exp->size); stw_be_p(buf + 26, client->exp->nbdflags | myflags); len = client->no_zeroes ? 10 : sizeof(buf) - 18; - if (nbd_negotiate_write(client->ioc, buf + 18, len) < 0) { + ret = nbd_write(client->ioc, buf + 18, len, NULL); + if (ret < 0) { LOG("write failed"); - goto fail; + return ret; } } TRACE("Negotiation succeeded."); - rc = 0; -fail: - return rc; + + return 0; } -static ssize_t nbd_receive_request(QIOChannel *ioc, NBDRequest *request) +static int nbd_receive_request(QIOChannel *ioc, NBDRequest *request) { uint8_t buf[NBD_REQUEST_SIZE]; uint32_t magic; - ssize_t ret; + int ret; - ret = read_sync(ioc, buf, sizeof(buf), NULL); + ret = nbd_read(ioc, buf, sizeof(buf), NULL); if (ret < 0) { return ret; } @@ -726,7 +654,7 @@ static ssize_t nbd_receive_request(QIOChannel *ioc, NBDRequest *request) return 0; } -static ssize_t nbd_send_reply(QIOChannel *ioc, NBDReply *reply) +static int nbd_send_reply(QIOChannel *ioc, NBDReply *reply) { uint8_t buf[NBD_REPLY_SIZE]; @@ -745,7 +673,7 @@ static ssize_t nbd_send_reply(QIOChannel *ioc, NBDReply *reply) stl_be_p(buf + 4, reply->error); stq_be_p(buf + 8, reply->handle); - return write_sync(ioc, buf, sizeof(buf), NULL); + return nbd_write(ioc, buf, sizeof(buf), NULL); } #define MAX_NBD_REQUESTS 16 @@ -778,7 +706,7 @@ void nbd_client_put(NBDClient *client) } } -static void client_close(NBDClient *client) +static void client_close(NBDClient *client, bool negotiated) { if (client->closing) { return; @@ -793,8 +721,8 @@ static void client_close(NBDClient *client) NULL); /* Also tell the client, so that they release their reference. */ - if (client->close) { - client->close(client); + if (client->close_fn) { + client->close_fn(client, negotiated); } } @@ -975,7 +903,7 @@ void nbd_export_close(NBDExport *exp) nbd_export_get(exp); QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) { - client_close(client); + client_close(client, true); } nbd_export_set_name(exp, NULL); nbd_export_set_description(exp, NULL); @@ -1032,25 +960,24 @@ void nbd_export_close_all(void) } } -static ssize_t nbd_co_send_reply(NBDRequestData *req, NBDReply *reply, - int len) +static int nbd_co_send_reply(NBDRequestData *req, NBDReply *reply, int len) { NBDClient *client = req->client; - ssize_t rc, ret; + int ret; g_assert(qemu_in_coroutine()); qemu_co_mutex_lock(&client->send_lock); client->send_coroutine = qemu_coroutine_self(); if (!len) { - rc = nbd_send_reply(client->ioc, reply); + ret = nbd_send_reply(client->ioc, reply); } else { qio_channel_set_cork(client->ioc, true); - rc = nbd_send_reply(client->ioc, reply); - if (rc >= 0) { - ret = write_sync(client->ioc, req->data, len, NULL); + ret = nbd_send_reply(client->ioc, reply); + if (ret == 0) { + ret = nbd_write(client->ioc, req->data, len, NULL); if (ret < 0) { - rc = -EIO; + ret = -EIO; } } qio_channel_set_cork(client->ioc, false); @@ -1058,28 +985,23 @@ static ssize_t nbd_co_send_reply(NBDRequestData *req, NBDReply *reply, client->send_coroutine = NULL; qemu_co_mutex_unlock(&client->send_lock); - return rc; + return ret; } -/* Collect a client request. Return 0 if request looks valid, -EAGAIN - * to keep trying the collection, -EIO to drop connection right away, - * and any other negative value to report an error to the client - * (although the caller may still need to disconnect after reporting - * the error). */ -static ssize_t nbd_co_receive_request(NBDRequestData *req, - NBDRequest *request) +/* nbd_co_receive_request + * Collect a client request. Return 0 if request looks valid, -EIO to drop + * connection right away, and any other negative value to report an error to + * the client (although the caller may still need to disconnect after reporting + * the error). + */ +static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request) { NBDClient *client = req->client; - ssize_t rc; g_assert(qemu_in_coroutine()); assert(client->recv_coroutine == qemu_coroutine_self()); - rc = nbd_receive_request(client->ioc, request); - if (rc < 0) { - if (rc != -EAGAIN) { - rc = -EIO; - } - goto out; + if (nbd_receive_request(client->ioc, request) < 0) { + return -EIO; } TRACE("Decoding type"); @@ -1093,8 +1015,7 @@ static ssize_t nbd_co_receive_request(NBDRequestData *req, /* Special case: we're going to disconnect without a reply, * whether or not flags, from, or len are bogus */ TRACE("Request type is DISCONNECT"); - rc = -EIO; - goto out; + return -EIO; } /* Check for sanity in the parameters, part 1. Defer as many @@ -1102,31 +1023,27 @@ static ssize_t nbd_co_receive_request(NBDRequestData *req, * payload, so we can try and keep the connection alive. */ if ((request->from + request->len) < request->from) { LOG("integer overflow detected, you're probably being attacked"); - rc = -EINVAL; - goto out; + return -EINVAL; } if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE) { if (request->len > NBD_MAX_BUFFER_SIZE) { LOG("len (%" PRIu32" ) is larger than max len (%u)", request->len, NBD_MAX_BUFFER_SIZE); - rc = -EINVAL; - goto out; + return -EINVAL; } req->data = blk_try_blockalign(client->exp->blk, request->len); if (req->data == NULL) { - rc = -ENOMEM; - goto out; + return -ENOMEM; } } if (request->type == NBD_CMD_WRITE) { TRACE("Reading %" PRIu32 " byte(s)", request->len); - if (read_sync(client->ioc, req->data, request->len, NULL) < 0) { + if (nbd_read(client->ioc, req->data, request->len, NULL) < 0) { LOG("reading from socket failed"); - rc = -EIO; - goto out; + return -EIO; } req->complete = true; } @@ -1136,28 +1053,19 @@ static ssize_t nbd_co_receive_request(NBDRequestData *req, LOG("operation past EOF; From: %" PRIu64 ", Len: %" PRIu32 ", Size: %" PRIu64, request->from, request->len, (uint64_t)client->exp->size); - rc = request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL; - goto out; + return request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL; } if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) { LOG("unsupported flags (got 0x%x)", request->flags); - rc = -EINVAL; - goto out; + return -EINVAL; } if (request->type != NBD_CMD_WRITE_ZEROES && (request->flags & NBD_CMD_FLAG_NO_HOLE)) { LOG("unexpected flags (got 0x%x)", request->flags); - rc = -EINVAL; - goto out; + return -EINVAL; } - rc = 0; - -out: - client->recv_coroutine = NULL; - nbd_client_receive_next_request(client); - - return rc; + return 0; } /* Owns a reference to the NBDClient passed as opaque. */ @@ -1168,8 +1076,9 @@ static coroutine_fn void nbd_trip(void *opaque) NBDRequestData *req; NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */ NBDReply reply; - ssize_t ret; + int ret; int flags; + int reply_data_len = 0; TRACE("Reading request."); if (client->closing) { @@ -1179,11 +1088,10 @@ static coroutine_fn void nbd_trip(void *opaque) req = nbd_request_get(client); ret = nbd_co_receive_request(req, &request); - if (ret == -EAGAIN) { - goto done; - } + client->recv_coroutine = NULL; + nbd_client_receive_next_request(client); if (ret == -EIO) { - goto out; + goto disconnect; } reply.handle = request.handle; @@ -1191,7 +1099,7 @@ static coroutine_fn void nbd_trip(void *opaque) if (ret < 0) { reply.error = -ret; - goto error_reply; + goto reply; } if (client->closing) { @@ -1212,7 +1120,7 @@ static coroutine_fn void nbd_trip(void *opaque) if (ret < 0) { LOG("flush failed"); reply.error = -ret; - goto error_reply; + break; } } @@ -1221,12 +1129,12 @@ static coroutine_fn void nbd_trip(void *opaque) if (ret < 0) { LOG("reading from file failed"); reply.error = -ret; - goto error_reply; + break; } + reply_data_len = request.len; TRACE("Read %" PRIu32" byte(s)", request.len); - if (nbd_co_send_reply(req, &reply, request.len) < 0) - goto out; + break; case NBD_CMD_WRITE: TRACE("Request type is WRITE"); @@ -1234,7 +1142,7 @@ static coroutine_fn void nbd_trip(void *opaque) if (exp->nbdflags & NBD_FLAG_READ_ONLY) { TRACE("Server is read-only, return error"); reply.error = EROFS; - goto error_reply; + break; } TRACE("Writing to device"); @@ -1248,21 +1156,16 @@ static coroutine_fn void nbd_trip(void *opaque) if (ret < 0) { LOG("writing to file failed"); reply.error = -ret; - goto error_reply; } - if (nbd_co_send_reply(req, &reply, 0) < 0) { - goto out; - } break; - case NBD_CMD_WRITE_ZEROES: TRACE("Request type is WRITE_ZEROES"); if (exp->nbdflags & NBD_FLAG_READ_ONLY) { TRACE("Server is read-only, return error"); reply.error = EROFS; - goto error_reply; + break; } TRACE("Writing to device"); @@ -1279,14 +1182,9 @@ static coroutine_fn void nbd_trip(void *opaque) if (ret < 0) { LOG("writing to file failed"); reply.error = -ret; - goto error_reply; } - if (nbd_co_send_reply(req, &reply, 0) < 0) { - goto out; - } break; - case NBD_CMD_DISC: /* unreachable, thanks to special case in nbd_co_receive_request() */ abort(); @@ -1299,9 +1197,7 @@ static coroutine_fn void nbd_trip(void *opaque) LOG("flush failed"); reply.error = -ret; } - if (nbd_co_send_reply(req, &reply, 0) < 0) { - goto out; - } + break; case NBD_CMD_TRIM: TRACE("Request type is TRIM"); @@ -1311,21 +1207,19 @@ static coroutine_fn void nbd_trip(void *opaque) LOG("discard failed"); reply.error = -ret; } - if (nbd_co_send_reply(req, &reply, 0) < 0) { - goto out; - } + break; default: LOG("invalid request type (%" PRIu32 ") received", request.type); reply.error = EINVAL; - error_reply: - /* We must disconnect after NBD_CMD_WRITE if we did not - * read the payload. - */ - if (nbd_co_send_reply(req, &reply, 0) < 0 || !req->complete) { - goto out; - } - break; + } + +reply: + /* We must disconnect after NBD_CMD_WRITE if we did not + * read the payload. + */ + if (nbd_co_send_reply(req, &reply, reply_data_len) < 0 || !req->complete) { + goto disconnect; } TRACE("Request/Reply complete"); @@ -1335,9 +1229,9 @@ done: nbd_client_put(client); return; -out: +disconnect: nbd_request_put(req); - client_close(client); + client_close(client, true); nbd_client_put(client); } @@ -1352,8 +1246,7 @@ static void nbd_client_receive_next_request(NBDClient *client) static coroutine_fn void nbd_co_client_start(void *opaque) { - NBDClientNewData *data = opaque; - NBDClient *client = data->client; + NBDClient *client = opaque; NBDExport *exp = client->exp; if (exp) { @@ -1362,25 +1255,28 @@ static coroutine_fn void nbd_co_client_start(void *opaque) } qemu_co_mutex_init(&client->send_lock); - if (nbd_negotiate(data)) { - client_close(client); - goto out; + if (nbd_negotiate(client)) { + client_close(client, false); + return; } nbd_client_receive_next_request(client); - -out: - g_free(data); } +/* + * Create a new client listener on the given export @exp, using the + * given channel @sioc. Begin servicing it in a coroutine. When the + * connection closes, call @close_fn with an indication of whether the + * client completed negotiation. + */ void nbd_client_new(NBDExport *exp, QIOChannelSocket *sioc, QCryptoTLSCreds *tlscreds, const char *tlsaclname, - void (*close_fn)(NBDClient *)) + void (*close_fn)(NBDClient *, bool)) { NBDClient *client; - NBDClientNewData *data = g_new(NBDClientNewData, 1); + Coroutine *co; client = g_malloc0(sizeof(NBDClient)); client->refcount = 1; @@ -1394,9 +1290,8 @@ void nbd_client_new(NBDExport *exp, object_ref(OBJECT(client->sioc)); client->ioc = QIO_CHANNEL(sioc); object_ref(OBJECT(client->ioc)); - client->close = close_fn; + client->close_fn = close_fn; - data->client = client; - data->co = qemu_coroutine_create(nbd_co_client_start, data); - qemu_coroutine_enter(data->co); + co = qemu_coroutine_create(nbd_co_client_start, client); + qemu_coroutine_enter(co); } diff --git a/qemu-doc.texi b/qemu-doc.texi index 965ba5929e..21079fd675 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -1,11 +1,12 @@ \input texinfo @c -*- texinfo -*- @c %**start of header @setfilename qemu-doc.info +@include version.texi @documentlanguage en @documentencoding UTF-8 -@settitle QEMU Emulator User Documentation +@settitle QEMU version @value{VERSION} User Documentation @exampleindent 0 @paragraphindent 0 @c %**end of header @@ -19,7 +20,7 @@ @iftex @titlepage @sp 7 -@center @titlefont{QEMU Emulator} +@center @titlefont{QEMU version @value{VERSION}} @sp 1 @center @titlefont{User Documentation} @sp 3 diff --git a/qemu-nbd.c b/qemu-nbd.c index 651f85ecc1..4dd3fd4732 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -336,10 +336,10 @@ static void nbd_export_closed(NBDExport *exp) static void nbd_update_server_watch(void); -static void nbd_client_closed(NBDClient *client) +static void nbd_client_closed(NBDClient *client, bool negotiated) { nb_fds--; - if (nb_fds == 0 && !persistent && state == RUNNING) { + if (negotiated && nb_fds == 0 && !persistent && state == RUNNING) { state = TERMINATE; } nbd_update_server_watch(); @@ -581,6 +581,10 @@ int main(int argc, char **argv) sa_sigterm.sa_handler = termsig_handler; sigaction(SIGTERM, &sa_sigterm, NULL); +#ifdef CONFIG_POSIX + signal(SIGPIPE, SIG_IGN); +#endif + module_call_init(MODULE_INIT_TRACE); qcrypto_init(&error_fatal); @@ -377,7 +377,7 @@ define unnest-vars endef TEXI2MAN = $(call quiet-command, \ - perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl -I docs $< $@.pod && \ + perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $(TEXI2PODFLAGS) $< $@.pod && \ $(POD2MAN) --section=$(subst .,,$(suffix $@)) --center=" " --release=" " $@.pod > $@, \ "GEN","$@") diff --git a/target/i386/hax-all.c b/target/i386/hax-all.c index 097db5cae1..ba6117d7de 100644 --- a/target/i386/hax-all.c +++ b/target/i386/hax-all.c @@ -514,9 +514,10 @@ static int hax_vcpu_hax_exec(CPUArchState *env) hax_vcpu_interrupt(env); qemu_mutex_unlock_iothread(); + cpu_exec_start(cpu); hax_ret = hax_vcpu_run(vcpu); + cpu_exec_end(cpu); qemu_mutex_lock_iothread(); - current_cpu = cpu; /* Simply continue the vcpu_run if system call interrupted */ if (hax_ret == -EINTR || hax_ret == -EAGAIN) { diff --git a/target/m68k/Makefile.objs b/target/m68k/Makefile.objs index 02cf616a78..39141ab93d 100644 --- a/target/m68k/Makefile.objs +++ b/target/m68k/Makefile.objs @@ -1,3 +1,3 @@ obj-y += m68k-semi.o -obj-y += translate.o op_helper.o helper.o cpu.o +obj-y += translate.o op_helper.o helper.o cpu.o fpu_helper.o obj-y += gdbstub.o diff --git a/target/m68k/fpu_helper.c b/target/m68k/fpu_helper.c new file mode 100644 index 0000000000..5bf2576c2b --- /dev/null +++ b/target/m68k/fpu_helper.c @@ -0,0 +1,112 @@ +/* + * m68k FPU helpers + * + * Copyright (c) 2006-2007 CodeSourcery + * Written by Paul Brook + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "exec/helper-proto.h" + +uint32_t HELPER(f64_to_i32)(CPUM68KState *env, float64 val) +{ + return float64_to_int32(val, &env->fp_status); +} + +float32 HELPER(f64_to_f32)(CPUM68KState *env, float64 val) +{ + return float64_to_float32(val, &env->fp_status); +} + +float64 HELPER(i32_to_f64)(CPUM68KState *env, uint32_t val) +{ + return int32_to_float64(val, &env->fp_status); +} + +float64 HELPER(f32_to_f64)(CPUM68KState *env, float32 val) +{ + return float32_to_float64(val, &env->fp_status); +} + +float64 HELPER(iround_f64)(CPUM68KState *env, float64 val) +{ + return float64_round_to_int(val, &env->fp_status); +} + +float64 HELPER(itrunc_f64)(CPUM68KState *env, float64 val) +{ + return float64_trunc_to_int(val, &env->fp_status); +} + +float64 HELPER(sqrt_f64)(CPUM68KState *env, float64 val) +{ + return float64_sqrt(val, &env->fp_status); +} + +float64 HELPER(abs_f64)(float64 val) +{ + return float64_abs(val); +} + +float64 HELPER(chs_f64)(float64 val) +{ + return float64_chs(val); +} + +float64 HELPER(add_f64)(CPUM68KState *env, float64 a, float64 b) +{ + return float64_add(a, b, &env->fp_status); +} + +float64 HELPER(sub_f64)(CPUM68KState *env, float64 a, float64 b) +{ + return float64_sub(a, b, &env->fp_status); +} + +float64 HELPER(mul_f64)(CPUM68KState *env, float64 a, float64 b) +{ + return float64_mul(a, b, &env->fp_status); +} + +float64 HELPER(div_f64)(CPUM68KState *env, float64 a, float64 b) +{ + return float64_div(a, b, &env->fp_status); +} + +float64 HELPER(sub_cmp_f64)(CPUM68KState *env, float64 a, float64 b) +{ + /* ??? This may incorrectly raise exceptions. */ + /* ??? Should flush denormals to zero. */ + float64 res; + res = float64_sub(a, b, &env->fp_status); + if (float64_is_quiet_nan(res, &env->fp_status)) { + /* +/-inf compares equal against itself, but sub returns nan. */ + if (!float64_is_quiet_nan(a, &env->fp_status) + && !float64_is_quiet_nan(b, &env->fp_status)) { + res = float64_zero; + if (float64_lt_quiet(a, res, &env->fp_status)) { + res = float64_chs(res); + } + } + } + return res; +} + +uint32_t HELPER(compare_f64)(CPUM68KState *env, float64 val) +{ + return float64_compare_quiet(val, float64_zero, &env->fp_status); +} diff --git a/target/m68k/helper.c b/target/m68k/helper.c index f750d3dbaa..5ca9911657 100644 --- a/target/m68k/helper.c +++ b/target/m68k/helper.c @@ -284,94 +284,6 @@ void HELPER(set_sr)(CPUM68KState *env, uint32_t val) m68k_switch_sp(env); } -/* FPU helpers. */ -uint32_t HELPER(f64_to_i32)(CPUM68KState *env, float64 val) -{ - return float64_to_int32(val, &env->fp_status); -} - -float32 HELPER(f64_to_f32)(CPUM68KState *env, float64 val) -{ - return float64_to_float32(val, &env->fp_status); -} - -float64 HELPER(i32_to_f64)(CPUM68KState *env, uint32_t val) -{ - return int32_to_float64(val, &env->fp_status); -} - -float64 HELPER(f32_to_f64)(CPUM68KState *env, float32 val) -{ - return float32_to_float64(val, &env->fp_status); -} - -float64 HELPER(iround_f64)(CPUM68KState *env, float64 val) -{ - return float64_round_to_int(val, &env->fp_status); -} - -float64 HELPER(itrunc_f64)(CPUM68KState *env, float64 val) -{ - return float64_trunc_to_int(val, &env->fp_status); -} - -float64 HELPER(sqrt_f64)(CPUM68KState *env, float64 val) -{ - return float64_sqrt(val, &env->fp_status); -} - -float64 HELPER(abs_f64)(float64 val) -{ - return float64_abs(val); -} - -float64 HELPER(chs_f64)(float64 val) -{ - return float64_chs(val); -} - -float64 HELPER(add_f64)(CPUM68KState *env, float64 a, float64 b) -{ - return float64_add(a, b, &env->fp_status); -} - -float64 HELPER(sub_f64)(CPUM68KState *env, float64 a, float64 b) -{ - return float64_sub(a, b, &env->fp_status); -} - -float64 HELPER(mul_f64)(CPUM68KState *env, float64 a, float64 b) -{ - return float64_mul(a, b, &env->fp_status); -} - -float64 HELPER(div_f64)(CPUM68KState *env, float64 a, float64 b) -{ - return float64_div(a, b, &env->fp_status); -} - -float64 HELPER(sub_cmp_f64)(CPUM68KState *env, float64 a, float64 b) -{ - /* ??? This may incorrectly raise exceptions. */ - /* ??? Should flush denormals to zero. */ - float64 res; - res = float64_sub(a, b, &env->fp_status); - if (float64_is_quiet_nan(res, &env->fp_status)) { - /* +/-inf compares equal against itself, but sub returns nan. */ - if (!float64_is_quiet_nan(a, &env->fp_status) - && !float64_is_quiet_nan(b, &env->fp_status)) { - res = float64_zero; - if (float64_lt_quiet(a, res, &env->fp_status)) - res = float64_chs(res); - } - } - return res; -} - -uint32_t HELPER(compare_f64)(CPUM68KState *env, float64 val) -{ - return float64_compare_quiet(val, float64_zero, &env->fp_status); -} /* MAC unit. */ /* FIXME: The MAC unit implementation is a bit of a mess. Some helpers diff --git a/target/m68k/translate.c b/target/m68k/translate.c index ad4d4efb8d..dfecfb6e5f 100644 --- a/target/m68k/translate.c +++ b/target/m68k/translate.c @@ -565,7 +565,7 @@ static void gen_flush_flags(DisasContext *s) t1 = tcg_temp_new(); tcg_gen_add_i32(t0, QREG_CC_N, QREG_CC_V); gen_ext(t0, t0, s->cc_op - CC_OP_SUBB, 1); - tcg_gen_xor_i32(t1, QREG_CC_N, QREG_CC_V); + tcg_gen_xor_i32(t1, QREG_CC_N, t0); tcg_gen_xor_i32(QREG_CC_V, QREG_CC_V, t0); tcg_temp_free(t0); tcg_gen_and_i32(QREG_CC_V, QREG_CC_V, t1); @@ -669,6 +669,21 @@ static inline int insn_opsize(int insn) } } +static inline int ext_opsize(int ext, int pos) +{ + switch ((ext >> pos) & 7) { + case 0: return OS_LONG; + case 1: return OS_SINGLE; + case 2: return OS_EXTENDED; + case 3: return OS_PACKED; + case 4: return OS_WORD; + case 5: return OS_DOUBLE; + case 6: return OS_BYTE; + default: + g_assert_not_reached(); + } +} + /* Assign value to a register. If the width is less than the register width only the low part of the register is set. */ static void gen_partset_reg(int opsize, TCGv reg, TCGv val) @@ -4111,20 +4126,19 @@ DISAS_INSN(fpu) tmp32 = tcg_temp_new_i32(); /* fmove */ /* ??? TODO: Proper behavior on overflow. */ - switch ((ext >> 10) & 7) { - case 0: - opsize = OS_LONG; + + opsize = ext_opsize(ext, 10); + switch (opsize) { + case OS_LONG: gen_helper_f64_to_i32(tmp32, cpu_env, src); break; - case 1: - opsize = OS_SINGLE; + case OS_SINGLE: gen_helper_f64_to_f32(tmp32, cpu_env, src); break; - case 4: - opsize = OS_WORD; + case OS_WORD: gen_helper_f64_to_i32(tmp32, cpu_env, src); break; - case 5: /* OS_DOUBLE */ + case OS_DOUBLE: tcg_gen_mov_i32(tmp32, AREG(insn, 0)); switch ((insn >> 3) & 7) { case 2: @@ -4153,8 +4167,7 @@ DISAS_INSN(fpu) } tcg_temp_free_i32(tmp32); return; - case 6: - opsize = OS_BYTE; + case OS_BYTE: gen_helper_f64_to_i32(tmp32, cpu_env, src); break; default: @@ -4227,15 +4240,7 @@ DISAS_INSN(fpu) } if (ext & (1 << 14)) { /* Source effective address. */ - switch ((ext >> 10) & 7) { - case 0: opsize = OS_LONG; break; - case 1: opsize = OS_SINGLE; break; - case 4: opsize = OS_WORD; break; - case 5: opsize = OS_DOUBLE; break; - case 6: opsize = OS_BYTE; break; - default: - goto undef; - } + opsize = ext_opsize(ext, 10); if (opsize == OS_DOUBLE) { tmp32 = tcg_temp_new_i32(); tcg_gen_mov_i32(tmp32, AREG(insn, 0)); diff --git a/tcg-runtime.c b/tcg/tcg-runtime.c index ec3a34e461..ec3a34e461 100644 --- a/tcg-runtime.c +++ b/tcg/tcg-runtime.c diff --git a/tests/Makefile.include b/tests/Makefile.include index f42f3dfa72..77da9b7f4b 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include @@ -205,6 +205,8 @@ check-qtest-pci-y += tests/intel-hda-test$(EXESUF) gcov-files-pci-y += hw/audio/intel-hda.c hw/audio/hda-codec.c check-qtest-pci-$(CONFIG_EVENTFD) += tests/ivshmem-test$(EXESUF) gcov-files-pci-y += hw/misc/ivshmem.c +check-qtest-pci-y += tests/megasas-test$(EXESUF) +gcov-files-pci-y += hw/scsi/megasas.c check-qtest-i386-y = tests/endianness-test$(EXESUF) check-qtest-i386-y += tests/fdc-test$(EXESUF) @@ -755,6 +757,7 @@ tests/test-filter-mirror$(EXESUF): tests/test-filter-mirror.o $(qtest-obj-y) tests/test-filter-redirector$(EXESUF): tests/test-filter-redirector.o $(qtest-obj-y) tests/test-x86-cpuid-compat$(EXESUF): tests/test-x86-cpuid-compat.o $(qtest-obj-y) tests/ivshmem-test$(EXESUF): tests/ivshmem-test.o contrib/ivshmem-server/ivshmem-server.o $(libqos-pc-obj-y) $(libqos-spapr-obj-y) +tests/megasas-test$(EXESUF): tests/megasas-test.o $(libqos-spapr-obj-y) $(libqos-pc-obj-y) tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o contrib/libvhost-user/libvhost-user.o $(test-util-obj-y) tests/test-uuid$(EXESUF): tests/test-uuid.o $(test-util-obj-y) tests/test-arm-mptimer$(EXESUF): tests/test-arm-mptimer.o diff --git a/tests/docker/Makefile.include b/tests/docker/Makefile.include index 03eda37bf4..0ed8c3d323 100644 --- a/tests/docker/Makefile.include +++ b/tests/docker/Makefile.include @@ -126,7 +126,7 @@ docker-run: docker-qemu-src " COPYING $(EXECUTABLE) to $(IMAGE)")) $(call quiet-command, \ $(SRC_PATH)/tests/docker/docker.py run \ - -t \ + $(if $(NOUSER),,-u $(shell id -u)) -t \ $(if $V,,--rm) \ $(if $(DEBUG),-i,--net=none) \ -e TARGET_LIST=$(TARGET_LIST) \ diff --git a/tests/docker/dockerfiles/centos6.docker b/tests/docker/dockerfiles/centos6.docker index 34e0d3b91e..17a4d24d54 100644 --- a/tests/docker/dockerfiles/centos6.docker +++ b/tests/docker/dockerfiles/centos6.docker @@ -1,7 +1,7 @@ FROM centos:6 RUN yum install -y epel-release ENV PACKAGES libfdt-devel ccache \ - tar git make gcc g++ \ + tar git make gcc g++ flex bison \ zlib-devel glib2-devel SDL-devel pixman-devel \ epel-release RUN yum install -y $PACKAGES diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker index c4f80ad3d8..4eaa8ed2a5 100644 --- a/tests/docker/dockerfiles/fedora.docker +++ b/tests/docker/dockerfiles/fedora.docker @@ -1,8 +1,8 @@ FROM fedora:latest ENV PACKAGES \ - ccache git tar PyYAML sparse flex bison python2 \ + ccache git tar PyYAML sparse flex bison python2 bzip2 hostname \ glib2-devel pixman-devel zlib-devel SDL-devel libfdt-devel \ - gcc gcc-c++ clang make perl which bc findutils \ + gcc gcc-c++ clang make perl which bc findutils libaio-devel \ mingw32-pixman mingw32-glib2 mingw32-gmp mingw32-SDL mingw32-pkg-config \ mingw32-gtk2 mingw32-gtk3 mingw32-gnutls mingw32-nettle mingw32-libtasn1 \ mingw32-libjpeg-turbo mingw32-libpng mingw32-curl mingw32-libssh2 \ diff --git a/tests/megasas-test.c b/tests/megasas-test.c new file mode 100644 index 0000000000..ce960e7f81 --- /dev/null +++ b/tests/megasas-test.c @@ -0,0 +1,86 @@ +/* + * QTest testcase for LSI MegaRAID + * + * Copyright (c) 2017 Red Hat Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "libqtest.h" +#include "qemu/bswap.h" +#include "libqos/libqos-pc.h" +#include "libqos/libqos-spapr.h" + +static QOSState *qmegasas_start(const char *extra_opts) +{ + const char *arch = qtest_get_arch(); + const char *cmd = "-drive id=hd0,if=none,file=null-co://,format=raw " + "-device megasas,id=scsi0,addr=04.0 " + "-device scsi-hd,bus=scsi0.0,drive=hd0 %s"; + + if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) { + return qtest_pc_boot(cmd, extra_opts ? : ""); + } + + g_printerr("virtio-scsi tests are only available on x86 or ppc64\n"); + exit(EXIT_FAILURE); +} + +static void qmegasas_stop(QOSState *qs) +{ + qtest_shutdown(qs); +} + +/* Tests only initialization so far. TODO: Replace with functional tests */ +static void pci_nop(void) +{ + QOSState *qs; + + qs = qmegasas_start(NULL); + qmegasas_stop(qs); +} + +/* This used to cause a NULL pointer dereference. */ +static void megasas_pd_get_info_fuzz(void) +{ + QPCIDevice *dev; + QOSState *qs; + QPCIBar bar; + uint32_t context[256]; + uint64_t context_pa; + int i; + + qs = qmegasas_start(NULL); + dev = qpci_device_find(qs->pcibus, QPCI_DEVFN(4,0)); + g_assert(dev != NULL); + + qpci_device_enable(dev); + bar = qpci_iomap(dev, 0, NULL); + + memset(context, 0, sizeof(context)); + context[0] = cpu_to_le32(0x05050505); + context[1] = cpu_to_le32(0x01010101); + for (i = 2; i < ARRAY_SIZE(context); i++) { + context[i] = cpu_to_le32(0x41414141); + } + context[6] = cpu_to_le32(0x02020000); + context[7] = cpu_to_le32(0); + + context_pa = qmalloc(qs, sizeof(context)); + memwrite(context_pa, context, sizeof(context)); + qpci_io_writel(dev, bar, 0x40, context_pa); + + g_free(dev); + qmegasas_stop(qs); +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + qtest_add_func("/megasas/pci/nop", pci_nop); + qtest_add_func("/megasas/dcmd/pd-get-info/fuzz", megasas_pd_get_info_fuzz); + + return g_test_run(); +} diff --git a/tests/q35-test.c b/tests/q35-test.c index cc58f3ecf4..f98bed7a2d 100644 --- a/tests/q35-test.c +++ b/tests/q35-test.c @@ -15,6 +15,48 @@ #include "libqos/pci-pc.h" #include "hw/pci-host/q35.h" +#define TSEG_SIZE_TEST_GUEST_RAM_MBYTES 128 + +/* @esmramc_tseg_sz: ESMRAMC.TSEG_SZ bitmask for selecting the requested TSEG + * size. Must be a subset of + * MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_MASK. + * + * @extended_tseg_mbytes: Size of the extended TSEG. Only consulted if + * @esmramc_tseg_sz equals + * MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_MASK precisely. + * + * @expected_tseg_mbytes: Expected guest-visible TSEG size in megabytes, + * matching @esmramc_tseg_sz and @extended_tseg_mbytes + * above. + */ +struct TsegSizeArgs { + uint8_t esmramc_tseg_sz; + uint16_t extended_tseg_mbytes; + uint16_t expected_tseg_mbytes; +}; +typedef struct TsegSizeArgs TsegSizeArgs; + +static const TsegSizeArgs tseg_1mb = { + .esmramc_tseg_sz = MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_1MB, + .extended_tseg_mbytes = 0, + .expected_tseg_mbytes = 1, +}; +static const TsegSizeArgs tseg_2mb = { + .esmramc_tseg_sz = MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_2MB, + .extended_tseg_mbytes = 0, + .expected_tseg_mbytes = 2, +}; +static const TsegSizeArgs tseg_8mb = { + .esmramc_tseg_sz = MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_8MB, + .extended_tseg_mbytes = 0, + .expected_tseg_mbytes = 8, +}; +static const TsegSizeArgs tseg_ext_16mb = { + .esmramc_tseg_sz = MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_MASK, + .extended_tseg_mbytes = 16, + .expected_tseg_mbytes = 16, +}; + static void smram_set_bit(QPCIDevice *pcidev, uint8_t mask, bool enabled) { uint8_t smram; @@ -42,6 +84,8 @@ static void test_smram_lock(void) QPCIDevice *pcidev; QDict *response; + qtest_start("-M q35"); + pcibus = qpci_init_pc(NULL); g_assert(pcibus != NULL); @@ -74,19 +118,86 @@ static void test_smram_lock(void) g_free(pcidev); qpci_free_pc(pcibus); + + qtest_end(); } -int main(int argc, char **argv) +static void test_tseg_size(const void *data) { - int ret; + const TsegSizeArgs *args = data; + char *cmdline; + QPCIBus *pcibus; + QPCIDevice *pcidev; + uint8_t smram_val; + uint8_t esmramc_val; + uint32_t ram_offs; + + if (args->esmramc_tseg_sz == MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_MASK) { + cmdline = g_strdup_printf("-M q35 -m %uM " + "-global mch.extended-tseg-mbytes=%u", + TSEG_SIZE_TEST_GUEST_RAM_MBYTES, + args->extended_tseg_mbytes); + } else { + cmdline = g_strdup_printf("-M q35 -m %uM", + TSEG_SIZE_TEST_GUEST_RAM_MBYTES); + } + qtest_start(cmdline); + g_free(cmdline); - g_test_init(&argc, &argv, NULL); + /* locate the DRAM controller */ + pcibus = qpci_init_pc(NULL); + g_assert(pcibus != NULL); + pcidev = qpci_device_find(pcibus, 0); + g_assert(pcidev != NULL); - qtest_add_func("/q35/smram/lock", test_smram_lock); + /* Set TSEG size. Restrict TSEG visibility to SMM by setting T_EN. */ + esmramc_val = qpci_config_readb(pcidev, MCH_HOST_BRIDGE_ESMRAMC); + esmramc_val &= ~MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_MASK; + esmramc_val |= args->esmramc_tseg_sz; + esmramc_val |= MCH_HOST_BRIDGE_ESMRAMC_T_EN; + qpci_config_writeb(pcidev, MCH_HOST_BRIDGE_ESMRAMC, esmramc_val); + + /* Enable TSEG by setting G_SMRAME. Close TSEG by setting D_CLS. */ + smram_val = qpci_config_readb(pcidev, MCH_HOST_BRIDGE_SMRAM); + smram_val &= ~(MCH_HOST_BRIDGE_SMRAM_D_OPEN | + MCH_HOST_BRIDGE_SMRAM_D_LCK); + smram_val |= (MCH_HOST_BRIDGE_SMRAM_D_CLS | + MCH_HOST_BRIDGE_SMRAM_G_SMRAME); + qpci_config_writeb(pcidev, MCH_HOST_BRIDGE_SMRAM, smram_val); + + /* lock TSEG */ + smram_val |= MCH_HOST_BRIDGE_SMRAM_D_LCK; + qpci_config_writeb(pcidev, MCH_HOST_BRIDGE_SMRAM, smram_val); + + /* Now check that the byte right before the TSEG is r/w, and that the first + * byte in the TSEG always reads as 0xff. + */ + ram_offs = (TSEG_SIZE_TEST_GUEST_RAM_MBYTES - args->expected_tseg_mbytes) * + 1024 * 1024 - 1; + g_assert_cmpint(readb(ram_offs), ==, 0); + writeb(ram_offs, 1); + g_assert_cmpint(readb(ram_offs), ==, 1); + + ram_offs++; + g_assert_cmpint(readb(ram_offs), ==, 0xff); + writeb(ram_offs, 1); + g_assert_cmpint(readb(ram_offs), ==, 0xff); - qtest_start("-M q35"); - ret = g_test_run(); + g_free(pcidev); + qpci_free_pc(pcibus); qtest_end(); +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + + qtest_add_func("/q35/smram/lock", test_smram_lock); - return ret; + qtest_add_data_func("/q35/tseg-size/1mb", &tseg_1mb, test_tseg_size); + qtest_add_data_func("/q35/tseg-size/2mb", &tseg_2mb, test_tseg_size); + qtest_add_data_func("/q35/tseg-size/8mb", &tseg_8mb, test_tseg_size); + qtest_add_data_func("/q35/tseg-size/ext/16mb", &tseg_ext_16mb, + test_tseg_size); + return g_test_run(); } diff --git a/trace-events b/trace-events index fd83087e39..bae63fdb1d 100644 --- a/trace-events +++ b/trace-events @@ -55,28 +55,6 @@ dma_complete(void *dbs, int ret, void *cb) "dbs=%p ret=%d cb=%p" dma_blk_cb(void *dbs, int ret) "dbs=%p ret=%d" dma_map_wait(void *dbs) "dbs=%p" -# kvm-all.c -kvm_ioctl(int type, void *arg) "type 0x%x, arg %p" -kvm_vm_ioctl(int type, void *arg) "type 0x%x, arg %p" -kvm_vcpu_ioctl(int cpu_index, int type, void *arg) "cpu_index %d, type 0x%x, arg %p" -kvm_run_exit(int cpu_index, uint32_t reason) "cpu_index %d, reason %d" -kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p" -kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s" -kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s" -kvm_irqchip_commit_routes(void) "" -kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d" -kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" -kvm_irqchip_release_virq(int virq) "virq %d" - -# TCG related tracing (mostly disabled by default) -# cpu-exec.c -disable exec_tb(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR -disable exec_tb_nocache(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR -disable exec_tb_exit(void *last_tb, unsigned int flags) "tb:%p flags=%x" - -# translate-all.c -translate_block(void *tb, uintptr_t pc, uint8_t *tb_code) "tb:%p, pc:0x%"PRIxPTR", tb_code:%p" - # memory.c memory_region_ops_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value %#"PRIx64" size %u" memory_region_ops_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value %#"PRIx64" size %u" diff --git a/ui/cocoa.m b/ui/cocoa.m index 004ec2711c..1f010d3ae7 100644 --- a/ui/cocoa.m +++ b/ui/cocoa.m @@ -52,6 +52,8 @@ /* macOS 10.12 deprecated many constants, #define the new names for older SDKs */ #if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_12 #define NSEventMaskAny NSAnyEventMask +#define NSEventModifierFlagCapsLock NSAlphaShiftKeyMask +#define NSEventModifierFlagShift NSShiftKeyMask #define NSEventModifierFlagCommand NSCommandKeyMask #define NSEventModifierFlagControl NSControlKeyMask #define NSEventModifierFlagOption NSAlternateKeyMask @@ -268,7 +270,7 @@ static void handleAnyDeviceErrors(Error * err) NSWindow *fullScreenWindow; float cx,cy,cw,ch,cdx,cdy; CGDataProviderRef dataProviderRef; - int modifiers_state[256]; + BOOL modifiers_state[256]; BOOL isMouseGrabbed; BOOL isFullscreen; BOOL isAbsoluteEnabled; @@ -536,18 +538,59 @@ QemuCocoaView *cocoaView; } } +- (void) toggleModifier: (int)keycode { + // Toggle the stored state. + modifiers_state[keycode] = !modifiers_state[keycode]; + // Send a keyup or keydown depending on the state. + qemu_input_event_send_key_qcode(dcl->con, keycode, modifiers_state[keycode]); +} + +- (void) toggleStatefulModifier: (int)keycode { + // Toggle the stored state. + modifiers_state[keycode] = !modifiers_state[keycode]; + // Generate keydown and keyup. + qemu_input_event_send_key_qcode(dcl->con, keycode, true); + qemu_input_event_send_key_qcode(dcl->con, keycode, false); +} + - (void) handleEvent:(NSEvent *)event { COCOA_DEBUG("QemuCocoaView: handleEvent\n"); int buttons = 0; - int keycode; + int keycode = 0; bool mouse_event = false; NSPoint p = [event locationInWindow]; switch ([event type]) { case NSEventTypeFlagsChanged: - keycode = cocoa_keycode_to_qemu([event keyCode]); + if ([event keyCode] == 0) { + // When the Cocoa keyCode is zero that means keys should be + // synthesized based on the values in in the eventModifiers + // bitmask. + + if (qemu_console_is_graphic(NULL)) { + NSEventModifierFlags modifiers = [event modifierFlags]; + + if (!!(modifiers & NSEventModifierFlagCapsLock) != !!modifiers_state[Q_KEY_CODE_CAPS_LOCK]) { + [self toggleStatefulModifier:Q_KEY_CODE_CAPS_LOCK]; + } + if (!!(modifiers & NSEventModifierFlagShift) != !!modifiers_state[Q_KEY_CODE_SHIFT]) { + [self toggleModifier:Q_KEY_CODE_SHIFT]; + } + if (!!(modifiers & NSEventModifierFlagControl) != !!modifiers_state[Q_KEY_CODE_CTRL]) { + [self toggleModifier:Q_KEY_CODE_CTRL]; + } + if (!!(modifiers & NSEventModifierFlagOption) != !!modifiers_state[Q_KEY_CODE_ALT]) { + [self toggleModifier:Q_KEY_CODE_ALT]; + } + if (!!(modifiers & NSEventModifierFlagCommand) != !!modifiers_state[Q_KEY_CODE_META_L]) { + [self toggleModifier:Q_KEY_CODE_META_L]; + } + } + } else { + keycode = cocoa_keycode_to_qemu([event keyCode]); + } if ((keycode == Q_KEY_CODE_META_L || keycode == Q_KEY_CODE_META_R) && !isMouseGrabbed) { @@ -559,16 +602,9 @@ QemuCocoaView *cocoaView; // emulate caps lock and num lock keydown and keyup if (keycode == Q_KEY_CODE_CAPS_LOCK || keycode == Q_KEY_CODE_NUM_LOCK) { - qemu_input_event_send_key_qcode(dcl->con, keycode, true); - qemu_input_event_send_key_qcode(dcl->con, keycode, false); + [self toggleStatefulModifier:keycode]; } else if (qemu_console_is_graphic(NULL)) { - if (modifiers_state[keycode] == 0) { // keydown - qemu_input_event_send_key_qcode(dcl->con, keycode, true); - modifiers_state[keycode] = 1; - } else { // keyup - qemu_input_event_send_key_qcode(dcl->con, keycode, false); - modifiers_state[keycode] = 0; - } + [self toggleModifier:keycode]; } } diff --git a/ui/spice-core.c b/ui/spice-core.c index a087ad5da9..182f550f1f 100644 --- a/ui/spice-core.c +++ b/ui/spice-core.c @@ -847,6 +847,7 @@ void qemu_spice_init(void) exit(1); } display_opengl = 1; + spice_opengl = 1; } #endif } diff --git a/ui/spice-display.c b/ui/spice-display.c index b353445f58..042292cc90 100644 --- a/ui/spice-display.c +++ b/ui/spice-display.c @@ -27,6 +27,7 @@ #include "ui/spice-display.h" static int debug = 0; +bool spice_opengl; static void GCC_FMT_ATTR(2, 3) dprint(int level, const char *fmt, ...) { @@ -1013,7 +1014,7 @@ static void qemu_spice_display_init_one(QemuConsole *con) ssd->dcl.ops = &display_listener_ops; #ifdef HAVE_SPICE_GL - if (display_opengl) { + if (spice_opengl) { ssd->dcl.ops = &display_listener_gl_ops; ssd->gl_unblock_bh = qemu_bh_new(qemu_spice_gl_unblock_bh, ssd); ssd->gl_unblock_timer = timer_new_ms(QEMU_CLOCK_REALTIME, diff --git a/ui/spice-input.c b/ui/spice-input.c index 86293dd2ce..918580239d 100644 --- a/ui/spice-input.c +++ b/ui/spice-input.c @@ -87,7 +87,7 @@ static void kbd_leds(void *opaque, int ledstate) if (ledstate & QEMU_CAPS_LOCK_LED) { kbd->ledstate |= SPICE_KEYBOARD_MODIFIER_FLAGS_CAPS_LOCK; } - spice_server_kbd_leds(&kbd->sin, ledstate); + spice_server_kbd_leds(&kbd->sin, kbd->ledstate); } /* mouse bits */ diff --git a/util/Makefile.objs b/util/Makefile.objs index 94d9477209..50a55ecc75 100644 --- a/util/Makefile.objs +++ b/util/Makefile.objs @@ -43,4 +43,5 @@ util-obj-y += log.o util-obj-y += qdist.o util-obj-y += qht.o util-obj-y += range.o +util-obj-y += stats64.o util-obj-y += systemd.o diff --git a/util/stats64.c b/util/stats64.c new file mode 100644 index 0000000000..9968fcceac --- /dev/null +++ b/util/stats64.c @@ -0,0 +1,137 @@ +/* + * Atomic operations on 64-bit quantities. + * + * Copyright (C) 2017 Red Hat, Inc. + * + * Author: Paolo Bonzini <pbonzini@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/atomic.h" +#include "qemu/stats64.h" +#include "qemu/processor.h" + +#ifndef CONFIG_ATOMIC64 +static inline void stat64_rdlock(Stat64 *s) +{ + /* Keep out incoming writers to avoid them starving us. */ + atomic_add(&s->lock, 2); + + /* If there is a concurrent writer, wait for it. */ + while (atomic_read(&s->lock) & 1) { + cpu_relax(); + } +} + +static inline void stat64_rdunlock(Stat64 *s) +{ + atomic_sub(&s->lock, 2); +} + +static inline bool stat64_wrtrylock(Stat64 *s) +{ + return atomic_cmpxchg(&s->lock, 0, 1) == 0; +} + +static inline void stat64_wrunlock(Stat64 *s) +{ + atomic_dec(&s->lock); +} + +uint64_t stat64_get(const Stat64 *s) +{ + uint32_t high, low; + + stat64_rdlock((Stat64 *)s); + + /* 64-bit writes always take the lock, so we can read in + * any order. + */ + high = atomic_read(&s->high); + low = atomic_read(&s->low); + stat64_rdunlock((Stat64 *)s); + + return ((uint64_t)high << 32) | low; +} + +bool stat64_add32_carry(Stat64 *s, uint32_t low, uint32_t high) +{ + uint32_t old; + + if (!stat64_wrtrylock(s)) { + cpu_relax(); + return false; + } + + /* 64-bit reads always take the lock, so they don't care about the + * order of our update. By updating s->low first, we can check + * whether we have to carry into s->high. + */ + old = atomic_fetch_add(&s->low, low); + high += (old + low) < old; + atomic_add(&s->high, high); + stat64_wrunlock(s); + return true; +} + +bool stat64_min_slow(Stat64 *s, uint64_t value) +{ + uint32_t high, low; + uint64_t orig; + + if (!stat64_wrtrylock(s)) { + cpu_relax(); + return false; + } + + high = atomic_read(&s->high); + low = atomic_read(&s->low); + + orig = ((uint64_t)high << 32) | low; + if (orig < value) { + /* We have to set low before high, just like stat64_min reads + * high before low. The value may become higher temporarily, but + * stat64_get does not notice (it takes the lock) and the only ill + * effect on stat64_min is that the slow path may be triggered + * unnecessarily. + */ + atomic_set(&s->low, (uint32_t)value); + smp_wmb(); + atomic_set(&s->high, value >> 32); + } + stat64_wrunlock(s); + return true; +} + +bool stat64_max_slow(Stat64 *s, uint64_t value) +{ + uint32_t high, low; + uint64_t orig; + + if (!stat64_wrtrylock(s)) { + cpu_relax(); + return false; + } + + high = atomic_read(&s->high); + low = atomic_read(&s->low); + + orig = ((uint64_t)high << 32) | low; + if (orig > value) { + /* We have to set low before high, just like stat64_max reads + * high before low. The value may become lower temporarily, but + * stat64_get does not notice (it takes the lock) and the only ill + * effect on stat64_max is that the slow path may be triggered + * unnecessarily. + */ + atomic_set(&s->low, (uint32_t)value); + smp_wmb(); + atomic_set(&s->high, value >> 32); + } + stat64_wrunlock(s); + return true; +} +#endif @@ -3757,21 +3757,18 @@ int main(int argc, char **argv, char **envp) qdev_prop_register_global(&kvm_pit_lost_tick_policy); break; } - case QEMU_OPTION_accel: { - QemuOpts *accel_opts; - + case QEMU_OPTION_accel: accel_opts = qemu_opts_parse_noisily(qemu_find_opts("accel"), optarg, true); optarg = qemu_opt_get(accel_opts, "accel"); if (!optarg || is_help_option(optarg)) { error_printf("Possible accelerators: kvm, xen, hax, tcg\n"); - exit(1); + exit(0); } - accel_opts = qemu_opts_create(qemu_find_opts("machine"), NULL, - false, &error_abort); - qemu_opt_set(accel_opts, "accel", optarg, &error_abort); + opts = qemu_opts_create(qemu_find_opts("machine"), NULL, + false, &error_abort); + qemu_opt_set(opts, "accel", optarg, &error_abort); break; - } case QEMU_OPTION_usb: olist = qemu_find_opts("machine"); qemu_opts_parse_noisily(olist, "usb=on", false); |