diff options
83 files changed, 2833 insertions, 1553 deletions
diff --git a/.gitlab-ci-edk2.yml b/.gitlab-ci-edk2.yml new file mode 100644 index 0000000000..088ba4b43a --- /dev/null +++ b/.gitlab-ci-edk2.yml @@ -0,0 +1,49 @@ +docker-edk2: + stage: build + rules: # Only run this job when the Dockerfile is modified + - changes: + - .gitlab-ci-edk2.yml + - .gitlab-ci.d/edk2/Dockerfile + when: always + image: docker:19.03.1 + services: + - docker:19.03.1-dind + variables: + GIT_DEPTH: 3 + IMAGE_TAG: $CI_REGISTRY_IMAGE:edk2-cross-build + # We don't use TLS + DOCKER_HOST: tcp://docker:2375 + DOCKER_TLS_CERTDIR: "" + before_script: + - docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY + script: + - docker pull $IMAGE_TAG || true + - docker build --cache-from $IMAGE_TAG --tag $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA + --tag $IMAGE_TAG .gitlab-ci.d/edk2 + - docker push $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA + - docker push $IMAGE_TAG + +build-edk2: + rules: # Only run this job when ... + - changes: # ... roms/edk2/ is modified (submodule updated) + - roms/edk2/* + when: always + - if: '$CI_COMMIT_REF_NAME =~ /^edk2/' # or the branch/tag starts with 'edk2' + when: always + - if: '$CI_COMMIT_MESSAGE =~ /edk2/i' # or last commit description contains 'EDK2' + when: always + artifacts: + paths: # 'artifacts.zip' will contains the following files: + - pc-bios/edk2*bz2 + - pc-bios/edk2-licenses.txt + - edk2-stdout.log + - edk2-stderr.log + image: $CI_REGISTRY_IMAGE:edk2-cross-build + variables: + GIT_DEPTH: 3 + script: # Clone the required submodules and build EDK2 + - git submodule update --init roms/edk2 + - git -C roms/edk2 submodule update --init + - export JOBS=$(($(getconf _NPROCESSORS_ONLN) + 1)) + - echo "=== Using ${JOBS} simultaneous jobs ===" + - make -j${JOBS} -C roms efi 2>&1 1>edk2-stdout.log | tee -a edk2-stderr.log >&2 diff --git a/.gitlab-ci.d/edk2/Dockerfile b/.gitlab-ci.d/edk2/Dockerfile new file mode 100644 index 0000000000..b4584d1cf6 --- /dev/null +++ b/.gitlab-ci.d/edk2/Dockerfile @@ -0,0 +1,27 @@ +# +# Docker image to cross-compile EDK2 firmware binaries +# +FROM ubuntu:16.04 + +MAINTAINER Philippe Mathieu-Daudé <philmd@redhat.com> + +# Install packages required to build EDK2 +RUN apt update \ + && \ + \ + DEBIAN_FRONTEND=noninteractive \ + apt install --assume-yes --no-install-recommends \ + build-essential \ + ca-certificates \ + dos2unix \ + gcc-aarch64-linux-gnu \ + gcc-arm-linux-gnueabi \ + git \ + iasl \ + make \ + nasm \ + python \ + uuid-dev \ + && \ + \ + rm -rf /var/lib/apt/lists/* diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d9a90f795d..228783993e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,3 +1,6 @@ +include: + - local: '/.gitlab-ci-edk2.yml' + before_script: - apt-get update -qq - apt-get install -y -qq flex bison libglib2.0-dev libpixman-1-dev genisoimage diff --git a/MAINTAINERS b/MAINTAINERS index 55d3642e6c..d9798c20c0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2368,6 +2368,8 @@ F: roms/edk2 F: roms/edk2-* F: tests/data/uefi-boot-images/ F: tests/uefi-test-tools/ +F: .gitlab-ci-edk2.yml +F: .gitlab-ci.d/edk2/ Usermode Emulation ------------------ @@ -2517,6 +2519,7 @@ F: include/block/nbd* F: qemu-nbd.* F: blockdev-nbd.c F: docs/interop/nbd.txt +F: docs/interop/qemu-nbd.rst T: git https://repo.or.cz/qemu/ericb.git nbd NFS @@ -339,10 +339,12 @@ MANUAL_BUILDDIR := docs endif ifdef BUILD_DOCS -DOCS=qemu-doc.html qemu-doc.txt qemu.1 qemu-img.1 qemu-nbd.8 $(MANUAL_BUILDDIR)/interop/qemu-ga.8 +DOCS=qemu-doc.html qemu-doc.txt qemu.1 qemu-img.1 +DOCS+=$(MANUAL_BUILDDIR)/interop/qemu-nbd.8 +DOCS+=$(MANUAL_BUILDDIR)/interop/qemu-ga.8 +DOCS+=$(MANUAL_BUILDDIR)/system/qemu-block-drivers.7 DOCS+=docs/interop/qemu-qmp-ref.html docs/interop/qemu-qmp-ref.txt docs/interop/qemu-qmp-ref.7 DOCS+=docs/interop/qemu-ga-ref.html docs/interop/qemu-ga-ref.txt docs/interop/qemu-ga-ref.7 -DOCS+=docs/qemu-block-drivers.7 DOCS+=docs/qemu-cpu-models.7 DOCS+=$(MANUAL_BUILDDIR)/index.html ifdef CONFIG_VIRTFS @@ -749,12 +751,12 @@ distclean: clean rm -f docs/interop/qemu-qmp-ref.txt docs/interop/qemu-ga-ref.txt rm -f docs/interop/qemu-qmp-ref.pdf docs/interop/qemu-ga-ref.pdf rm -f docs/interop/qemu-qmp-ref.html docs/interop/qemu-ga-ref.html - rm -f docs/qemu-block-drivers.7 rm -f docs/qemu-cpu-models.7 rm -rf .doctrees $(call clean-manual,devel) $(call clean-manual,interop) $(call clean-manual,specs) + $(call clean-manual,system) for d in $(TARGET_DIRS); do \ rm -rf $$d || exit 1 ; \ done @@ -811,6 +813,7 @@ endef install-sphinxdocs: sphinxdocs $(call install-manual,interop) $(call install-manual,specs) + $(call install-manual,system) install-doc: $(DOCS) install-sphinxdocs $(INSTALL_DIR) "$(DESTDIR)$(qemu_docdir)" @@ -824,12 +827,12 @@ ifdef CONFIG_POSIX $(INSTALL_DATA) qemu.1 "$(DESTDIR)$(mandir)/man1" $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man7" $(INSTALL_DATA) docs/interop/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7" - $(INSTALL_DATA) docs/qemu-block-drivers.7 "$(DESTDIR)$(mandir)/man7" + $(INSTALL_DATA) $(MANUAL_BUILDDIR)/system/qemu-block-drivers.7 "$(DESTDIR)$(mandir)/man7" $(INSTALL_DATA) docs/qemu-cpu-models.7 "$(DESTDIR)$(mandir)/man7" ifeq ($(CONFIG_TOOLS),y) $(INSTALL_DATA) qemu-img.1 "$(DESTDIR)$(mandir)/man1" $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man8" - $(INSTALL_DATA) qemu-nbd.8 "$(DESTDIR)$(mandir)/man8" + $(INSTALL_DATA) $(MANUAL_BUILDDIR)/interop/qemu-nbd.8 "$(DESTDIR)$(mandir)/man8" endif ifdef CONFIG_TRACE_SYSTEMTAP $(INSTALL_DATA) scripts/qemu-trace-stap.1 "$(DESTDIR)$(mandir)/man1" @@ -998,7 +1001,10 @@ docs/version.texi: $(SRC_PATH)/VERSION config-host.mak # and handles "don't rebuild things unless necessary" itself. # The '.doctrees' files are cached information to speed this up. .PHONY: sphinxdocs -sphinxdocs: $(MANUAL_BUILDDIR)/devel/index.html $(MANUAL_BUILDDIR)/interop/index.html $(MANUAL_BUILDDIR)/specs/index.html +sphinxdocs: $(MANUAL_BUILDDIR)/devel/index.html \ + $(MANUAL_BUILDDIR)/interop/index.html \ + $(MANUAL_BUILDDIR)/specs/index.html \ + $(MANUAL_BUILDDIR)/system/index.html # Canned command to build a single manual # Arguments: $1 = manual name, $2 = Sphinx builder ('html' or 'man') @@ -1007,7 +1013,9 @@ sphinxdocs: $(MANUAL_BUILDDIR)/devel/index.html $(MANUAL_BUILDDIR)/interop/index # a single doctree: https://github.com/sphinx-doc/sphinx/issues/2946 build-manual = $(call quiet-command,CONFDIR="$(qemu_confdir)" sphinx-build $(if $(V),,-q) -W -b $2 -D version=$(VERSION) -D release="$(FULL_VERSION)" -d .doctrees/$1-$2 $(SRC_PATH)/docs/$1 $(MANUAL_BUILDDIR)/$1 ,"SPHINX","$(MANUAL_BUILDDIR)/$1") # We assume all RST files in the manual's directory are used in it -manual-deps = $(wildcard $(SRC_PATH)/docs/$1/*.rst) $(SRC_PATH)/docs/$1/conf.py $(SRC_PATH)/docs/conf.py +manual-deps = $(wildcard $(SRC_PATH)/docs/$1/*.rst) \ + $(wildcard $(SRC_PATH)/docs/$1/*.rst.inc) \ + $(SRC_PATH)/docs/$1/conf.py $(SRC_PATH)/docs/conf.py $(MANUAL_BUILDDIR)/devel/index.html: $(call manual-deps,devel) $(call build-manual,devel,html) @@ -1018,10 +1026,20 @@ $(MANUAL_BUILDDIR)/interop/index.html: $(call manual-deps,interop) $(MANUAL_BUILDDIR)/specs/index.html: $(call manual-deps,specs) $(call build-manual,specs,html) +$(MANUAL_BUILDDIR)/system/index.html: $(call manual-deps,system) + $(call build-manual,system,html) + $(MANUAL_BUILDDIR)/interop/qemu-ga.8: $(call manual-deps,interop) $(call build-manual,interop,man) +$(MANUAL_BUILDDIR)/interop/qemu-nbd.8: $(call manual-deps,interop) + $(call build-manual,interop,man) + +$(MANUAL_BUILDDIR)/system/qemu-block-drivers.7: $(call manual-deps,system) + $(call build-manual,system,man) + $(MANUAL_BUILDDIR)/index.html: $(SRC_PATH)/docs/index.html.in qemu-version.h + @mkdir -p "$(MANUAL_BUILDDIR)" $(call quiet-command, sed "s|@@VERSION@@|${VERSION}|g" $< >$@, \ "GEN","$@") @@ -1047,8 +1065,6 @@ qemu.1: qemu-doc.texi qemu-options.texi qemu-monitor.texi qemu-monitor-info.texi qemu.1: qemu-option-trace.texi qemu-img.1: qemu-img.texi qemu-option-trace.texi qemu-img-cmds.texi fsdev/virtfs-proxy-helper.1: fsdev/virtfs-proxy-helper.texi -qemu-nbd.8: qemu-nbd.texi qemu-option-trace.texi -docs/qemu-block-drivers.7: docs/qemu-block-drivers.texi docs/qemu-cpu-models.7: docs/qemu-cpu-models.texi scripts/qemu-trace-stap.1: scripts/qemu-trace-stap.texi @@ -1058,10 +1074,10 @@ pdf: qemu-doc.pdf docs/interop/qemu-qmp-ref.pdf docs/interop/qemu-ga-ref.pdf txt: qemu-doc.txt docs/interop/qemu-qmp-ref.txt docs/interop/qemu-ga-ref.txt qemu-doc.html qemu-doc.info qemu-doc.pdf qemu-doc.txt: \ - qemu-img.texi qemu-nbd.texi qemu-options.texi \ + qemu-img.texi qemu-options.texi \ qemu-tech.texi qemu-option-trace.texi \ qemu-deprecated.texi qemu-monitor.texi qemu-img-cmds.texi \ - qemu-monitor-info.texi docs/qemu-block-drivers.texi \ + qemu-monitor-info.texi \ docs/qemu-cpu-models.texi docs/security.texi docs/interop/qemu-ga-ref.dvi docs/interop/qemu-ga-ref.html \ diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index a991ea2964..e3b5750c3b 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -80,9 +80,14 @@ QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data)); QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16); #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1) -static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx) +static inline size_t tlb_n_entries(CPUTLBDescFast *fast) { - return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS); + return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1; +} + +static inline size_t sizeof_tlb(CPUTLBDescFast *fast) +{ + return fast->mask + (1 << CPU_TLB_ENTRY_BITS); } static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, @@ -92,26 +97,10 @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, desc->window_max_entries = max_entries; } -static void tlb_dyn_init(CPUArchState *env) -{ - int i; - - for (i = 0; i < NB_MMU_MODES; i++) { - CPUTLBDesc *desc = &env_tlb(env)->d[i]; - size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS; - - tlb_window_reset(desc, get_clock_realtime(), 0); - desc->n_used_entries = 0; - env_tlb(env)->f[i].mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS; - env_tlb(env)->f[i].table = g_new(CPUTLBEntry, n_entries); - env_tlb(env)->d[i].iotlb = g_new(CPUIOTLBEntry, n_entries); - } -} - /** * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary - * @env: CPU that owns the TLB - * @mmu_idx: MMU index of the TLB + * @desc: The CPUTLBDesc portion of the TLB + * @fast: The CPUTLBDescFast portion of the same TLB * * Called with tlb_lock_held. * @@ -148,13 +137,12 @@ static void tlb_dyn_init(CPUArchState *env) * high), since otherwise we are likely to have a significant amount of * conflict misses. */ -static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx) +static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast, + int64_t now) { - CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx]; - size_t old_size = tlb_n_entries(env, mmu_idx); + size_t old_size = tlb_n_entries(fast); size_t rate; size_t new_size = old_size; - int64_t now = get_clock_realtime(); int64_t window_len_ms = 100; int64_t window_len_ns = window_len_ms * 1000 * 1000; bool window_expired = now > desc->window_begin_ns + window_len_ns; @@ -193,14 +181,15 @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx) return; } - g_free(env_tlb(env)->f[mmu_idx].table); - g_free(env_tlb(env)->d[mmu_idx].iotlb); + g_free(fast->table); + g_free(desc->iotlb); tlb_window_reset(desc, now, 0); /* desc->n_used_entries is cleared by the caller */ - env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; - env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size); - env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size); + fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; + fast->table = g_try_new(CPUTLBEntry, new_size); + desc->iotlb = g_try_new(CPUIOTLBEntry, new_size); + /* * If the allocations fail, try smaller sizes. We just freed some * memory, so going back to half of new_size has a good chance of working. @@ -208,27 +197,51 @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx) * allocations to fail though, so we progressively reduce the allocation * size, aborting if we cannot even allocate the smallest TLB we support. */ - while (env_tlb(env)->f[mmu_idx].table == NULL || - env_tlb(env)->d[mmu_idx].iotlb == NULL) { + while (fast->table == NULL || desc->iotlb == NULL) { if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) { error_report("%s: %s", __func__, strerror(errno)); abort(); } new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS); - env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; + fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; - g_free(env_tlb(env)->f[mmu_idx].table); - g_free(env_tlb(env)->d[mmu_idx].iotlb); - env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size); - env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size); + g_free(fast->table); + g_free(desc->iotlb); + fast->table = g_try_new(CPUTLBEntry, new_size); + desc->iotlb = g_try_new(CPUIOTLBEntry, new_size); } } -static inline void tlb_table_flush_by_mmuidx(CPUArchState *env, int mmu_idx) +static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast) { - tlb_mmu_resize_locked(env, mmu_idx); - memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx)); - env_tlb(env)->d[mmu_idx].n_used_entries = 0; + desc->n_used_entries = 0; + desc->large_page_addr = -1; + desc->large_page_mask = -1; + desc->vindex = 0; + memset(fast->table, -1, sizeof_tlb(fast)); + memset(desc->vtable, -1, sizeof(desc->vtable)); +} + +static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx, + int64_t now) +{ + CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx]; + CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx]; + + tlb_mmu_resize_locked(desc, fast, now); + tlb_mmu_flush_locked(desc, fast); +} + +static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now) +{ + size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS; + + tlb_window_reset(desc, now, 0); + desc->n_used_entries = 0; + fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS; + fast->table = g_new(CPUTLBEntry, n_entries); + desc->iotlb = g_new(CPUIOTLBEntry, n_entries); + tlb_mmu_flush_locked(desc, fast); } static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx) @@ -244,13 +257,17 @@ static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx) void tlb_init(CPUState *cpu) { CPUArchState *env = cpu->env_ptr; + int64_t now = get_clock_realtime(); + int i; qemu_spin_init(&env_tlb(env)->c.lock); - /* Ensure that cpu_reset performs a full flush. */ - env_tlb(env)->c.dirty = ALL_MMUIDX_BITS; + /* All tlbs are initialized flushed. */ + env_tlb(env)->c.dirty = 0; - tlb_dyn_init(env); + for (i = 0; i < NB_MMU_MODES; i++) { + tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now); + } } /* flush_all_helper: run fn across all cpus @@ -289,21 +306,12 @@ void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide) *pelide = elide; } -static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx) -{ - tlb_table_flush_by_mmuidx(env, mmu_idx); - env_tlb(env)->d[mmu_idx].large_page_addr = -1; - env_tlb(env)->d[mmu_idx].large_page_mask = -1; - env_tlb(env)->d[mmu_idx].vindex = 0; - memset(env_tlb(env)->d[mmu_idx].vtable, -1, - sizeof(env_tlb(env)->d[0].vtable)); -} - static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) { CPUArchState *env = cpu->env_ptr; uint16_t asked = data.host_int; uint16_t all_dirty, work, to_clean; + int64_t now = get_clock_realtime(); assert_cpu_is_self(cpu); @@ -318,7 +326,7 @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) for (work = to_clean; work != 0; work &= work - 1) { int mmu_idx = ctz32(work); - tlb_flush_one_mmuidx_locked(env, mmu_idx); + tlb_flush_one_mmuidx_locked(env, mmu_idx, now); } qemu_spin_unlock(&env_tlb(env)->c.lock); @@ -440,7 +448,7 @@ static void tlb_flush_page_locked(CPUArchState *env, int midx, tlb_debug("forcing full flush midx %d (" TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", midx, lp_addr, lp_mask); - tlb_flush_one_mmuidx_locked(env, midx); + tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); } else { if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) { tlb_n_used_entries_dec(env, midx); @@ -449,28 +457,29 @@ static void tlb_flush_page_locked(CPUArchState *env, int midx, } } -/* As we are going to hijack the bottom bits of the page address for a - * mmuidx bit mask we need to fail to build if we can't do that +/** + * tlb_flush_page_by_mmuidx_async_0: + * @cpu: cpu on which to flush + * @addr: page of virtual address to flush + * @idxmap: set of mmu_idx to flush + * + * Helper for tlb_flush_page_by_mmuidx and friends, flush one page + * at @addr from the tlbs indicated by @idxmap from @cpu. */ -QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN); - -static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu, - run_on_cpu_data data) +static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu, + target_ulong addr, + uint16_t idxmap) { CPUArchState *env = cpu->env_ptr; - target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr; - target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK; - unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS; int mmu_idx; assert_cpu_is_self(cpu); - tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%lx\n", - addr, mmu_idx_bitmap); + tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap); qemu_spin_lock(&env_tlb(env)->c.lock); for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { - if (test_bit(mmu_idx, &mmu_idx_bitmap)) { + if ((idxmap >> mmu_idx) & 1) { tlb_flush_page_locked(env, mmu_idx, addr); } } @@ -479,22 +488,75 @@ static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu, tb_flush_jmp_cache(cpu, addr); } -void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap) +/** + * tlb_flush_page_by_mmuidx_async_1: + * @cpu: cpu on which to flush + * @data: encoded addr + idxmap + * + * Helper for tlb_flush_page_by_mmuidx and friends, called through + * async_run_on_cpu. The idxmap parameter is encoded in the page + * offset of the target_ptr field. This limits the set of mmu_idx + * that can be passed via this method. + */ +static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu, + run_on_cpu_data data) +{ + target_ulong addr_and_idxmap = (target_ulong) data.target_ptr; + target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK; + uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK; + + tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap); +} + +typedef struct { + target_ulong addr; + uint16_t idxmap; +} TLBFlushPageByMMUIdxData; + +/** + * tlb_flush_page_by_mmuidx_async_2: + * @cpu: cpu on which to flush + * @data: allocated addr + idxmap + * + * Helper for tlb_flush_page_by_mmuidx and friends, called through + * async_run_on_cpu. The addr+idxmap parameters are stored in a + * TLBFlushPageByMMUIdxData structure that has been allocated + * specifically for this helper. Free the structure when done. + */ +static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu, + run_on_cpu_data data) { - target_ulong addr_and_mmu_idx; + TLBFlushPageByMMUIdxData *d = data.host_ptr; + + tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap); + g_free(d); +} +void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap) +{ tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap); /* This should already be page aligned */ - addr_and_mmu_idx = addr & TARGET_PAGE_MASK; - addr_and_mmu_idx |= idxmap; + addr &= TARGET_PAGE_MASK; - if (!qemu_cpu_is_self(cpu)) { - async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_work, - RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); + if (qemu_cpu_is_self(cpu)) { + tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap); + } else if (idxmap < TARGET_PAGE_SIZE) { + /* + * Most targets have only a few mmu_idx. In the case where + * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid + * allocating memory for this operation. + */ + async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1, + RUN_ON_CPU_TARGET_PTR(addr | idxmap)); } else { - tlb_flush_page_by_mmuidx_async_work( - cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); + TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1); + + /* Otherwise allocate a structure, freed by the worker. */ + d->addr = addr; + d->idxmap = idxmap; + async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2, + RUN_ON_CPU_HOST_PTR(d)); } } @@ -506,17 +568,36 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr) void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr, uint16_t idxmap) { - const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work; - target_ulong addr_and_mmu_idx; - tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); /* This should already be page aligned */ - addr_and_mmu_idx = addr & TARGET_PAGE_MASK; - addr_and_mmu_idx |= idxmap; + addr &= TARGET_PAGE_MASK; + + /* + * Allocate memory to hold addr+idxmap only when needed. + * See tlb_flush_page_by_mmuidx for details. + */ + if (idxmap < TARGET_PAGE_SIZE) { + flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1, + RUN_ON_CPU_TARGET_PTR(addr | idxmap)); + } else { + CPUState *dst_cpu; + + /* Allocate a separate data block for each destination cpu. */ + CPU_FOREACH(dst_cpu) { + if (dst_cpu != src_cpu) { + TLBFlushPageByMMUIdxData *d + = g_new(TLBFlushPageByMMUIdxData, 1); + + d->addr = addr; + d->idxmap = idxmap; + async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2, + RUN_ON_CPU_HOST_PTR(d)); + } + } + } - flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); - fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); + tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap); } void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr) @@ -528,17 +609,41 @@ void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu, target_ulong addr, uint16_t idxmap) { - const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work; - target_ulong addr_and_mmu_idx; - tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); /* This should already be page aligned */ - addr_and_mmu_idx = addr & TARGET_PAGE_MASK; - addr_and_mmu_idx |= idxmap; + addr &= TARGET_PAGE_MASK; + + /* + * Allocate memory to hold addr+idxmap only when needed. + * See tlb_flush_page_by_mmuidx for details. + */ + if (idxmap < TARGET_PAGE_SIZE) { + flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1, + RUN_ON_CPU_TARGET_PTR(addr | idxmap)); + async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1, + RUN_ON_CPU_TARGET_PTR(addr | idxmap)); + } else { + CPUState *dst_cpu; + TLBFlushPageByMMUIdxData *d; + + /* Allocate a separate data block for each destination cpu. */ + CPU_FOREACH(dst_cpu) { + if (dst_cpu != src_cpu) { + d = g_new(TLBFlushPageByMMUIdxData, 1); + d->addr = addr; + d->idxmap = idxmap; + async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2, + RUN_ON_CPU_HOST_PTR(d)); + } + } - flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); - async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); + d = g_new(TLBFlushPageByMMUIdxData, 1); + d->addr = addr; + d->idxmap = idxmap; + async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2, + RUN_ON_CPU_HOST_PTR(d)); + } } void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr) @@ -622,7 +727,7 @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) qemu_spin_lock(&env_tlb(env)->c.lock); for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { unsigned int i; - unsigned int n = tlb_n_entries(env, mmu_idx); + unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]); for (i = 0; i < n; i++) { tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i], @@ -4761,6 +4761,12 @@ if compile_prog "" "" ; then syncfs=yes fi +# check for kcov support (kernel must be 4.4+, compiled with certain options) +kcov=no +if check_include sys/kcov.h ; then + kcov=yes +fi + # Check we have a new enough version of sphinx-build has_sphinx_build() { # This is a bit awkward but works: create a trivial document and @@ -6874,6 +6880,9 @@ fi if test "$syncfs" = "yes" ; then echo "CONFIG_SYNCFS=y" >> $config_host_mak fi +if test "$kcov" = "yes" ; then + echo "CONFIG_KCOV=y" >> $config_host_mak +fi if test "$inotify" = "yes" ; then echo "CONFIG_INOTIFY=y" >> $config_host_mak fi diff --git a/docs/index.html.in b/docs/index.html.in index 94eb782cf7..8512933d14 100644 --- a/docs/index.html.in +++ b/docs/index.html.in @@ -12,6 +12,7 @@ <li><a href="qemu-ga-ref.html">Guest Agent Protocol Reference</a></li> <li><a href="interop/index.html">System Emulation Management and Interoperability Guide</a></li> <li><a href="specs/index.html">System Emulation Guest Hardware Specifications</a></li> + <li><a href="system/index.html">System Emulation User's Guide</a></li> </ul> </body> </html> diff --git a/docs/index.rst b/docs/index.rst index baa5791c17..46405d4f07 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -13,4 +13,4 @@ Welcome to QEMU's documentation! interop/index devel/index specs/index - + system/index diff --git a/docs/interop/conf.py b/docs/interop/conf.py index e87b8c22be..40b1ad811d 100644 --- a/docs/interop/conf.py +++ b/docs/interop/conf.py @@ -18,5 +18,7 @@ html_theme_options['description'] = u'System Emulation Management and Interopera # (source start file, name, description, authors, manual section). man_pages = [ ('qemu-ga', 'qemu-ga', u'QEMU Guest Agent', - ['Michael Roth <mdroth@linux.vnet.ibm.com>'], 8) + ['Michael Roth <mdroth@linux.vnet.ibm.com>'], 8), + ('qemu-nbd', 'qemu-nbd', u'QEMU Disk Network Block Device Server', + ['Anthony Liguori <anthony@codemonkey.ws>'], 8) ] diff --git a/docs/interop/index.rst b/docs/interop/index.rst index 049387ac6d..c28f7785a5 100644 --- a/docs/interop/index.rst +++ b/docs/interop/index.rst @@ -18,5 +18,6 @@ Contents: live-block-operations pr-helper qemu-ga + qemu-nbd vhost-user vhost-user-gpu diff --git a/docs/interop/qemu-nbd.rst b/docs/interop/qemu-nbd.rst new file mode 100644 index 0000000000..873bb9e17d --- /dev/null +++ b/docs/interop/qemu-nbd.rst @@ -0,0 +1,263 @@ +QEMU Disk Network Block Device Server +===================================== + +Synopsis +-------- + +**qemu-nbd** [*OPTION*]... *filename* + +**qemu-nbd** -L [*OPTION*]... + +**qemu-nbd** -d *dev* + +Description +----------- + +Export a QEMU disk image using the NBD protocol. + +Other uses: + +- Bind a /dev/nbdX block device to a QEMU server (on Linux). +- As a client to query exports of a remote NBD server. + +Options +------- + +.. program:: qemu-nbd + +*filename* is a disk image filename, or a set of block +driver options if ``--image-opts`` is specified. + +*dev* is an NBD device. + +.. option:: --object type,id=ID,...props... + + Define a new instance of the *type* object class identified by *ID*. + See the :manpage:`qemu(1)` manual page for full details of the properties + supported. The common object types that it makes sense to define are the + ``secret`` object, which is used to supply passwords and/or encryption + keys, and the ``tls-creds`` object, which is used to supply TLS + credentials for the qemu-nbd server or client. + +.. option:: -p, --port=PORT + + TCP port to listen on as a server, or connect to as a client + (default ``10809``). + +.. option:: -o, --offset=OFFSET + + The offset into the image. + +.. option:: -b, --bind=IFACE + + The interface to bind to as a server, or connect to as a client + (default ``0.0.0.0``). + +.. option:: -k, --socket=PATH + + Use a unix socket with path *PATH*. + +.. option:: --image-opts + + Treat *filename* as a set of image options, instead of a plain + filename. If this flag is specified, the ``-f`` flag should + not be used, instead the :option:`format=` option should be set. + +.. option:: -f, --format=FMT + + Force the use of the block driver for format *FMT* instead of + auto-detecting. + +.. option:: -r, --read-only + + Export the disk as read-only. + +.. option:: -P, --partition=NUM + + Deprecated: Only expose MBR partition *NUM*. Understands physical + partitions 1-4 and logical partition 5. New code should instead use + :option:`--image-opts` with the raw driver wrapping a subset of the + original image. + +.. option:: -B, --bitmap=NAME + + If *filename* has a qcow2 persistent bitmap *NAME*, expose + that bitmap via the ``qemu:dirty-bitmap:NAME`` context + accessible through NBD_OPT_SET_META_CONTEXT. + +.. option:: -s, --snapshot + + Use *filename* as an external snapshot, create a temporary + file with ``backing_file=``\ *filename*, redirect the write to + the temporary one. + +.. option:: -l, --load-snapshot=SNAPSHOT_PARAM + + Load an internal snapshot inside *filename* and export it + as an read-only device, SNAPSHOT_PARAM format is + ``snapshot.id=[ID],snapshot.name=[NAME]`` or ``[ID_OR_NAME]`` + +.. option:: --cache=CACHE + + The cache mode to be used with the file. See the documentation of + the emulator's ``-drive cache=...`` option for allowed values. + +.. option:: -n, --nocache + + Equivalent to :option:`--cache=none`. + +.. option:: --aio=AIO + + Set the asynchronous I/O mode between ``threads`` (the default) + and ``native`` (Linux only). + +.. option:: --discard=DISCARD + + Control whether ``discard`` (also known as ``trim`` or ``unmap``) + requests are ignored or passed to the filesystem. *DISCARD* is one of + ``ignore`` (or ``off``), ``unmap`` (or ``on``). The default is + ``ignore``. + +.. option:: --detect-zeroes=DETECT_ZEROES + + Control the automatic conversion of plain zero writes by the OS to + driver-specific optimized zero write commands. *DETECT_ZEROES* is one of + ``off``, ``on``, or ``unmap``. ``unmap`` + converts a zero write to an unmap operation and can only be used if + *DISCARD* is set to ``unmap``. The default is ``off``. + +.. option:: -c, --connect=DEV + + Connect *filename* to NBD device *DEV* (Linux only). + +.. option:: -d, --disconnect + + Disconnect the device *DEV* (Linux only). + +.. option:: -e, --shared=NUM + + Allow up to *NUM* clients to share the device (default + ``1``). Safe for readers, but for now, consistency is not + guaranteed between multiple writers. + +.. option:: -t, --persistent + + Don't exit on the last connection. + +.. option:: -x, --export-name=NAME + + Set the NBD volume export name (default of a zero-length string). + +.. option:: -D, --description=DESCRIPTION + + Set the NBD volume export description, as a human-readable + string. + +.. option:: -L, --list + + Connect as a client and list all details about the exports exposed by + a remote NBD server. This enables list mode, and is incompatible + with options that change behavior related to a specific export (such as + :option:`--export-name`, :option:`--offset`, ...). + +.. option:: --tls-creds=ID + + Enable mandatory TLS encryption for the server by setting the ID + of the TLS credentials object previously created with the --object + option; or provide the credentials needed for connecting as a client + in list mode. + +.. option:: --fork + + Fork off the server process and exit the parent once the server is running. + +.. option:: --pid-file=PATH + + Store the server's process ID in the given file. + +.. option:: --tls-authz=ID + + Specify the ID of a qauthz object previously created with the + :option:`--object` option. This will be used to authorize connecting users + against their x509 distinguished name. + +.. option:: -v, --verbose + + Display extra debugging information. + +.. option:: -h, --help + + Display this help and exit. + +.. option:: -V, --version + + Display version information and exit. + +.. option:: -T, --trace [[enable=]PATTERN][,events=FILE][,file=FILE] + + .. include:: qemu-option-trace.rst.inc + +Examples +-------- + +Start a server listening on port 10809 that exposes only the +guest-visible contents of a qcow2 file, with no TLS encryption, and +with the default export name (an empty string). The command is +one-shot, and will block until the first successful client +disconnects: + +:: + + qemu-nbd -f qcow2 file.qcow2 + +Start a long-running server listening with encryption on port 10810, +and whitelist clients with a specific X.509 certificate to connect to +a 1 megabyte subset of a raw file, using the export name 'subset': + +:: + + qemu-nbd \ + --object tls-creds-x509,id=tls0,endpoint=server,dir=/path/to/qemutls \ + --object 'authz-simple,id=auth0,identity=CN=laptop.example.com,,\ + O=Example Org,,L=London,,ST=London,,C=GB' \ + --tls-creds tls0 --tls-authz auth0 \ + -t -x subset -p 10810 \ + --image-opts driver=raw,offset=1M,size=1M,file.driver=file,file.filename=file.raw + +Serve a read-only copy of just the first MBR partition of a guest +image over a Unix socket with as many as 5 simultaneous readers, with +a persistent process forked as a daemon: + +:: + + qemu-nbd --fork --persistent --shared=5 --socket=/path/to/sock \ + --partition=1 --read-only --format=qcow2 file.qcow2 + +Expose the guest-visible contents of a qcow2 file via a block device +/dev/nbd0 (and possibly creating /dev/nbd0p1 and friends for +partitions found within), then disconnect the device when done. +Access to bind qemu-nbd to an /dev/nbd device generally requires root +privileges, and may also require the execution of ``modprobe nbd`` +to enable the kernel NBD client module. *CAUTION*: Do not use +this method to mount filesystems from an untrusted guest image - a +malicious guest may have prepared the image to attempt to trigger +kernel bugs in partition probing or file system mounting. + +:: + + qemu-nbd -c /dev/nbd0 -f qcow2 file.qcow2 + qemu-nbd -d /dev/nbd0 + +Query a remote server to see details about what export(s) it is +serving on port 10809, and authenticating via PSK: + +:: + + qemu-nbd \ + --object tls-creds-psk,id=tls0,dir=/tmp/keys,username=eblake,endpoint=client \ + --tls-creds tls0 -L -b remote.example.com + +See also +-------- + +:manpage:`qemu(1)`, :manpage:`qemu-img(1)` diff --git a/docs/interop/qemu-option-trace.rst.inc b/docs/interop/qemu-option-trace.rst.inc new file mode 100644 index 0000000000..23cfcb4853 --- /dev/null +++ b/docs/interop/qemu-option-trace.rst.inc @@ -0,0 +1,30 @@ +.. + The contents of this file must be kept in sync with qemu-option-trace.texi + until all the users of the texi file have been converted to rst and + the texi file can be removed. + +Specify tracing options. + +.. option:: [enable=]PATTERN + + Immediately enable events matching *PATTERN* + (either event name or a globbing pattern). This option is only + available if QEMU has been compiled with the ``simple``, ``log`` + or ``ftrace`` tracing backend. To specify multiple events or patterns, + specify the :option:`-trace` option multiple times. + + Use :option:`-trace help` to print a list of names of trace points. + +.. option:: events=FILE + + Immediately enable events listed in *FILE*. + The file must contain one event name (as listed in the ``trace-events-all`` + file) per line; globbing patterns are accepted too. This option is only + available if QEMU has been compiled with the ``simple``, ``log`` or + ``ftrace`` tracing backend. + +.. option:: file=FILE + + Log output traces to *FILE*. + This option is only available if QEMU has been compiled with + the ``simple`` tracing backend. diff --git a/docs/qemu-block-drivers.texi b/docs/qemu-block-drivers.texi deleted file mode 100644 index 2c7ea49c32..0000000000 --- a/docs/qemu-block-drivers.texi +++ /dev/null @@ -1,889 +0,0 @@ -@c man begin SYNOPSIS -QEMU block driver reference manual -@c man end - -@set qemu_system qemu-system-x86_64 - -@c man begin DESCRIPTION - -@node disk_images_formats -@subsection Disk image file formats - -QEMU supports many image file formats that can be used with VMs as well as with -any of the tools (like @code{qemu-img}). This includes the preferred formats -raw and qcow2 as well as formats that are supported for compatibility with -older QEMU versions or other hypervisors. - -Depending on the image format, different options can be passed to -@code{qemu-img create} and @code{qemu-img convert} using the @code{-o} option. -This section describes each format and the options that are supported for it. - -@table @option -@item raw - -Raw disk image format. This format has the advantage of -being simple and easily exportable to all other emulators. If your -file system supports @emph{holes} (for example in ext2 or ext3 on -Linux or NTFS on Windows), then only the written sectors will reserve -space. Use @code{qemu-img info} to know the real size used by the -image or @code{ls -ls} on Unix/Linux. - -Supported options: -@table @code -@item preallocation -Preallocation mode (allowed values: @code{off}, @code{falloc}, @code{full}). -@code{falloc} mode preallocates space for image by calling posix_fallocate(). -@code{full} mode preallocates space for image by writing data to underlying -storage. This data may or may not be zero, depending on the storage location. -@end table - -@item qcow2 -QEMU image format, the most versatile format. Use it to have smaller -images (useful if your filesystem does not supports holes, for example -on Windows), zlib based compression and support of multiple VM -snapshots. - -Supported options: -@table @code -@item compat -Determines the qcow2 version to use. @code{compat=0.10} uses the -traditional image format that can be read by any QEMU since 0.10. -@code{compat=1.1} enables image format extensions that only QEMU 1.1 and -newer understand (this is the default). Amongst others, this includes -zero clusters, which allow efficient copy-on-read for sparse images. - -@item backing_file -File name of a base image (see @option{create} subcommand) -@item backing_fmt -Image format of the base image -@item encryption -This option is deprecated and equivalent to @code{encrypt.format=aes} - -@item encrypt.format - -If this is set to @code{luks}, it requests that the qcow2 payload (not -qcow2 header) be encrypted using the LUKS format. The passphrase to -use to unlock the LUKS key slot is given by the @code{encrypt.key-secret} -parameter. LUKS encryption parameters can be tuned with the other -@code{encrypt.*} parameters. - -If this is set to @code{aes}, the image is encrypted with 128-bit AES-CBC. -The encryption key is given by the @code{encrypt.key-secret} parameter. -This encryption format is considered to be flawed by modern cryptography -standards, suffering from a number of design problems: - -@itemize @minus -@item The AES-CBC cipher is used with predictable initialization vectors based -on the sector number. This makes it vulnerable to chosen plaintext attacks -which can reveal the existence of encrypted data. -@item The user passphrase is directly used as the encryption key. A poorly -chosen or short passphrase will compromise the security of the encryption. -@item In the event of the passphrase being compromised there is no way to -change the passphrase to protect data in any qcow images. The files must -be cloned, using a different encryption passphrase in the new file. The -original file must then be securely erased using a program like shred, -though even this is ineffective with many modern storage technologies. -@end itemize - -The use of this is no longer supported in system emulators. Support only -remains in the command line utilities, for the purposes of data liberation -and interoperability with old versions of QEMU. The @code{luks} format -should be used instead. - -@item encrypt.key-secret - -Provides the ID of a @code{secret} object that contains the passphrase -(@code{encrypt.format=luks}) or encryption key (@code{encrypt.format=aes}). - -@item encrypt.cipher-alg - -Name of the cipher algorithm and key length. Currently defaults -to @code{aes-256}. Only used when @code{encrypt.format=luks}. - -@item encrypt.cipher-mode - -Name of the encryption mode to use. Currently defaults to @code{xts}. -Only used when @code{encrypt.format=luks}. - -@item encrypt.ivgen-alg - -Name of the initialization vector generator algorithm. Currently defaults -to @code{plain64}. Only used when @code{encrypt.format=luks}. - -@item encrypt.ivgen-hash-alg - -Name of the hash algorithm to use with the initialization vector generator -(if required). Defaults to @code{sha256}. Only used when @code{encrypt.format=luks}. - -@item encrypt.hash-alg - -Name of the hash algorithm to use for PBKDF algorithm -Defaults to @code{sha256}. Only used when @code{encrypt.format=luks}. - -@item encrypt.iter-time - -Amount of time, in milliseconds, to use for PBKDF algorithm per key slot. -Defaults to @code{2000}. Only used when @code{encrypt.format=luks}. - -@item cluster_size -Changes the qcow2 cluster size (must be between 512 and 2M). Smaller cluster -sizes can improve the image file size whereas larger cluster sizes generally -provide better performance. - -@item preallocation -Preallocation mode (allowed values: @code{off}, @code{metadata}, @code{falloc}, -@code{full}). An image with preallocated metadata is initially larger but can -improve performance when the image needs to grow. @code{falloc} and @code{full} -preallocations are like the same options of @code{raw} format, but sets up -metadata also. - -@item lazy_refcounts -If this option is set to @code{on}, reference count updates are postponed with -the goal of avoiding metadata I/O and improving performance. This is -particularly interesting with @option{cache=writethrough} which doesn't batch -metadata updates. The tradeoff is that after a host crash, the reference count -tables must be rebuilt, i.e. on the next open an (automatic) @code{qemu-img -check -r all} is required, which may take some time. - -This option can only be enabled if @code{compat=1.1} is specified. - -@item nocow -If this option is set to @code{on}, it will turn off COW of the file. It's only -valid on btrfs, no effect on other file systems. - -Btrfs has low performance when hosting a VM image file, even more when the guest -on the VM also using btrfs as file system. Turning off COW is a way to mitigate -this bad performance. Generally there are two ways to turn off COW on btrfs: -a) Disable it by mounting with nodatacow, then all newly created files will be -NOCOW. b) For an empty file, add the NOCOW file attribute. That's what this option -does. - -Note: this option is only valid to new or empty files. If there is an existing -file which is COW and has data blocks already, it couldn't be changed to NOCOW -by setting @code{nocow=on}. One can issue @code{lsattr filename} to check if -the NOCOW flag is set or not (Capital 'C' is NOCOW flag). - -@end table - -@item qed -Old QEMU image format with support for backing files and compact image files -(when your filesystem or transport medium does not support holes). - -When converting QED images to qcow2, you might want to consider using the -@code{lazy_refcounts=on} option to get a more QED-like behaviour. - -Supported options: -@table @code -@item backing_file -File name of a base image (see @option{create} subcommand). -@item backing_fmt -Image file format of backing file (optional). Useful if the format cannot be -autodetected because it has no header, like some vhd/vpc files. -@item cluster_size -Changes the cluster size (must be power-of-2 between 4K and 64K). Smaller -cluster sizes can improve the image file size whereas larger cluster sizes -generally provide better performance. -@item table_size -Changes the number of clusters per L1/L2 table (must be power-of-2 between 1 -and 16). There is normally no need to change this value but this option can be -used for performance benchmarking. -@end table - -@item qcow -Old QEMU image format with support for backing files, compact image files, -encryption and compression. - -Supported options: -@table @code -@item backing_file -File name of a base image (see @option{create} subcommand) -@item encryption -This option is deprecated and equivalent to @code{encrypt.format=aes} - -@item encrypt.format -If this is set to @code{aes}, the image is encrypted with 128-bit AES-CBC. -The encryption key is given by the @code{encrypt.key-secret} parameter. -This encryption format is considered to be flawed by modern cryptography -standards, suffering from a number of design problems enumerated previously -against the @code{qcow2} image format. - -The use of this is no longer supported in system emulators. Support only -remains in the command line utilities, for the purposes of data liberation -and interoperability with old versions of QEMU. - -Users requiring native encryption should use the @code{qcow2} format -instead with @code{encrypt.format=luks}. - -@item encrypt.key-secret - -Provides the ID of a @code{secret} object that contains the encryption -key (@code{encrypt.format=aes}). - -@end table - -@item luks - -LUKS v1 encryption format, compatible with Linux dm-crypt/cryptsetup - -Supported options: -@table @code - -@item key-secret - -Provides the ID of a @code{secret} object that contains the passphrase. - -@item cipher-alg - -Name of the cipher algorithm and key length. Currently defaults -to @code{aes-256}. - -@item cipher-mode - -Name of the encryption mode to use. Currently defaults to @code{xts}. - -@item ivgen-alg - -Name of the initialization vector generator algorithm. Currently defaults -to @code{plain64}. - -@item ivgen-hash-alg - -Name of the hash algorithm to use with the initialization vector generator -(if required). Defaults to @code{sha256}. - -@item hash-alg - -Name of the hash algorithm to use for PBKDF algorithm -Defaults to @code{sha256}. - -@item iter-time - -Amount of time, in milliseconds, to use for PBKDF algorithm per key slot. -Defaults to @code{2000}. - -@end table - -@item vdi -VirtualBox 1.1 compatible image format. -Supported options: -@table @code -@item static -If this option is set to @code{on}, the image is created with metadata -preallocation. -@end table - -@item vmdk -VMware 3 and 4 compatible image format. - -Supported options: -@table @code -@item backing_file -File name of a base image (see @option{create} subcommand). -@item compat6 -Create a VMDK version 6 image (instead of version 4) -@item hwversion -Specify vmdk virtual hardware version. Compat6 flag cannot be enabled -if hwversion is specified. -@item subformat -Specifies which VMDK subformat to use. Valid options are -@code{monolithicSparse} (default), -@code{monolithicFlat}, -@code{twoGbMaxExtentSparse}, -@code{twoGbMaxExtentFlat} and -@code{streamOptimized}. -@end table - -@item vpc -VirtualPC compatible image format (VHD). -Supported options: -@table @code -@item subformat -Specifies which VHD subformat to use. Valid options are -@code{dynamic} (default) and @code{fixed}. -@end table - -@item VHDX -Hyper-V compatible image format (VHDX). -Supported options: -@table @code -@item subformat -Specifies which VHDX subformat to use. Valid options are -@code{dynamic} (default) and @code{fixed}. -@item block_state_zero -Force use of payload blocks of type 'ZERO'. Can be set to @code{on} (default) -or @code{off}. When set to @code{off}, new blocks will be created as -@code{PAYLOAD_BLOCK_NOT_PRESENT}, which means parsers are free to return -arbitrary data for those blocks. Do not set to @code{off} when using -@code{qemu-img convert} with @code{subformat=dynamic}. -@item block_size -Block size; min 1 MB, max 256 MB. 0 means auto-calculate based on image size. -@item log_size -Log size; min 1 MB. -@end table -@end table - -@subsubsection Read-only formats -More disk image file formats are supported in a read-only mode. -@table @option -@item bochs -Bochs images of @code{growing} type. -@item cloop -Linux Compressed Loop image, useful only to reuse directly compressed -CD-ROM images present for example in the Knoppix CD-ROMs. -@item dmg -Apple disk image. -@item parallels -Parallels disk image format. -@end table - - -@node host_drives -@subsection Using host drives - -In addition to disk image files, QEMU can directly access host -devices. We describe here the usage for QEMU version >= 0.8.3. - -@subsubsection Linux - -On Linux, you can directly use the host device filename instead of a -disk image filename provided you have enough privileges to access -it. For example, use @file{/dev/cdrom} to access to the CDROM. - -@table @code -@item CD -You can specify a CDROM device even if no CDROM is loaded. QEMU has -specific code to detect CDROM insertion or removal. CDROM ejection by -the guest OS is supported. Currently only data CDs are supported. -@item Floppy -You can specify a floppy device even if no floppy is loaded. Floppy -removal is currently not detected accurately (if you change floppy -without doing floppy access while the floppy is not loaded, the guest -OS will think that the same floppy is loaded). -Use of the host's floppy device is deprecated, and support for it will -be removed in a future release. -@item Hard disks -Hard disks can be used. Normally you must specify the whole disk -(@file{/dev/hdb} instead of @file{/dev/hdb1}) so that the guest OS can -see it as a partitioned disk. WARNING: unless you know what you do, it -is better to only make READ-ONLY accesses to the hard disk otherwise -you may corrupt your host data (use the @option{-snapshot} command -line option or modify the device permissions accordingly). -@end table - -@subsubsection Windows - -@table @code -@item CD -The preferred syntax is the drive letter (e.g. @file{d:}). The -alternate syntax @file{\\.\d:} is supported. @file{/dev/cdrom} is -supported as an alias to the first CDROM drive. - -Currently there is no specific code to handle removable media, so it -is better to use the @code{change} or @code{eject} monitor commands to -change or eject media. -@item Hard disks -Hard disks can be used with the syntax: @file{\\.\PhysicalDrive@var{N}} -where @var{N} is the drive number (0 is the first hard disk). - -WARNING: unless you know what you do, it is better to only make -READ-ONLY accesses to the hard disk otherwise you may corrupt your -host data (use the @option{-snapshot} command line so that the -modifications are written in a temporary file). -@end table - - -@subsubsection Mac OS X - -@file{/dev/cdrom} is an alias to the first CDROM. - -Currently there is no specific code to handle removable media, so it -is better to use the @code{change} or @code{eject} monitor commands to -change or eject media. - -@node disk_images_fat_images -@subsection Virtual FAT disk images - -QEMU can automatically create a virtual FAT disk image from a -directory tree. In order to use it, just type: - -@example -@value{qemu_system} linux.img -hdb fat:/my_directory -@end example - -Then you access access to all the files in the @file{/my_directory} -directory without having to copy them in a disk image or to export -them via SAMBA or NFS. The default access is @emph{read-only}. - -Floppies can be emulated with the @code{:floppy:} option: - -@example -@value{qemu_system} linux.img -fda fat:floppy:/my_directory -@end example - -A read/write support is available for testing (beta stage) with the -@code{:rw:} option: - -@example -@value{qemu_system} linux.img -fda fat:floppy:rw:/my_directory -@end example - -What you should @emph{never} do: -@itemize -@item use non-ASCII filenames ; -@item use "-snapshot" together with ":rw:" ; -@item expect it to work when loadvm'ing ; -@item write to the FAT directory on the host system while accessing it with the guest system. -@end itemize - -@node disk_images_nbd -@subsection NBD access - -QEMU can access directly to block device exported using the Network Block Device -protocol. - -@example -@value{qemu_system} linux.img -hdb nbd://my_nbd_server.mydomain.org:1024/ -@end example - -If the NBD server is located on the same host, you can use an unix socket instead -of an inet socket: - -@example -@value{qemu_system} linux.img -hdb nbd+unix://?socket=/tmp/my_socket -@end example - -In this case, the block device must be exported using qemu-nbd: - -@example -qemu-nbd --socket=/tmp/my_socket my_disk.qcow2 -@end example - -The use of qemu-nbd allows sharing of a disk between several guests: -@example -qemu-nbd --socket=/tmp/my_socket --share=2 my_disk.qcow2 -@end example - -@noindent -and then you can use it with two guests: -@example -@value{qemu_system} linux1.img -hdb nbd+unix://?socket=/tmp/my_socket -@value{qemu_system} linux2.img -hdb nbd+unix://?socket=/tmp/my_socket -@end example - -If the nbd-server uses named exports (supported since NBD 2.9.18, or with QEMU's -own embedded NBD server), you must specify an export name in the URI: -@example -@value{qemu_system} -cdrom nbd://localhost/debian-500-ppc-netinst -@value{qemu_system} -cdrom nbd://localhost/openSUSE-11.1-ppc-netinst -@end example - -The URI syntax for NBD is supported since QEMU 1.3. An alternative syntax is -also available. Here are some example of the older syntax: -@example -@value{qemu_system} linux.img -hdb nbd:my_nbd_server.mydomain.org:1024 -@value{qemu_system} linux2.img -hdb nbd:unix:/tmp/my_socket -@value{qemu_system} -cdrom nbd:localhost:10809:exportname=debian-500-ppc-netinst -@end example - -@node disk_images_sheepdog -@subsection Sheepdog disk images - -Sheepdog is a distributed storage system for QEMU. It provides highly -available block level storage volumes that can be attached to -QEMU-based virtual machines. - -You can create a Sheepdog disk image with the command: -@example -qemu-img create sheepdog:///@var{image} @var{size} -@end example -where @var{image} is the Sheepdog image name and @var{size} is its -size. - -To import the existing @var{filename} to Sheepdog, you can use a -convert command. -@example -qemu-img convert @var{filename} sheepdog:///@var{image} -@end example - -You can boot from the Sheepdog disk image with the command: -@example -@value{qemu_system} sheepdog:///@var{image} -@end example - -You can also create a snapshot of the Sheepdog image like qcow2. -@example -qemu-img snapshot -c @var{tag} sheepdog:///@var{image} -@end example -where @var{tag} is a tag name of the newly created snapshot. - -To boot from the Sheepdog snapshot, specify the tag name of the -snapshot. -@example -@value{qemu_system} sheepdog:///@var{image}#@var{tag} -@end example - -You can create a cloned image from the existing snapshot. -@example -qemu-img create -b sheepdog:///@var{base}#@var{tag} sheepdog:///@var{image} -@end example -where @var{base} is an image name of the source snapshot and @var{tag} -is its tag name. - -You can use an unix socket instead of an inet socket: - -@example -@value{qemu_system} sheepdog+unix:///@var{image}?socket=@var{path} -@end example - -If the Sheepdog daemon doesn't run on the local host, you need to -specify one of the Sheepdog servers to connect to. -@example -qemu-img create sheepdog://@var{hostname}:@var{port}/@var{image} @var{size} -@value{qemu_system} sheepdog://@var{hostname}:@var{port}/@var{image} -@end example - -@node disk_images_iscsi -@subsection iSCSI LUNs - -iSCSI is a popular protocol used to access SCSI devices across a computer -network. - -There are two different ways iSCSI devices can be used by QEMU. - -The first method is to mount the iSCSI LUN on the host, and make it appear as -any other ordinary SCSI device on the host and then to access this device as a -/dev/sd device from QEMU. How to do this differs between host OSes. - -The second method involves using the iSCSI initiator that is built into -QEMU. This provides a mechanism that works the same way regardless of which -host OS you are running QEMU on. This section will describe this second method -of using iSCSI together with QEMU. - -In QEMU, iSCSI devices are described using special iSCSI URLs - -@example -URL syntax: -iscsi://[<username>[%<password>]@@]<host>[:<port>]/<target-iqn-name>/<lun> -@end example - -Username and password are optional and only used if your target is set up -using CHAP authentication for access control. -Alternatively the username and password can also be set via environment -variables to have these not show up in the process list - -@example -export LIBISCSI_CHAP_USERNAME=<username> -export LIBISCSI_CHAP_PASSWORD=<password> -iscsi://<host>/<target-iqn-name>/<lun> -@end example - -Various session related parameters can be set via special options, either -in a configuration file provided via '-readconfig' or directly on the -command line. - -If the initiator-name is not specified qemu will use a default name -of 'iqn.2008-11.org.linux-kvm[:<uuid>'] where <uuid> is the UUID of the -virtual machine. If the UUID is not specified qemu will use -'iqn.2008-11.org.linux-kvm[:<name>'] where <name> is the name of the -virtual machine. - -@example -Setting a specific initiator name to use when logging in to the target --iscsi initiator-name=iqn.qemu.test:my-initiator -@end example - -@example -Controlling which type of header digest to negotiate with the target --iscsi header-digest=CRC32C|CRC32C-NONE|NONE-CRC32C|NONE -@end example - -These can also be set via a configuration file -@example -[iscsi] - user = "CHAP username" - password = "CHAP password" - initiator-name = "iqn.qemu.test:my-initiator" - # header digest is one of CRC32C|CRC32C-NONE|NONE-CRC32C|NONE - header-digest = "CRC32C" -@end example - - -Setting the target name allows different options for different targets -@example -[iscsi "iqn.target.name"] - user = "CHAP username" - password = "CHAP password" - initiator-name = "iqn.qemu.test:my-initiator" - # header digest is one of CRC32C|CRC32C-NONE|NONE-CRC32C|NONE - header-digest = "CRC32C" -@end example - - -Howto use a configuration file to set iSCSI configuration options: -@example -cat >iscsi.conf <<EOF -[iscsi] - user = "me" - password = "my password" - initiator-name = "iqn.qemu.test:my-initiator" - header-digest = "CRC32C" -EOF - -@value{qemu_system} -drive file=iscsi://127.0.0.1/iqn.qemu.test/1 \ - -readconfig iscsi.conf -@end example - - -How to set up a simple iSCSI target on loopback and access it via QEMU: -@example -This example shows how to set up an iSCSI target with one CDROM and one DISK -using the Linux STGT software target. This target is available on Red Hat based -systems as the package 'scsi-target-utils'. - -tgtd --iscsi portal=127.0.0.1:3260 -tgtadm --lld iscsi --op new --mode target --tid 1 -T iqn.qemu.test -tgtadm --lld iscsi --mode logicalunit --op new --tid 1 --lun 1 \ - -b /IMAGES/disk.img --device-type=disk -tgtadm --lld iscsi --mode logicalunit --op new --tid 1 --lun 2 \ - -b /IMAGES/cd.iso --device-type=cd -tgtadm --lld iscsi --op bind --mode target --tid 1 -I ALL - -@value{qemu_system} -iscsi initiator-name=iqn.qemu.test:my-initiator \ - -boot d -drive file=iscsi://127.0.0.1/iqn.qemu.test/1 \ - -cdrom iscsi://127.0.0.1/iqn.qemu.test/2 -@end example - -@node disk_images_gluster -@subsection GlusterFS disk images - -GlusterFS is a user space distributed file system. - -You can boot from the GlusterFS disk image with the command: -@example -URI: -@value{qemu_system} -drive file=gluster[+@var{type}]://[@var{host}[:@var{port}]]/@var{volume}/@var{path} - [?socket=...][,file.debug=9][,file.logfile=...] - -JSON: -@value{qemu_system} 'json:@{"driver":"qcow2", - "file":@{"driver":"gluster", - "volume":"testvol","path":"a.img","debug":9,"logfile":"...", - "server":[@{"type":"tcp","host":"...","port":"..."@}, - @{"type":"unix","socket":"..."@}]@}@}' -@end example - -@var{gluster} is the protocol. - -@var{type} specifies the transport type used to connect to gluster -management daemon (glusterd). Valid transport types are -tcp and unix. In the URI form, if a transport type isn't specified, -then tcp type is assumed. - -@var{host} specifies the server where the volume file specification for -the given volume resides. This can be either a hostname or an ipv4 address. -If transport type is unix, then @var{host} field should not be specified. -Instead @var{socket} field needs to be populated with the path to unix domain -socket. - -@var{port} is the port number on which glusterd is listening. This is optional -and if not specified, it defaults to port 24007. If the transport type is unix, -then @var{port} should not be specified. - -@var{volume} is the name of the gluster volume which contains the disk image. - -@var{path} is the path to the actual disk image that resides on gluster volume. - -@var{debug} is the logging level of the gluster protocol driver. Debug levels -are 0-9, with 9 being the most verbose, and 0 representing no debugging output. -The default level is 4. The current logging levels defined in the gluster source -are 0 - None, 1 - Emergency, 2 - Alert, 3 - Critical, 4 - Error, 5 - Warning, -6 - Notice, 7 - Info, 8 - Debug, 9 - Trace - -@var{logfile} is a commandline option to mention log file path which helps in -logging to the specified file and also help in persisting the gfapi logs. The -default is stderr. - - - - -You can create a GlusterFS disk image with the command: -@example -qemu-img create gluster://@var{host}/@var{volume}/@var{path} @var{size} -@end example - -Examples -@example -@value{qemu_system} -drive file=gluster://1.2.3.4/testvol/a.img -@value{qemu_system} -drive file=gluster+tcp://1.2.3.4/testvol/a.img -@value{qemu_system} -drive file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img -@value{qemu_system} -drive file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img -@value{qemu_system} -drive file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img -@value{qemu_system} -drive file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img -@value{qemu_system} -drive file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket -@value{qemu_system} -drive file=gluster+rdma://1.2.3.4:24007/testvol/a.img -@value{qemu_system} -drive file=gluster://1.2.3.4/testvol/a.img,file.debug=9,file.logfile=/var/log/qemu-gluster.log -@value{qemu_system} 'json:@{"driver":"qcow2", - "file":@{"driver":"gluster", - "volume":"testvol","path":"a.img", - "debug":9,"logfile":"/var/log/qemu-gluster.log", - "server":[@{"type":"tcp","host":"1.2.3.4","port":24007@}, - @{"type":"unix","socket":"/var/run/glusterd.socket"@}]@}@}' -@value{qemu_system} -drive driver=qcow2,file.driver=gluster,file.volume=testvol,file.path=/path/a.img, - file.debug=9,file.logfile=/var/log/qemu-gluster.log, - file.server.0.type=tcp,file.server.0.host=1.2.3.4,file.server.0.port=24007, - file.server.1.type=unix,file.server.1.socket=/var/run/glusterd.socket -@end example - -@node disk_images_ssh -@subsection Secure Shell (ssh) disk images - -You can access disk images located on a remote ssh server -by using the ssh protocol: - -@example -@value{qemu_system} -drive file=ssh://[@var{user}@@]@var{server}[:@var{port}]/@var{path}[?host_key_check=@var{host_key_check}] -@end example - -Alternative syntax using properties: - -@example -@value{qemu_system} -drive file.driver=ssh[,file.user=@var{user}],file.host=@var{server}[,file.port=@var{port}],file.path=@var{path}[,file.host_key_check=@var{host_key_check}] -@end example - -@var{ssh} is the protocol. - -@var{user} is the remote user. If not specified, then the local -username is tried. - -@var{server} specifies the remote ssh server. Any ssh server can be -used, but it must implement the sftp-server protocol. Most Unix/Linux -systems should work without requiring any extra configuration. - -@var{port} is the port number on which sshd is listening. By default -the standard ssh port (22) is used. - -@var{path} is the path to the disk image. - -The optional @var{host_key_check} parameter controls how the remote -host's key is checked. The default is @code{yes} which means to use -the local @file{.ssh/known_hosts} file. Setting this to @code{no} -turns off known-hosts checking. Or you can check that the host key -matches a specific fingerprint: -@code{host_key_check=md5:78:45:8e:14:57:4f:d5:45:83:0a:0e:f3:49:82:c9:c8} -(@code{sha1:} can also be used as a prefix, but note that OpenSSH -tools only use MD5 to print fingerprints). - -Currently authentication must be done using ssh-agent. Other -authentication methods may be supported in future. - -Note: Many ssh servers do not support an @code{fsync}-style operation. -The ssh driver cannot guarantee that disk flush requests are -obeyed, and this causes a risk of disk corruption if the remote -server or network goes down during writes. The driver will -print a warning when @code{fsync} is not supported: - -warning: ssh server @code{ssh.example.com:22} does not support fsync - -With sufficiently new versions of libssh and OpenSSH, @code{fsync} is -supported. - -@node disk_images_nvme -@subsection NVMe disk images - -NVM Express (NVMe) storage controllers can be accessed directly by a userspace -driver in QEMU. This bypasses the host kernel file system and block layers -while retaining QEMU block layer functionalities, such as block jobs, I/O -throttling, image formats, etc. Disk I/O performance is typically higher than -with @code{-drive file=/dev/sda} using either thread pool or linux-aio. - -The controller will be exclusively used by the QEMU process once started. To be -able to share storage between multiple VMs and other applications on the host, -please use the file based protocols. - -Before starting QEMU, bind the host NVMe controller to the host vfio-pci -driver. For example: - -@example -# modprobe vfio-pci -# lspci -n -s 0000:06:0d.0 -06:0d.0 0401: 1102:0002 (rev 08) -# echo 0000:06:0d.0 > /sys/bus/pci/devices/0000:06:0d.0/driver/unbind -# echo 1102 0002 > /sys/bus/pci/drivers/vfio-pci/new_id - -# @value{qemu_system} -drive file=nvme://@var{host}:@var{bus}:@var{slot}.@var{func}/@var{namespace} -@end example - -Alternative syntax using properties: - -@example -@value{qemu_system} -drive file.driver=nvme,file.device=@var{host}:@var{bus}:@var{slot}.@var{func},file.namespace=@var{namespace} -@end example - -@var{host}:@var{bus}:@var{slot}.@var{func} is the NVMe controller's PCI device -address on the host. - -@var{namespace} is the NVMe namespace number, starting from 1. - -@node disk_image_locking -@subsection Disk image file locking - -By default, QEMU tries to protect image files from unexpected concurrent -access, as long as it's supported by the block protocol driver and host -operating system. If multiple QEMU processes (including QEMU emulators and -utilities) try to open the same image with conflicting accessing modes, all but -the first one will get an error. - -This feature is currently supported by the file protocol on Linux with the Open -File Descriptor (OFD) locking API, and can be configured to fall back to POSIX -locking if the POSIX host doesn't support Linux OFD locking. - -To explicitly enable image locking, specify "locking=on" in the file protocol -driver options. If OFD locking is not possible, a warning will be printed and -the POSIX locking API will be used. In this case there is a risk that the lock -will get silently lost when doing hot plugging and block jobs, due to the -shortcomings of the POSIX locking API. - -QEMU transparently handles lock handover during shared storage migration. For -shared virtual disk images between multiple VMs, the "share-rw" device option -should be used. - -By default, the guest has exclusive write access to its disk image. If the -guest can safely share the disk image with other writers the @code{-device -...,share-rw=on} parameter can be used. This is only safe if the guest is -running software, such as a cluster file system, that coordinates disk accesses -to avoid corruption. - -Note that share-rw=on only declares the guest's ability to share the disk. -Some QEMU features, such as image file formats, require exclusive write access -to the disk image and this is unaffected by the share-rw=on option. - -Alternatively, locking can be fully disabled by "locking=off" block device -option. In the command line, the option is usually in the form of -"file.locking=off" as the protocol driver is normally placed as a "file" child -under a format driver. For example: - -@code{-blockdev driver=qcow2,file.filename=/path/to/image,file.locking=off,file.driver=file} - -To check if image locking is active, check the output of the "lslocks" command -on host and see if there are locks held by the QEMU process on the image file. -More than one byte could be locked by the QEMU instance, each byte of which -reflects a particular permission that is acquired or protected by the running -block driver. - -@c man end - -@ignore - -@setfilename qemu-block-drivers -@settitle QEMU block drivers reference - -@c man begin SEEALSO -The HTML documentation of QEMU for more precise information and Linux -user mode emulator invocation. -@c man end - -@c man begin AUTHOR -Fabrice Bellard and the QEMU Project developers -@c man end - -@end ignore diff --git a/docs/specs/acpi_cpu_hotplug.txt b/docs/specs/acpi_cpu_hotplug.txt index ee219c8358..a8ce5e7402 100644 --- a/docs/specs/acpi_cpu_hotplug.txt +++ b/docs/specs/acpi_cpu_hotplug.txt @@ -15,14 +15,14 @@ CPU present bitmap for: PIIX-PM (IO port 0xaf00-0xaf1f, 1-byte access) One bit per CPU. Bit position reflects corresponding CPU APIC ID. Read-only. The first DWORD in bitmap is used in write mode to switch from legacy - to new CPU hotplug interface, write 0 into it to do switch. + to modern CPU hotplug interface, write 0 into it to do switch. --------------------------------------------------------------- QEMU sets corresponding CPU bit on hot-add event and issues SCI with GPE.2 event set. CPU present map is read by ACPI BIOS GPE.2 handler to notify OS about CPU hot-add events. CPU hot-remove isn't supported. ===================================== -ACPI CPU hotplug interface registers: +Modern ACPI CPU hotplug interface registers: ------------------------------------- Register block base address: ICH9-LPC IO port 0x0cd8 @@ -30,9 +30,25 @@ Register block base address: Register block size: ACPI_CPU_HOTPLUG_REG_LEN = 12 +All accesses to registers described below, imply little-endian byte order. + +Reserved resisters behavior: + - write accesses are ignored + - read accesses return all bits set to 0. + +The last stored value in 'CPU selector' must refer to a possible CPU, otherwise + - reads from any register return 0 + - writes to any other register are ignored until valid value is stored into it +On QEMU start, 'CPU selector' is initialized to a valid value, on reset it +keeps the current value. + read access: offset: - [0x0-0x3] reserved + [0x0-0x3] Command data 2: (DWORD access) + if value last stored in 'Command field': + 0: reads as 0x0 + 3: upper 32 bits of architecture specific CPU ID value + other values: reserved [0x4] CPU device status fields: (1 byte access) bits: 0: Device is enabled and may be used by guest @@ -44,15 +60,17 @@ read access: 3-7: reserved and should be ignored by OSPM [0x5-0x7] reserved [0x8] Command data: (DWORD access) - in case of error or unsupported command reads is 0xFFFFFFFF - current 'Command field' value: - 0: returns PXM value corresponding to device + contains 0 unless value last stored in 'Command field' is one of: + 0: contains 'CPU selector' value of a CPU with pending event[s] + 3: lower 32 bits of architecture specific CPU ID value + (in x86 case: APIC ID) write access: offset: [0x0-0x3] CPU selector: (DWORD access) selects active CPU device. All following accesses to other registers will read/store data from/to selected CPU. + Valid values: [0 .. max_cpus) [0x4] CPU device control fields: (1 byte access) bits: 0: reserved, OSPM must clear it before writing to register. @@ -69,9 +87,9 @@ write access: value: 0: selects a CPU device with inserting/removing events and following reads from 'Command data' register return - selected CPU (CPU selector value). If no CPU with events - found, the current CPU selector doesn't change and - corresponding insert/remove event flags are not set. + selected CPU ('CPU selector' value). + If no CPU with events found, the current 'CPU selector' doesn't + change and corresponding insert/remove event flags are not modified. 1: following writes to 'Command data' register set OST event register in QEMU 2: following writes to 'Command data' register set OST status @@ -79,16 +97,53 @@ write access: other values: reserved [0x6-0x7] reserved [0x8] Command data: (DWORD access) - current 'Command field' value: - 0: OSPM reads value of CPU selector + if last stored 'Command field' value: 1: stores value into OST event register 2: stores value into OST status register, triggers ACPI_DEVICE_OST QMP event from QEMU to external applications with current values of OST event and status registers. - other values: reserved + other values: reserved + +Typical usecases: + - (x86) Detecting and enabling modern CPU hotplug interface. + QEMU starts with legacy CPU hotplug interface enabled. Detecting and + switching to modern interface is based on the 2 legacy CPU hotplug features: + 1. Writes into CPU bitmap are ignored. + 2. CPU bitmap always has bit#0 set, corresponding to boot CPU. + + Use following steps to detect and enable modern CPU hotplug interface: + 1. Store 0x0 to the 'CPU selector' register, + attempting to switch to modern mode + 2. Store 0x0 to the 'CPU selector' register, + to ensure valid selector value + 3. Store 0x0 to the 'Command field' register, + 4. Read the 'Command data 2' register. + If read value is 0x0, the modern interface is enabled. + Otherwise legacy or no CPU hotplug interface available + + - Get a cpu with pending event + 1. Store 0x0 to the 'CPU selector' register. + 2. Store 0x0 to the 'Command field' register. + 3. Read the 'CPU device status fields' register. + 4. If both bit#1 and bit#2 are clear in the value read, there is no CPU + with a pending event and selected CPU remains unchanged. + 5. Otherwise, read the 'Command data' register. The value read is the + selector of the CPU with the pending event (which is already + selected). -Selecting CPU device beyond possible range has no effect on platform: - - write accesses to CPU hot-plug registers not documented above are - ignored - - read accesses to CPU hot-plug registers not documented above return - all bits set to 0. + - Enumerate CPUs present/non present CPUs + 01. Set the present CPU count to 0. + 02. Set the iterator to 0. + 03. Store 0x0 to the 'CPU selector' register, to ensure that it's in + a valid state and that access to other registers won't be ignored. + 04. Store 0x0 to the 'Command field' register to make 'Command data' + register return 'CPU selector' value of selected CPU + 05. Read the 'CPU device status fields' register. + 06. If bit#0 is set, increment the present CPU count. + 07. Increment the iterator. + 08. Store the iterator to the 'CPU selector' register. + 09. Read the 'Command data' register. + 10. If the value read is not zero, goto 05. + 11. Otherwise store 0x0 to the 'CPU selector' register, to put it + into a valid state and exit. + The iterator at this point equals "max_cpus". diff --git a/docs/system/conf.py b/docs/system/conf.py new file mode 100644 index 0000000000..7ca115f5e0 --- /dev/null +++ b/docs/system/conf.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +# +# QEMU documentation build configuration file for the 'system' manual. +# +# This includes the top level conf file and then makes any necessary tweaks. +import sys +import os + +qemu_docdir = os.path.abspath("..") +parent_config = os.path.join(qemu_docdir, "conf.py") +exec(compile(open(parent_config, "rb").read(), parent_config, 'exec')) + +# This slightly misuses the 'description', but is the best way to get +# the manual title to appear in the sidebar. +html_theme_options['description'] = u'System Emulation User''s Guide' +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('qemu-block-drivers', 'qemu-block-drivers', + u'QEMU block drivers reference', + ['Fabrice Bellard and the QEMU Project developers'], 7) +] diff --git a/docs/system/index.rst b/docs/system/index.rst new file mode 100644 index 0000000000..f66e6ea585 --- /dev/null +++ b/docs/system/index.rst @@ -0,0 +1,17 @@ +.. This is the top level page for the 'system' manual. + + +QEMU System Emulation User's Guide +================================== + +This manual is the overall guide for users using QEMU +for full system emulation (as opposed to user-mode emulation). +This includes working with hypervisors such as KVM, Xen, Hax +or Hypervisor.Framework. + +Contents: + +.. toctree:: + :maxdepth: 2 + + qemu-block-drivers diff --git a/docs/system/qemu-block-drivers.rst b/docs/system/qemu-block-drivers.rst new file mode 100644 index 0000000000..388adbefbf --- /dev/null +++ b/docs/system/qemu-block-drivers.rst @@ -0,0 +1,985 @@ +QEMU block drivers reference +============================ + +.. |qemu_system| replace:: qemu-system-x86_64 + +.. + We put the 'Synopsis' and 'See also' sections into the manpage, but not + the HTML. This makes the HTML docs read better and means the ToC in + the index has a more useful set of entries. Ideally, the section + headings 'Disk image file formats' would be top-level headings for + the HTML, but sub-headings of the conventional manpage 'Description' + header for the manpage. Unfortunately, due to deficiencies in + the Sphinx 'only' directive, this isn't possible: they must be headers + at the same level as 'Synopsis' and 'See also', otherwise Sphinx's + identification of which header underline style is which gets confused. + +.. only:: man + + Synopsis + -------- + + QEMU block driver reference manual + +Disk image file formats +----------------------- + +QEMU supports many image file formats that can be used with VMs as well as with +any of the tools (like ``qemu-img``). This includes the preferred formats +raw and qcow2 as well as formats that are supported for compatibility with +older QEMU versions or other hypervisors. + +Depending on the image format, different options can be passed to +``qemu-img create`` and ``qemu-img convert`` using the ``-o`` option. +This section describes each format and the options that are supported for it. + +.. program:: image-formats +.. option:: raw + + Raw disk image format. This format has the advantage of + being simple and easily exportable to all other emulators. If your + file system supports *holes* (for example in ext2 or ext3 on + Linux or NTFS on Windows), then only the written sectors will reserve + space. Use ``qemu-img info`` to know the real size used by the + image or ``ls -ls`` on Unix/Linux. + + Supported options: + + .. program:: raw + .. option:: preallocation + + Preallocation mode (allowed values: ``off``, ``falloc``, + ``full``). ``falloc`` mode preallocates space for image by + calling ``posix_fallocate()``. ``full`` mode preallocates space + for image by writing data to underlying storage. This data may or + may not be zero, depending on the storage location. + +.. program:: image-formats +.. option:: qcow2 + + QEMU image format, the most versatile format. Use it to have smaller + images (useful if your filesystem does not supports holes, for example + on Windows), zlib based compression and support of multiple VM + snapshots. + + Supported options: + + .. program:: qcow2 + .. option:: compat + + Determines the qcow2 version to use. ``compat=0.10`` uses the + traditional image format that can be read by any QEMU since 0.10. + ``compat=1.1`` enables image format extensions that only QEMU 1.1 and + newer understand (this is the default). Amongst others, this includes + zero clusters, which allow efficient copy-on-read for sparse images. + + .. option:: backing_file + + File name of a base image (see ``create`` subcommand) + + .. option:: backing_fmt + + Image format of the base image + + .. option:: encryption + + This option is deprecated and equivalent to ``encrypt.format=aes`` + + .. option:: encrypt.format + + If this is set to ``luks``, it requests that the qcow2 payload (not + qcow2 header) be encrypted using the LUKS format. The passphrase to + use to unlock the LUKS key slot is given by the ``encrypt.key-secret`` + parameter. LUKS encryption parameters can be tuned with the other + ``encrypt.*`` parameters. + + If this is set to ``aes``, the image is encrypted with 128-bit AES-CBC. + The encryption key is given by the ``encrypt.key-secret`` parameter. + This encryption format is considered to be flawed by modern cryptography + standards, suffering from a number of design problems: + + - The AES-CBC cipher is used with predictable initialization vectors based + on the sector number. This makes it vulnerable to chosen plaintext attacks + which can reveal the existence of encrypted data. + - The user passphrase is directly used as the encryption key. A poorly + chosen or short passphrase will compromise the security of the encryption. + - In the event of the passphrase being compromised there is no way to + change the passphrase to protect data in any qcow images. The files must + be cloned, using a different encryption passphrase in the new file. The + original file must then be securely erased using a program like shred, + though even this is ineffective with many modern storage technologies. + + The use of this is no longer supported in system emulators. Support only + remains in the command line utilities, for the purposes of data liberation + and interoperability with old versions of QEMU. The ``luks`` format + should be used instead. + + .. option:: encrypt.key-secret + + Provides the ID of a ``secret`` object that contains the passphrase + (``encrypt.format=luks``) or encryption key (``encrypt.format=aes``). + + .. option:: encrypt.cipher-alg + + Name of the cipher algorithm and key length. Currently defaults + to ``aes-256``. Only used when ``encrypt.format=luks``. + + .. option:: encrypt.cipher-mode + + Name of the encryption mode to use. Currently defaults to ``xts``. + Only used when ``encrypt.format=luks``. + + .. option:: encrypt.ivgen-alg + + Name of the initialization vector generator algorithm. Currently defaults + to ``plain64``. Only used when ``encrypt.format=luks``. + + .. option:: encrypt.ivgen-hash-alg + + Name of the hash algorithm to use with the initialization vector generator + (if required). Defaults to ``sha256``. Only used when ``encrypt.format=luks``. + + .. option:: encrypt.hash-alg + + Name of the hash algorithm to use for PBKDF algorithm + Defaults to ``sha256``. Only used when ``encrypt.format=luks``. + + .. option:: encrypt.iter-time + + Amount of time, in milliseconds, to use for PBKDF algorithm per key slot. + Defaults to ``2000``. Only used when ``encrypt.format=luks``. + + .. option:: cluster_size + + Changes the qcow2 cluster size (must be between 512 and 2M). Smaller cluster + sizes can improve the image file size whereas larger cluster sizes generally + provide better performance. + + .. option:: preallocation + + Preallocation mode (allowed values: ``off``, ``metadata``, ``falloc``, + ``full``). An image with preallocated metadata is initially larger but can + improve performance when the image needs to grow. ``falloc`` and ``full`` + preallocations are like the same options of ``raw`` format, but sets up + metadata also. + + .. option:: lazy_refcounts + + If this option is set to ``on``, reference count updates are postponed with + the goal of avoiding metadata I/O and improving performance. This is + particularly interesting with :option:`cache=writethrough` which doesn't batch + metadata updates. The tradeoff is that after a host crash, the reference count + tables must be rebuilt, i.e. on the next open an (automatic) ``qemu-img + check -r all`` is required, which may take some time. + + This option can only be enabled if ``compat=1.1`` is specified. + + .. option:: nocow + + If this option is set to ``on``, it will turn off COW of the file. It's only + valid on btrfs, no effect on other file systems. + + Btrfs has low performance when hosting a VM image file, even more + when the guest on the VM also using btrfs as file system. Turning off + COW is a way to mitigate this bad performance. Generally there are two + ways to turn off COW on btrfs: + + - Disable it by mounting with nodatacow, then all newly created files + will be NOCOW. + - For an empty file, add the NOCOW file attribute. That's what this + option does. + + Note: this option is only valid to new or empty files. If there is + an existing file which is COW and has data blocks already, it couldn't + be changed to NOCOW by setting ``nocow=on``. One can issue ``lsattr + filename`` to check if the NOCOW flag is set or not (Capital 'C' is + NOCOW flag). + +.. program:: image-formats +.. option:: qed + + Old QEMU image format with support for backing files and compact image files + (when your filesystem or transport medium does not support holes). + + When converting QED images to qcow2, you might want to consider using the + ``lazy_refcounts=on`` option to get a more QED-like behaviour. + + Supported options: + + .. program:: qed + .. option:: backing_file + + File name of a base image (see ``create`` subcommand). + + .. option:: backing_fmt + + Image file format of backing file (optional). Useful if the format cannot be + autodetected because it has no header, like some vhd/vpc files. + + .. option:: cluster_size + + Changes the cluster size (must be power-of-2 between 4K and 64K). Smaller + cluster sizes can improve the image file size whereas larger cluster sizes + generally provide better performance. + + .. option:: table_size + + Changes the number of clusters per L1/L2 table (must be + power-of-2 between 1 and 16). There is normally no need to + change this value but this option can between used for + performance benchmarking. + +.. program:: image-formats +.. option:: qcow + + Old QEMU image format with support for backing files, compact image files, + encryption and compression. + + Supported options: + + .. program:: qcow + .. option:: backing_file + + File name of a base image (see ``create`` subcommand) + + .. option:: encryption + + This option is deprecated and equivalent to ``encrypt.format=aes`` + + .. option:: encrypt.format + + If this is set to ``aes``, the image is encrypted with 128-bit AES-CBC. + The encryption key is given by the ``encrypt.key-secret`` parameter. + This encryption format is considered to be flawed by modern cryptography + standards, suffering from a number of design problems enumerated previously + against the ``qcow2`` image format. + + The use of this is no longer supported in system emulators. Support only + remains in the command line utilities, for the purposes of data liberation + and interoperability with old versions of QEMU. + + Users requiring native encryption should use the ``qcow2`` format + instead with ``encrypt.format=luks``. + + .. option:: encrypt.key-secret + + Provides the ID of a ``secret`` object that contains the encryption + key (``encrypt.format=aes``). + +.. program:: image-formats +.. option:: luks + + LUKS v1 encryption format, compatible with Linux dm-crypt/cryptsetup + + Supported options: + + .. program:: luks + .. option:: key-secret + + Provides the ID of a ``secret`` object that contains the passphrase. + + .. option:: cipher-alg + + Name of the cipher algorithm and key length. Currently defaults + to ``aes-256``. + + .. option:: cipher-mode + + Name of the encryption mode to use. Currently defaults to ``xts``. + + .. option:: ivgen-alg + + Name of the initialization vector generator algorithm. Currently defaults + to ``plain64``. + + .. option:: ivgen-hash-alg + + Name of the hash algorithm to use with the initialization vector generator + (if required). Defaults to ``sha256``. + + .. option:: hash-alg + + Name of the hash algorithm to use for PBKDF algorithm + Defaults to ``sha256``. + + .. option:: iter-time + + Amount of time, in milliseconds, to use for PBKDF algorithm per key slot. + Defaults to ``2000``. + +.. program:: image-formats +.. option:: vdi + + VirtualBox 1.1 compatible image format. + + Supported options: + + .. program:: vdi + .. option:: static + + If this option is set to ``on``, the image is created with metadata + preallocation. + +.. program:: image-formats +.. option:: vmdk + + VMware 3 and 4 compatible image format. + + Supported options: + + .. program: vmdk + .. option:: backing_file + + File name of a base image (see ``create`` subcommand). + + .. option:: compat6 + + Create a VMDK version 6 image (instead of version 4) + + .. option:: hwversion + + Specify vmdk virtual hardware version. Compat6 flag cannot be enabled + if hwversion is specified. + + .. option:: subformat + + Specifies which VMDK subformat to use. Valid options are + ``monolithicSparse`` (default), + ``monolithicFlat``, + ``twoGbMaxExtentSparse``, + ``twoGbMaxExtentFlat`` and + ``streamOptimized``. + +.. program:: image-formats +.. option:: vpc + + VirtualPC compatible image format (VHD). + + Supported options: + + .. program:: vpc + .. option:: subformat + + Specifies which VHD subformat to use. Valid options are + ``dynamic`` (default) and ``fixed``. + +.. program:: image-formats +.. option:: VHDX + + Hyper-V compatible image format (VHDX). + + Supported options: + + .. program:: VHDX + .. option:: subformat + + Specifies which VHDX subformat to use. Valid options are + ``dynamic`` (default) and ``fixed``. + + .. option:: block_state_zero + + Force use of payload blocks of type 'ZERO'. Can be set to ``on`` (default) + or ``off``. When set to ``off``, new blocks will be created as + ``PAYLOAD_BLOCK_NOT_PRESENT``, which means parsers are free to return + arbitrary data for those blocks. Do not set to ``off`` when using + ``qemu-img convert`` with ``subformat=dynamic``. + + .. option:: block_size + + Block size; min 1 MB, max 256 MB. 0 means auto-calculate based on + image size. + + .. option:: log_size + + Log size; min 1 MB. + +Read-only formats +----------------- + +More disk image file formats are supported in a read-only mode. + +.. program:: image-formats +.. option:: bochs + + Bochs images of ``growing`` type. + +.. program:: image-formats +.. option:: cloop + + Linux Compressed Loop image, useful only to reuse directly compressed + CD-ROM images present for example in the Knoppix CD-ROMs. + +.. program:: image-formats +.. option:: dmg + + Apple disk image. + +.. program:: image-formats +.. option:: parallels + + Parallels disk image format. + +Using host drives +----------------- + +In addition to disk image files, QEMU can directly access host +devices. We describe here the usage for QEMU version >= 0.8.3. + +Linux +''''' + +On Linux, you can directly use the host device filename instead of a +disk image filename provided you have enough privileges to access +it. For example, use ``/dev/cdrom`` to access to the CDROM. + +CD + You can specify a CDROM device even if no CDROM is loaded. QEMU has + specific code to detect CDROM insertion or removal. CDROM ejection by + the guest OS is supported. Currently only data CDs are supported. + +Floppy + You can specify a floppy device even if no floppy is loaded. Floppy + removal is currently not detected accurately (if you change floppy + without doing floppy access while the floppy is not loaded, the guest + OS will think that the same floppy is loaded). + Use of the host's floppy device is deprecated, and support for it will + be removed in a future release. + +Hard disks + Hard disks can be used. Normally you must specify the whole disk + (``/dev/hdb`` instead of ``/dev/hdb1``) so that the guest OS can + see it as a partitioned disk. WARNING: unless you know what you do, it + is better to only make READ-ONLY accesses to the hard disk otherwise + you may corrupt your host data (use the ``-snapshot`` command + line option or modify the device permissions accordingly). + +Windows +''''''' + +CD + The preferred syntax is the drive letter (e.g. ``d:``). The + alternate syntax ``\\.\d:`` is supported. ``/dev/cdrom`` is + supported as an alias to the first CDROM drive. + + Currently there is no specific code to handle removable media, so it + is better to use the ``change`` or ``eject`` monitor commands to + change or eject media. + +Hard disks + Hard disks can be used with the syntax: ``\\.\PhysicalDriveN`` + where *N* is the drive number (0 is the first hard disk). + + WARNING: unless you know what you do, it is better to only make + READ-ONLY accesses to the hard disk otherwise you may corrupt your + host data (use the ``-snapshot`` command line so that the + modifications are written in a temporary file). + +Mac OS X +'''''''' + +``/dev/cdrom`` is an alias to the first CDROM. + +Currently there is no specific code to handle removable media, so it +is better to use the ``change`` or ``eject`` monitor commands to +change or eject media. + +Virtual FAT disk images +----------------------- + +QEMU can automatically create a virtual FAT disk image from a +directory tree. In order to use it, just type: + +.. parsed-literal:: + + |qemu_system| linux.img -hdb fat:/my_directory + +Then you access access to all the files in the ``/my_directory`` +directory without having to copy them in a disk image or to export +them via SAMBA or NFS. The default access is *read-only*. + +Floppies can be emulated with the ``:floppy:`` option: + +.. parsed-literal:: + + |qemu_system| linux.img -fda fat:floppy:/my_directory + +A read/write support is available for testing (beta stage) with the +``:rw:`` option: + +.. parsed-literal:: + + |qemu_system| linux.img -fda fat:floppy:rw:/my_directory + +What you should *never* do: + +- use non-ASCII filenames +- use "-snapshot" together with ":rw:" +- expect it to work when loadvm'ing +- write to the FAT directory on the host system while accessing it with the guest system + +NBD access +---------- + +QEMU can access directly to block device exported using the Network Block Device +protocol. + +.. parsed-literal:: + + |qemu_system| linux.img -hdb nbd://my_nbd_server.mydomain.org:1024/ + +If the NBD server is located on the same host, you can use an unix socket instead +of an inet socket: + +.. parsed-literal:: + + |qemu_system| linux.img -hdb nbd+unix://?socket=/tmp/my_socket + +In this case, the block device must be exported using qemu-nbd: + +.. parsed-literal:: + + qemu-nbd --socket=/tmp/my_socket my_disk.qcow2 + +The use of qemu-nbd allows sharing of a disk between several guests: + +.. parsed-literal:: + + qemu-nbd --socket=/tmp/my_socket --share=2 my_disk.qcow2 + +and then you can use it with two guests: + +.. parsed-literal:: + + |qemu_system| linux1.img -hdb nbd+unix://?socket=/tmp/my_socket + |qemu_system| linux2.img -hdb nbd+unix://?socket=/tmp/my_socket + +If the nbd-server uses named exports (supported since NBD 2.9.18, or with QEMU's +own embedded NBD server), you must specify an export name in the URI: + +.. parsed-literal:: + + |qemu_system| -cdrom nbd://localhost/debian-500-ppc-netinst + |qemu_system| -cdrom nbd://localhost/openSUSE-11.1-ppc-netinst + +The URI syntax for NBD is supported since QEMU 1.3. An alternative syntax is +also available. Here are some example of the older syntax: + +.. parsed-literal:: + + |qemu_system| linux.img -hdb nbd:my_nbd_server.mydomain.org:1024 + |qemu_system| linux2.img -hdb nbd:unix:/tmp/my_socket + |qemu_system| -cdrom nbd:localhost:10809:exportname=debian-500-ppc-netinst + + + +Sheepdog disk images +-------------------- + +Sheepdog is a distributed storage system for QEMU. It provides highly +available block level storage volumes that can be attached to +QEMU-based virtual machines. + +You can create a Sheepdog disk image with the command: + +.. parsed-literal:: + + qemu-img create sheepdog:///IMAGE SIZE + +where *IMAGE* is the Sheepdog image name and *SIZE* is its +size. + +To import the existing *FILENAME* to Sheepdog, you can use a +convert command. + +.. parsed-literal:: + + qemu-img convert FILENAME sheepdog:///IMAGE + +You can boot from the Sheepdog disk image with the command: + +.. parsed-literal:: + + |qemu_system| sheepdog:///IMAGE + +You can also create a snapshot of the Sheepdog image like qcow2. + +.. parsed-literal:: + + qemu-img snapshot -c TAG sheepdog:///IMAGE + +where *TAG* is a tag name of the newly created snapshot. + +To boot from the Sheepdog snapshot, specify the tag name of the +snapshot. + +.. parsed-literal:: + + |qemu_system| sheepdog:///IMAGE#TAG + +You can create a cloned image from the existing snapshot. + +.. parsed-literal:: + + qemu-img create -b sheepdog:///BASE#TAG sheepdog:///IMAGE + +where *BASE* is an image name of the source snapshot and *TAG* +is its tag name. + +You can use an unix socket instead of an inet socket: + +.. parsed-literal:: + + |qemu_system| sheepdog+unix:///IMAGE?socket=PATH + +If the Sheepdog daemon doesn't run on the local host, you need to +specify one of the Sheepdog servers to connect to. + +.. parsed-literal:: + + qemu-img create sheepdog://HOSTNAME:PORT/IMAGE SIZE + |qemu_system| sheepdog://HOSTNAME:PORT/IMAGE + +iSCSI LUNs +---------- + +iSCSI is a popular protocol used to access SCSI devices across a computer +network. + +There are two different ways iSCSI devices can be used by QEMU. + +The first method is to mount the iSCSI LUN on the host, and make it appear as +any other ordinary SCSI device on the host and then to access this device as a +/dev/sd device from QEMU. How to do this differs between host OSes. + +The second method involves using the iSCSI initiator that is built into +QEMU. This provides a mechanism that works the same way regardless of which +host OS you are running QEMU on. This section will describe this second method +of using iSCSI together with QEMU. + +In QEMU, iSCSI devices are described using special iSCSI URLs. URL syntax: + +:: + + iscsi://[<username>[%<password>]@]<host>[:<port>]/<target-iqn-name>/<lun> + +Username and password are optional and only used if your target is set up +using CHAP authentication for access control. +Alternatively the username and password can also be set via environment +variables to have these not show up in the process list: + +:: + + export LIBISCSI_CHAP_USERNAME=<username> + export LIBISCSI_CHAP_PASSWORD=<password> + iscsi://<host>/<target-iqn-name>/<lun> + +Various session related parameters can be set via special options, either +in a configuration file provided via '-readconfig' or directly on the +command line. + +If the initiator-name is not specified qemu will use a default name +of 'iqn.2008-11.org.linux-kvm[:<uuid>'] where <uuid> is the UUID of the +virtual machine. If the UUID is not specified qemu will use +'iqn.2008-11.org.linux-kvm[:<name>'] where <name> is the name of the +virtual machine. + +Setting a specific initiator name to use when logging in to the target: + +:: + + -iscsi initiator-name=iqn.qemu.test:my-initiator + +Controlling which type of header digest to negotiate with the target: + +:: + + -iscsi header-digest=CRC32C|CRC32C-NONE|NONE-CRC32C|NONE + +These can also be set via a configuration file: + +:: + + [iscsi] + user = "CHAP username" + password = "CHAP password" + initiator-name = "iqn.qemu.test:my-initiator" + # header digest is one of CRC32C|CRC32C-NONE|NONE-CRC32C|NONE + header-digest = "CRC32C" + +Setting the target name allows different options for different targets: + +:: + + [iscsi "iqn.target.name"] + user = "CHAP username" + password = "CHAP password" + initiator-name = "iqn.qemu.test:my-initiator" + # header digest is one of CRC32C|CRC32C-NONE|NONE-CRC32C|NONE + header-digest = "CRC32C" + +How to use a configuration file to set iSCSI configuration options: + +.. parsed-literal:: + + cat >iscsi.conf <<EOF + [iscsi] + user = "me" + password = "my password" + initiator-name = "iqn.qemu.test:my-initiator" + header-digest = "CRC32C" + EOF + + |qemu_system| -drive file=iscsi://127.0.0.1/iqn.qemu.test/1 \\ + -readconfig iscsi.conf + +How to set up a simple iSCSI target on loopback and access it via QEMU: +this example shows how to set up an iSCSI target with one CDROM and one DISK +using the Linux STGT software target. This target is available on Red Hat based +systems as the package 'scsi-target-utils'. + +.. parsed-literal:: + + tgtd --iscsi portal=127.0.0.1:3260 + tgtadm --lld iscsi --op new --mode target --tid 1 -T iqn.qemu.test + tgtadm --lld iscsi --mode logicalunit --op new --tid 1 --lun 1 \\ + -b /IMAGES/disk.img --device-type=disk + tgtadm --lld iscsi --mode logicalunit --op new --tid 1 --lun 2 \\ + -b /IMAGES/cd.iso --device-type=cd + tgtadm --lld iscsi --op bind --mode target --tid 1 -I ALL + + |qemu_system| -iscsi initiator-name=iqn.qemu.test:my-initiator \\ + -boot d -drive file=iscsi://127.0.0.1/iqn.qemu.test/1 \\ + -cdrom iscsi://127.0.0.1/iqn.qemu.test/2 + +GlusterFS disk images +--------------------- + +GlusterFS is a user space distributed file system. + +You can boot from the GlusterFS disk image with the command: + +URI: + +.. parsed-literal:: + + |qemu_system| -drive file=gluster[+TYPE]://[HOST}[:PORT]]/VOLUME/PATH + [?socket=...][,file.debug=9][,file.logfile=...] + +JSON: + +.. parsed-literal:: + + |qemu_system| 'json:{"driver":"qcow2", + "file":{"driver":"gluster", + "volume":"testvol","path":"a.img","debug":9,"logfile":"...", + "server":[{"type":"tcp","host":"...","port":"..."}, + {"type":"unix","socket":"..."}]}}' + +*gluster* is the protocol. + +*TYPE* specifies the transport type used to connect to gluster +management daemon (glusterd). Valid transport types are +tcp and unix. In the URI form, if a transport type isn't specified, +then tcp type is assumed. + +*HOST* specifies the server where the volume file specification for +the given volume resides. This can be either a hostname or an ipv4 address. +If transport type is unix, then *HOST* field should not be specified. +Instead *socket* field needs to be populated with the path to unix domain +socket. + +*PORT* is the port number on which glusterd is listening. This is optional +and if not specified, it defaults to port 24007. If the transport type is unix, +then *PORT* should not be specified. + +*VOLUME* is the name of the gluster volume which contains the disk image. + +*PATH* is the path to the actual disk image that resides on gluster volume. + +*debug* is the logging level of the gluster protocol driver. Debug levels +are 0-9, with 9 being the most verbose, and 0 representing no debugging output. +The default level is 4. The current logging levels defined in the gluster source +are 0 - None, 1 - Emergency, 2 - Alert, 3 - Critical, 4 - Error, 5 - Warning, +6 - Notice, 7 - Info, 8 - Debug, 9 - Trace + +*logfile* is a commandline option to mention log file path which helps in +logging to the specified file and also help in persisting the gfapi logs. The +default is stderr. + +You can create a GlusterFS disk image with the command: + +.. parsed-literal:: + + qemu-img create gluster://HOST/VOLUME/PATH SIZE + +Examples + +.. parsed-literal:: + + |qemu_system| -drive file=gluster://1.2.3.4/testvol/a.img + |qemu_system| -drive file=gluster+tcp://1.2.3.4/testvol/a.img + |qemu_system| -drive file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img + |qemu_system| -drive file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img + |qemu_system| -drive file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img + |qemu_system| -drive file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img + |qemu_system| -drive file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket + |qemu_system| -drive file=gluster+rdma://1.2.3.4:24007/testvol/a.img + |qemu_system| -drive file=gluster://1.2.3.4/testvol/a.img,file.debug=9,file.logfile=/var/log/qemu-gluster.log + |qemu_system| 'json:{"driver":"qcow2", + "file":{"driver":"gluster", + "volume":"testvol","path":"a.img", + "debug":9,"logfile":"/var/log/qemu-gluster.log", + "server":[{"type":"tcp","host":"1.2.3.4","port":24007}, + {"type":"unix","socket":"/var/run/glusterd.socket"}]}}' + |qemu_system| -drive driver=qcow2,file.driver=gluster,file.volume=testvol,file.path=/path/a.img, + file.debug=9,file.logfile=/var/log/qemu-gluster.log, + file.server.0.type=tcp,file.server.0.host=1.2.3.4,file.server.0.port=24007, + file.server.1.type=unix,file.server.1.socket=/var/run/glusterd.socket + +Secure Shell (ssh) disk images +------------------------------ + +You can access disk images located on a remote ssh server +by using the ssh protocol: + +.. parsed-literal:: + + |qemu_system| -drive file=ssh://[USER@]SERVER[:PORT]/PATH[?host_key_check=HOST_KEY_CHECK] + +Alternative syntax using properties: + +.. parsed-literal:: + + |qemu_system| -drive file.driver=ssh[,file.user=USER],file.host=SERVER[,file.port=PORT],file.path=PATH[,file.host_key_check=HOST_KEY_CHECK] + +*ssh* is the protocol. + +*USER* is the remote user. If not specified, then the local +username is tried. + +*SERVER* specifies the remote ssh server. Any ssh server can be +used, but it must implement the sftp-server protocol. Most Unix/Linux +systems should work without requiring any extra configuration. + +*PORT* is the port number on which sshd is listening. By default +the standard ssh port (22) is used. + +*PATH* is the path to the disk image. + +The optional *HOST_KEY_CHECK* parameter controls how the remote +host's key is checked. The default is ``yes`` which means to use +the local ``.ssh/known_hosts`` file. Setting this to ``no`` +turns off known-hosts checking. Or you can check that the host key +matches a specific fingerprint: +``host_key_check=md5:78:45:8e:14:57:4f:d5:45:83:0a:0e:f3:49:82:c9:c8`` +(``sha1:`` can also be used as a prefix, but note that OpenSSH +tools only use MD5 to print fingerprints). + +Currently authentication must be done using ssh-agent. Other +authentication methods may be supported in future. + +Note: Many ssh servers do not support an ``fsync``-style operation. +The ssh driver cannot guarantee that disk flush requests are +obeyed, and this causes a risk of disk corruption if the remote +server or network goes down during writes. The driver will +print a warning when ``fsync`` is not supported: + +:: + + warning: ssh server ssh.example.com:22 does not support fsync + +With sufficiently new versions of libssh and OpenSSH, ``fsync`` is +supported. + +NVMe disk images +---------------- + +NVM Express (NVMe) storage controllers can be accessed directly by a userspace +driver in QEMU. This bypasses the host kernel file system and block layers +while retaining QEMU block layer functionalities, such as block jobs, I/O +throttling, image formats, etc. Disk I/O performance is typically higher than +with ``-drive file=/dev/sda`` using either thread pool or linux-aio. + +The controller will be exclusively used by the QEMU process once started. To be +able to share storage between multiple VMs and other applications on the host, +please use the file based protocols. + +Before starting QEMU, bind the host NVMe controller to the host vfio-pci +driver. For example: + +.. parsed-literal:: + + # modprobe vfio-pci + # lspci -n -s 0000:06:0d.0 + 06:0d.0 0401: 1102:0002 (rev 08) + # echo 0000:06:0d.0 > /sys/bus/pci/devices/0000:06:0d.0/driver/unbind + # echo 1102 0002 > /sys/bus/pci/drivers/vfio-pci/new_id + + # |qemu_system| -drive file=nvme://HOST:BUS:SLOT.FUNC/NAMESPACE + +Alternative syntax using properties: + +.. parsed-literal:: + + |qemu_system| -drive file.driver=nvme,file.device=HOST:BUS:SLOT.FUNC,file.namespace=NAMESPACE + +*HOST*:*BUS*:*SLOT*.\ *FUNC* is the NVMe controller's PCI device +address on the host. + +*NAMESPACE* is the NVMe namespace number, starting from 1. + +Disk image file locking +----------------------- + +By default, QEMU tries to protect image files from unexpected concurrent +access, as long as it's supported by the block protocol driver and host +operating system. If multiple QEMU processes (including QEMU emulators and +utilities) try to open the same image with conflicting accessing modes, all but +the first one will get an error. + +This feature is currently supported by the file protocol on Linux with the Open +File Descriptor (OFD) locking API, and can be configured to fall back to POSIX +locking if the POSIX host doesn't support Linux OFD locking. + +To explicitly enable image locking, specify "locking=on" in the file protocol +driver options. If OFD locking is not possible, a warning will be printed and +the POSIX locking API will be used. In this case there is a risk that the lock +will get silently lost when doing hot plugging and block jobs, due to the +shortcomings of the POSIX locking API. + +QEMU transparently handles lock handover during shared storage migration. For +shared virtual disk images between multiple VMs, the "share-rw" device option +should be used. + +By default, the guest has exclusive write access to its disk image. If the +guest can safely share the disk image with other writers the +``-device ...,share-rw=on`` parameter can be used. This is only safe if +the guest is running software, such as a cluster file system, that +coordinates disk accesses to avoid corruption. + +Note that share-rw=on only declares the guest's ability to share the disk. +Some QEMU features, such as image file formats, require exclusive write access +to the disk image and this is unaffected by the share-rw=on option. + +Alternatively, locking can be fully disabled by "locking=off" block device +option. In the command line, the option is usually in the form of +"file.locking=off" as the protocol driver is normally placed as a "file" child +under a format driver. For example: + +:: + + -blockdev driver=qcow2,file.filename=/path/to/image,file.locking=off,file.driver=file + +To check if image locking is active, check the output of the "lslocks" command +on host and see if there are locks held by the QEMU process on the image file. +More than one byte could be locked by the QEMU instance, each byte of which +reflects a particular permission that is acquired or protected by the running +block driver. + +.. only:: man + + See also + -------- + + The HTML documentation of QEMU for more precise information and Linux + user mode emulator invocation. diff --git a/fsdev/virtfs-proxy-helper.c b/fsdev/virtfs-proxy-helper.c index 0d4de49dcf..aa1ab2590d 100644 --- a/fsdev/virtfs-proxy-helper.c +++ b/fsdev/virtfs-proxy-helper.c @@ -287,8 +287,7 @@ static int setugid(int uid, int gid, int *suid, int *sgid) *sgid = getegid(); if (setresgid(-1, gid, *sgid) == -1) { - retval = -errno; - goto err_out; + return -errno; } if (setresuid(-1, uid, *suid) == -1) { @@ -322,7 +321,6 @@ err_sgid: if (setresgid(-1, *sgid, *sgid) == -1) { abort(); } -err_out: return retval; } diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c index ca641390fb..54e012e5b4 100644 --- a/hw/9pfs/9p-local.c +++ b/hw/9pfs/9p-local.c @@ -947,7 +947,7 @@ static int local_link(FsContext *ctx, V9fsPath *oldpath, if (ctx->export_flags & V9FS_SM_MAPPED_FILE && local_is_mapped_file_metadata(ctx, name)) { errno = EINVAL; - return -1; + goto out; } odirfd = local_opendir_nofollow(ctx, odirpath); @@ -1076,7 +1076,7 @@ out: static int local_unlinkat_common(FsContext *ctx, int dirfd, const char *name, int flags) { - int ret = -1; + int ret; if (ctx->export_flags & V9FS_SM_MAPPED_FILE) { int map_dirfd; @@ -1094,12 +1094,12 @@ static int local_unlinkat_common(FsContext *ctx, int dirfd, const char *name, fd = openat_dir(dirfd, name); if (fd == -1) { - goto err_out; + return -1; } ret = unlinkat(fd, VIRTFS_META_DIR, AT_REMOVEDIR); close_preserve_errno(fd); if (ret < 0 && errno != ENOENT) { - goto err_out; + return -1; } } map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR); @@ -1107,16 +1107,14 @@ static int local_unlinkat_common(FsContext *ctx, int dirfd, const char *name, ret = unlinkat(map_dirfd, name, 0); close_preserve_errno(map_dirfd); if (ret < 0 && errno != ENOENT) { - goto err_out; + return -1; } } else if (errno != ENOENT) { - goto err_out; + return -1; } } - ret = unlinkat(dirfd, name, flags); -err_out: - return ret; + return unlinkat(dirfd, name, flags); } static int local_remove(FsContext *ctx, const char *path) diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index 520177f40c..b0e445d6bd 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -2090,22 +2090,29 @@ out_nofid: * with qemu_iovec_destroy(). */ static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu, - size_t skip, size_t size, + size_t skip, size_t *size, bool is_write) { QEMUIOVector elem; struct iovec *iov; unsigned int niov; + size_t alloc_size = *size + skip; if (is_write) { - pdu->s->transport->init_out_iov_from_pdu(pdu, &iov, &niov, size + skip); + pdu->s->transport->init_out_iov_from_pdu(pdu, &iov, &niov, alloc_size); } else { - pdu->s->transport->init_in_iov_from_pdu(pdu, &iov, &niov, size + skip); + pdu->s->transport->init_in_iov_from_pdu(pdu, &iov, &niov, &alloc_size); + } + + if (alloc_size < skip) { + *size = 0; + } else { + *size = alloc_size - skip; } qemu_iovec_init_external(&elem, iov, niov); qemu_iovec_init(qiov, niov); - qemu_iovec_concat(qiov, &elem, skip, size); + qemu_iovec_concat(qiov, &elem, skip, *size); } static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp, @@ -2113,15 +2120,14 @@ static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp, { ssize_t err; size_t offset = 7; - uint64_t read_count; + size_t read_count; QEMUIOVector qiov_full; if (fidp->fs.xattr.len < off) { read_count = 0; - } else { + } else if (fidp->fs.xattr.len - off < max_count) { read_count = fidp->fs.xattr.len - off; - } - if (read_count > max_count) { + } else { read_count = max_count; } err = pdu_marshal(pdu, offset, "d", read_count); @@ -2130,7 +2136,7 @@ static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp, } offset += err; - v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, read_count, false); + v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, &read_count, false); err = v9fs_pack(qiov_full.iov, qiov_full.niov, 0, ((char *)fidp->fs.xattr.value) + off, read_count); @@ -2259,9 +2265,11 @@ static void coroutine_fn v9fs_read(void *opaque) QEMUIOVector qiov_full; QEMUIOVector qiov; int32_t len; + size_t size = max_count; - v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset + 4, max_count, false); + v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset + 4, &size, false); qemu_iovec_init(&qiov, qiov_full.niov); + max_count = size; do { qemu_iovec_reset(&qiov); qemu_iovec_concat(&qiov, &qiov_full, count, qiov_full.size - count); @@ -2464,8 +2472,7 @@ static int v9fs_xattr_write(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp, if (fidp->fs.xattr.len < off) { - err = -ENOSPC; - goto out; + return -ENOSPC; } write_count = fidp->fs.xattr.len - off; if (write_count > count) { @@ -2491,7 +2498,7 @@ static int v9fs_xattr_write(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp, off += to_copy; write_count -= to_copy; } -out: + return err; } @@ -2504,6 +2511,7 @@ static void coroutine_fn v9fs_write(void *opaque) int32_t len = 0; int32_t total = 0; size_t offset = 7; + size_t size; V9fsFidState *fidp; V9fsPDU *pdu = opaque; V9fsState *s = pdu->s; @@ -2516,7 +2524,9 @@ static void coroutine_fn v9fs_write(void *opaque) return; } offset += err; - v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, count, true); + size = count; + v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, &size, true); + count = size; trace_v9fs_write(pdu->tag, pdu->id, fid, off, count, qiov_full.niov); fidp = get_fid(pdu, fid); @@ -3056,8 +3066,7 @@ static int coroutine_fn v9fs_complete_rename(V9fsPDU *pdu, V9fsFidState *fidp, if (newdirfid != -1) { dirfidp = get_fid(pdu, newdirfid); if (dirfidp == NULL) { - err = -ENOENT; - goto out_nofid; + return -ENOENT; } if (fidp->fid_type != P9_FID_NONE) { err = -EINVAL; @@ -3100,7 +3109,6 @@ out: put_fid(pdu, dirfidp); } v9fs_path_free(&new_path); -out_nofid: return err; } diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h index 3904f82901..8d07a0b301 100644 --- a/hw/9pfs/9p.h +++ b/hw/9pfs/9p.h @@ -425,7 +425,7 @@ struct V9fsTransport { ssize_t (*pdu_vunmarshal)(V9fsPDU *pdu, size_t offset, const char *fmt, va_list ap); void (*init_in_iov_from_pdu)(V9fsPDU *pdu, struct iovec **piov, - unsigned int *pniov, size_t size); + unsigned int *pniov, size_t *size); void (*init_out_iov_from_pdu)(V9fsPDU *pdu, struct iovec **piov, unsigned int *pniov, size_t size); void (*push_and_notify)(V9fsPDU *pdu); diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c index b5a7c03f26..1d1c50409c 100644 --- a/hw/9pfs/virtio-9p-device.c +++ b/hw/9pfs/virtio-9p-device.c @@ -147,19 +147,22 @@ static ssize_t virtio_pdu_vunmarshal(V9fsPDU *pdu, size_t offset, } static void virtio_init_in_iov_from_pdu(V9fsPDU *pdu, struct iovec **piov, - unsigned int *pniov, size_t size) + unsigned int *pniov, size_t *size) { V9fsState *s = pdu->s; V9fsVirtioState *v = container_of(s, V9fsVirtioState, state); VirtQueueElement *elem = v->elems[pdu->idx]; size_t buf_size = iov_size(elem->in_sg, elem->in_num); - if (buf_size < size) { + if (buf_size < P9_IOHDRSZ) { VirtIODevice *vdev = VIRTIO_DEVICE(v); virtio_error(vdev, - "VirtFS reply type %d needs %zu bytes, buffer has %zu", - pdu->id + 1, size, buf_size); + "VirtFS reply type %d needs %zu bytes, buffer has %zu, less than minimum", + pdu->id + 1, *size, buf_size); + } + if (buf_size < *size) { + *size = buf_size; } *piov = elem->in_sg; @@ -215,6 +218,7 @@ static void virtio_9p_device_unrealize(DeviceState *dev, Error **errp) V9fsVirtioState *v = VIRTIO_9P(dev); V9fsState *s = &v->state; + virtio_delete_queue(v->vq); virtio_cleanup(vdev); v9fs_device_unrealize_common(s, errp); } diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c index 71eebe12dd..18fe5b7c92 100644 --- a/hw/9pfs/xen-9p-backend.c +++ b/hw/9pfs/xen-9p-backend.c @@ -187,7 +187,7 @@ static void xen_9pfs_init_out_iov_from_pdu(V9fsPDU *pdu, static void xen_9pfs_init_in_iov_from_pdu(V9fsPDU *pdu, struct iovec **piov, unsigned int *pniov, - size_t size) + size_t *size) { Xen9pfsDev *xen_9pfs = container_of(pdu->s, Xen9pfsDev, state); Xen9pfsRing *ring = &xen_9pfs->rings[pdu->tag % xen_9pfs->num_rings]; @@ -197,16 +197,19 @@ static void xen_9pfs_init_in_iov_from_pdu(V9fsPDU *pdu, g_free(ring->sg); ring->sg = g_new0(struct iovec, 2); - xen_9pfs_in_sg(ring, ring->sg, &num, pdu->idx, size); + xen_9pfs_in_sg(ring, ring->sg, &num, pdu->idx, *size); buf_size = iov_size(ring->sg, num); - if (buf_size < size) { + if (buf_size < P9_IOHDRSZ) { xen_pv_printf(&xen_9pfs->xendev, 0, "Xen 9pfs request type %d" - "needs %zu bytes, buffer has %zu\n", pdu->id, size, - buf_size); + "needs %zu bytes, buffer has %zu, less than minimum\n", + pdu->id, *size, buf_size); xen_be_set_state(&xen_9pfs->xendev, XenbusStateClosing); xen_9pfs_disconnect(&xen_9pfs->xendev); } + if (buf_size < *size) { + *size = buf_size; + } *piov = ring->sg; *pniov = num; diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c index 87f30a31d7..e2c957ce00 100644 --- a/hw/acpi/cpu.c +++ b/hw/acpi/cpu.c @@ -12,11 +12,13 @@ #define ACPI_CPU_FLAGS_OFFSET_RW 4 #define ACPI_CPU_CMD_OFFSET_WR 5 #define ACPI_CPU_CMD_DATA_OFFSET_RW 8 +#define ACPI_CPU_CMD_DATA2_OFFSET_R 0 enum { CPHP_GET_NEXT_CPU_WITH_EVENT_CMD = 0, CPHP_OST_EVENT_CMD = 1, CPHP_OST_STATUS_CMD = 2, + CPHP_GET_CPU_ID_CMD = 3, CPHP_CMD_MAX }; @@ -74,11 +76,27 @@ static uint64_t cpu_hotplug_rd(void *opaque, hwaddr addr, unsigned size) case CPHP_GET_NEXT_CPU_WITH_EVENT_CMD: val = cpu_st->selector; break; + case CPHP_GET_CPU_ID_CMD: + val = cdev->arch_id & 0xFFFFFFFF; + break; default: break; } trace_cpuhp_acpi_read_cmd_data(cpu_st->selector, val); break; + case ACPI_CPU_CMD_DATA2_OFFSET_R: + switch (cpu_st->command) { + case CPHP_GET_NEXT_CPU_WITH_EVENT_CMD: + val = 0; + break; + case CPHP_GET_CPU_ID_CMD: + val = cdev->arch_id >> 32; + break; + default: + break; + } + trace_cpuhp_acpi_read_cmd_data2(cpu_st->selector, val); + break; default: break; } diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c index 9cee90cc70..55eb29d80a 100644 --- a/hw/acpi/generic_event_device.c +++ b/hw/acpi/generic_event_device.c @@ -175,7 +175,7 @@ static void acpi_ged_device_plug_cb(HotplugHandler *hotplug_dev, AcpiGedState *s = ACPI_GED(hotplug_dev); if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { - acpi_memory_plug_cb(hotplug_dev, &s->memhp_state, dev, errp); + acpi_memory_plug_cb(hotplug_dev, &s->memhp_state, dev, errp); } else { error_setg(errp, "virt: device plug request for unsupported device" " type: %s", object_get_typename(OBJECT(dev))); diff --git a/hw/acpi/trace-events b/hw/acpi/trace-events index 96b8273297..afbc77de1c 100644 --- a/hw/acpi/trace-events +++ b/hw/acpi/trace-events @@ -23,6 +23,7 @@ cpuhp_acpi_read_flags(uint32_t idx, uint8_t flags) "idx[0x%"PRIx32"] flags: 0x%" cpuhp_acpi_write_idx(uint32_t idx) "set active cpu idx: 0x%"PRIx32 cpuhp_acpi_write_cmd(uint32_t idx, uint8_t cmd) "idx[0x%"PRIx32"] cmd: 0x%"PRIx8 cpuhp_acpi_read_cmd_data(uint32_t idx, uint32_t data) "idx[0x%"PRIx32"] data: 0x%"PRIx32 +cpuhp_acpi_read_cmd_data2(uint32_t idx, uint32_t data) "idx[0x%"PRIx32"] data: 0x%"PRIx32 cpuhp_acpi_cpu_has_events(uint32_t idx, bool ins, bool rm) "idx[0x%"PRIx32"] inserting: %d, removing: %d" cpuhp_acpi_clear_inserting_evt(uint32_t idx) "idx[0x%"PRIx32"]" cpuhp_acpi_clear_remove_evt(uint32_t idx) "idx[0x%"PRIx32"]" diff --git a/hw/arm/exynos4210.c b/hw/arm/exynos4210.c index 77fbe1baab..59a27bdd68 100644 --- a/hw/arm/exynos4210.c +++ b/hw/arm/exynos4210.c @@ -166,17 +166,37 @@ static uint64_t exynos4210_calc_affinity(int cpu) return (0x9 << ARM_AFF1_SHIFT) | cpu; } -static void pl330_create(uint32_t base, qemu_irq irq, int nreq) +static DeviceState *pl330_create(uint32_t base, qemu_or_irq *orgate, + qemu_irq irq, int nreq, int nevents, int width) { SysBusDevice *busdev; DeviceState *dev; + int i; dev = qdev_create(NULL, "pl330"); + qdev_prop_set_uint8(dev, "num_events", nevents); + qdev_prop_set_uint8(dev, "num_chnls", 8); qdev_prop_set_uint8(dev, "num_periph_req", nreq); + + qdev_prop_set_uint8(dev, "wr_cap", 4); + qdev_prop_set_uint8(dev, "wr_q_dep", 8); + qdev_prop_set_uint8(dev, "rd_cap", 4); + qdev_prop_set_uint8(dev, "rd_q_dep", 8); + qdev_prop_set_uint8(dev, "data_width", width); + qdev_prop_set_uint16(dev, "data_buffer_dep", width); qdev_init_nofail(dev); busdev = SYS_BUS_DEVICE(dev); sysbus_mmio_map(busdev, 0, base); - sysbus_connect_irq(busdev, 0, irq); + + object_property_set_int(OBJECT(orgate), nevents + 1, "num-lines", + &error_abort); + object_property_set_bool(OBJECT(orgate), true, "realized", &error_abort); + + for (i = 0; i < nevents + 1; i++) { + sysbus_connect_irq(busdev, i, qdev_get_gpio_in(DEVICE(orgate), i)); + } + qdev_connect_gpio_out(DEVICE(orgate), 0, irq); + return dev; } static void exynos4210_realize(DeviceState *socdev, Error **errp) @@ -185,7 +205,7 @@ static void exynos4210_realize(DeviceState *socdev, Error **errp) MemoryRegion *system_mem = get_system_memory(); qemu_irq gate_irq[EXYNOS4210_NCPUS][EXYNOS4210_IRQ_GATE_NINPUTS]; SysBusDevice *busdev; - DeviceState *dev; + DeviceState *dev, *uart[4], *pl330[3]; int i, n; for (n = 0; n < EXYNOS4210_NCPUS; n++) { @@ -371,19 +391,19 @@ static void exynos4210_realize(DeviceState *socdev, Error **errp) /*** UARTs ***/ - exynos4210_uart_create(EXYNOS4210_UART0_BASE_ADDR, + uart[0] = exynos4210_uart_create(EXYNOS4210_UART0_BASE_ADDR, EXYNOS4210_UART0_FIFO_SIZE, 0, serial_hd(0), s->irq_table[exynos4210_get_irq(EXYNOS4210_UART_INT_GRP, 0)]); - exynos4210_uart_create(EXYNOS4210_UART1_BASE_ADDR, + uart[1] = exynos4210_uart_create(EXYNOS4210_UART1_BASE_ADDR, EXYNOS4210_UART1_FIFO_SIZE, 1, serial_hd(1), s->irq_table[exynos4210_get_irq(EXYNOS4210_UART_INT_GRP, 1)]); - exynos4210_uart_create(EXYNOS4210_UART2_BASE_ADDR, + uart[2] = exynos4210_uart_create(EXYNOS4210_UART2_BASE_ADDR, EXYNOS4210_UART2_FIFO_SIZE, 2, serial_hd(2), s->irq_table[exynos4210_get_irq(EXYNOS4210_UART_INT_GRP, 2)]); - exynos4210_uart_create(EXYNOS4210_UART3_BASE_ADDR, + uart[3] = exynos4210_uart_create(EXYNOS4210_UART3_BASE_ADDR, EXYNOS4210_UART3_FIFO_SIZE, 3, serial_hd(3), s->irq_table[exynos4210_get_irq(EXYNOS4210_UART_INT_GRP, 3)]); @@ -431,12 +451,42 @@ static void exynos4210_realize(DeviceState *socdev, Error **errp) s->irq_table[exynos4210_get_irq(28, 3)]); /*** DMA controllers ***/ - pl330_create(EXYNOS4210_PL330_BASE0_ADDR, - qemu_irq_invert(s->irq_table[exynos4210_get_irq(35, 1)]), 32); - pl330_create(EXYNOS4210_PL330_BASE1_ADDR, - qemu_irq_invert(s->irq_table[exynos4210_get_irq(36, 1)]), 32); - pl330_create(EXYNOS4210_PL330_BASE2_ADDR, - qemu_irq_invert(s->irq_table[exynos4210_get_irq(34, 1)]), 1); + pl330[0] = pl330_create(EXYNOS4210_PL330_BASE0_ADDR, + &s->pl330_irq_orgate[0], + s->irq_table[exynos4210_get_irq(21, 0)], + 32, 32, 32); + pl330[1] = pl330_create(EXYNOS4210_PL330_BASE1_ADDR, + &s->pl330_irq_orgate[1], + s->irq_table[exynos4210_get_irq(21, 1)], + 32, 32, 32); + pl330[2] = pl330_create(EXYNOS4210_PL330_BASE2_ADDR, + &s->pl330_irq_orgate[2], + s->irq_table[exynos4210_get_irq(20, 1)], + 1, 31, 64); + + sysbus_connect_irq(SYS_BUS_DEVICE(uart[0]), 1, + qdev_get_gpio_in(pl330[0], 15)); + sysbus_connect_irq(SYS_BUS_DEVICE(uart[1]), 1, + qdev_get_gpio_in(pl330[1], 15)); + sysbus_connect_irq(SYS_BUS_DEVICE(uart[2]), 1, + qdev_get_gpio_in(pl330[0], 17)); + sysbus_connect_irq(SYS_BUS_DEVICE(uart[3]), 1, + qdev_get_gpio_in(pl330[1], 17)); +} + +static void exynos4210_init(Object *obj) +{ + Exynos4210State *s = EXYNOS4210_SOC(obj); + int i; + + for (i = 0; i < ARRAY_SIZE(s->pl330_irq_orgate); i++) { + char *name = g_strdup_printf("pl330-irq-orgate%d", i); + qemu_or_irq *orgate = &s->pl330_irq_orgate[i]; + + object_initialize_child(obj, name, orgate, sizeof(*orgate), + TYPE_OR_IRQ, &error_abort, NULL); + g_free(name); + } } static void exynos4210_class_init(ObjectClass *klass, void *data) @@ -450,6 +500,7 @@ static const TypeInfo exynos4210_info = { .name = TYPE_EXYNOS4210_SOC, .parent = TYPE_SYS_BUS_DEVICE, .instance_size = sizeof(Exynos4210State), + .instance_init = exynos4210_init, .class_init = exynos4210_class_init, }; diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 39ab5f47e0..656b0081c2 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -1934,7 +1934,6 @@ static void virt_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, static void virt_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { - HotplugHandlerClass *hhc; VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); Error *local_err = NULL; @@ -1943,8 +1942,9 @@ static void virt_memory_plug(HotplugHandler *hotplug_dev, goto out; } - hhc = HOTPLUG_HANDLER_GET_CLASS(vms->acpi_dev); - hhc->plug(HOTPLUG_HANDLER(vms->acpi_dev), dev, &error_abort); + hotplug_handler_plug(HOTPLUG_HANDLER(vms->acpi_dev), + dev, &error_abort); + out: error_propagate(errp, local_err); } diff --git a/hw/char/exynos4210_uart.c b/hw/char/exynos4210_uart.c index 7e5c5ce789..20d8509107 100644 --- a/hw/char/exynos4210_uart.c +++ b/hw/char/exynos4210_uart.c @@ -24,6 +24,7 @@ #include "migration/vmstate.h" #include "qemu/error-report.h" #include "qemu/module.h" +#include "qemu/timer.h" #include "chardev/char-fe.h" #include "chardev/char-serial.h" @@ -31,45 +32,7 @@ #include "hw/irq.h" #include "hw/qdev-properties.h" -#undef DEBUG_UART -#undef DEBUG_UART_EXTEND -#undef DEBUG_IRQ -#undef DEBUG_Rx_DATA -#undef DEBUG_Tx_DATA - -#define DEBUG_UART 0 -#define DEBUG_UART_EXTEND 0 -#define DEBUG_IRQ 0 -#define DEBUG_Rx_DATA 0 -#define DEBUG_Tx_DATA 0 - -#if DEBUG_UART -#define PRINT_DEBUG(fmt, args...) \ - do { \ - fprintf(stderr, " [%s:%d] "fmt, __func__, __LINE__, ##args); \ - } while (0) - -#if DEBUG_UART_EXTEND -#define PRINT_DEBUG_EXTEND(fmt, args...) \ - do { \ - fprintf(stderr, " [%s:%d] "fmt, __func__, __LINE__, ##args); \ - } while (0) -#else -#define PRINT_DEBUG_EXTEND(fmt, args...) \ - do {} while (0) -#endif /* EXTEND */ - -#else -#define PRINT_DEBUG(fmt, args...) \ - do {} while (0) -#define PRINT_DEBUG_EXTEND(fmt, args...) \ - do {} while (0) -#endif - -#define PRINT_ERROR(fmt, args...) \ - do { \ - fprintf(stderr, " [%s:%d] "fmt, __func__, __LINE__, ##args); \ - } while (0) +#include "trace.h" /* * Offsets for UART registers relative to SFR base address @@ -156,6 +119,7 @@ static const Exynos4210UartReg exynos4210_uart_regs[] = { #define ULCON_STOP_BIT_SHIFT 1 /* UART Tx/Rx Status */ +#define UTRSTAT_Rx_TIMEOUT 0x8 #define UTRSTAT_TRANSMITTER_EMPTY 0x4 #define UTRSTAT_Tx_BUFFER_EMPTY 0x2 #define UTRSTAT_Rx_BUFFER_DATA_READY 0x1 @@ -185,16 +149,19 @@ typedef struct Exynos4210UartState { Exynos4210UartFIFO rx; Exynos4210UartFIFO tx; + QEMUTimer *fifo_timeout_timer; + uint64_t wordtime; /* word time in ns */ + CharBackend chr; qemu_irq irq; + qemu_irq dmairq; uint32_t channel; } Exynos4210UartState; -#if DEBUG_UART -/* Used only for debugging inside PRINT_DEBUG_... macros */ +/* Used only for tracing */ static const char *exynos4210_uart_regname(hwaddr offset) { @@ -208,7 +175,6 @@ static const char *exynos4210_uart_regname(hwaddr offset) return NULL; } -#endif static void fifo_store(Exynos4210UartFIFO *q, uint8_t ch) @@ -249,15 +215,12 @@ static void fifo_reset(Exynos4210UartFIFO *q) q->rp = 0; } -static uint32_t exynos4210_uart_Tx_FIFO_trigger_level(const Exynos4210UartState *s) +static uint32_t exynos4210_uart_FIFO_trigger_level(uint32_t channel, + uint32_t reg) { - uint32_t level = 0; - uint32_t reg; - - reg = (s->reg[I_(UFCON)] & UFCON_Tx_FIFO_TRIGGER_LEVEL) >> - UFCON_Tx_FIFO_TRIGGER_LEVEL_SHIFT; + uint32_t level; - switch (s->channel) { + switch (channel) { case 0: level = reg * 32; break; @@ -271,12 +234,52 @@ static uint32_t exynos4210_uart_Tx_FIFO_trigger_level(const Exynos4210UartState break; default: level = 0; - PRINT_ERROR("Wrong UART channel number: %d\n", s->channel); + trace_exynos_uart_channel_error(channel); + break; } - return level; } +static uint32_t +exynos4210_uart_Tx_FIFO_trigger_level(const Exynos4210UartState *s) +{ + uint32_t reg; + + reg = (s->reg[I_(UFCON)] & UFCON_Tx_FIFO_TRIGGER_LEVEL) >> + UFCON_Tx_FIFO_TRIGGER_LEVEL_SHIFT; + + return exynos4210_uart_FIFO_trigger_level(s->channel, reg); +} + +static uint32_t +exynos4210_uart_Rx_FIFO_trigger_level(const Exynos4210UartState *s) +{ + uint32_t reg; + + reg = ((s->reg[I_(UFCON)] & UFCON_Rx_FIFO_TRIGGER_LEVEL) >> + UFCON_Rx_FIFO_TRIGGER_LEVEL_SHIFT) + 1; + + return exynos4210_uart_FIFO_trigger_level(s->channel, reg); +} + +/* + * Update Rx DMA busy signal if Rx DMA is enabled. For simplicity, + * mark DMA as busy if DMA is enabled and the receive buffer is empty. + */ +static void exynos4210_uart_update_dmabusy(Exynos4210UartState *s) +{ + bool rx_dma_enabled = (s->reg[I_(UCON)] & 0x03) == 0x02; + uint32_t count = fifo_elements_number(&s->rx); + + if (rx_dma_enabled && !count) { + qemu_irq_raise(s->dmairq); + trace_exynos_uart_dmabusy(s->channel); + } else { + qemu_irq_lower(s->dmairq); + trace_exynos_uart_dmaready(s->channel); + } +} + static void exynos4210_uart_update_irq(Exynos4210UartState *s) { /* @@ -284,27 +287,53 @@ static void exynos4210_uart_update_irq(Exynos4210UartState *s) * transmit FIFO is smaller than the trigger level. */ if (s->reg[I_(UFCON)] & UFCON_FIFO_ENABLE) { - uint32_t count = (s->reg[I_(UFSTAT)] & UFSTAT_Tx_FIFO_COUNT) >> UFSTAT_Tx_FIFO_COUNT_SHIFT; if (count <= exynos4210_uart_Tx_FIFO_trigger_level(s)) { s->reg[I_(UINTSP)] |= UINTSP_TXD; } + + /* + * Rx interrupt if trigger level is reached or if rx timeout + * interrupt is disabled and there is data in the receive buffer + */ + count = fifo_elements_number(&s->rx); + if ((count && !(s->reg[I_(UCON)] & 0x80)) || + count >= exynos4210_uart_Rx_FIFO_trigger_level(s)) { + exynos4210_uart_update_dmabusy(s); + s->reg[I_(UINTSP)] |= UINTSP_RXD; + timer_del(s->fifo_timeout_timer); + } + } else if (s->reg[I_(UTRSTAT)] & UTRSTAT_Rx_BUFFER_DATA_READY) { + exynos4210_uart_update_dmabusy(s); + s->reg[I_(UINTSP)] |= UINTSP_RXD; } s->reg[I_(UINTP)] = s->reg[I_(UINTSP)] & ~s->reg[I_(UINTM)]; if (s->reg[I_(UINTP)]) { qemu_irq_raise(s->irq); - -#if DEBUG_IRQ - fprintf(stderr, "UART%d: IRQ has been raised: %08x\n", - s->channel, s->reg[I_(UINTP)]); -#endif - + trace_exynos_uart_irq_raised(s->channel, s->reg[I_(UINTP)]); } else { qemu_irq_lower(s->irq); + trace_exynos_uart_irq_lowered(s->channel); + } +} + +static void exynos4210_uart_timeout_int(void *opaque) +{ + Exynos4210UartState *s = opaque; + + trace_exynos_uart_rx_timeout(s->channel, s->reg[I_(UTRSTAT)], + s->reg[I_(UINTSP)]); + + if ((s->reg[I_(UTRSTAT)] & UTRSTAT_Rx_BUFFER_DATA_READY) || + (s->reg[I_(UCON)] & (1 << 11))) { + s->reg[I_(UINTSP)] |= UINTSP_RXD; + s->reg[I_(UTRSTAT)] |= UTRSTAT_Rx_TIMEOUT; + exynos4210_uart_update_dmabusy(s); + exynos4210_uart_update_irq(s); } } @@ -346,10 +375,24 @@ static void exynos4210_uart_update_parameters(Exynos4210UartState *s) ssp.data_bits = data_bits; ssp.stop_bits = stop_bits; + s->wordtime = NANOSECONDS_PER_SECOND * (data_bits + stop_bits + 1) / speed; + qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_SERIAL_SET_PARAMS, &ssp); - PRINT_DEBUG("UART%d: speed: %d, parity: %c, data: %d, stop: %d\n", - s->channel, speed, parity, data_bits, stop_bits); + trace_exynos_uart_update_params( + s->channel, speed, parity, data_bits, stop_bits, s->wordtime); +} + +static void exynos4210_uart_rx_timeout_set(Exynos4210UartState *s) +{ + if (s->reg[I_(UCON)] & 0x80) { + uint32_t timeout = ((s->reg[I_(UCON)] >> 12) & 0x0f) * s->wordtime; + + timer_mod(s->fifo_timeout_timer, + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + timeout); + } else { + timer_del(s->fifo_timeout_timer); + } } static void exynos4210_uart_write(void *opaque, hwaddr offset, @@ -358,8 +401,8 @@ static void exynos4210_uart_write(void *opaque, hwaddr offset, Exynos4210UartState *s = (Exynos4210UartState *)opaque; uint8_t ch; - PRINT_DEBUG_EXTEND("UART%d: <0x%04x> %s <- 0x%08llx\n", s->channel, - offset, exynos4210_uart_regname(offset), (long long unsigned int)val); + trace_exynos_uart_write(s->channel, offset, + exynos4210_uart_regname(offset), val); switch (offset) { case ULCON: @@ -373,12 +416,12 @@ static void exynos4210_uart_write(void *opaque, hwaddr offset, if (val & UFCON_Rx_FIFO_RESET) { fifo_reset(&s->rx); s->reg[I_(UFCON)] &= ~UFCON_Rx_FIFO_RESET; - PRINT_DEBUG("UART%d: Rx FIFO Reset\n", s->channel); + trace_exynos_uart_rx_fifo_reset(s->channel); } if (val & UFCON_Tx_FIFO_RESET) { fifo_reset(&s->tx); s->reg[I_(UFCON)] &= ~UFCON_Tx_FIFO_RESET; - PRINT_DEBUG("UART%d: Tx FIFO Reset\n", s->channel); + trace_exynos_uart_tx_fifo_reset(s->channel); } break; @@ -390,9 +433,7 @@ static void exynos4210_uart_write(void *opaque, hwaddr offset, /* XXX this blocks entire thread. Rewrite to use * qemu_chr_fe_write and background I/O callbacks */ qemu_chr_fe_write_all(&s->chr, &ch, 1); -#if DEBUG_Tx_DATA - fprintf(stderr, "%c", ch); -#endif + trace_exynos_uart_tx(s->channel, ch); s->reg[I_(UTRSTAT)] |= UTRSTAT_TRANSMITTER_EMPTY | UTRSTAT_Tx_BUFFER_EMPTY; s->reg[I_(UINTSP)] |= UINTSP_TXD; @@ -403,16 +444,19 @@ static void exynos4210_uart_write(void *opaque, hwaddr offset, case UINTP: s->reg[I_(UINTP)] &= ~val; s->reg[I_(UINTSP)] &= ~val; - PRINT_DEBUG("UART%d: UINTP [%04x] have been cleared: %08x\n", - s->channel, offset, s->reg[I_(UINTP)]); + trace_exynos_uart_intclr(s->channel, s->reg[I_(UINTP)]); exynos4210_uart_update_irq(s); break; case UTRSTAT: + if (val & UTRSTAT_Rx_TIMEOUT) { + s->reg[I_(UTRSTAT)] &= ~UTRSTAT_Rx_TIMEOUT; + } + break; case UERSTAT: case UFSTAT: case UMSTAT: case URXH: - PRINT_DEBUG("UART%d: Trying to write into RO register: %s [%04x]\n", + trace_exynos_uart_ro_write( s->channel, exynos4210_uart_regname(offset), offset); break; case UINTSP: @@ -429,6 +473,7 @@ static void exynos4210_uart_write(void *opaque, hwaddr offset, break; } } + static uint64_t exynos4210_uart_read(void *opaque, hwaddr offset, unsigned size) { @@ -439,6 +484,8 @@ static uint64_t exynos4210_uart_read(void *opaque, hwaddr offset, case UERSTAT: /* Read Only */ res = s->reg[I_(UERSTAT)]; s->reg[I_(UERSTAT)] = 0; + trace_exynos_uart_read(s->channel, offset, + exynos4210_uart_regname(offset), res); return res; case UFSTAT: /* Read Only */ s->reg[I_(UFSTAT)] = fifo_elements_number(&s->rx) & 0xff; @@ -446,20 +493,22 @@ static uint64_t exynos4210_uart_read(void *opaque, hwaddr offset, s->reg[I_(UFSTAT)] |= UFSTAT_Rx_FIFO_FULL; s->reg[I_(UFSTAT)] &= ~0xff; } + trace_exynos_uart_read(s->channel, offset, + exynos4210_uart_regname(offset), + s->reg[I_(UFSTAT)]); return s->reg[I_(UFSTAT)]; case URXH: if (s->reg[I_(UFCON)] & UFCON_FIFO_ENABLE) { if (fifo_elements_number(&s->rx)) { res = fifo_retrieve(&s->rx); -#if DEBUG_Rx_DATA - fprintf(stderr, "%c", res); -#endif + trace_exynos_uart_rx(s->channel, res); if (!fifo_elements_number(&s->rx)) { s->reg[I_(UTRSTAT)] &= ~UTRSTAT_Rx_BUFFER_DATA_READY; } else { s->reg[I_(UTRSTAT)] |= UTRSTAT_Rx_BUFFER_DATA_READY; } } else { + trace_exynos_uart_rx_error(s->channel); s->reg[I_(UINTSP)] |= UINTSP_ERROR; exynos4210_uart_update_irq(s); res = 0; @@ -468,15 +517,23 @@ static uint64_t exynos4210_uart_read(void *opaque, hwaddr offset, s->reg[I_(UTRSTAT)] &= ~UTRSTAT_Rx_BUFFER_DATA_READY; res = s->reg[I_(URXH)]; } + exynos4210_uart_update_dmabusy(s); + trace_exynos_uart_read(s->channel, offset, + exynos4210_uart_regname(offset), res); return res; case UTXH: - PRINT_DEBUG("UART%d: Trying to read from WO register: %s [%04x]\n", - s->channel, exynos4210_uart_regname(offset), offset); + trace_exynos_uart_wo_read(s->channel, exynos4210_uart_regname(offset), + offset); break; default: + trace_exynos_uart_read(s->channel, offset, + exynos4210_uart_regname(offset), + s->reg[I_(offset)]); return s->reg[I_(offset)]; } + trace_exynos_uart_read(s->channel, offset, exynos4210_uart_regname(offset), + 0); return 0; } @@ -497,7 +554,6 @@ static int exynos4210_uart_can_receive(void *opaque) return fifo_empty_elements_number(&s->rx); } - static void exynos4210_uart_receive(void *opaque, const uint8_t *buf, int size) { Exynos4210UartState *s = (Exynos4210UartState *)opaque; @@ -505,24 +561,17 @@ static void exynos4210_uart_receive(void *opaque, const uint8_t *buf, int size) if (s->reg[I_(UFCON)] & UFCON_FIFO_ENABLE) { if (fifo_empty_elements_number(&s->rx) < size) { - for (i = 0; i < fifo_empty_elements_number(&s->rx); i++) { - fifo_store(&s->rx, buf[i]); - } + size = fifo_empty_elements_number(&s->rx); s->reg[I_(UINTSP)] |= UINTSP_ERROR; - s->reg[I_(UTRSTAT)] |= UTRSTAT_Rx_BUFFER_DATA_READY; - } else { - for (i = 0; i < size; i++) { - fifo_store(&s->rx, buf[i]); - } - s->reg[I_(UTRSTAT)] |= UTRSTAT_Rx_BUFFER_DATA_READY; } - /* XXX: Around here we maybe should check Rx trigger level */ - s->reg[I_(UINTSP)] |= UINTSP_RXD; + for (i = 0; i < size; i++) { + fifo_store(&s->rx, buf[i]); + } + exynos4210_uart_rx_timeout_set(s); } else { s->reg[I_(URXH)] = buf[0]; - s->reg[I_(UINTSP)] |= UINTSP_RXD; - s->reg[I_(UTRSTAT)] |= UTRSTAT_Rx_BUFFER_DATA_READY; } + s->reg[I_(UTRSTAT)] |= UTRSTAT_Rx_BUFFER_DATA_READY; exynos4210_uart_update_irq(s); } @@ -555,13 +604,24 @@ static void exynos4210_uart_reset(DeviceState *dev) fifo_reset(&s->rx); fifo_reset(&s->tx); - PRINT_DEBUG("UART%d: Rx FIFO size: %d\n", s->channel, s->rx.size); + trace_exynos_uart_rxsize(s->channel, s->rx.size); +} + +static int exynos4210_uart_post_load(void *opaque, int version_id) +{ + Exynos4210UartState *s = (Exynos4210UartState *)opaque; + + exynos4210_uart_update_parameters(s); + exynos4210_uart_rx_timeout_set(s); + + return 0; } static const VMStateDescription vmstate_exynos4210_uart_fifo = { .name = "exynos4210.uart.fifo", .version_id = 1, .minimum_version_id = 1, + .post_load = exynos4210_uart_post_load, .fields = (VMStateField[]) { VMSTATE_UINT32(sp, Exynos4210UartFIFO), VMSTATE_UINT32(rp, Exynos4210UartFIFO), @@ -614,12 +674,17 @@ static void exynos4210_uart_init(Object *obj) SysBusDevice *dev = SYS_BUS_DEVICE(obj); Exynos4210UartState *s = EXYNOS4210_UART(dev); + s->fifo_timeout_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, + exynos4210_uart_timeout_int, s); + s->wordtime = NANOSECONDS_PER_SECOND * 10 / 9600; + /* memory mapping */ memory_region_init_io(&s->iomem, obj, &exynos4210_uart_ops, s, "exynos4210.uart", EXYNOS4210_UART_REGS_MEM_SIZE); sysbus_init_mmio(dev, &s->iomem); sysbus_init_irq(dev, &s->irq); + sysbus_init_irq(dev, &s->dmairq); } static void exynos4210_uart_realize(DeviceState *dev, Error **errp) diff --git a/hw/char/trace-events b/hw/char/trace-events index 2ce7f2f998..6f938301d9 100644 --- a/hw/char/trace-events +++ b/hw/char/trace-events @@ -77,3 +77,23 @@ cmsdk_apb_uart_set_params(int speed) "CMSDK APB UART: params set to %d 8N1" # nrf51_uart.c nrf51_uart_read(uint64_t addr, uint64_t r, unsigned int size) "addr 0x%" PRIx64 " value 0x%" PRIx64 " size %u" nrf51_uart_write(uint64_t addr, uint64_t value, unsigned int size) "addr 0x%" PRIx64 " value 0x%" PRIx64 " size %u" + +# exynos4210_uart.c +exynos_uart_dmabusy(uint32_t channel) "UART%d: DMA busy (Rx buffer empty)" +exynos_uart_dmaready(uint32_t channel) "UART%d: DMA ready" +exynos_uart_irq_raised(uint32_t channel, uint32_t reg) "UART%d: IRQ raised: 0x%08"PRIx32 +exynos_uart_irq_lowered(uint32_t channel) "UART%d: IRQ lowered" +exynos_uart_update_params(uint32_t channel, int speed, uint8_t parity, int data, int stop, uint64_t wordtime) "UART%d: speed: %d, parity: %c, data bits: %d, stop bits: %d wordtime: %"PRId64"ns" +exynos_uart_write(uint32_t channel, uint32_t offset, const char *name, uint64_t val) "UART%d: <0x%04x> %s <- 0x%" PRIx64 +exynos_uart_read(uint32_t channel, uint32_t offset, const char *name, uint64_t val) "UART%d: <0x%04x> %s -> 0x%" PRIx64 +exynos_uart_rx_fifo_reset(uint32_t channel) "UART%d: Rx FIFO Reset" +exynos_uart_tx_fifo_reset(uint32_t channel) "UART%d: Tx FIFO Reset" +exynos_uart_tx(uint32_t channel, uint8_t ch) "UART%d: Tx 0x%02"PRIx32 +exynos_uart_intclr(uint32_t channel, uint32_t reg) "UART%d: interrupts cleared: 0x%08"PRIx32 +exynos_uart_ro_write(uint32_t channel, const char *name, uint32_t reg) "UART%d: Trying to write into RO register: %s [0x%04"PRIx32"]" +exynos_uart_rx(uint32_t channel, uint8_t ch) "UART%d: Rx 0x%02"PRIx32 +exynos_uart_rx_error(uint32_t channel) "UART%d: Rx error" +exynos_uart_wo_read(uint32_t channel, const char *name, uint32_t reg) "UART%d: Trying to read from WO register: %s [0x%04"PRIx32"]" +exynos_uart_rxsize(uint32_t channel, uint32_t size) "UART%d: Rx FIFO size: %d" +exynos_uart_channel_error(uint32_t channel) "Wrong UART channel number: %d" +exynos_uart_rx_timeout(uint32_t channel, uint32_t stat, uint32_t intsp) "UART%d: Rx timeout stat=0x%x intsp=0x%x" diff --git a/hw/dma/pl330.c b/hw/dma/pl330.c index f2bb2d9ac1..64519971ef 100644 --- a/hw/dma/pl330.c +++ b/hw/dma/pl330.c @@ -25,19 +25,12 @@ #include "sysemu/dma.h" #include "qemu/log.h" #include "qemu/module.h" +#include "trace.h" #ifndef PL330_ERR_DEBUG #define PL330_ERR_DEBUG 0 #endif -#define DB_PRINT_L(lvl, fmt, args...) do {\ - if (PL330_ERR_DEBUG >= lvl) {\ - fprintf(stderr, "PL330: %s:" fmt, __func__, ## args);\ - } \ -} while (0) - -#define DB_PRINT(fmt, args...) DB_PRINT_L(1, fmt, ## args) - #define PL330_PERIPH_NUM 32 #define PL330_MAX_BURST_LEN 128 #define PL330_INSN_MAXSIZE 6 @@ -319,6 +312,26 @@ typedef struct PL330InsnDesc { void (*exec)(PL330Chan *, uint8_t opcode, uint8_t *args, int len); } PL330InsnDesc; +static void pl330_hexdump(uint8_t *buf, size_t size) +{ + unsigned int b, i, len; + char tmpbuf[80]; + + for (b = 0; b < size; b += 16) { + len = size - b; + if (len > 16) { + len = 16; + } + tmpbuf[0] = '\0'; + for (i = 0; i < len; i++) { + if ((i % 4) == 0) { + strcat(tmpbuf, " "); + } + sprintf(tmpbuf + strlen(tmpbuf), " %02x", buf[b + i]); + } + trace_pl330_hexdump(b, tmpbuf); + } +} /* MFIFO Implementation * @@ -582,7 +595,7 @@ static inline void pl330_queue_remove_tagged(PL330Queue *s, uint8_t tag) static inline void pl330_fault(PL330Chan *ch, uint32_t flags) { - DB_PRINT("ch: %p, flags: %" PRIx32 "\n", ch, flags); + trace_pl330_fault(ch, flags); ch->fault_type |= flags; if (ch->state == pl330_chan_fault) { return; @@ -590,7 +603,7 @@ static inline void pl330_fault(PL330Chan *ch, uint32_t flags) ch->state = pl330_chan_fault; ch->parent->num_faulting++; if (ch->parent->num_faulting == 1) { - DB_PRINT("abort interrupt raised\n"); + trace_pl330_fault_abort(); qemu_irq_raise(ch->parent->irq_abort); } } @@ -648,7 +661,7 @@ static void pl330_dmaend(PL330Chan *ch, uint8_t opcode, return; } } - DB_PRINT("DMA ending!\n"); + trace_pl330_dmaend(); pl330_fifo_tagged_remove(&s->fifo, ch->tag); pl330_queue_remove_tagged(&s->read_queue, ch->tag); pl330_queue_remove_tagged(&s->write_queue, ch->tag); @@ -683,7 +696,7 @@ static void pl330_dmago(PL330Chan *ch, uint8_t opcode, uint8_t *args, int len) uint32_t pc; PL330Chan *s; - DB_PRINT("\n"); + trace_pl330_dmago(); if (!ch->is_manager) { pl330_fault(ch, PL330_FAULT_UNDEF_INSTR); @@ -740,9 +753,7 @@ static void pl330_dmald(PL330Chan *ch, uint8_t opcode, uint8_t *args, int len) ch->stall = pl330_queue_put_insn(&ch->parent->read_queue, ch->src, size, num, inc, 0, ch->tag); if (!ch->stall) { - DB_PRINT("channel:%" PRId8 " address:%08" PRIx32 " size:%" PRIx32 - " num:%" PRId32 " %c\n", - ch->tag, ch->src, size, num, inc ? 'Y' : 'N'); + trace_pl330_dmald(ch->tag, ch->src, size, num, inc ? 'Y' : 'N'); ch->src += inc ? size * num - (ch->src & (size - 1)) : 0; } } @@ -782,7 +793,7 @@ static void pl330_dmakill(PL330Chan *ch, uint8_t opcode, uint8_t *args, int len) ch->fault_type = 0; ch->parent->num_faulting--; if (ch->parent->num_faulting == 0) { - DB_PRINT("abort interrupt lowered\n"); + trace_pl330_dmakill(); qemu_irq_lower(ch->parent->irq_abort); } } @@ -800,6 +811,8 @@ static void pl330_dmalpend(PL330Chan *ch, uint8_t opcode, uint8_t bs = opcode & 3; uint8_t lc = (opcode & 4) >> 2; + trace_pl330_dmalpend(nf, bs, lc, ch->lc[lc], ch->request_flag); + if (bs == 2) { pl330_fault(ch, PL330_FAULT_OPERAND_INVALID); return; @@ -813,12 +826,12 @@ static void pl330_dmalpend(PL330Chan *ch, uint8_t opcode, if (nf) { ch->lc[lc]--; } - DB_PRINT("loop reiteration\n"); + trace_pl330_dmalpiter(); ch->pc -= args[0]; ch->pc -= len + 1; /* "ch->pc -= args[0] + len + 1" is incorrect when args[0] == 256 */ } else { - DB_PRINT("loop fallthrough\n"); + trace_pl330_dmalpfallthrough(); } } @@ -886,10 +899,10 @@ static void pl330_dmasev(PL330Chan *ch, uint8_t opcode, uint8_t *args, int len) } if (ch->parent->inten & (1 << ev_id)) { ch->parent->int_status |= (1 << ev_id); - DB_PRINT("event interrupt raised %" PRId8 "\n", ev_id); + trace_pl330_dmasev_evirq(ev_id); qemu_irq_raise(ch->parent->irq[ev_id]); } - DB_PRINT("event raised %" PRId8 "\n", ev_id); + trace_pl330_dmasev_event(ev_id); ch->parent->ev_status |= (1 << ev_id); } @@ -914,9 +927,7 @@ static void pl330_dmast(PL330Chan *ch, uint8_t opcode, uint8_t *args, int len) ch->stall = pl330_queue_put_insn(&ch->parent->write_queue, ch->dst, size, num, inc, 0, ch->tag); if (!ch->stall) { - DB_PRINT("channel:%" PRId8 " address:%08" PRIx32 " size:%" PRIx32 - " num:%" PRId32 " %c\n", - ch->tag, ch->dst, size, num, inc ? 'Y' : 'N'); + trace_pl330_dmast(ch->tag, ch->dst, size, num, inc ? 'Y' : 'N'); ch->dst += inc ? size * num - (ch->dst & (size - 1)) : 0; } } @@ -992,7 +1003,7 @@ static void pl330_dmawfe(PL330Chan *ch, uint8_t opcode, } } ch->parent->ev_status &= ~(1 << ev_id); - DB_PRINT("event lowered %" PRIx8 "\n", ev_id); + trace_pl330_dmawfe(ev_id); } else { ch->stall = 1; } @@ -1135,7 +1146,7 @@ static int pl330_chan_exec(PL330Chan *ch) ch->stall = 0; insn = pl330_fetch_insn(ch); if (!insn) { - DB_PRINT("pl330 undefined instruction\n"); + trace_pl330_chan_exec_undef(); pl330_fault(ch, PL330_FAULT_UNDEF_INSTR); return 0; } @@ -1175,10 +1186,9 @@ static int pl330_exec_cycle(PL330Chan *channel) int len = q->len - (q->addr & (q->len - 1)); dma_memory_read(&address_space_memory, q->addr, buf, len); - if (PL330_ERR_DEBUG > 1) { - DB_PRINT("PL330 read from memory @%08" PRIx32 " (size = %08x):\n", - q->addr, len); - qemu_hexdump((char *)buf, stderr, "", len); + trace_pl330_exec_cycle(q->addr, len); + if (trace_event_get_state_backends(TRACE_PL330_HEXDUMP)) { + pl330_hexdump(buf, len); } fifo_res = pl330_fifo_push(&s->fifo, buf, len, q->tag); if (fifo_res == PL330_FIFO_OK) { @@ -1207,10 +1217,9 @@ static int pl330_exec_cycle(PL330Chan *channel) } if (fifo_res == PL330_FIFO_OK || q->z) { dma_memory_write(&address_space_memory, q->addr, buf, len); - if (PL330_ERR_DEBUG > 1) { - DB_PRINT("PL330 read from memory @%08" PRIx32 - " (size = %08x):\n", q->addr, len); - qemu_hexdump((char *)buf, stderr, "", len); + trace_pl330_exec_cycle(q->addr, len); + if (trace_event_get_state_backends(TRACE_PL330_HEXDUMP)) { + pl330_hexdump(buf, len); } if (q->inc) { q->addr += len; @@ -1252,8 +1261,8 @@ static int pl330_exec_channel(PL330Chan *channel) static inline void pl330_exec(PL330State *s) { - DB_PRINT("\n"); int i, insr_exec; + trace_pl330_exec(); do { insr_exec = pl330_exec_channel(&s->manager); @@ -1298,7 +1307,7 @@ static void pl330_debug_exec(PL330State *s) args[2] = (s->dbg[1] >> 8) & 0xff; args[3] = (s->dbg[1] >> 16) & 0xff; args[4] = (s->dbg[1] >> 24) & 0xff; - DB_PRINT("chan id: %" PRIx8 "\n", chan_id); + trace_pl330_debug_exec(chan_id); if (s->dbg[0] & 1) { ch = &s->chan[chan_id]; } else { @@ -1320,6 +1329,7 @@ static void pl330_debug_exec(PL330State *s) ch->fault_type |= PL330_FAULT_DBG_INSTR; } if (ch->stall) { + trace_pl330_debug_exec_stall(); qemu_log_mask(LOG_UNIMP, "pl330: stall of debug instruction not " "implemented\n"); } @@ -1334,7 +1344,7 @@ static void pl330_iomem_write(void *opaque, hwaddr offset, PL330State *s = (PL330State *) opaque; int i; - DB_PRINT("addr: %08x data: %08x\n", (unsigned)offset, (unsigned)value); + trace_pl330_iomem_write((unsigned)offset, (unsigned)value); switch (offset) { case PL330_REG_INTEN: @@ -1343,7 +1353,7 @@ static void pl330_iomem_write(void *opaque, hwaddr offset, case PL330_REG_INTCLR: for (i = 0; i < s->num_events; i++) { if (s->int_status & s->inten & value & (1 << i)) { - DB_PRINT("event interrupt lowered %d\n", i); + trace_pl330_iomem_write_clr(i); qemu_irq_lower(s->irq[i]); } } @@ -1361,11 +1371,9 @@ static void pl330_iomem_write(void *opaque, hwaddr offset, } break; case PL330_REG_DBGINST0: - DB_PRINT("s->dbg[0] = %08x\n", (unsigned)value); s->dbg[0] = value; break; case PL330_REG_DBGINST1: - DB_PRINT("s->dbg[1] = %08x\n", (unsigned)value); s->dbg[1] = value; break; default: @@ -1489,7 +1497,7 @@ static uint64_t pl330_iomem_read(void *opaque, hwaddr offset, unsigned size) { uint32_t ret = pl330_iomem_read_imp(opaque, offset); - DB_PRINT("addr: %08" HWADDR_PRIx " data: %08" PRIx32 "\n", offset, ret); + trace_pl330_iomem_read((uint32_t)offset, ret); return ret; } diff --git a/hw/dma/trace-events b/hw/dma/trace-events index e4498428c5..44893995f6 100644 --- a/hw/dma/trace-events +++ b/hw/dma/trace-events @@ -20,3 +20,27 @@ sparc32_dma_enable_lower(void) "Lower DMA enable" # i8257.c i8257_unregistered_dma(int nchan, int dma_pos, int dma_len) "unregistered DMA channel used nchan=%d dma_pos=%d dma_len=%d" + +# pl330.c +pl330_fault(void *ptr, uint32_t flags) "ch: %p, flags: 0x%"PRIx32 +pl330_fault_abort(void) "abort interrupt raised" +pl330_dmaend(void) "DMA ending" +pl330_dmago(void) "DMA run" +pl330_dmald(uint8_t chan, uint32_t addr, uint32_t size, uint32_t num, char ch) "channel:%"PRId8" address:0x%08"PRIx32" size:0x%"PRIx32" num:%"PRId32"%c" +pl330_dmakill(void) "abort interrupt lowered" +pl330_dmalpend(uint8_t nf, uint8_t bs, uint8_t lc, uint8_t ch, uint8_t flag) "nf=0x%02x bs=0x%02x lc=0x%02x ch=0x%02x flag=0x%02x" +pl330_dmalpiter(void) "loop reiteration" +pl330_dmalpfallthrough(void) "loop fallthrough" +pl330_dmasev_evirq(uint8_t ev_id) "event interrupt raised %"PRId8 +pl330_dmasev_event(uint8_t ev_id) "event raised %"PRId8 +pl330_dmast(uint8_t chan, uint32_t addr, uint32_t sz, uint32_t num, char ch) "channel:%"PRId8" address:0x%08"PRIx32" size:0x%"PRIx32" num:%"PRId32" %c" +pl330_dmawfe(uint8_t ev_id) "event lowered 0x%"PRIx8 +pl330_chan_exec_undef(void) "undefined instruction" +pl330_exec_cycle(uint32_t addr, uint32_t size) "PL330 read from memory @0x%08"PRIx32" (size = 0x%08"PRIx32")" +pl330_hexdump(uint32_t offset, char *str) " 0x%04"PRIx32":%s" +pl330_exec(void) "pl330_exec" +pl330_debug_exec(uint8_t ch) "chan id: 0x%"PRIx8 +pl330_debug_exec_stall(void) "stall of debug instruction not implemented" +pl330_iomem_write(uint32_t offset, uint32_t value) "addr: 0x%08"PRIx32" data: 0x%08"PRIx32 +pl330_iomem_write_clr(int i) "event interrupt lowered %d" +pl330_iomem_read(uint32_t addr, uint32_t data) "addr: 0x%08"PRIx32" data: 0x%08"PRIx32 diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index e25df838f0..9c4e46fa74 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -1816,7 +1816,6 @@ static void build_smb0(Aml *table, I2CBus *smbus, int devnr, int func) Aml *scope = aml_scope("_SB.PCI0"); Aml *dev = aml_device("SMB0"); - aml_append(dev, aml_name_decl("_HID", aml_eisaid("APP0005"))); aml_append(dev, aml_name_decl("_ADR", aml_int(devnr << 16 | func))); build_acpi_ipmi_devices(dev, BUS(smbus), "\\_SB.PCI0.SMB0"); aml_append(scope, dev); diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 8054bc4147..a6302a772d 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -93,7 +93,9 @@ #include "fw_cfg.h" #include "trace.h" -GlobalProperty pc_compat_4_2[] = {}; +GlobalProperty pc_compat_4_2[] = { + { "mch", "smbase-smram", "off" }, +}; const size_t pc_compat_4_2_len = G_N_ELEMENTS(pc_compat_4_2); GlobalProperty pc_compat_4_1[] = {}; diff --git a/hw/misc/stm32f4xx_syscfg.c b/hw/misc/stm32f4xx_syscfg.c index dbcdca59f8..f960e4ea1e 100644 --- a/hw/misc/stm32f4xx_syscfg.c +++ b/hw/misc/stm32f4xx_syscfg.c @@ -47,7 +47,7 @@ static void stm32f4xx_syscfg_set_irq(void *opaque, int irq, int level) STM32F4xxSyscfgState *s = opaque; int icrreg = irq / 4; int startbit = (irq & 3) * 4; - uint8_t config = config = irq / 16; + uint8_t config = irq / 16; trace_stm32f4xx_syscfg_set_irq(irq / 16, irq % 16, level); diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index 158d270b9f..6342f73b9f 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -275,20 +275,20 @@ static const TypeInfo q35_host_info = { * MCH D0:F0 */ -static uint64_t tseg_blackhole_read(void *ptr, hwaddr reg, unsigned size) +static uint64_t blackhole_read(void *ptr, hwaddr reg, unsigned size) { return 0xffffffff; } -static void tseg_blackhole_write(void *opaque, hwaddr addr, uint64_t val, - unsigned width) +static void blackhole_write(void *opaque, hwaddr addr, uint64_t val, + unsigned width) { /* nothing */ } -static const MemoryRegionOps tseg_blackhole_ops = { - .read = tseg_blackhole_read, - .write = tseg_blackhole_write, +static const MemoryRegionOps blackhole_ops = { + .read = blackhole_read, + .write = blackhole_write, .endianness = DEVICE_NATIVE_ENDIAN, .valid.min_access_size = 1, .valid.max_access_size = 4, @@ -430,6 +430,46 @@ static void mch_update_ext_tseg_mbytes(MCHPCIState *mch) } } +static void mch_update_smbase_smram(MCHPCIState *mch) +{ + PCIDevice *pd = PCI_DEVICE(mch); + uint8_t *reg = pd->config + MCH_HOST_BRIDGE_F_SMBASE; + bool lck; + + if (!mch->has_smram_at_smbase) { + return; + } + + if (*reg == MCH_HOST_BRIDGE_F_SMBASE_QUERY) { + pd->wmask[MCH_HOST_BRIDGE_F_SMBASE] = + MCH_HOST_BRIDGE_F_SMBASE_LCK; + *reg = MCH_HOST_BRIDGE_F_SMBASE_IN_RAM; + return; + } + + /* + * default/reset state, discard written value + * which will disable SMRAM balackhole at SMBASE + */ + if (pd->wmask[MCH_HOST_BRIDGE_F_SMBASE] == 0xff) { + *reg = 0x00; + } + + memory_region_transaction_begin(); + if (*reg & MCH_HOST_BRIDGE_F_SMBASE_LCK) { + /* disable all writes */ + pd->wmask[MCH_HOST_BRIDGE_F_SMBASE] &= + ~MCH_HOST_BRIDGE_F_SMBASE_LCK; + *reg = MCH_HOST_BRIDGE_F_SMBASE_LCK; + lck = true; + } else { + lck = false; + } + memory_region_set_enabled(&mch->smbase_blackhole, lck); + memory_region_set_enabled(&mch->smbase_window, lck); + memory_region_transaction_commit(); +} + static void mch_write_config(PCIDevice *d, uint32_t address, uint32_t val, int len) { @@ -456,6 +496,10 @@ static void mch_write_config(PCIDevice *d, MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_SIZE)) { mch_update_ext_tseg_mbytes(mch); } + + if (ranges_overlap(address, len, MCH_HOST_BRIDGE_F_SMBASE, 1)) { + mch_update_smbase_smram(mch); + } } static void mch_update(MCHPCIState *mch) @@ -464,6 +508,7 @@ static void mch_update(MCHPCIState *mch) mch_update_pam(mch); mch_update_smram(mch); mch_update_ext_tseg_mbytes(mch); + mch_update_smbase_smram(mch); /* * pci hole goes from end-of-low-ram to io-apic. @@ -514,6 +559,9 @@ static void mch_reset(DeviceState *qdev) MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY); } + d->config[MCH_HOST_BRIDGE_F_SMBASE] = 0; + d->wmask[MCH_HOST_BRIDGE_F_SMBASE] = 0xff; + mch_update(mch); } @@ -563,7 +611,7 @@ static void mch_realize(PCIDevice *d, Error **errp) memory_region_add_subregion(&mch->smram, 0xfeda0000, &mch->high_smram); memory_region_init_io(&mch->tseg_blackhole, OBJECT(mch), - &tseg_blackhole_ops, NULL, + &blackhole_ops, NULL, "tseg-blackhole", 0); memory_region_set_enabled(&mch->tseg_blackhole, false); memory_region_add_subregion_overlap(mch->system_memory, @@ -575,6 +623,27 @@ static void mch_realize(PCIDevice *d, Error **errp) memory_region_set_enabled(&mch->tseg_window, false); memory_region_add_subregion(&mch->smram, mch->below_4g_mem_size, &mch->tseg_window); + + /* + * This is not what hardware does, so it's QEMU specific hack. + * See commit message for details. + */ + memory_region_init_io(&mch->smbase_blackhole, OBJECT(mch), &blackhole_ops, + NULL, "smbase-blackhole", + MCH_HOST_BRIDGE_SMBASE_SIZE); + memory_region_set_enabled(&mch->smbase_blackhole, false); + memory_region_add_subregion_overlap(mch->system_memory, + MCH_HOST_BRIDGE_SMBASE_ADDR, + &mch->smbase_blackhole, 1); + + memory_region_init_alias(&mch->smbase_window, OBJECT(mch), + "smbase-window", mch->ram_memory, + MCH_HOST_BRIDGE_SMBASE_ADDR, + MCH_HOST_BRIDGE_SMBASE_SIZE); + memory_region_set_enabled(&mch->smbase_window, false); + memory_region_add_subregion(&mch->smram, MCH_HOST_BRIDGE_SMBASE_ADDR, + &mch->smbase_window); + object_property_add_const_link(qdev_get_machine(), "smram", OBJECT(&mch->smram), &error_abort); @@ -601,6 +670,7 @@ uint64_t mch_mcfg_base(void) static Property mch_props[] = { DEFINE_PROP_UINT16("extended-tseg-mbytes", MCHPCIState, ext_tseg_mbytes, 16), + DEFINE_PROP_BOOL("smbase-smram", MCHPCIState, has_smram_at_smbase, true), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 4bc73a370e..d3af42ef92 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -943,7 +943,13 @@ void virtio_scsi_common_unrealize(DeviceState *dev) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); + int i; + virtio_delete_queue(vs->ctrl_vq); + virtio_delete_queue(vs->event_vq); + for (i = 0; i < vs->conf.num_queues; i++) { + virtio_delete_queue(vs->cmd_vqs[i]); + } g_free(vs->cmd_vqs); virtio_cleanup(vdev); } diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c index f5744363a8..b6cee479bb 100644 --- a/hw/virtio/vhost-vsock.c +++ b/hw/virtio/vhost-vsock.c @@ -335,8 +335,10 @@ static void vhost_vsock_device_realize(DeviceState *dev, Error **errp) sizeof(struct virtio_vsock_config)); /* Receive and transmit queues belong to vhost */ - virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, vhost_vsock_handle_output); - virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, vhost_vsock_handle_output); + vsock->recv_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, + vhost_vsock_handle_output); + vsock->trans_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, + vhost_vsock_handle_output); /* The event queue belongs to QEMU */ vsock->event_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, @@ -363,6 +365,9 @@ static void vhost_vsock_device_realize(DeviceState *dev, Error **errp) err_vhost_dev: vhost_dev_cleanup(&vsock->vhost_dev); err_virtio: + virtio_delete_queue(vsock->recv_vq); + virtio_delete_queue(vsock->trans_vq); + virtio_delete_queue(vsock->event_vq); virtio_cleanup(vdev); close(vhostfd); return; @@ -379,6 +384,9 @@ static void vhost_vsock_device_unrealize(DeviceState *dev, Error **errp) vhost_vsock_set_status(vdev, 0); vhost_dev_cleanup(&vsock->vhost_dev); + virtio_delete_queue(vsock->recv_vq); + virtio_delete_queue(vsock->trans_vq); + virtio_delete_queue(vsock->event_vq); virtio_cleanup(vdev); } diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 4da0d5a6c5..9edfadc81d 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -547,26 +547,28 @@ static void vhost_region_add_section(struct vhost_dev *dev, uintptr_t mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) + section->offset_within_region; RAMBlock *mrs_rb = section->mr->ram_block; - size_t mrs_page = qemu_ram_pagesize(mrs_rb); trace_vhost_region_add_section(section->mr->name, mrs_gpa, mrs_size, mrs_host); - /* Round the section to it's page size */ - /* First align the start down to a page boundary */ - uint64_t alignage = mrs_host & (mrs_page - 1); - if (alignage) { - mrs_host -= alignage; - mrs_size += alignage; - mrs_gpa -= alignage; - } - /* Now align the size up to a page boundary */ - alignage = mrs_size & (mrs_page - 1); - if (alignage) { - mrs_size += mrs_page - alignage; + if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { + /* Round the section to it's page size */ + /* First align the start down to a page boundary */ + size_t mrs_page = qemu_ram_pagesize(mrs_rb); + uint64_t alignage = mrs_host & (mrs_page - 1); + if (alignage) { + mrs_host -= alignage; + mrs_size += alignage; + mrs_gpa -= alignage; + } + /* Now align the size up to a page boundary */ + alignage = mrs_size & (mrs_page - 1); + if (alignage) { + mrs_size += mrs_page - alignage; + } + trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, + mrs_size, mrs_host); } - trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, mrs_size, - mrs_host); if (dev->n_tmp_sections) { /* Since we already have at least one section, lets see if @@ -590,9 +592,10 @@ static void vhost_region_add_section(struct vhost_dev *dev, * match up in the same RAMBlock if they do. */ if (mrs_gpa < prev_gpa_start) { - error_report("%s:Section rounded to %"PRIx64 - " prior to previous %"PRIx64, - __func__, mrs_gpa, prev_gpa_start); + error_report("%s:Section '%s' rounded to %"PRIx64 + " prior to previous '%s' %"PRIx64, + __func__, section->mr->name, mrs_gpa, + prev_sec->mr->name, prev_gpa_start); /* A way to cleanly fail here would be better */ return; } diff --git a/include/elf.h b/include/elf.h index 3501e0c8d0..8fbfe60e09 100644 --- a/include/elf.h +++ b/include/elf.h @@ -1650,6 +1650,7 @@ typedef struct elf64_shdr { #define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */ #define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */ #define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */ +#define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension regs */ /* * Physical entry point into the kernel. diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h index a46116167c..53de19753a 100644 --- a/include/exec/cpu_ldst.h +++ b/include/exec/cpu_ldst.h @@ -234,11 +234,6 @@ static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx, return (addr >> TARGET_PAGE_BITS) & size_mask; } -static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx) -{ - return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1; -} - /* Find the TLB entry corresponding to the mmu_idx + address pair. */ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx, target_ulong addr) diff --git a/include/hw/arm/exynos4210.h b/include/hw/arm/exynos4210.h index f0f23b0e9b..55260394af 100644 --- a/include/hw/arm/exynos4210.h +++ b/include/hw/arm/exynos4210.h @@ -24,6 +24,7 @@ #ifndef EXYNOS4210_H #define EXYNOS4210_H +#include "hw/or-irq.h" #include "hw/sysbus.h" #include "target/arm/cpu-qom.h" @@ -74,6 +75,8 @@ #define EXYNOS4210_I2C_NUMBER 9 +#define EXYNOS4210_NUM_DMA 3 + typedef struct Exynos4210Irq { qemu_irq int_combiner_irq[EXYNOS4210_MAX_INT_COMBINER_IN_IRQ]; qemu_irq ext_combiner_irq[EXYNOS4210_MAX_EXT_COMBINER_IN_IRQ]; @@ -97,6 +100,7 @@ typedef struct Exynos4210State { MemoryRegion boot_secondary; MemoryRegion bootreg_mem; I2CBus *i2c_if[EXYNOS4210_I2C_NUMBER]; + qemu_or_irq pl330_irq_orgate[EXYNOS4210_NUM_DMA]; } Exynos4210State; #define TYPE_EXYNOS4210_SOC "exynos4210" diff --git a/include/hw/or-irq.h b/include/hw/or-irq.h index 3a3230dd84..0038bfbe3d 100644 --- a/include/hw/or-irq.h +++ b/include/hw/or-irq.h @@ -33,7 +33,7 @@ /* This can safely be increased if necessary without breaking * migration compatibility (as long as it remains greater than 15). */ -#define MAX_OR_LINES 32 +#define MAX_OR_LINES 48 typedef struct OrIRQState qemu_or_irq; diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h index b3bcf2e632..976fbae599 100644 --- a/include/hw/pci-host/q35.h +++ b/include/hw/pci-host/q35.h @@ -32,6 +32,7 @@ #include "hw/acpi/ich9.h" #include "hw/pci-host/pam.h" #include "hw/i386/intel_iommu.h" +#include "qemu/units.h" #define TYPE_Q35_HOST_DEVICE "q35-pcihost" #define Q35_HOST_DEVICE(obj) \ @@ -54,6 +55,8 @@ typedef struct MCHPCIState { MemoryRegion smram_region, open_high_smram; MemoryRegion smram, low_smram, high_smram; MemoryRegion tseg_blackhole, tseg_window; + MemoryRegion smbase_blackhole, smbase_window; + bool has_smram_at_smbase; Range pci_hole; uint64_t below_4g_mem_size; uint64_t above_4g_mem_size; @@ -97,6 +100,13 @@ typedef struct Q35PCIHost { #define MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY 0xffff #define MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_MAX 0xfff +#define MCH_HOST_BRIDGE_SMBASE_SIZE (128 * KiB) +#define MCH_HOST_BRIDGE_SMBASE_ADDR 0x30000 +#define MCH_HOST_BRIDGE_F_SMBASE 0x9c +#define MCH_HOST_BRIDGE_F_SMBASE_QUERY 0xff +#define MCH_HOST_BRIDGE_F_SMBASE_IN_RAM 0x01 +#define MCH_HOST_BRIDGE_F_SMBASE_LCK 0x02 + #define MCH_HOST_BRIDGE_PCIEXBAR 0x60 /* 64bit register */ #define MCH_HOST_BRIDGE_PCIEXBAR_SIZE 8 /* 64bit register */ #define MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT 0xb0000000 diff --git a/include/hw/virtio/vhost-vsock.h b/include/hw/virtio/vhost-vsock.h index d509d67c4a..bc5a988ee5 100644 --- a/include/hw/virtio/vhost-vsock.h +++ b/include/hw/virtio/vhost-vsock.h @@ -33,6 +33,8 @@ typedef struct { struct vhost_virtqueue vhost_vqs[2]; struct vhost_dev vhost_dev; VirtQueue *event_vq; + VirtQueue *recv_vq; + VirtQueue *trans_vq; QEMUTimer *post_load_timer; /*< public >*/ diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 07b16cc0f4..f3080a1635 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -10,6 +10,7 @@ #include "qemu/path.h" #include "qemu/queue.h" #include "qemu/guest-random.h" +#include "qemu/units.h" #ifdef _ARCH_PPC64 #undef ARCH_DLINFO @@ -2191,7 +2192,7 @@ unsigned long init_guest_space(unsigned long host_start, * to where we need to put the commpage. */ munmap((void *)real_start, host_size); - real_size = aligned_size + qemu_host_page_size; + real_size = aligned_size + align; real_start = (unsigned long) mmap((void *)real_start, real_size, PROT_NONE, flags, -1, 0); if (real_start == (unsigned long)-1) { @@ -2364,24 +2365,51 @@ static void load_elf_image(const char *image_name, int image_fd, } } - load_addr = loaddr; - if (ehdr->e_type == ET_DYN) { - /* The image indicates that it can be loaded anywhere. Find a - location that can hold the memory space required. If the - image is pre-linked, LOADDR will be non-zero. Since we do - not supply MAP_FIXED here we'll use that address if and - only if it remains available. */ - load_addr = target_mmap(loaddr, hiaddr - loaddr, PROT_NONE, - MAP_PRIVATE | MAP_ANON | MAP_NORESERVE, - -1, 0); - if (load_addr == -1) { - goto exit_perror; + if (pinterp_name != NULL) { + /* + * This is the main executable. + * + * Reserve extra space for brk. + * We hold on to this space while placing the interpreter + * and the stack, lest they be placed immediately after + * the data segment and block allocation from the brk. + * + * 16MB is chosen as "large enough" without being so large + * as to allow the result to not fit with a 32-bit guest on + * a 32-bit host. + */ + info->reserve_brk = 16 * MiB; + hiaddr += info->reserve_brk; + + if (ehdr->e_type == ET_EXEC) { + /* + * Make sure that the low address does not conflict with + * MMAP_MIN_ADDR or the QEMU application itself. + */ + probe_guest_base(image_name, loaddr, hiaddr); } - } else if (pinterp_name != NULL) { - /* This is the main executable. Make sure that the low - address does not conflict with MMAP_MIN_ADDR or the - QEMU application itself. */ - probe_guest_base(image_name, loaddr, hiaddr); + } + + /* + * Reserve address space for all of this. + * + * In the case of ET_EXEC, we supply MAP_FIXED so that we get + * exactly the address range that is required. + * + * Otherwise this is ET_DYN, and we are searching for a location + * that can hold the memory space required. If the image is + * pre-linked, LOADDR will be non-zero, and the kernel should + * honor that address if it happens to be free. + * + * In both cases, we will overwrite pages in this range with mappings + * from the executable. + */ + load_addr = target_mmap(loaddr, hiaddr - loaddr, PROT_NONE, + MAP_PRIVATE | MAP_ANON | MAP_NORESERVE | + (ehdr->e_type == ET_EXEC ? MAP_FIXED : 0), + -1, 0); + if (load_addr == -1) { + goto exit_perror; } load_bias = load_addr - loaddr; @@ -2860,6 +2888,17 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info) bprm->core_dump = &elf_core_dump; #endif + /* + * If we reserved extra space for brk, release it now. + * The implementation of do_brk in syscalls.c expects to be able + * to mmap pages in this space. + */ + if (info->reserve_brk) { + abi_ulong start_brk = HOST_PAGE_ALIGN(info->brk); + abi_ulong end_brk = HOST_PAGE_ALIGN(info->brk + info->reserve_brk); + target_munmap(start_brk, end_brk - start_brk); + } + return 0; } diff --git a/linux-user/ioctls.h b/linux-user/ioctls.h index c6b9d6ad66..73dcc761e6 100644 --- a/linux-user/ioctls.h +++ b/linux-user/ioctls.h @@ -69,6 +69,29 @@ IOCTL(KDSETLED, 0, TYPE_INT) IOCTL_SPECIAL(KDSIGACCEPT, 0, do_ioctl_kdsigaccept, TYPE_INT) + IOCTL(RTC_AIE_ON, 0, TYPE_NULL) + IOCTL(RTC_AIE_OFF, 0, TYPE_NULL) + IOCTL(RTC_UIE_ON, 0, TYPE_NULL) + IOCTL(RTC_UIE_OFF, 0, TYPE_NULL) + IOCTL(RTC_PIE_ON, 0, TYPE_NULL) + IOCTL(RTC_PIE_OFF, 0, TYPE_NULL) + IOCTL(RTC_WIE_ON, 0, TYPE_NULL) + IOCTL(RTC_WIE_OFF, 0, TYPE_NULL) + IOCTL(RTC_ALM_READ, IOC_R, MK_PTR(MK_STRUCT(STRUCT_rtc_time))) + IOCTL(RTC_ALM_SET, IOC_W, MK_PTR(MK_STRUCT(STRUCT_rtc_time))) + IOCTL(RTC_RD_TIME, IOC_R, MK_PTR(MK_STRUCT(STRUCT_rtc_time))) + IOCTL(RTC_SET_TIME, IOC_W, MK_PTR(MK_STRUCT(STRUCT_rtc_time))) + IOCTL(RTC_IRQP_READ, IOC_R, MK_PTR(TYPE_ULONG)) + IOCTL(RTC_IRQP_SET, IOC_W, TYPE_ULONG) + IOCTL(RTC_EPOCH_READ, IOC_R, MK_PTR(TYPE_ULONG)) + IOCTL(RTC_EPOCH_SET, IOC_W, TYPE_ULONG) + IOCTL(RTC_WKALM_RD, IOC_R, MK_PTR(MK_STRUCT(STRUCT_rtc_wkalrm))) + IOCTL(RTC_WKALM_SET, IOC_W, MK_PTR(MK_STRUCT(STRUCT_rtc_wkalrm))) + IOCTL(RTC_PLL_GET, IOC_R, MK_PTR(MK_STRUCT(STRUCT_rtc_pll_info))) + IOCTL(RTC_PLL_SET, IOC_W, MK_PTR(MK_STRUCT(STRUCT_rtc_pll_info))) + IOCTL(RTC_VL_READ, IOC_R, MK_PTR(TYPE_INT)) + IOCTL(RTC_VL_CLR, 0, TYPE_NULL) + IOCTL(BLKROSET, IOC_W, MK_PTR(TYPE_INT)) IOCTL(BLKROGET, IOC_R, MK_PTR(TYPE_INT)) IOCTL(BLKRRPART, 0, TYPE_NULL) @@ -114,7 +137,13 @@ IOCTL(FDMSGON, 0, TYPE_NULL) IOCTL(FDMSGOFF, 0, TYPE_NULL) + IOCTL(FDSETEMSGTRESH, 0, TYPE_NULL) + IOCTL(FDFMTBEG, 0, TYPE_NULL) + IOCTL(FDFMTTRK, IOC_W, MK_PTR(MK_STRUCT(STRUCT_format_descr))) + IOCTL(FDFMTEND, 0, TYPE_NULL) IOCTL(FDFLUSH, 0, TYPE_NULL) + IOCTL(FDSETMAXERRS, IOC_W, MK_PTR(MK_STRUCT(STRUCT_floppy_max_errors))) + IOCTL(FDGETMAXERRS, IOC_R, MK_PTR(MK_STRUCT(STRUCT_floppy_max_errors))) IOCTL(FDRESET, 0, TYPE_NULL) IOCTL(FDRAWCMD, 0, TYPE_NULL) IOCTL(FDTWADDLE, 0, TYPE_NULL) @@ -138,6 +167,12 @@ IOCTL(FS_IOC_GETFLAGS, IOC_R, MK_PTR(TYPE_INT)) IOCTL(FS_IOC_SETFLAGS, IOC_W, MK_PTR(TYPE_INT)) + IOCTL(FS_IOC_GETVERSION, IOC_R, MK_PTR(TYPE_INT)) + IOCTL(FS_IOC_SETVERSION, IOC_W, MK_PTR(TYPE_INT)) + IOCTL(FS_IOC32_GETFLAGS, IOC_R, MK_PTR(TYPE_INT)) + IOCTL(FS_IOC32_SETFLAGS, IOC_W, MK_PTR(TYPE_INT)) + IOCTL(FS_IOC32_GETVERSION, IOC_R, MK_PTR(TYPE_INT)) + IOCTL(FS_IOC32_SETVERSION, IOC_W, MK_PTR(TYPE_INT)) #ifdef CONFIG_USBFS /* USB ioctls */ @@ -522,3 +557,9 @@ IOCTL_IGNORE(TIOCSTART) IOCTL_IGNORE(TIOCSTOP) #endif + +#ifdef CONFIG_KCOV + IOCTL(KCOV_ENABLE, 0, TYPE_NULL) + IOCTL(KCOV_DISABLE, 0, TYPE_NULL) + IOCTL(KCOV_INIT_TRACE, IOC_R, TYPE_ULONG) +#endif diff --git a/linux-user/qemu.h b/linux-user/qemu.h index f6f5fe5fbb..560a68090e 100644 --- a/linux-user/qemu.h +++ b/linux-user/qemu.h @@ -35,6 +35,7 @@ struct image_info { abi_ulong end_data; abi_ulong start_brk; abi_ulong brk; + abi_ulong reserve_brk; abi_ulong start_mmap; abi_ulong start_stack; abi_ulong stack_limit; diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 249e4b95fc..d60142f069 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -73,6 +73,9 @@ #ifdef CONFIG_SENDFILE #include <sys/sendfile.h> #endif +#ifdef CONFIG_KCOV +#include <sys/kcov.h> +#endif #define termios host_termios #define winsize host_winsize @@ -107,6 +110,7 @@ #include <netpacket/packet.h> #include <linux/netlink.h> #include <linux/if_alg.h> +#include <linux/rtc.h> #include "linux_loop.h" #include "uname.h" @@ -5175,6 +5179,8 @@ static abi_long do_ioctl(int fd, int cmd, abi_long arg) break; case TYPE_PTRVOID: case TYPE_INT: + case TYPE_LONG: + case TYPE_ULONG: ret = get_errno(safe_ioctl(fd, ie->host_cmd, arg)); break; case TYPE_PTR: diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h index 98c2119de9..9b61ae8547 100644 --- a/linux-user/syscall_defs.h +++ b/linux-user/syscall_defs.h @@ -763,6 +763,42 @@ struct target_pollfd { #define TARGET_KDSETLED 0x4B32 /* set led state [lights, not flags] */ #define TARGET_KDSIGACCEPT 0x4B4E +struct target_rtc_pll_info { + int pll_ctrl; + int pll_value; + int pll_max; + int pll_min; + int pll_posmult; + int pll_negmult; + abi_long pll_clock; +}; + +/* real time clock ioctls */ +#define TARGET_RTC_AIE_ON TARGET_IO('p', 0x01) +#define TARGET_RTC_AIE_OFF TARGET_IO('p', 0x02) +#define TARGET_RTC_UIE_ON TARGET_IO('p', 0x03) +#define TARGET_RTC_UIE_OFF TARGET_IO('p', 0x04) +#define TARGET_RTC_PIE_ON TARGET_IO('p', 0x05) +#define TARGET_RTC_PIE_OFF TARGET_IO('p', 0x06) +#define TARGET_RTC_WIE_ON TARGET_IO('p', 0x0f) +#define TARGET_RTC_WIE_OFF TARGET_IO('p', 0x10) +#define TARGET_RTC_ALM_READ TARGET_IOR('p', 0x08, struct rtc_time) +#define TARGET_RTC_ALM_SET TARGET_IOW('p', 0x07, struct rtc_time) +#define TARGET_RTC_RD_TIME TARGET_IOR('p', 0x09, struct rtc_time) +#define TARGET_RTC_SET_TIME TARGET_IOW('p', 0x0a, struct rtc_time) +#define TARGET_RTC_IRQP_READ TARGET_IOR('p', 0x0b, abi_ulong) +#define TARGET_RTC_IRQP_SET TARGET_IOW('p', 0x0c, abi_ulong) +#define TARGET_RTC_EPOCH_READ TARGET_IOR('p', 0x0d, abi_ulong) +#define TARGET_RTC_EPOCH_SET TARGET_IOW('p', 0x0e, abi_ulong) +#define TARGET_RTC_WKALM_RD TARGET_IOR('p', 0x10, struct rtc_wkalrm) +#define TARGET_RTC_WKALM_SET TARGET_IOW('p', 0x0f, struct rtc_wkalrm) +#define TARGET_RTC_PLL_GET TARGET_IOR('p', 0x11, \ + struct target_rtc_pll_info) +#define TARGET_RTC_PLL_SET TARGET_IOW('p', 0x12, \ + struct target_rtc_pll_info) +#define TARGET_RTC_VL_READ TARGET_IOR('p', 0x13, int) +#define TARGET_RTC_VL_CLR TARGET_IO('p', 0x14) + #if defined(TARGET_ALPHA) || defined(TARGET_MIPS) || defined(TARGET_SH4) || \ defined(TARGET_XTENSA) #define TARGET_FIOGETOWN TARGET_IOR('f', 123, int) @@ -899,7 +935,13 @@ struct target_pollfd { #define TARGET_FDMSGON TARGET_IO(2, 0x45) #define TARGET_FDMSGOFF TARGET_IO(2, 0x46) +#define TARGET_FDFMTBEG TARGET_IO(2, 0x47) +#define TARGET_FDFMTTRK TARGET_IOW(2, 0x48, struct format_descr) +#define TARGET_FDFMTEND TARGET_IO(2, 0x49) +#define TARGET_FDSETEMSGTRESH TARGET_IO(2, 0x4a) #define TARGET_FDFLUSH TARGET_IO(2, 0x4b) +#define TARGET_FDSETMAXERRS TARGET_IOW(2, 0x4c, struct floppy_max_errors) +#define TARGET_FDGETMAXERRS TARGET_IOR(2, 0x0e, struct floppy_max_errors) #define TARGET_FDRESET TARGET_IO(2, 0x54) #define TARGET_FDRAWCMD TARGET_IO(2, 0x58) #define TARGET_FDTWADDLE TARGET_IO(2, 0x59) @@ -911,13 +953,19 @@ struct target_pollfd { #define TARGET_FICLONE TARGET_IOW(0x94, 9, int) #define TARGET_FICLONERANGE TARGET_IOW(0x94, 13, struct file_clone_range) -/* Note that the ioctl numbers claim type "long" but the actual type - * used by the kernel is "int". +/* + * Note that the ioctl numbers for FS_IOC_<GET|SET><FLAGS|VERSION> + * claim type "long" but the actual type used by the kernel is "int". */ #define TARGET_FS_IOC_GETFLAGS TARGET_IOR('f', 1, abi_long) #define TARGET_FS_IOC_SETFLAGS TARGET_IOW('f', 2, abi_long) - +#define TARGET_FS_IOC_GETVERSION TARGET_IOR('v', 1, abi_long) +#define TARGET_FS_IOC_SETVERSION TARGET_IOW('v', 2, abi_long) #define TARGET_FS_IOC_FIEMAP TARGET_IOWR('f',11,struct fiemap) +#define TARGET_FS_IOC32_GETFLAGS TARGET_IOR('f', 1, int) +#define TARGET_FS_IOC32_SETFLAGS TARGET_IOW('f', 2, int) +#define TARGET_FS_IOC32_GETVERSION TARGET_IOR('v', 1, int) +#define TARGET_FS_IOC32_SETVERSION TARGET_IOW('v', 2, int) /* usb ioctls */ #define TARGET_USBDEVFS_CONTROL TARGET_IOWRU('U', 0) @@ -2422,6 +2470,11 @@ struct target_mtpos { #define TARGET_MTIOCGET TARGET_IOR('m', 2, struct target_mtget) #define TARGET_MTIOCPOS TARGET_IOR('m', 3, struct target_mtpos) +/* kcov ioctls */ +#define TARGET_KCOV_ENABLE TARGET_IO('c', 100) +#define TARGET_KCOV_DISABLE TARGET_IO('c', 101) +#define TARGET_KCOV_INIT_TRACE TARGET_IOR('c', 1, abi_ulong) + struct target_sysinfo { abi_long uptime; /* Seconds since boot */ abi_ulong loads[3]; /* 1, 5, and 15 minute load averages */ diff --git a/linux-user/syscall_types.h b/linux-user/syscall_types.h index 4e36983826..5ba4155047 100644 --- a/linux-user/syscall_types.h +++ b/linux-user/syscall_types.h @@ -255,12 +255,49 @@ STRUCT(blkpg_partition, MK_ARRAY(TYPE_CHAR, BLKPG_DEVNAMELTH), /* devname */ MK_ARRAY(TYPE_CHAR, BLKPG_VOLNAMELTH)) /* volname */ +STRUCT(rtc_time, + TYPE_INT, /* tm_sec */ + TYPE_INT, /* tm_min */ + TYPE_INT, /* tm_hour */ + TYPE_INT, /* tm_mday */ + TYPE_INT, /* tm_mon */ + TYPE_INT, /* tm_year */ + TYPE_INT, /* tm_wday */ + TYPE_INT, /* tm_yday */ + TYPE_INT) /* tm_isdst */ + +STRUCT(rtc_wkalrm, + TYPE_CHAR, /* enabled */ + TYPE_CHAR, /* pending */ + MK_STRUCT(STRUCT_rtc_time)) /* time */ + +STRUCT(rtc_pll_info, + TYPE_INT, /* pll_ctrl */ + TYPE_INT, /* pll_value */ + TYPE_INT, /* pll_max */ + TYPE_INT, /* pll_min */ + TYPE_INT, /* pll_posmult */ + TYPE_INT, /* pll_negmult */ + TYPE_LONG) /* pll_clock */ + STRUCT(blkpg_ioctl_arg, TYPE_INT, /* op */ TYPE_INT, /* flags */ TYPE_INT, /* datalen */ TYPE_PTRVOID) /* data */ +STRUCT(format_descr, + TYPE_INT, /* device */ + TYPE_INT, /* head */ + TYPE_INT) /* track */ + +STRUCT(floppy_max_errors, + TYPE_INT, /* abort */ + TYPE_INT, /* read_track */ + TYPE_INT, /* reset */ + TYPE_INT, /* recal */ + TYPE_INT) /* reporting */ + #if defined(CONFIG_USBFS) /* usb device ioctls */ STRUCT(usbdevfs_ctrltransfer, diff --git a/qemu-doc.texi b/qemu-doc.texi index 39f950471f..2328e7ea47 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -633,17 +633,6 @@ encrypted disk images. * disk_images_snapshot_mode:: Snapshot mode * vm_snapshots:: VM snapshots * qemu_img_invocation:: qemu-img Invocation -* qemu_nbd_invocation:: qemu-nbd Invocation -* disk_images_formats:: Disk image file formats -* host_drives:: Using host drives -* disk_images_fat_images:: Virtual FAT disk images -* disk_images_nbd:: NBD access -* disk_images_sheepdog:: Sheepdog disk images -* disk_images_iscsi:: iSCSI LUNs -* disk_images_gluster:: GlusterFS disk images -* disk_images_ssh:: Secure Shell (ssh) disk images -* disk_images_nvme:: NVMe userspace driver -* disk_image_locking:: Disk image file locking @end menu @node disk_images_quickstart @@ -724,13 +713,6 @@ state is not saved or restored properly (in particular USB). @include qemu-img.texi -@node qemu_nbd_invocation -@subsection @code{qemu-nbd} Invocation - -@include qemu-nbd.texi - -@include docs/qemu-block-drivers.texi - @node pcsys_network @section Network emulation diff --git a/qemu-nbd.texi b/qemu-nbd.texi deleted file mode 100644 index 7f55657722..0000000000 --- a/qemu-nbd.texi +++ /dev/null @@ -1,214 +0,0 @@ -@example -@c man begin SYNOPSIS -@command{qemu-nbd} [OPTION]... @var{filename} - -@command{qemu-nbd} @option{-L} [OPTION]... - -@command{qemu-nbd} @option{-d} @var{dev} -@c man end -@end example - -@c man begin DESCRIPTION - -Export a QEMU disk image using the NBD protocol. - -Other uses: -@itemize -@item -Bind a /dev/nbdX block device to a QEMU server (on Linux). -@item -As a client to query exports of a remote NBD server. -@end itemize - -@c man end - -@c man begin OPTIONS -@var{filename} is a disk image filename, or a set of block -driver options if @option{--image-opts} is specified. - -@var{dev} is an NBD device. - -@table @option -@item --object type,id=@var{id},...props... -Define a new instance of the @var{type} object class identified by @var{id}. -See the @code{qemu(1)} manual page for full details of the properties -supported. The common object types that it makes sense to define are the -@code{secret} object, which is used to supply passwords and/or encryption -keys, and the @code{tls-creds} object, which is used to supply TLS -credentials for the qemu-nbd server or client. -@item -p, --port=@var{port} -The TCP port to listen on as a server, or connect to as a client -(default @samp{10809}). -@item -o, --offset=@var{offset} -The offset into the image. -@item -b, --bind=@var{iface} -The interface to bind to as a server, or connect to as a client -(default @samp{0.0.0.0}). -@item -k, --socket=@var{path} -Use a unix socket with path @var{path}. -@item --image-opts -Treat @var{filename} as a set of image options, instead of a plain -filename. If this flag is specified, the @var{-f} flag should -not be used, instead the '@code{format=}' option should be set. -@item -f, --format=@var{fmt} -Force the use of the block driver for format @var{fmt} instead of -auto-detecting. -@item -r, --read-only -Export the disk as read-only. -@item -P, --partition=@var{num} -Deprecated: Only expose MBR partition @var{num}. Understands physical -partitions 1-4 and logical partition 5. New code should instead use -@option{--image-opts} with the raw driver wrapping a subset of the -original image. -@item -B, --bitmap=@var{name} -If @var{filename} has a qcow2 persistent bitmap @var{name}, expose -that bitmap via the ``qemu:dirty-bitmap:@var{name}'' context -accessible through NBD_OPT_SET_META_CONTEXT. -@item -s, --snapshot -Use @var{filename} as an external snapshot, create a temporary -file with backing_file=@var{filename}, redirect the write to -the temporary one. -@item -l, --load-snapshot=@var{snapshot_param} -Load an internal snapshot inside @var{filename} and export it -as an read-only device, @var{snapshot_param} format is -'snapshot.id=[ID],snapshot.name=[NAME]' or '[ID_OR_NAME]' -@item -n, --nocache -@itemx --cache=@var{cache} -The cache mode to be used with the file. See the documentation of -the emulator's @code{-drive cache=...} option for allowed values. -@item --aio=@var{aio} -Set the asynchronous I/O mode between @samp{threads} (the default) -and @samp{native} (Linux only). -@item --discard=@var{discard} -Control whether @dfn{discard} (also known as @dfn{trim} or @dfn{unmap}) -requests are ignored or passed to the filesystem. @var{discard} is one of -@samp{ignore} (or @samp{off}), @samp{unmap} (or @samp{on}). The default is -@samp{ignore}. -@item --detect-zeroes=@var{detect-zeroes} -Control the automatic conversion of plain zero writes by the OS to -driver-specific optimized zero write commands. @var{detect-zeroes} is one of -@samp{off}, @samp{on} or @samp{unmap}. @samp{unmap} -converts a zero write to an unmap operation and can only be used if -@var{discard} is set to @samp{unmap}. The default is @samp{off}. -@item -c, --connect=@var{dev} -Connect @var{filename} to NBD device @var{dev} (Linux only). -@item -d, --disconnect -Disconnect the device @var{dev} (Linux only). -@item -e, --shared=@var{num} -Allow up to @var{num} clients to share the device (default -@samp{1}). Safe for readers, but for now, consistency is not -guaranteed between multiple writers. -@item -t, --persistent -Don't exit on the last connection. -@item -x, --export-name=@var{name} -Set the NBD volume export name (default of a zero-length string). -@item -D, --description=@var{description} -Set the NBD volume export description, as a human-readable -string. -@item -L, --list -Connect as a client and list all details about the exports exposed by -a remote NBD server. This enables list mode, and is incompatible -with options that change behavior related to a specific export (such as -@option{--export-name}, @option{--offset}, ...). -@item --tls-creds=ID -Enable mandatory TLS encryption for the server by setting the ID -of the TLS credentials object previously created with the --object -option; or provide the credentials needed for connecting as a client -in list mode. -@item --fork -Fork off the server process and exit the parent once the server is running. -@item --pid-file=PATH -Store the server's process ID in the given file. -@item --tls-authz=ID -Specify the ID of a qauthz object previously created with the ---object option. This will be used to authorize connecting users -against their x509 distinguished name. -@item -v, --verbose -Display extra debugging information. -@item -h, --help -Display this help and exit. -@item -V, --version -Display version information and exit. -@item -T, --trace [[enable=]@var{pattern}][,events=@var{file}][,file=@var{file}] -@findex --trace -@include qemu-option-trace.texi -@end table - -@c man end - -@c man begin EXAMPLES -Start a server listening on port 10809 that exposes only the -guest-visible contents of a qcow2 file, with no TLS encryption, and -with the default export name (an empty string). The command is -one-shot, and will block until the first successful client -disconnects: - -@example -qemu-nbd -f qcow2 file.qcow2 -@end example - -Start a long-running server listening with encryption on port 10810, -and whitelist clients with a specific X.509 certificate to connect to -a 1 megabyte subset of a raw file, using the export name 'subset': - -@example -qemu-nbd \ - --object tls-creds-x509,id=tls0,endpoint=server,dir=/path/to/qemutls \ - --object 'authz-simple,id=auth0,identity=CN=laptop.example.com,,\ - O=Example Org,,L=London,,ST=London,,C=GB' \ - --tls-creds tls0 --tls-authz auth0 \ - -t -x subset -p 10810 \ - --image-opts driver=raw,offset=1M,size=1M,file.driver=file,file.filename=file.raw -@end example - -Serve a read-only copy of just the first MBR partition of a guest -image over a Unix socket with as many as 5 simultaneous readers, with -a persistent process forked as a daemon: - -@example -qemu-nbd --fork --persistent --shared=5 --socket=/path/to/sock \ - --partition=1 --read-only --format=qcow2 file.qcow2 -@end example - -Expose the guest-visible contents of a qcow2 file via a block device -/dev/nbd0 (and possibly creating /dev/nbd0p1 and friends for -partitions found within), then disconnect the device when done. -Access to bind qemu-nbd to an /dev/nbd device generally requires root -privileges, and may also require the execution of @code{modprobe nbd} -to enable the kernel NBD client module. @emph{CAUTION}: Do not use -this method to mount filesystems from an untrusted guest image - a -malicious guest may have prepared the image to attempt to trigger -kernel bugs in partition probing or file system mounting. - -@example -qemu-nbd -c /dev/nbd0 -f qcow2 file.qcow2 -qemu-nbd -d /dev/nbd0 -@end example - -Query a remote server to see details about what export(s) it is -serving on port 10809, and authenticating via PSK: - -@example -qemu-nbd \ - --object tls-creds-psk,id=tls0,dir=/tmp/keys,username=eblake,endpoint=client \ - --tls-creds tls0 -L -b remote.example.com -@end example - -@c man end - -@ignore - -@setfilename qemu-nbd -@settitle QEMU Disk Network Block Device Server - -@c man begin AUTHOR -Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws>. -This is free software; see the source for copying conditions. There is NO -warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -@c man end - -@c man begin SEEALSO -qemu(1), qemu-img(1) -@c man end - -@end ignore diff --git a/qemu-option-trace.texi b/qemu-option-trace.texi index 7d1b7f05c5..162f1528d2 100644 --- a/qemu-option-trace.texi +++ b/qemu-option-trace.texi @@ -1,3 +1,7 @@ +@c The contents of this file must be kept in sync with qemu-option-trace.rst.inc +@c until all the users of the texi file have been converted to rst and +@c the texi file can be removed. + Specify tracing options. @table @option diff --git a/qemu-options.hx b/qemu-options.hx index 709162c159..224a8e8712 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -953,7 +953,7 @@ STEXI @findex -cdrom Use @var{file} as CD-ROM image (you cannot use @option{-hdc} and @option{-cdrom} at the same time). You can use the host CD-ROM by -using @file{/dev/cdrom} as filename (@pxref{host_drives}). +using @file{/dev/cdrom} as filename. ETEXI DEF("blockdev", HAS_ARG, QEMU_OPTION_blockdev, diff --git a/roms/edk2-funcs.sh b/roms/edk2-funcs.sh index 3f4485b201..cd6e4f2c82 100644 --- a/roms/edk2-funcs.sh +++ b/roms/edk2-funcs.sh @@ -112,6 +112,9 @@ qemu_edk2_get_cross_prefix() ( [ "$gcc_arch" == i686 ] && [ "$host_arch" == x86_64 ] ); then # no cross-compiler needed : + elif ( [ -e /etc/debian_version ] && [ "$gcc_arch" == arm ] ); then + # force soft-float cross-compiler on Debian + printf 'arm-linux-gnueabi-' else printf '%s-linux-gnu-\n' "$gcc_arch" fi diff --git a/scripts/git.orderfile b/scripts/git.orderfile index e89790941c..1f747b583a 100644 --- a/scripts/git.orderfile +++ b/scripts/git.orderfile @@ -25,5 +25,8 @@ qga/*.json # headers *.h +# decoding tree specification +*.decode + # code *.c diff --git a/target/arm/arch_dump.c b/target/arm/arch_dump.c index 26a2c09868..2345dec3c2 100644 --- a/target/arm/arch_dump.c +++ b/target/arm/arch_dump.c @@ -62,12 +62,23 @@ struct aarch64_user_vfp_state { QEMU_BUILD_BUG_ON(sizeof(struct aarch64_user_vfp_state) != 528); +/* struct user_sve_header from arch/arm64/include/uapi/asm/ptrace.h */ +struct aarch64_user_sve_header { + uint32_t size; + uint32_t max_size; + uint16_t vl; + uint16_t max_vl; + uint16_t flags; + uint16_t reserved; +} QEMU_PACKED; + struct aarch64_note { Elf64_Nhdr hdr; char name[8]; /* align_up(sizeof("CORE"), 4) */ union { struct aarch64_elf_prstatus prstatus; struct aarch64_user_vfp_state vfp; + struct aarch64_user_sve_header sve; }; } QEMU_PACKED; @@ -76,6 +87,8 @@ struct aarch64_note { (AARCH64_NOTE_HEADER_SIZE + sizeof(struct aarch64_elf_prstatus)) #define AARCH64_PRFPREG_NOTE_SIZE \ (AARCH64_NOTE_HEADER_SIZE + sizeof(struct aarch64_user_vfp_state)) +#define AARCH64_SVE_NOTE_SIZE(env) \ + (AARCH64_NOTE_HEADER_SIZE + sve_size(env)) static void aarch64_note_init(struct aarch64_note *note, DumpState *s, const char *name, Elf64_Word namesz, @@ -128,11 +141,102 @@ static int aarch64_write_elf64_prfpreg(WriteCoreDumpFunction f, return 0; } +#ifdef TARGET_AARCH64 +static off_t sve_zreg_offset(uint32_t vq, int n) +{ + off_t off = sizeof(struct aarch64_user_sve_header); + return ROUND_UP(off, 16) + vq * 16 * n; +} + +static off_t sve_preg_offset(uint32_t vq, int n) +{ + return sve_zreg_offset(vq, 32) + vq * 16 / 8 * n; +} + +static off_t sve_fpsr_offset(uint32_t vq) +{ + off_t off = sve_preg_offset(vq, 17); + return ROUND_UP(off, 16); +} + +static off_t sve_fpcr_offset(uint32_t vq) +{ + return sve_fpsr_offset(vq) + sizeof(uint32_t); +} + +static uint32_t sve_current_vq(CPUARMState *env) +{ + return sve_zcr_len_for_el(env, arm_current_el(env)) + 1; +} + +static size_t sve_size_vq(uint32_t vq) +{ + off_t off = sve_fpcr_offset(vq) + sizeof(uint32_t); + return ROUND_UP(off, 16); +} + +static size_t sve_size(CPUARMState *env) +{ + return sve_size_vq(sve_current_vq(env)); +} + +static int aarch64_write_elf64_sve(WriteCoreDumpFunction f, + CPUARMState *env, int cpuid, + DumpState *s) +{ + struct aarch64_note *note; + ARMCPU *cpu = env_archcpu(env); + uint32_t vq = sve_current_vq(env); + uint64_t tmp[ARM_MAX_VQ * 2], *r; + uint32_t fpr; + uint8_t *buf; + int ret, i; + + note = g_malloc0(AARCH64_SVE_NOTE_SIZE(env)); + buf = (uint8_t *)¬e->sve; + + aarch64_note_init(note, s, "LINUX", 6, NT_ARM_SVE, sve_size_vq(vq)); + + note->sve.size = cpu_to_dump32(s, sve_size_vq(vq)); + note->sve.max_size = cpu_to_dump32(s, sve_size_vq(cpu->sve_max_vq)); + note->sve.vl = cpu_to_dump16(s, vq * 16); + note->sve.max_vl = cpu_to_dump16(s, cpu->sve_max_vq * 16); + note->sve.flags = cpu_to_dump16(s, 1); + + for (i = 0; i < 32; ++i) { + r = sve_bswap64(tmp, &env->vfp.zregs[i].d[0], vq * 2); + memcpy(&buf[sve_zreg_offset(vq, i)], r, vq * 16); + } + + for (i = 0; i < 17; ++i) { + r = sve_bswap64(tmp, r = &env->vfp.pregs[i].p[0], + DIV_ROUND_UP(vq * 2, 8)); + memcpy(&buf[sve_preg_offset(vq, i)], r, vq * 16 / 8); + } + + fpr = cpu_to_dump32(s, vfp_get_fpsr(env)); + memcpy(&buf[sve_fpsr_offset(vq)], &fpr, sizeof(uint32_t)); + + fpr = cpu_to_dump32(s, vfp_get_fpcr(env)); + memcpy(&buf[sve_fpcr_offset(vq)], &fpr, sizeof(uint32_t)); + + ret = f(note, AARCH64_SVE_NOTE_SIZE(env), s); + g_free(note); + + if (ret < 0) { + return -1; + } + + return 0; +} +#endif + int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, int cpuid, void *opaque) { struct aarch64_note note; - CPUARMState *env = &ARM_CPU(cs)->env; + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; DumpState *s = opaque; uint64_t pstate, sp; int ret, i; @@ -163,7 +267,18 @@ int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, return -1; } - return aarch64_write_elf64_prfpreg(f, env, cpuid, s); + ret = aarch64_write_elf64_prfpreg(f, env, cpuid, s); + if (ret) { + return ret; + } + +#ifdef TARGET_AARCH64 + if (cpu_isar_feature(aa64_sve, cpu)) { + ret = aarch64_write_elf64_sve(f, env, cpuid, s); + } +#endif + + return ret; } /* struct pt_regs from arch/arm/include/asm/ptrace.h */ @@ -335,6 +450,11 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus) if (class == ELFCLASS64) { note_size = AARCH64_PRSTATUS_NOTE_SIZE; note_size += AARCH64_PRFPREG_NOTE_SIZE; +#ifdef TARGET_AARCH64 + if (cpu_isar_feature(aa64_sve, cpu)) { + note_size += AARCH64_SVE_NOTE_SIZE(env); + } +#endif } else { note_size = ARM_PRSTATUS_NOTE_SIZE; if (arm_feature(env, ARM_FEATURE_VFP)) { diff --git a/target/arm/cpu.c b/target/arm/cpu.c index d62fd5fdc6..64cd0a7d73 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -2121,6 +2121,7 @@ static void cortex_r5_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_V7); set_feature(&cpu->env, ARM_FEATURE_V7MP); set_feature(&cpu->env, ARM_FEATURE_PMSA); + set_feature(&cpu->env, ARM_FEATURE_PMU); cpu->midr = 0x411fc153; /* r1p3 */ cpu->id_pfr0 = 0x0131; cpu->id_pfr1 = 0x001; diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 40f2c45e17..c1aedbeac0 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -980,6 +980,31 @@ void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq); void aarch64_sve_change_el(CPUARMState *env, int old_el, int new_el, bool el0_a64); void aarch64_add_sve_properties(Object *obj); + +/* + * SVE registers are encoded in KVM's memory in an endianness-invariant format. + * The byte at offset i from the start of the in-memory representation contains + * the bits [(7 + 8 * i) : (8 * i)] of the register value. As this means the + * lowest offsets are stored in the lowest memory addresses, then that nearly + * matches QEMU's representation, which is to use an array of host-endian + * uint64_t's, where the lower offsets are at the lower indices. To complete + * the translation we just need to byte swap the uint64_t's on big-endian hosts. + */ +static inline uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr) +{ +#ifdef HOST_WORDS_BIGENDIAN + int i; + + for (i = 0; i < nr; ++i) { + dst[i] = bswap64(src[i]); + } + + return dst; +#else + return src; +#endif +} + #else static inline void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) { } static inline void aarch64_sve_change_el(CPUARMState *env, int o, diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c index 876184b8fe..e2da756e65 100644 --- a/target/arm/kvm64.c +++ b/target/arm/kvm64.c @@ -877,30 +877,6 @@ static int kvm_arch_put_fpsimd(CPUState *cs) } /* - * SVE registers are encoded in KVM's memory in an endianness-invariant format. - * The byte at offset i from the start of the in-memory representation contains - * the bits [(7 + 8 * i) : (8 * i)] of the register value. As this means the - * lowest offsets are stored in the lowest memory addresses, then that nearly - * matches QEMU's representation, which is to use an array of host-endian - * uint64_t's, where the lower offsets are at the lower indices. To complete - * the translation we just need to byte swap the uint64_t's on big-endian hosts. - */ -static uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr) -{ -#ifdef HOST_WORDS_BIGENDIAN - int i; - - for (i = 0; i < nr; ++i) { - dst[i] = bswap64(src[i]); - } - - return dst; -#else - return src; -#endif -} - -/* * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits * and PREGS and the FFR have a slice size of 256 bits. However we simply hard * code the slice index to zero for now as it's unlikely we'll need more than diff --git a/target/arm/pauth_helper.c b/target/arm/pauth_helper.c index d3194f2043..0a5f41e10c 100644 --- a/target/arm/pauth_helper.c +++ b/target/arm/pauth_helper.c @@ -89,7 +89,7 @@ static uint64_t pac_sub(uint64_t i) uint64_t o = 0; int b; - for (b = 0; b < 64; b += 16) { + for (b = 0; b < 64; b += 4) { o |= (uint64_t)sub[(i >> b) & 0xf] << b; } return o; @@ -104,7 +104,7 @@ static uint64_t pac_inv_sub(uint64_t i) uint64_t o = 0; int b; - for (b = 0; b < 64; b += 16) { + for (b = 0; b < 64; b += 4) { o |= (uint64_t)inv_sub[(i >> b) & 0xf] << b; } return o; diff --git a/target/m68k/translate.c b/target/m68k/translate.c index 31b743717e..0f80888203 100644 --- a/target/m68k/translate.c +++ b/target/m68k/translate.c @@ -289,16 +289,21 @@ static void gen_jmp(DisasContext *s, TCGv dest) s->base.is_jmp = DISAS_JUMP; } -static void gen_exception(DisasContext *s, uint32_t dest, int nr) +static void gen_raise_exception(int nr) { TCGv_i32 tmp; - update_cc_op(s); - tcg_gen_movi_i32(QREG_PC, dest); - tmp = tcg_const_i32(nr); gen_helper_raise_exception(cpu_env, tmp); tcg_temp_free_i32(tmp); +} + +static void gen_exception(DisasContext *s, uint32_t dest, int nr) +{ + update_cc_op(s); + tcg_gen_movi_i32(QREG_PC, dest); + + gen_raise_exception(nr); s->base.is_jmp = DISAS_NORETURN; } @@ -6198,29 +6203,36 @@ static void m68k_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) { DisasContext *dc = container_of(dcbase, DisasContext, base); - if (dc->base.is_jmp == DISAS_NORETURN) { - return; - } - if (dc->base.singlestep_enabled) { - gen_helper_raise_exception(cpu_env, tcg_const_i32(EXCP_DEBUG)); - return; - } - switch (dc->base.is_jmp) { + case DISAS_NORETURN: + break; case DISAS_TOO_MANY: update_cc_op(dc); - gen_jmp_tb(dc, 0, dc->pc); + if (dc->base.singlestep_enabled) { + tcg_gen_movi_i32(QREG_PC, dc->pc); + gen_raise_exception(EXCP_DEBUG); + } else { + gen_jmp_tb(dc, 0, dc->pc); + } break; case DISAS_JUMP: /* We updated CC_OP and PC in gen_jmp/gen_jmp_im. */ - tcg_gen_lookup_and_goto_ptr(); + if (dc->base.singlestep_enabled) { + gen_raise_exception(EXCP_DEBUG); + } else { + tcg_gen_lookup_and_goto_ptr(); + } break; case DISAS_EXIT: /* * We updated CC_OP and PC in gen_exit_tb, but also modified * other state that may require returning to the main loop. */ - tcg_gen_exit_tb(NULL, 0); + if (dc->base.singlestep_enabled) { + gen_raise_exception(EXCP_DEBUG); + } else { + tcg_gen_exit_tb(NULL, 0); + } break; default: g_assert_not_reached(); diff --git a/tests/data/acpi/q35/DSDT b/tests/data/acpi/q35/DSDT Binary files differindex 77ea60ffed..1f91888d7a 100644 --- a/tests/data/acpi/q35/DSDT +++ b/tests/data/acpi/q35/DSDT diff --git a/tests/data/acpi/q35/DSDT.acpihmat b/tests/data/acpi/q35/DSDT.acpihmat Binary files differindex 30e3717b5b..3586f6368a 100644 --- a/tests/data/acpi/q35/DSDT.acpihmat +++ b/tests/data/acpi/q35/DSDT.acpihmat diff --git a/tests/data/acpi/q35/DSDT.bridge b/tests/data/acpi/q35/DSDT.bridge Binary files differindex fbc2d40000..eae3a2a865 100644 --- a/tests/data/acpi/q35/DSDT.bridge +++ b/tests/data/acpi/q35/DSDT.bridge diff --git a/tests/data/acpi/q35/DSDT.cphp b/tests/data/acpi/q35/DSDT.cphp Binary files differindex 6a896cb214..53d735a4de 100644 --- a/tests/data/acpi/q35/DSDT.cphp +++ b/tests/data/acpi/q35/DSDT.cphp diff --git a/tests/data/acpi/q35/DSDT.dimmpxm b/tests/data/acpi/q35/DSDT.dimmpxm Binary files differindex 23fdf5e60a..02ccdd5f38 100644 --- a/tests/data/acpi/q35/DSDT.dimmpxm +++ b/tests/data/acpi/q35/DSDT.dimmpxm diff --git a/tests/data/acpi/q35/DSDT.ipmibt b/tests/data/acpi/q35/DSDT.ipmibt Binary files differindex c3fca0a71e..9e2d4f785c 100644 --- a/tests/data/acpi/q35/DSDT.ipmibt +++ b/tests/data/acpi/q35/DSDT.ipmibt diff --git a/tests/data/acpi/q35/DSDT.memhp b/tests/data/acpi/q35/DSDT.memhp Binary files differindex 2abd0e36cd..baefa611ac 100644 --- a/tests/data/acpi/q35/DSDT.memhp +++ b/tests/data/acpi/q35/DSDT.memhp diff --git a/tests/data/acpi/q35/DSDT.mmio64 b/tests/data/acpi/q35/DSDT.mmio64 Binary files differindex b32034a11c..aae0ea2110 100644 --- a/tests/data/acpi/q35/DSDT.mmio64 +++ b/tests/data/acpi/q35/DSDT.mmio64 diff --git a/tests/data/acpi/q35/DSDT.numamem b/tests/data/acpi/q35/DSDT.numamem Binary files differindex d8b2b47f8b..859a2e0871 100644 --- a/tests/data/acpi/q35/DSDT.numamem +++ b/tests/data/acpi/q35/DSDT.numamem diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c index f1ac2d7e96..3ab4872bd7 100644 --- a/tests/qtest/bios-tables-test.c +++ b/tests/qtest/bios-tables-test.c @@ -16,7 +16,10 @@ * 1. add empty files for new tables, if any, under tests/data/acpi * 2. list any changed files in tests/bios-tables-test-allowed-diff.h * 3. commit the above *before* making changes that affect the tables - * Maintainer: + * + * Contributor or ACPI Maintainer (steps 4-7 need to be redone to resolve conflicts + * in binary commit created in step 6): + * * After 1-3 above tests will pass but ignore differences with the expected files. * You will also notice that tests/bios-tables-test-allowed-diff.h lists * a bunch of files. This is your hint that you need to do the below: @@ -28,13 +31,23 @@ * output. If not - disassemble them yourself in any way you like. * Look at the differences - make sure they make sense and match what the * changes you are merging are supposed to do. + * Save the changes, preferably in form of ASL diff for the commit log in + * step 6. * * 5. From build directory, run: * $(SRC_PATH)/tests/data/acpi/rebuild-expected-aml.sh - * 6. Now commit any changes. - * 7. Before doing a pull request, make sure tests/bios-tables-test-allowed-diff.h - * is empty - this will ensure following changes to ACPI tables will - * be noticed. + * 6. Now commit any changes to the expected binary, include diff from step 4 + * in commit log. + * 7. Before sending patches to the list (Contributor) + * or before doing a pull request (Maintainer), make sure + * tests/bios-tables-test-allowed-diff.h is empty - this will ensure + * following changes to ACPI tables will be noticed. + * + * The resulting patchset/pull request then looks like this: + * - patch 1: list changed files in tests/bios-tables-test-allowed-diff.h. + * - patches 2 - n: real changes, may contain multiple patches. + * - patch n + 1: update golden master binaries and empty + * tests/bios-tables-test-allowed-diff.h */ #include "qemu/osdep.h" diff --git a/tests/qtest/q35-test.c b/tests/qtest/q35-test.c index a68183d513..c922d81bc0 100644 --- a/tests/qtest/q35-test.c +++ b/tests/qtest/q35-test.c @@ -186,6 +186,109 @@ static void test_tseg_size(const void *data) qtest_quit(qts); } +#define SMBASE 0x30000 +#define SMRAM_TEST_PATTERN 0x32 +#define SMRAM_TEST_RESET_PATTERN 0x23 + +static void test_smram_smbase_lock(void) +{ + QPCIBus *pcibus; + QPCIDevice *pcidev; + QDict *response; + QTestState *qts; + int i; + + qts = qtest_init("-M q35"); + + pcibus = qpci_new_pc(qts, NULL); + g_assert(pcibus != NULL); + + pcidev = qpci_device_find(pcibus, 0); + g_assert(pcidev != NULL); + + /* check that SMRAM is not enabled by default */ + g_assert(qpci_config_readb(pcidev, MCH_HOST_BRIDGE_F_SMBASE) == 0); + qtest_writeb(qts, SMBASE, SMRAM_TEST_PATTERN); + g_assert_cmpint(qtest_readb(qts, SMBASE), ==, SMRAM_TEST_PATTERN); + + /* check that writing junk to 0x9c before before negotiating is ignored */ + for (i = 0; i < 0xff; i++) { + qpci_config_writeb(pcidev, MCH_HOST_BRIDGE_F_SMBASE, i); + g_assert(qpci_config_readb(pcidev, MCH_HOST_BRIDGE_F_SMBASE) == 0); + } + + /* enable SMRAM at SMBASE */ + qpci_config_writeb(pcidev, MCH_HOST_BRIDGE_F_SMBASE, 0xff); + g_assert(qpci_config_readb(pcidev, MCH_HOST_BRIDGE_F_SMBASE) == 0x01); + /* lock SMRAM at SMBASE */ + qpci_config_writeb(pcidev, MCH_HOST_BRIDGE_F_SMBASE, 0x02); + g_assert(qpci_config_readb(pcidev, MCH_HOST_BRIDGE_F_SMBASE) == 0x02); + + /* check that SMRAM at SMBASE is locked and can't be unlocked */ + g_assert_cmpint(qtest_readb(qts, SMBASE), ==, 0xff); + for (i = 0; i <= 0xff; i++) { + /* make sure register is immutable */ + qpci_config_writeb(pcidev, MCH_HOST_BRIDGE_F_SMBASE, i); + g_assert(qpci_config_readb(pcidev, MCH_HOST_BRIDGE_F_SMBASE) == 0x02); + + /* RAM access should go into black hole */ + qtest_writeb(qts, SMBASE, SMRAM_TEST_PATTERN); + g_assert_cmpint(qtest_readb(qts, SMBASE), ==, 0xff); + } + + /* reset */ + response = qtest_qmp(qts, "{'execute': 'system_reset', 'arguments': {} }"); + g_assert(response); + g_assert(!qdict_haskey(response, "error")); + qobject_unref(response); + + /* check RAM at SMBASE is available after reset */ + g_assert_cmpint(qtest_readb(qts, SMBASE), ==, SMRAM_TEST_PATTERN); + g_assert(qpci_config_readb(pcidev, MCH_HOST_BRIDGE_F_SMBASE) == 0); + qtest_writeb(qts, SMBASE, SMRAM_TEST_RESET_PATTERN); + g_assert_cmpint(qtest_readb(qts, SMBASE), ==, SMRAM_TEST_RESET_PATTERN); + + g_free(pcidev); + qpci_free_pc(pcibus); + + qtest_quit(qts); +} + +static void test_without_smram_base(void) +{ + QPCIBus *pcibus; + QPCIDevice *pcidev; + QTestState *qts; + int i; + + qts = qtest_init("-M pc-q35-4.1"); + + pcibus = qpci_new_pc(qts, NULL); + g_assert(pcibus != NULL); + + pcidev = qpci_device_find(pcibus, 0); + g_assert(pcidev != NULL); + + /* check that RAM is accessible */ + qtest_writeb(qts, SMBASE, SMRAM_TEST_PATTERN); + g_assert_cmpint(qtest_readb(qts, SMBASE), ==, SMRAM_TEST_PATTERN); + + /* check that writing to 0x9c succeeds */ + for (i = 0; i <= 0xff; i++) { + qpci_config_writeb(pcidev, MCH_HOST_BRIDGE_F_SMBASE, i); + g_assert(qpci_config_readb(pcidev, MCH_HOST_BRIDGE_F_SMBASE) == i); + } + + /* check that RAM is still accessible */ + qtest_writeb(qts, SMBASE, SMRAM_TEST_PATTERN + 1); + g_assert_cmpint(qtest_readb(qts, SMBASE), ==, (SMRAM_TEST_PATTERN + 1)); + + g_free(pcidev); + qpci_free_pc(pcibus); + + qtest_quit(qts); +} + int main(int argc, char **argv) { g_test_init(&argc, &argv, NULL); @@ -197,5 +300,7 @@ int main(int argc, char **argv) qtest_add_data_func("/q35/tseg-size/8mb", &tseg_8mb, test_tseg_size); qtest_add_data_func("/q35/tseg-size/ext/16mb", &tseg_ext_16mb, test_tseg_size); + qtest_add_func("/q35/smram/smbase_lock", test_smram_smbase_lock); + qtest_add_func("/q35/smram/legacy_smbase", test_without_smram_base); return g_test_run(); } diff --git a/tests/tcg/aarch64/Makefile.softmmu-target b/tests/tcg/aarch64/Makefile.softmmu-target index 7b4eede3f0..f6b5121f5c 100644 --- a/tests/tcg/aarch64/Makefile.softmmu-target +++ b/tests/tcg/aarch64/Makefile.softmmu-target @@ -61,4 +61,7 @@ run-memory-replay: memory-replay run-memory-record $(QEMU_OPTS) memory, \ "$< on $(TARGET_NAME)") -EXTRA_TESTS+=memory-record memory-replay +run-pauth-3: pauth-3 +pauth-3: CFLAGS += -march=armv8.3-a + +EXTRA_TESTS+=memory-record memory-replay pauth-3 diff --git a/tests/tcg/aarch64/Makefile.target b/tests/tcg/aarch64/Makefile.target index df3fe8032c..efa67cf1e9 100644 --- a/tests/tcg/aarch64/Makefile.target +++ b/tests/tcg/aarch64/Makefile.target @@ -18,8 +18,9 @@ run-fcvt: fcvt $(call diff-out,$<,$(AARCH64_SRC)/fcvt.ref) # Pauth Tests -AARCH64_TESTS += pauth-1 pauth-2 +AARCH64_TESTS += pauth-1 pauth-2 pauth-4 run-pauth-%: QEMU_OPTS += -cpu max +pauth-%: CFLAGS += -march=armv8.3-a # Semihosting smoke test for linux-user AARCH64_TESTS += semihosting diff --git a/tests/tcg/aarch64/pauth-1.c b/tests/tcg/aarch64/pauth-1.c index a3c1443cd0..ea0984ea82 100644 --- a/tests/tcg/aarch64/pauth-1.c +++ b/tests/tcg/aarch64/pauth-1.c @@ -2,8 +2,6 @@ #include <sys/prctl.h> #include <stdio.h> -asm(".arch armv8.4-a"); - #ifndef PR_PAC_RESET_KEYS #define PR_PAC_RESET_KEYS 54 #define PR_PAC_APDAKEY (1 << 2) diff --git a/tests/tcg/aarch64/pauth-2.c b/tests/tcg/aarch64/pauth-2.c index 2fe030ba3d..9bba0beb63 100644 --- a/tests/tcg/aarch64/pauth-2.c +++ b/tests/tcg/aarch64/pauth-2.c @@ -1,8 +1,6 @@ #include <stdint.h> #include <assert.h> -asm(".arch armv8.4-a"); - void do_test(uint64_t value) { uint64_t salt1, salt2; diff --git a/tests/tcg/aarch64/pauth-4.c b/tests/tcg/aarch64/pauth-4.c new file mode 100644 index 0000000000..1040e92aec --- /dev/null +++ b/tests/tcg/aarch64/pauth-4.c @@ -0,0 +1,25 @@ +#include <stdint.h> +#include <assert.h> + +int main() +{ + uintptr_t x, y; + + asm("mov %0, lr\n\t" + "pacia %0, sp\n\t" /* sigill if pauth not supported */ + "eor %0, %0, #4\n\t" /* corrupt single bit */ + "mov %1, %0\n\t" + "autia %1, sp\n\t" /* validate corrupted pointer */ + "xpaci %0\n\t" /* strip pac from corrupted pointer */ + : "=r"(x), "=r"(y)); + + /* + * Once stripped, the corrupted pointer is of the form 0x0000...wxyz. + * We expect the autia to indicate failure, producing a pointer of the + * form 0x000e....wxyz. Use xpaci and != for the test, rather than + * extracting explicit bits from the top, because the location of the + * error code "e" depends on the configuration of virtual memory. + */ + assert(x != y); + return 0; +} diff --git a/tests/tcg/aarch64/system/pauth-3.c b/tests/tcg/aarch64/system/pauth-3.c new file mode 100644 index 0000000000..42eff4d5ea --- /dev/null +++ b/tests/tcg/aarch64/system/pauth-3.c @@ -0,0 +1,40 @@ +#include <inttypes.h> +#include <minilib.h> + +int main() +{ + /* + * Test vector from QARMA paper (https://eprint.iacr.org/2016/444.pdf) + * to verify one computation of the pauth_computepac() function, + * which uses sbox2. + * + * Use PACGA, because it returns the most bits from ComputePAC. + * We still only get the most significant 32-bits of the result. + */ + + static const uint64_t d[5] = { + 0xfb623599da6e8127ull, + 0x477d469dec0b8762ull, + 0x84be85ce9804e94bull, + 0xec2802d4e0a488e9ull, + 0xc003b93999b33765ull & 0xffffffff00000000ull + }; + uint64_t r; + + asm("msr apgakeyhi_el1, %[w0]\n\t" + "msr apgakeylo_el1, %[k0]\n\t" + "pacga %[r], %[P], %[T]" + : [r] "=r"(r) + : [P] "r" (d[0]), + [T] "r" (d[1]), + [w0] "r" (d[2]), + [k0] "r" (d[3])); + + if (r == d[4]) { + ml_printf("OK\n"); + return 0; + } else { + ml_printf("FAIL: %lx != %lx\n", r, d[4]); + return 1; + } +} diff --git a/util/cacheinfo.c b/util/cacheinfo.c index ea6f3e99bf..d94dc6adc8 100644 --- a/util/cacheinfo.c +++ b/util/cacheinfo.c @@ -93,10 +93,16 @@ static void sys_cache_info(int *isize, int *dsize) static void sys_cache_info(int *isize, int *dsize) { # ifdef _SC_LEVEL1_ICACHE_LINESIZE - *isize = sysconf(_SC_LEVEL1_ICACHE_LINESIZE); + int tmp_isize = (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE); + if (tmp_isize > 0) { + *isize = tmp_isize; + } # endif # ifdef _SC_LEVEL1_DCACHE_LINESIZE - *dsize = sysconf(_SC_LEVEL1_DCACHE_LINESIZE); + int tmp_dsize = (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE); + if (tmp_dsize > 0) { + *dsize = tmp_dsize; + } # endif } #endif /* sys_cache_info */ @@ -2755,8 +2755,6 @@ static int do_configure_accelerator(void *opaque, QemuOpts *opts, Error **errp) static void configure_accelerators(const char *progname) { const char *accel; - char **accel_list, **tmp; - bool accel_initialised = false; bool init_failed = false; qemu_opts_foreach(qemu_find_opts("icount"), @@ -2764,26 +2762,33 @@ static void configure_accelerators(const char *progname) accel = qemu_opt_get(qemu_get_machine_opts(), "accel"); if (QTAILQ_EMPTY(&qemu_accel_opts.head)) { + char **accel_list, **tmp; + if (accel == NULL) { /* Select the default accelerator */ - if (!accel_find("tcg") && !accel_find("kvm")) { - error_report("No accelerator selected and" - " no default accelerator available"); - exit(1); - } else { - int pnlen = strlen(progname); - if (pnlen >= 3 && g_str_equal(&progname[pnlen - 3], "kvm")) { + bool have_tcg = accel_find("tcg"); + bool have_kvm = accel_find("kvm"); + + if (have_tcg && have_kvm) { + if (g_str_has_suffix(progname, "kvm")) { /* If the program name ends with "kvm", we prefer KVM */ accel = "kvm:tcg"; } else { accel = "tcg:kvm"; } + } else if (have_kvm) { + accel = "kvm"; + } else if (have_tcg) { + accel = "tcg"; + } else { + error_report("No accelerator selected and" + " no default accelerator available"); + exit(1); } } - accel_list = g_strsplit(accel, ":", 0); - for (tmp = accel_list; !accel_initialised && tmp && *tmp; tmp++) { + for (tmp = accel_list; *tmp; tmp++) { /* * Filter invalid accelerators here, to prevent obscenities * such as "-machine accel=tcg,,thread=single". |