aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore3
-rw-r--r--MAINTAINERS9
-rw-r--r--accel/tcg/cpu-exec.c24
-rw-r--r--accel/tcg/internal.h91
-rw-r--r--accel/tcg/meson.build1
-rw-r--r--accel/tcg/tb-maint.c704
-rw-r--r--accel/tcg/tcg-accel-ops-mttcg.c5
-rw-r--r--accel/tcg/tcg-accel-ops-rr.c9
-rw-r--r--accel/tcg/translate-all.c1017
-rw-r--r--accel/tcg/translator.c9
-rw-r--r--accel/tcg/user-exec.c42
-rw-r--r--block.c14
-rw-r--r--block/blkio.c1008
-rw-r--r--block/blkverify.c4
-rw-r--r--block/block-backend.c8
-rw-r--r--block/block-ram-registrar.c58
-rw-r--r--block/crypto.c4
-rw-r--r--block/file-posix.c1
-rw-r--r--block/gluster.c1
-rw-r--r--block/io.c101
-rw-r--r--block/meson.build2
-rw-r--r--block/mirror.c2
-rw-r--r--block/nbd.c1
-rw-r--r--block/nvme.c20
-rw-r--r--block/parallels.c1
-rw-r--r--block/qcow.c2
-rw-r--r--block/qed.c1
-rw-r--r--block/raw-format.c2
-rw-r--r--block/replication.c1
-rw-r--r--block/ssh.c1
-rw-r--r--block/vhdx.c1
-rw-r--r--bsd-user/mmap.c2
-rw-r--r--contrib/elf2dmp/main.c1
-rw-r--r--cpu.c4
-rw-r--r--docs/devel/reset.rst8
-rw-r--r--docs/system/arm/emulation.rst2
-rw-r--r--dump/dump.c288
-rw-r--r--dump/win_dump.c7
-rw-r--r--hw/9pfs/9p.c2
-rw-r--r--hw/arm/aspeed.c19
-rw-r--r--hw/arm/aspeed_ast2600.c2
-rw-r--r--hw/arm/boot.c2
-rw-r--r--hw/arm/mps2-tz.c4
-rw-r--r--hw/arm/nseries.c4
-rw-r--r--hw/arm/virt.c5
-rw-r--r--hw/block/m25p80.c52
-rw-r--r--hw/block/m25p80_sfdp.c332
-rw-r--r--hw/block/m25p80_sfdp.h29
-rw-r--r--hw/block/meson.build1
-rw-r--r--hw/block/trace-events1
-rw-r--r--hw/block/virtio-blk.c39
-rw-r--r--hw/char/exynos4210_uart.c2
-rw-r--r--hw/core/numa.c26
-rw-r--r--hw/core/reset.c17
-rw-r--r--hw/core/resettable.c3
-rw-r--r--hw/core/sysbus-fdt.c5
-rw-r--r--hw/display/blizzard.c2
-rw-r--r--hw/dma/pl330.c2
-rw-r--r--hw/hppa/machine.c4
-rw-r--r--hw/hyperv/hyperv.c2
-rw-r--r--hw/i2c/aspeed_i2c.c8
-rw-r--r--hw/i386/microvm.c4
-rw-r--r--hw/i386/pc.c6
-rw-r--r--hw/i386/x86.c2
-rw-r--r--hw/m68k/q800.c33
-rw-r--r--hw/m68k/virt.c20
-rw-r--r--hw/mips/boston.c3
-rw-r--r--hw/mips/malta.c27
-rw-r--r--hw/misc/cbus.c6
-rw-r--r--hw/net/can/can_sja1000.c2
-rw-r--r--hw/nvram/eeprom93xx.c2
-rw-r--r--hw/openrisc/boot.c3
-rw-r--r--hw/ppc/pegasos2.c4
-rw-r--r--hw/ppc/pnv.c4
-rw-r--r--hw/ppc/spapr.c4
-rw-r--r--hw/riscv/boot.c3
-rw-r--r--hw/rx/rx-gdbsim.c3
-rw-r--r--hw/s390x/pv.c112
-rw-r--r--hw/s390x/s390-virtio-ccw.c10
-rw-r--r--hw/ssi/aspeed_smc.c9
-rw-r--r--hw/ssi/ssi.c18
-rw-r--r--hw/timer/imx_epit.c9
-rw-r--r--hw/timer/renesas_cmt.c2
-rw-r--r--hw/timer/renesas_tmr.c8
-rw-r--r--hw/usb/ccid-card-emulated.c2
-rw-r--r--hw/virtio/virtio-pci.c2
-rw-r--r--include/block/block-common.h9
-rw-r--r--include/block/block-global-state.h10
-rw-r--r--include/block/block_int-common.h15
-rw-r--r--include/elf.h2
-rw-r--r--include/exec/cpu-all.h22
-rw-r--r--include/exec/cpu-common.h1
-rw-r--r--include/exec/exec-all.h35
-rw-r--r--include/exec/ram_addr.h2
-rw-r--r--include/exec/translate-all.h2
-rw-r--r--include/hw/boards.h2
-rw-r--r--include/hw/core/tcg-cpu-ops.h11
-rw-r--r--include/hw/elf_ops.h2
-rw-r--r--include/hw/i2c/aspeed_i2c.h1
-rw-r--r--include/hw/s390x/pv.h19
-rw-r--r--include/hw/scsi/scsi.h1
-rw-r--r--include/hw/ssi/aspeed_smc.h2
-rw-r--r--include/hw/ssi/ssi.h3
-rw-r--r--include/hw/virtio/virtio-blk.h2
-rw-r--r--include/qemu/atomic.h16
-rw-r--r--include/qemu/coroutine.h15
-rw-r--r--include/qemu/osdep.h8
-rw-r--r--include/qemu/thread.h8
-rw-r--r--include/sysemu/block-backend-global-state.h4
-rw-r--r--include/sysemu/block-ram-registrar.h37
-rw-r--r--include/sysemu/device_tree.h9
-rw-r--r--include/sysemu/dump-arch.h3
-rw-r--r--include/sysemu/dump.h26
-rw-r--r--include/sysemu/reset.h5
-rw-r--r--linux-user/cpu_loop-common.h15
-rw-r--r--linux-user/i386/cpu_loop.c6
-rw-r--r--linux-user/ioctls.h24
-rw-r--r--linux-user/mmap.c4
-rw-r--r--linux-user/strace.c6
-rw-r--r--linux-user/strace.list3
-rw-r--r--linux-user/syscall.c100
-rw-r--r--meson.build11
-rw-r--r--meson_options.txt2
-rw-r--r--migration/savevm.c2
-rw-r--r--qapi/block-core.json77
-rw-r--r--qapi/run-state.json6
-rw-r--r--qemu-img.c6
-rw-r--r--qga/channel-posix.c23
-rw-r--r--qga/commands-bsd.c200
-rw-r--r--qga/commands-common.h51
-rw-r--r--qga/commands-linux.c286
-rw-r--r--qga/commands-posix.c639
-rw-r--r--qga/main.c13
-rw-r--r--qga/meson.build6
-rw-r--r--scripts/meson-buildoptions.sh3
-rwxr-xr-xscripts/vmstate-static-checker.py1
-rw-r--r--softmmu/device_tree.c21
-rw-r--r--softmmu/physmem.c5
-rw-r--r--softmmu/runstate.c11
-rw-r--r--stubs/meson.build1
-rw-r--r--stubs/physmem.c13
-rw-r--r--target/alpha/cpu.c9
-rw-r--r--target/alpha/translate.c6
-rw-r--r--target/arm/cpu.c50
-rw-r--r--target/arm/cpu.h23
-rw-r--r--target/arm/cpu64.c2
-rw-r--r--target/arm/helper.c31
-rw-r--r--target/arm/internals.h34
-rw-r--r--target/arm/mte_helper.c5
-rw-r--r--target/arm/ptw.c509
-rw-r--r--target/arm/translate.c22
-rw-r--r--target/avr/cpu.c11
-rw-r--r--target/avr/translate.c6
-rw-r--r--target/cris/cpu.c11
-rw-r--r--target/cris/translate.c6
-rw-r--r--target/hexagon/cpu.c9
-rw-r--r--target/hppa/cpu.c19
-rw-r--r--target/hppa/translate.c13
-rw-r--r--target/i386/kvm/kvm.c3
-rw-r--r--target/i386/tcg/tcg-cpu.c19
-rw-r--r--target/i386/tcg/translate.c15
-rw-r--r--target/i386/whpx/whpx-all.c5
-rw-r--r--target/loongarch/cpu.c11
-rw-r--r--target/loongarch/translate.c6
-rw-r--r--target/m68k/cpu.c14
-rw-r--r--target/m68k/translate.c10
-rw-r--r--target/microblaze/cpu.c11
-rw-r--r--target/microblaze/translate.c7
-rw-r--r--target/mips/cpu.c1
-rw-r--r--target/mips/tcg/tcg-internal.h3
-rw-r--r--target/mips/tcg/translate.c8
-rw-r--r--target/nios2/cpu.c11
-rw-r--r--target/nios2/translate.c6
-rw-r--r--target/openrisc/cpu.c13
-rw-r--r--target/openrisc/translate.c10
-rw-r--r--target/ppc/cpu_init.c10
-rw-r--r--target/ppc/translate.c6
-rw-r--r--target/riscv/cpu.c9
-rw-r--r--target/riscv/vector_helper.c2
-rw-r--r--target/rx/cpu.c10
-rw-r--r--target/rx/op_helper.c4
-rw-r--r--target/rx/translate.c6
-rw-r--r--target/s390x/arch_dump.c262
-rw-r--r--target/s390x/cpu.c1
-rw-r--r--target/s390x/kvm/kvm.c9
-rw-r--r--target/s390x/kvm/kvm_s390x.h1
-rw-r--r--target/s390x/kvm/meson.build2
-rw-r--r--target/s390x/kvm/stubs.c12
-rw-r--r--target/s390x/s390x-internal.h4
-rw-r--r--target/s390x/tcg/translate.c7
-rw-r--r--target/sh4/cpu.c16
-rw-r--r--target/sh4/translate.c10
-rw-r--r--target/sparc/cpu.c1
-rw-r--r--target/sparc/cpu.h3
-rw-r--r--target/sparc/translate.c7
-rw-r--r--target/tricore/cpu.c11
-rw-r--r--target/tricore/translate.c6
-rw-r--r--target/xtensa/cpu.c10
-rw-r--r--target/xtensa/translate.c6
-rw-r--r--tcg/aarch64/tcg-target.c.inc31
-rw-r--r--tcg/loongarch64/tcg-target.c.inc48
-rw-r--r--tcg/loongarch64/tcg-target.h9
-rw-r--r--tests/avocado/machine_aspeed.py16
-rw-r--r--tests/qtest/migration-test.c4
-rw-r--r--tests/qtest/modules-test.c3
-rw-r--r--tests/qtest/vhost-user-test.c4
-rw-r--r--ui/console.c2
-rw-r--r--ui/gtk.c2
-rw-r--r--ui/vnc-enc-hextile.c4
-rw-r--r--ui/vnc-jobs.c2
-rw-r--r--ui/vnc.c2
-rw-r--r--util/qemu-coroutine-lock.c9
-rw-r--r--util/vfio-helpers.c5
213 files changed, 5352 insertions, 2250 deletions
diff --git a/.gitignore b/.gitignore
index 8aab671265..61fa39967b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,10 +1,13 @@
/GNUmakefile
/build/
/.cache/
+/.vscode/
*.pyc
.sdk
.stgit-*
.git-submodule-status
+.clang-format
+.gdb_history
cscope.*
tags
TAGS
diff --git a/MAINTAINERS b/MAINTAINERS
index e3d5b7e09c..64893e36bc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1915,7 +1915,7 @@ SSI
M: Alistair Francis <alistair@alistair23.me>
S: Maintained
F: hw/ssi/*
-F: hw/block/m25p80.c
+F: hw/block/m25p80*
F: include/hw/ssi/ssi.h
X: hw/ssi/xilinx_*
F: tests/qtest/m25p80-test.c
@@ -2510,6 +2510,7 @@ F: block*
F: block/
F: hw/block/
F: include/block/
+F: include/sysemu/block-*.h
F: qemu-img*
F: docs/tools/qemu-img.rst
F: qemu-io*
@@ -3415,6 +3416,12 @@ L: qemu-block@nongnu.org
S: Maintained
F: block/vdi.c
+blkio
+M: Stefan Hajnoczi <stefanha@redhat.com>
+L: qemu-block@nongnu.org
+S: Maintained
+F: block/blkio.c
+
iSCSI
M: Ronnie Sahlberg <ronniesahlberg@gmail.com>
M: Paolo Bonzini <pbonzini@redhat.com>
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index f9e5cc9ba0..82b06c1824 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -187,13 +187,14 @@ static bool tb_lookup_cmp(const void *p, const void *d)
const struct tb_desc *desc = d;
if ((TARGET_TB_PCREL || tb_pc(tb) == desc->pc) &&
- tb->page_addr[0] == desc->page_addr0 &&
+ tb_page_addr0(tb) == desc->page_addr0 &&
tb->cs_base == desc->cs_base &&
tb->flags == desc->flags &&
tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
tb_cflags(tb) == desc->cflags) {
/* check next page if needed */
- if (tb->page_addr[1] == -1) {
+ tb_page_addr_t tb_phys_page1 = tb_page_addr1(tb);
+ if (tb_phys_page1 == -1) {
return true;
} else {
tb_page_addr_t phys_page1;
@@ -210,7 +211,7 @@ static bool tb_lookup_cmp(const void *p, const void *d)
*/
virt_page1 = TARGET_PAGE_ALIGN(desc->pc);
phys_page1 = get_page_addr_code(desc->env, virt_page1);
- if (tb->page_addr[1] == phys_page1) {
+ if (tb_phys_page1 == phys_page1) {
return true;
}
}
@@ -304,16 +305,12 @@ static void log_cpu_exec(target_ulong pc, CPUState *cpu,
}
}
-static bool check_for_breakpoints(CPUState *cpu, target_ulong pc,
- uint32_t *cflags)
+static bool check_for_breakpoints_slow(CPUState *cpu, target_ulong pc,
+ uint32_t *cflags)
{
CPUBreakpoint *bp;
bool match_page = false;
- if (likely(QTAILQ_EMPTY(&cpu->breakpoints))) {
- return false;
- }
-
/*
* Singlestep overrides breakpoints.
* This requirement is visible in the record-replay tests, where
@@ -374,6 +371,13 @@ static bool check_for_breakpoints(CPUState *cpu, target_ulong pc,
return false;
}
+static inline bool check_for_breakpoints(CPUState *cpu, target_ulong pc,
+ uint32_t *cflags)
+{
+ return unlikely(!QTAILQ_EMPTY(&cpu->breakpoints)) &&
+ check_for_breakpoints_slow(cpu, pc, cflags);
+}
+
/**
* helper_lookup_tb_ptr: quick check for next tb
* @env: current cpu state
@@ -1016,7 +1020,7 @@ int cpu_exec(CPUState *cpu)
* direct jump to a TB spanning two pages because the mapping
* for the second page can change.
*/
- if (tb->page_addr[1] != -1) {
+ if (tb_page_addr1(tb) != -1) {
last_tb = NULL;
}
#endif
diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
index dc800fd485..1227bb69bd 100644
--- a/accel/tcg/internal.h
+++ b/accel/tcg/internal.h
@@ -11,12 +11,103 @@
#include "exec/exec-all.h"
+/*
+ * Access to the various translations structures need to be serialised
+ * via locks for consistency. In user-mode emulation access to the
+ * memory related structures are protected with mmap_lock.
+ * In !user-mode we use per-page locks.
+ */
+#ifdef CONFIG_SOFTMMU
+#define assert_memory_lock()
+#else
+#define assert_memory_lock() tcg_debug_assert(have_mmap_lock())
+#endif
+
+typedef struct PageDesc {
+ /* list of TBs intersecting this ram page */
+ uintptr_t first_tb;
+#ifdef CONFIG_USER_ONLY
+ unsigned long flags;
+ void *target_data;
+#endif
+#ifdef CONFIG_SOFTMMU
+ QemuSpin lock;
+#endif
+} PageDesc;
+
+/* Size of the L2 (and L3, etc) page tables. */
+#define V_L2_BITS 10
+#define V_L2_SIZE (1 << V_L2_BITS)
+
+/*
+ * L1 Mapping properties
+ */
+extern int v_l1_size;
+extern int v_l1_shift;
+extern int v_l2_levels;
+
+/*
+ * The bottom level has pointers to PageDesc, and is indexed by
+ * anything from 4 to (V_L2_BITS + 3) bits, depending on target page size.
+ */
+#define V_L1_MIN_BITS 4
+#define V_L1_MAX_BITS (V_L2_BITS + 3)
+#define V_L1_MAX_SIZE (1 << V_L1_MAX_BITS)
+
+extern void *l1_map[V_L1_MAX_SIZE];
+
+PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc);
+
+static inline PageDesc *page_find(tb_page_addr_t index)
+{
+ return page_find_alloc(index, false);
+}
+
+/* list iterators for lists of tagged pointers in TranslationBlock */
+#define TB_FOR_EACH_TAGGED(head, tb, n, field) \
+ for (n = (head) & 1, tb = (TranslationBlock *)((head) & ~1); \
+ tb; tb = (TranslationBlock *)tb->field[n], n = (uintptr_t)tb & 1, \
+ tb = (TranslationBlock *)((uintptr_t)tb & ~1))
+
+#define PAGE_FOR_EACH_TB(pagedesc, tb, n) \
+ TB_FOR_EACH_TAGGED((pagedesc)->first_tb, tb, n, page_next)
+
+#define TB_FOR_EACH_JMP(head_tb, tb, n) \
+ TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
+
+/* In user-mode page locks aren't used; mmap_lock is enough */
+#ifdef CONFIG_USER_ONLY
+#define assert_page_locked(pd) tcg_debug_assert(have_mmap_lock())
+static inline void page_lock(PageDesc *pd) { }
+static inline void page_unlock(PageDesc *pd) { }
+#else
+#ifdef CONFIG_DEBUG_TCG
+void do_assert_page_locked(const PageDesc *pd, const char *file, int line);
+#define assert_page_locked(pd) do_assert_page_locked(pd, __FILE__, __LINE__)
+#else
+#define assert_page_locked(pd)
+#endif
+void page_lock(PageDesc *pd);
+void page_unlock(PageDesc *pd);
+#endif
+#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_DEBUG_TCG)
+void assert_no_pages_locked(void);
+#else
+static inline void assert_no_pages_locked(void) { }
+#endif
+
TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc,
target_ulong cs_base, uint32_t flags,
int cflags);
G_NORETURN void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr);
void page_init(void);
void tb_htable_init(void);
+void tb_reset_jump(TranslationBlock *tb, int n);
+TranslationBlock *tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
+ tb_page_addr_t phys_page2);
+bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc);
+int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
+ uintptr_t searched_pc, bool reset_icount);
/* Return the current PC from CPU, which may be cached in TB. */
static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb)
diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build
index 7a0a79d731..75e1dffb4d 100644
--- a/accel/tcg/meson.build
+++ b/accel/tcg/meson.build
@@ -3,6 +3,7 @@ tcg_ss.add(files(
'tcg-all.c',
'cpu-exec-common.c',
'cpu-exec.c',
+ 'tb-maint.c',
'tcg-runtime-gvec.c',
'tcg-runtime.c',
'translate-all.c',
diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
new file mode 100644
index 0000000000..c8e921089d
--- /dev/null
+++ b/accel/tcg/tb-maint.c
@@ -0,0 +1,704 @@
+/*
+ * Translation Block Maintaince
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "exec/cputlb.h"
+#include "exec/log.h"
+#include "exec/exec-all.h"
+#include "exec/translate-all.h"
+#include "sysemu/tcg.h"
+#include "tcg/tcg.h"
+#include "tb-hash.h"
+#include "tb-context.h"
+#include "internal.h"
+
+
+static bool tb_cmp(const void *ap, const void *bp)
+{
+ const TranslationBlock *a = ap;
+ const TranslationBlock *b = bp;
+
+ return ((TARGET_TB_PCREL || tb_pc(a) == tb_pc(b)) &&
+ a->cs_base == b->cs_base &&
+ a->flags == b->flags &&
+ (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
+ a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
+ tb_page_addr0(a) == tb_page_addr0(b) &&
+ tb_page_addr1(a) == tb_page_addr1(b));
+}
+
+void tb_htable_init(void)
+{
+ unsigned int mode = QHT_MODE_AUTO_RESIZE;
+
+ qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
+}
+
+/* Set to NULL all the 'first_tb' fields in all PageDescs. */
+static void page_flush_tb_1(int level, void **lp)
+{
+ int i;
+
+ if (*lp == NULL) {
+ return;
+ }
+ if (level == 0) {
+ PageDesc *pd = *lp;
+
+ for (i = 0; i < V_L2_SIZE; ++i) {
+ page_lock(&pd[i]);
+ pd[i].first_tb = (uintptr_t)NULL;
+ page_unlock(&pd[i]);
+ }
+ } else {
+ void **pp = *lp;
+
+ for (i = 0; i < V_L2_SIZE; ++i) {
+ page_flush_tb_1(level - 1, pp + i);
+ }
+ }
+}
+
+static void page_flush_tb(void)
+{
+ int i, l1_sz = v_l1_size;
+
+ for (i = 0; i < l1_sz; i++) {
+ page_flush_tb_1(v_l2_levels, l1_map + i);
+ }
+}
+
+/* flush all the translation blocks */
+static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
+{
+ bool did_flush = false;
+
+ mmap_lock();
+ /* If it is already been done on request of another CPU, just retry. */
+ if (tb_ctx.tb_flush_count != tb_flush_count.host_int) {
+ goto done;
+ }
+ did_flush = true;
+
+ CPU_FOREACH(cpu) {
+ tcg_flush_jmp_cache(cpu);
+ }
+
+ qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
+ page_flush_tb();
+
+ tcg_region_reset_all();
+ /* XXX: flush processor icache at this point if cache flush is expensive */
+ qatomic_mb_set(&tb_ctx.tb_flush_count, tb_ctx.tb_flush_count + 1);
+
+done:
+ mmap_unlock();
+ if (did_flush) {
+ qemu_plugin_flush_cb();
+ }
+}
+
+void tb_flush(CPUState *cpu)
+{
+ if (tcg_enabled()) {
+ unsigned tb_flush_count = qatomic_mb_read(&tb_ctx.tb_flush_count);
+
+ if (cpu_in_exclusive_context(cpu)) {
+ do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
+ } else {
+ async_safe_run_on_cpu(cpu, do_tb_flush,
+ RUN_ON_CPU_HOST_INT(tb_flush_count));
+ }
+ }
+}
+
+/*
+ * user-mode: call with mmap_lock held
+ * !user-mode: call with @pd->lock held
+ */
+static inline void tb_page_remove(PageDesc *pd, TranslationBlock *tb)
+{
+ TranslationBlock *tb1;
+ uintptr_t *pprev;
+ unsigned int n1;
+
+ assert_page_locked(pd);
+ pprev = &pd->first_tb;
+ PAGE_FOR_EACH_TB(pd, tb1, n1) {
+ if (tb1 == tb) {
+ *pprev = tb1->page_next[n1];
+ return;
+ }
+ pprev = &tb1->page_next[n1];
+ }
+ g_assert_not_reached();
+}
+
+/* remove @orig from its @n_orig-th jump list */
+static inline void tb_remove_from_jmp_list(TranslationBlock *orig, int n_orig)
+{
+ uintptr_t ptr, ptr_locked;
+ TranslationBlock *dest;
+ TranslationBlock *tb;
+ uintptr_t *pprev;
+ int n;
+
+ /* mark the LSB of jmp_dest[] so that no further jumps can be inserted */
+ ptr = qatomic_or_fetch(&orig->jmp_dest[n_orig], 1);
+ dest = (TranslationBlock *)(ptr & ~1);
+ if (dest == NULL) {
+ return;
+ }
+
+ qemu_spin_lock(&dest->jmp_lock);
+ /*
+ * While acquiring the lock, the jump might have been removed if the
+ * destination TB was invalidated; check again.
+ */
+ ptr_locked = qatomic_read(&orig->jmp_dest[n_orig]);
+ if (ptr_locked != ptr) {
+ qemu_spin_unlock(&dest->jmp_lock);
+ /*
+ * The only possibility is that the jump was unlinked via
+ * tb_jump_unlink(dest). Seeing here another destination would be a bug,
+ * because we set the LSB above.
+ */
+ g_assert(ptr_locked == 1 && dest->cflags & CF_INVALID);
+ return;
+ }
+ /*
+ * We first acquired the lock, and since the destination pointer matches,
+ * we know for sure that @orig is in the jmp list.
+ */
+ pprev = &dest->jmp_list_head;
+ TB_FOR_EACH_JMP(dest, tb, n) {
+ if (tb == orig && n == n_orig) {
+ *pprev = tb->jmp_list_next[n];
+ /* no need to set orig->jmp_dest[n]; setting the LSB was enough */
+ qemu_spin_unlock(&dest->jmp_lock);
+ return;
+ }
+ pprev = &tb->jmp_list_next[n];
+ }
+ g_assert_not_reached();
+}
+
+/*
+ * Reset the jump entry 'n' of a TB so that it is not chained to another TB.
+ */
+void tb_reset_jump(TranslationBlock *tb, int n)
+{
+ uintptr_t addr = (uintptr_t)(tb->tc.ptr + tb->jmp_reset_offset[n]);
+ tb_set_jmp_target(tb, n, addr);
+}
+
+/* remove any jumps to the TB */
+static inline void tb_jmp_unlink(TranslationBlock *dest)
+{
+ TranslationBlock *tb;
+ int n;
+
+ qemu_spin_lock(&dest->jmp_lock);
+
+ TB_FOR_EACH_JMP(dest, tb, n) {
+ tb_reset_jump(tb, n);
+ qatomic_and(&tb->jmp_dest[n], (uintptr_t)NULL | 1);
+ /* No need to clear the list entry; setting the dest ptr is enough */
+ }
+ dest->jmp_list_head = (uintptr_t)NULL;
+
+ qemu_spin_unlock(&dest->jmp_lock);
+}
+
+static void tb_jmp_cache_inval_tb(TranslationBlock *tb)
+{
+ CPUState *cpu;
+
+ if (TARGET_TB_PCREL) {
+ /* A TB may be at any virtual address */
+ CPU_FOREACH(cpu) {
+ tcg_flush_jmp_cache(cpu);
+ }
+ } else {
+ uint32_t h = tb_jmp_cache_hash_func(tb_pc(tb));
+
+ CPU_FOREACH(cpu) {
+ CPUJumpCache *jc = cpu->tb_jmp_cache;
+
+ if (qatomic_read(&jc->array[h].tb) == tb) {
+ qatomic_set(&jc->array[h].tb, NULL);
+ }
+ }
+ }
+}
+
+/*
+ * In user-mode, call with mmap_lock held.
+ * In !user-mode, if @rm_from_page_list is set, call with the TB's pages'
+ * locks held.
+ */
+static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
+{
+ PageDesc *p;
+ uint32_t h;
+ tb_page_addr_t phys_pc;
+ uint32_t orig_cflags = tb_cflags(tb);
+
+ assert_memory_lock();
+
+ /* make sure no further incoming jumps will be chained to this TB */
+ qemu_spin_lock(&tb->jmp_lock);
+ qatomic_set(&tb->cflags, tb->cflags | CF_INVALID);
+ qemu_spin_unlock(&tb->jmp_lock);
+
+ /* remove the TB from the hash list */
+ phys_pc = tb_page_addr0(tb);
+ h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : tb_pc(tb)),
+ tb->flags, orig_cflags, tb->trace_vcpu_dstate);
+ if (!qht_remove(&tb_ctx.htable, tb, h)) {
+ return;
+ }
+
+ /* remove the TB from the page list */
+ if (rm_from_page_list) {
+ p = page_find(phys_pc >> TARGET_PAGE_BITS);
+ tb_page_remove(p, tb);
+ phys_pc = tb_page_addr1(tb);
+ if (phys_pc != -1) {
+ p = page_find(phys_pc >> TARGET_PAGE_BITS);
+ tb_page_remove(p, tb);
+ }
+ }
+
+ /* remove the TB from the hash list */
+ tb_jmp_cache_inval_tb(tb);
+
+ /* suppress this TB from the two jump lists */
+ tb_remove_from_jmp_list(tb, 0);
+ tb_remove_from_jmp_list(tb, 1);
+
+ /* suppress any remaining jumps to this TB */
+ tb_jmp_unlink(tb);
+
+ qatomic_set(&tb_ctx.tb_phys_invalidate_count,
+ tb_ctx.tb_phys_invalidate_count + 1);
+}
+
+static void tb_phys_invalidate__locked(TranslationBlock *tb)
+{
+ qemu_thread_jit_write();
+ do_tb_phys_invalidate(tb, true);
+ qemu_thread_jit_execute();
+}
+
+static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
+ PageDesc **ret_p2, tb_page_addr_t phys2, bool alloc)
+{
+ PageDesc *p1, *p2;
+ tb_page_addr_t page1;
+ tb_page_addr_t page2;
+
+ assert_memory_lock();
+ g_assert(phys1 != -1);
+
+ page1 = phys1 >> TARGET_PAGE_BITS;
+ page2 = phys2 >> TARGET_PAGE_BITS;
+
+ p1 = page_find_alloc(page1, alloc);
+ if (ret_p1) {
+ *ret_p1 = p1;
+ }
+ if (likely(phys2 == -1)) {
+ page_lock(p1);
+ return;
+ } else if (page1 == page2) {
+ page_lock(p1);
+ if (ret_p2) {
+ *ret_p2 = p1;
+ }
+ return;
+ }
+ p2 = page_find_alloc(page2, alloc);
+ if (ret_p2) {
+ *ret_p2 = p2;
+ }
+ if (page1 < page2) {
+ page_lock(p1);
+ page_lock(p2);
+ } else {
+ page_lock(p2);
+ page_lock(p1);
+ }
+}
+
+#ifdef CONFIG_USER_ONLY
+static inline void page_lock_tb(const TranslationBlock *tb) { }
+static inline void page_unlock_tb(const TranslationBlock *tb) { }
+#else
+/* lock the page(s) of a TB in the correct acquisition order */
+static void page_lock_tb(const TranslationBlock *tb)
+{
+ page_lock_pair(NULL, tb_page_addr0(tb), NULL, tb_page_addr1(tb), false);
+}
+
+static void page_unlock_tb(const TranslationBlock *tb)
+{
+ PageDesc *p1 = page_find(tb_page_addr0(tb) >> TARGET_PAGE_BITS);
+
+ page_unlock(p1);
+ if (unlikely(tb_page_addr1(tb) != -1)) {
+ PageDesc *p2 = page_find(tb_page_addr1(tb) >> TARGET_PAGE_BITS);
+
+ if (p2 != p1) {
+ page_unlock(p2);
+ }
+ }
+}
+#endif
+
+/*
+ * Invalidate one TB.
+ * Called with mmap_lock held in user-mode.
+ */
+void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
+{
+ if (page_addr == -1 && tb_page_addr0(tb) != -1) {
+ page_lock_tb(tb);
+ do_tb_phys_invalidate(tb, true);
+ page_unlock_tb(tb);
+ } else {
+ do_tb_phys_invalidate(tb, false);
+ }
+}
+
+/*
+ * Add the tb in the target page and protect it if necessary.
+ * Called with mmap_lock held for user-mode emulation.
+ * Called with @p->lock held in !user-mode.
+ */
+static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
+ unsigned int n, tb_page_addr_t page_addr)
+{
+#ifndef CONFIG_USER_ONLY
+ bool page_already_protected;
+#endif
+
+ assert_page_locked(p);
+
+ tb->page_next[n] = p->first_tb;
+#ifndef CONFIG_USER_ONLY
+ page_already_protected = p->first_tb != (uintptr_t)NULL;
+#endif
+ p->first_tb = (uintptr_t)tb | n;
+
+#if defined(CONFIG_USER_ONLY)
+ /* translator_loop() must have made all TB pages non-writable */
+ assert(!(p->flags & PAGE_WRITE));
+#else
+ /*
+ * If some code is already present, then the pages are already
+ * protected. So we handle the case where only the first TB is
+ * allocated in a physical page.
+ */
+ if (!page_already_protected) {
+ tlb_protect_code(page_addr);
+ }
+#endif
+}
+
+/*
+ * Add a new TB and link it to the physical page tables. phys_page2 is
+ * (-1) to indicate that only one page contains the TB.
+ *
+ * Called with mmap_lock held for user-mode emulation.
+ *
+ * Returns a pointer @tb, or a pointer to an existing TB that matches @tb.
+ * Note that in !user-mode, another thread might have already added a TB
+ * for the same block of guest code that @tb corresponds to. In that case,
+ * the caller should discard the original @tb, and use instead the returned TB.
+ */
+TranslationBlock *tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
+ tb_page_addr_t phys_page2)
+{
+ PageDesc *p;
+ PageDesc *p2 = NULL;
+ void *existing_tb = NULL;
+ uint32_t h;
+
+ assert_memory_lock();
+ tcg_debug_assert(!(tb->cflags & CF_INVALID));
+
+ /*
+ * Add the TB to the page list, acquiring first the pages's locks.
+ * We keep the locks held until after inserting the TB in the hash table,
+ * so that if the insertion fails we know for sure that the TBs are still
+ * in the page descriptors.
+ * Note that inserting into the hash table first isn't an option, since
+ * we can only insert TBs that are fully initialized.
+ */
+ page_lock_pair(&p, phys_pc, &p2, phys_page2, true);
+ tb_page_add(p, tb, 0, phys_pc);
+ if (p2) {
+ tb_page_add(p2, tb, 1, phys_page2);
+ }
+
+ /* add in the hash table */
+ h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : tb_pc(tb)),
+ tb->flags, tb->cflags, tb->trace_vcpu_dstate);
+ qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
+
+ /* remove TB from the page(s) if we couldn't insert it */
+ if (unlikely(existing_tb)) {
+ tb_page_remove(p, tb);
+ if (p2) {
+ tb_page_remove(p2, tb);
+ }
+ tb = existing_tb;
+ }
+
+ if (p2 && p2 != p) {
+ page_unlock(p2);
+ }
+ page_unlock(p);
+ return tb;
+}
+
+/*
+ * @p must be non-NULL.
+ * user-mode: call with mmap_lock held.
+ * !user-mode: call with all @pages locked.
+ */
+static void
+tb_invalidate_phys_page_range__locked(struct page_collection *pages,
+ PageDesc *p, tb_page_addr_t start,
+ tb_page_addr_t end,
+ uintptr_t retaddr)
+{
+ TranslationBlock *tb;
+ tb_page_addr_t tb_start, tb_end;
+ int n;
+#ifdef TARGET_HAS_PRECISE_SMC
+ CPUState *cpu = current_cpu;
+ bool current_tb_not_found = retaddr != 0;
+ bool current_tb_modified = false;
+ TranslationBlock *current_tb = NULL;
+#endif /* TARGET_HAS_PRECISE_SMC */
+
+ assert_page_locked(p);
+
+ /*
+ * We remove all the TBs in the range [start, end[.
+ * XXX: see if in some cases it could be faster to invalidate all the code
+ */
+ PAGE_FOR_EACH_TB(p, tb, n) {
+ assert_page_locked(p);
+ /* NOTE: this is subtle as a TB may span two physical pages */
+ if (n == 0) {
+ /* NOTE: tb_end may be after the end of the page, but
+ it is not a problem */
+ tb_start = tb_page_addr0(tb);
+ tb_end = tb_start + tb->size;
+ } else {
+ tb_start = tb_page_addr1(tb);
+ tb_end = tb_start + ((tb_page_addr0(tb) + tb->size)
+ & ~TARGET_PAGE_MASK);
+ }
+ if (!(tb_end <= start || tb_start >= end)) {
+#ifdef TARGET_HAS_PRECISE_SMC
+ if (current_tb_not_found) {
+ current_tb_not_found = false;
+ /* now we have a real cpu fault */
+ current_tb = tcg_tb_lookup(retaddr);
+ }
+ if (current_tb == tb &&
+ (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
+ /*
+ * If we are modifying the current TB, we must stop
+ * its execution. We could be more precise by checking
+ * that the modification is after the current PC, but it
+ * would require a specialized function to partially
+ * restore the CPU state.
+ */
+ current_tb_modified = true;
+ cpu_restore_state_from_tb(cpu, current_tb, retaddr, true);
+ }
+#endif /* TARGET_HAS_PRECISE_SMC */
+ tb_phys_invalidate__locked(tb);
+ }
+ }
+#if !defined(CONFIG_USER_ONLY)
+ /* if no code remaining, no need to continue to use slow writes */
+ if (!p->first_tb) {
+ tlb_unprotect_code(start);
+ }
+#endif
+#ifdef TARGET_HAS_PRECISE_SMC
+ if (current_tb_modified) {
+ page_collection_unlock(pages);
+ /* Force execution of one insn next time. */
+ cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu);
+ mmap_unlock();
+ cpu_loop_exit_noexc(cpu);
+ }
+#endif
+}
+
+/*
+ * Invalidate all TBs which intersect with the target physical
+ * address page @addr.
+ *
+ * Called with mmap_lock held for user-mode emulation
+ */
+void tb_invalidate_phys_page(tb_page_addr_t addr)
+{
+ struct page_collection *pages;
+ tb_page_addr_t start, end;
+ PageDesc *p;
+
+ assert_memory_lock();
+
+ p = page_find(addr >> TARGET_PAGE_BITS);
+ if (p == NULL) {
+ return;
+ }
+
+ start = addr & TARGET_PAGE_MASK;
+ end = start + TARGET_PAGE_SIZE;
+ pages = page_collection_lock(start, end);
+ tb_invalidate_phys_page_range__locked(pages, p, start, end, 0);
+ page_collection_unlock(pages);
+}
+
+/*
+ * Invalidate all TBs which intersect with the target physical address range
+ * [start;end[. NOTE: start and end may refer to *different* physical pages.
+ * 'is_cpu_write_access' should be true if called from a real cpu write
+ * access: the virtual CPU will exit the current TB if code is modified inside
+ * this TB.
+ *
+ * Called with mmap_lock held for user-mode emulation.
+ */
+void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end)
+{
+ struct page_collection *pages;
+ tb_page_addr_t next;
+
+ assert_memory_lock();
+
+ pages = page_collection_lock(start, end);
+ for (next = (start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
+ start < end;
+ start = next, next += TARGET_PAGE_SIZE) {
+ PageDesc *pd = page_find(start >> TARGET_PAGE_BITS);
+ tb_page_addr_t bound = MIN(next, end);
+
+ if (pd == NULL) {
+ continue;
+ }
+ tb_invalidate_phys_page_range__locked(pages, pd, start, bound, 0);
+ }
+ page_collection_unlock(pages);
+}
+
+#ifdef CONFIG_SOFTMMU
+/*
+ * len must be <= 8 and start must be a multiple of len.
+ * Called via softmmu_template.h when code areas are written to with
+ * iothread mutex not held.
+ *
+ * Call with all @pages in the range [@start, @start + len[ locked.
+ */
+void tb_invalidate_phys_page_fast(struct page_collection *pages,
+ tb_page_addr_t start, int len,
+ uintptr_t retaddr)
+{
+ PageDesc *p;
+
+ assert_memory_lock();
+
+ p = page_find(start >> TARGET_PAGE_BITS);
+ if (!p) {
+ return;
+ }
+
+ assert_page_locked(p);
+ tb_invalidate_phys_page_range__locked(pages, p, start, start + len,
+ retaddr);
+}
+#else
+/*
+ * Called with mmap_lock held. If pc is not 0 then it indicates the
+ * host PC of the faulting store instruction that caused this invalidate.
+ * Returns true if the caller needs to abort execution of the current
+ * TB (because it was modified by this store and the guest CPU has
+ * precise-SMC semantics).
+ */
+bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc)
+{
+ TranslationBlock *tb;
+ PageDesc *p;
+ int n;
+#ifdef TARGET_HAS_PRECISE_SMC
+ TranslationBlock *current_tb = NULL;
+ CPUState *cpu = current_cpu;
+ bool current_tb_modified = false;
+#endif
+
+ assert_memory_lock();
+
+ addr &= TARGET_PAGE_MASK;
+ p = page_find(addr >> TARGET_PAGE_BITS);
+ if (!p) {
+ return false;
+ }
+
+#ifdef TARGET_HAS_PRECISE_SMC
+ if (p->first_tb && pc != 0) {
+ current_tb = tcg_tb_lookup(pc);
+ }
+#endif
+ assert_page_locked(p);
+ PAGE_FOR_EACH_TB(p, tb, n) {
+#ifdef TARGET_HAS_PRECISE_SMC
+ if (current_tb == tb &&
+ (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
+ /*
+ * If we are modifying the current TB, we must stop its execution.
+ * We could be more precise by checking that the modification is
+ * after the current PC, but it would require a specialized
+ * function to partially restore the CPU state.
+ */
+ current_tb_modified = true;
+ cpu_restore_state_from_tb(cpu, current_tb, pc, true);
+ }
+#endif /* TARGET_HAS_PRECISE_SMC */
+ tb_phys_invalidate(tb, addr);
+ }
+ p->first_tb = (uintptr_t)NULL;
+#ifdef TARGET_HAS_PRECISE_SMC
+ if (current_tb_modified) {
+ /* Force execution of one insn next time. */
+ cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu);
+ return true;
+ }
+#endif
+
+ return false;
+}
+#endif
diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c
index ba997f6cfe..d50239e0e2 100644
--- a/accel/tcg/tcg-accel-ops-mttcg.c
+++ b/accel/tcg/tcg-accel-ops-mttcg.c
@@ -70,8 +70,6 @@ static void *mttcg_cpu_thread_fn(void *arg)
assert(tcg_enabled());
g_assert(!icount_enabled());
- tcg_cpu_init_cflags(cpu, current_machine->smp.max_cpus > 1);
-
rcu_register_thread();
force_rcu.notifier.notify = mttcg_force_rcu;
force_rcu.cpu = cpu;
@@ -141,6 +139,9 @@ void mttcg_start_vcpu_thread(CPUState *cpu)
{
char thread_name[VCPU_THREAD_NAME_SIZE];
+ g_assert(tcg_enabled());
+ tcg_cpu_init_cflags(cpu, current_machine->smp.max_cpus > 1);
+
cpu->thread = g_new0(QemuThread, 1);
cpu->halt_cond = g_malloc0(sizeof(QemuCond));
qemu_cond_init(cpu->halt_cond);
diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
index cc8adc2380..290833a37f 100644
--- a/accel/tcg/tcg-accel-ops-rr.c
+++ b/accel/tcg/tcg-accel-ops-rr.c
@@ -51,7 +51,7 @@ void rr_kick_vcpu_thread(CPUState *unused)
*
* The kick timer is responsible for moving single threaded vCPU
* emulation on to the next vCPU. If more than one vCPU is running a
- * timer event with force a cpu->exit so the next vCPU can get
+ * timer event we force a cpu->exit so the next vCPU can get
* scheduled.
*
* The timer is removed if all vCPUs are idle and restarted again once
@@ -152,9 +152,7 @@ static void *rr_cpu_thread_fn(void *arg)
Notifier force_rcu;
CPUState *cpu = arg;
- g_assert(tcg_enabled());
- tcg_cpu_init_cflags(cpu, false);
-
+ assert(tcg_enabled());
rcu_register_thread();
force_rcu.notify = rr_force_rcu;
rcu_add_force_rcu_notifier(&force_rcu);
@@ -277,6 +275,9 @@ void rr_start_vcpu_thread(CPUState *cpu)
static QemuCond *single_tcg_halt_cond;
static QemuThread *single_tcg_cpu_thread;
+ g_assert(tcg_enabled());
+ tcg_cpu_init_cflags(cpu, false);
+
if (!single_tcg_cpu_thread) {
cpu->thread = g_new0(QemuThread, 1);
cpu->halt_cond = g_new0(QemuCond, 1);
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 4ed75a13e1..f185356a36 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -63,57 +63,7 @@
#include "tb-context.h"
#include "internal.h"
-/* #define DEBUG_TB_INVALIDATE */
-/* #define DEBUG_TB_FLUSH */
/* make various TB consistency checks */
-/* #define DEBUG_TB_CHECK */
-
-#ifdef DEBUG_TB_INVALIDATE
-#define DEBUG_TB_INVALIDATE_GATE 1
-#else
-#define DEBUG_TB_INVALIDATE_GATE 0
-#endif
-
-#ifdef DEBUG_TB_FLUSH
-#define DEBUG_TB_FLUSH_GATE 1
-#else
-#define DEBUG_TB_FLUSH_GATE 0
-#endif
-
-#if !defined(CONFIG_USER_ONLY)
-/* TB consistency checks only implemented for usermode emulation. */
-#undef DEBUG_TB_CHECK
-#endif
-
-#ifdef DEBUG_TB_CHECK
-#define DEBUG_TB_CHECK_GATE 1
-#else
-#define DEBUG_TB_CHECK_GATE 0
-#endif
-
-/* Access to the various translations structures need to be serialised via locks
- * for consistency.
- * In user-mode emulation access to the memory related structures are protected
- * with mmap_lock.
- * In !user-mode we use per-page locks.
- */
-#ifdef CONFIG_SOFTMMU
-#define assert_memory_lock()
-#else
-#define assert_memory_lock() tcg_debug_assert(have_mmap_lock())
-#endif
-
-typedef struct PageDesc {
- /* list of TBs intersecting this ram page */
- uintptr_t first_tb;
-#ifdef CONFIG_USER_ONLY
- unsigned long flags;
- void *target_data;
-#endif
-#ifdef CONFIG_SOFTMMU
- QemuSpin lock;
-#endif
-} PageDesc;
/**
* struct page_entry - page descriptor entry
@@ -159,18 +109,6 @@ struct page_collection {
struct page_entry *max;
};
-/* list iterators for lists of tagged pointers in TranslationBlock */
-#define TB_FOR_EACH_TAGGED(head, tb, n, field) \
- for (n = (head) & 1, tb = (TranslationBlock *)((head) & ~1); \
- tb; tb = (TranslationBlock *)tb->field[n], n = (uintptr_t)tb & 1, \
- tb = (TranslationBlock *)((uintptr_t)tb & ~1))
-
-#define PAGE_FOR_EACH_TB(pagedesc, tb, n) \
- TB_FOR_EACH_TAGGED((pagedesc)->first_tb, tb, n, page_next)
-
-#define TB_FOR_EACH_JMP(head_tb, tb, n) \
- TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
-
/*
* In system mode we want L1_MAP to be based on ram offsets,
* while in user mode we want it to be based on virtual addresses.
@@ -188,10 +126,6 @@ struct page_collection {
# define L1_MAP_ADDR_SPACE_BITS MIN(HOST_LONG_BITS, TARGET_ABI_BITS)
#endif
-/* Size of the L2 (and L3, etc) page tables. */
-#define V_L2_BITS 10
-#define V_L2_SIZE (1 << V_L2_BITS)
-
/* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
sizeof_field(TranslationBlock, trace_vcpu_dstate)
@@ -200,18 +134,11 @@ QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
/*
* L1 Mapping properties
*/
-static int v_l1_size;
-static int v_l1_shift;
-static int v_l2_levels;
+int v_l1_size;
+int v_l1_shift;
+int v_l2_levels;
-/* The bottom level has pointers to PageDesc, and is indexed by
- * anything from 4 to (V_L2_BITS + 3) bits, depending on target page size.
- */
-#define V_L1_MIN_BITS 4
-#define V_L1_MAX_BITS (V_L2_BITS + 3)
-#define V_L1_MAX_SIZE (1 << V_L1_MAX_BITS)
-
-static void *l1_map[V_L1_MAX_SIZE];
+void *l1_map[V_L1_MAX_SIZE];
TBContext tb_ctx;
@@ -324,12 +251,11 @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
* When reset_icount is true, current TB will be interrupted and
* icount should be recalculated.
*/
-static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
- uintptr_t searched_pc, bool reset_icount)
+int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
+ uintptr_t searched_pc, bool reset_icount)
{
- target_ulong data[TARGET_INSN_START_WORDS];
+ uint64_t data[TARGET_INSN_START_WORDS];
uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
- CPUArchState *env = cpu->env_ptr;
const uint8_t *p = tb->tc.ptr + tb->tc.size;
int i, j, num_insns = tb->icount;
#ifdef CONFIG_PROFILER
@@ -368,7 +294,8 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
and shift if to the number of actually executed instructions */
cpu_neg(cpu)->icount_decr.u16.low += num_insns - i;
}
- restore_state_to_opc(env, tb, data);
+
+ cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
#ifdef CONFIG_PROFILER
qatomic_set(&prof->restore_time,
@@ -381,6 +308,14 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
{
/*
+ * The pc update associated with restore without exit will
+ * break the relative pc adjustments performed by TARGET_TB_PCREL.
+ */
+ if (TARGET_TB_PCREL) {
+ assert(will_exit);
+ }
+
+ /*
* The host_pc has to be in the rx region of the code buffer.
* If it is not we will not be able to resolve it here.
* The two cases where host_pc will not be correct are:
@@ -471,7 +406,7 @@ void page_init(void)
#endif
}
-static PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc)
+PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc)
{
PageDesc *pd;
void **lp;
@@ -537,31 +472,8 @@ static PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc)
return pd + (index & (V_L2_SIZE - 1));
}
-static inline PageDesc *page_find(tb_page_addr_t index)
-{
- return page_find_alloc(index, false);
-}
-
-static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
- PageDesc **ret_p2, tb_page_addr_t phys2, bool alloc);
-
/* In user-mode page locks aren't used; mmap_lock is enough */
#ifdef CONFIG_USER_ONLY
-
-#define assert_page_locked(pd) tcg_debug_assert(have_mmap_lock())
-
-static inline void page_lock(PageDesc *pd)
-{ }
-
-static inline void page_unlock(PageDesc *pd)
-{ }
-
-static inline void page_lock_tb(const TranslationBlock *tb)
-{ }
-
-static inline void page_unlock_tb(const TranslationBlock *tb)
-{ }
-
struct page_collection *
page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
{
@@ -610,8 +522,7 @@ static void page_unlock__debug(const PageDesc *pd)
g_assert(removed);
}
-static void
-do_assert_page_locked(const PageDesc *pd, const char *file, int line)
+void do_assert_page_locked(const PageDesc *pd, const char *file, int line)
{
if (unlikely(!page_is_locked(pd))) {
error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
@@ -620,8 +531,6 @@ do_assert_page_locked(const PageDesc *pd, const char *file, int line)
}
}
-#define assert_page_locked(pd) do_assert_page_locked(pd, __FILE__, __LINE__)
-
void assert_no_pages_locked(void)
{
ht_pages_locked_debug_init();
@@ -630,50 +539,23 @@ void assert_no_pages_locked(void)
#else /* !CONFIG_DEBUG_TCG */
-#define assert_page_locked(pd)
-
-static inline void page_lock__debug(const PageDesc *pd)
-{
-}
-
-static inline void page_unlock__debug(const PageDesc *pd)
-{
-}
+static inline void page_lock__debug(const PageDesc *pd) { }
+static inline void page_unlock__debug(const PageDesc *pd) { }
#endif /* CONFIG_DEBUG_TCG */
-static inline void page_lock(PageDesc *pd)
+void page_lock(PageDesc *pd)
{
page_lock__debug(pd);
qemu_spin_lock(&pd->lock);
}
-static inline void page_unlock(PageDesc *pd)
+void page_unlock(PageDesc *pd)
{
qemu_spin_unlock(&pd->lock);
page_unlock__debug(pd);
}
-/* lock the page(s) of a TB in the correct acquisition order */
-static inline void page_lock_tb(const TranslationBlock *tb)
-{
- page_lock_pair(NULL, tb->page_addr[0], NULL, tb->page_addr[1], false);
-}
-
-static inline void page_unlock_tb(const TranslationBlock *tb)
-{
- PageDesc *p1 = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
-
- page_unlock(p1);
- if (unlikely(tb->page_addr[1] != -1)) {
- PageDesc *p2 = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
-
- if (p2 != p1) {
- page_unlock(p2);
- }
- }
-}
-
static inline struct page_entry *
page_entry_new(PageDesc *pd, tb_page_addr_t index)
{
@@ -824,9 +706,9 @@ page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
}
assert_page_locked(pd);
PAGE_FOR_EACH_TB(pd, tb, n) {
- if (page_trylock_add(set, tb->page_addr[0]) ||
- (tb->page_addr[1] != -1 &&
- page_trylock_add(set, tb->page_addr[1]))) {
+ if (page_trylock_add(set, tb_page_addr0(tb)) ||
+ (tb_page_addr1(tb) != -1 &&
+ page_trylock_add(set, tb_page_addr1(tb)))) {
/* drop all locks, and reacquire in order */
g_tree_foreach(set->tree, page_entry_unlock, NULL);
goto retry;
@@ -845,509 +727,6 @@ void page_collection_unlock(struct page_collection *set)
#endif /* !CONFIG_USER_ONLY */
-static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
- PageDesc **ret_p2, tb_page_addr_t phys2, bool alloc)
-{
- PageDesc *p1, *p2;
- tb_page_addr_t page1;
- tb_page_addr_t page2;
-
- assert_memory_lock();
- g_assert(phys1 != -1);
-
- page1 = phys1 >> TARGET_PAGE_BITS;
- page2 = phys2 >> TARGET_PAGE_BITS;
-
- p1 = page_find_alloc(page1, alloc);
- if (ret_p1) {
- *ret_p1 = p1;
- }
- if (likely(phys2 == -1)) {
- page_lock(p1);
- return;
- } else if (page1 == page2) {
- page_lock(p1);
- if (ret_p2) {
- *ret_p2 = p1;
- }
- return;
- }
- p2 = page_find_alloc(page2, alloc);
- if (ret_p2) {
- *ret_p2 = p2;
- }
- if (page1 < page2) {
- page_lock(p1);
- page_lock(p2);
- } else {
- page_lock(p2);
- page_lock(p1);
- }
-}
-
-static bool tb_cmp(const void *ap, const void *bp)
-{
- const TranslationBlock *a = ap;
- const TranslationBlock *b = bp;
-
- return ((TARGET_TB_PCREL || tb_pc(a) == tb_pc(b)) &&
- a->cs_base == b->cs_base &&
- a->flags == b->flags &&
- (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
- a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
- a->page_addr[0] == b->page_addr[0] &&
- a->page_addr[1] == b->page_addr[1]);
-}
-
-void tb_htable_init(void)
-{
- unsigned int mode = QHT_MODE_AUTO_RESIZE;
-
- qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
-}
-
-/* Set to NULL all the 'first_tb' fields in all PageDescs. */
-static void page_flush_tb_1(int level, void **lp)
-{
- int i;
-
- if (*lp == NULL) {
- return;
- }
- if (level == 0) {
- PageDesc *pd = *lp;
-
- for (i = 0; i < V_L2_SIZE; ++i) {
- page_lock(&pd[i]);
- pd[i].first_tb = (uintptr_t)NULL;
- page_unlock(&pd[i]);
- }
- } else {
- void **pp = *lp;
-
- for (i = 0; i < V_L2_SIZE; ++i) {
- page_flush_tb_1(level - 1, pp + i);
- }
- }
-}
-
-static void page_flush_tb(void)
-{
- int i, l1_sz = v_l1_size;
-
- for (i = 0; i < l1_sz; i++) {
- page_flush_tb_1(v_l2_levels, l1_map + i);
- }
-}
-
-static gboolean tb_host_size_iter(gpointer key, gpointer value, gpointer data)
-{
- const TranslationBlock *tb = value;
- size_t *size = data;
-
- *size += tb->tc.size;
- return false;
-}
-
-/* flush all the translation blocks */
-static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
-{
- bool did_flush = false;
-
- mmap_lock();
- /* If it is already been done on request of another CPU,
- * just retry.
- */
- if (tb_ctx.tb_flush_count != tb_flush_count.host_int) {
- goto done;
- }
- did_flush = true;
-
- if (DEBUG_TB_FLUSH_GATE) {
- size_t nb_tbs = tcg_nb_tbs();
- size_t host_size = 0;
-
- tcg_tb_foreach(tb_host_size_iter, &host_size);
- printf("qemu: flush code_size=%zu nb_tbs=%zu avg_tb_size=%zu\n",
- tcg_code_size(), nb_tbs, nb_tbs > 0 ? host_size / nb_tbs : 0);
- }
-
- CPU_FOREACH(cpu) {
- tcg_flush_jmp_cache(cpu);
- }
-
- qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
- page_flush_tb();
-
- tcg_region_reset_all();
- /* XXX: flush processor icache at this point if cache flush is
- expensive */
- qatomic_mb_set(&tb_ctx.tb_flush_count, tb_ctx.tb_flush_count + 1);
-
-done:
- mmap_unlock();
- if (did_flush) {
- qemu_plugin_flush_cb();
- }
-}
-
-void tb_flush(CPUState *cpu)
-{
- if (tcg_enabled()) {
- unsigned tb_flush_count = qatomic_mb_read(&tb_ctx.tb_flush_count);
-
- if (cpu_in_exclusive_context(cpu)) {
- do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
- } else {
- async_safe_run_on_cpu(cpu, do_tb_flush,
- RUN_ON_CPU_HOST_INT(tb_flush_count));
- }
- }
-}
-
-/*
- * Formerly ifdef DEBUG_TB_CHECK. These debug functions are user-mode-only,
- * so in order to prevent bit rot we compile them unconditionally in user-mode,
- * and let the optimizer get rid of them by wrapping their user-only callers
- * with if (DEBUG_TB_CHECK_GATE).
- */
-#ifdef CONFIG_USER_ONLY
-
-static void do_tb_invalidate_check(void *p, uint32_t hash, void *userp)
-{
- TranslationBlock *tb = p;
- target_ulong addr = *(target_ulong *)userp;
-
- if (!(addr + TARGET_PAGE_SIZE <= tb_pc(tb) ||
- addr >= tb_pc(tb) + tb->size)) {
- printf("ERROR invalidate: address=" TARGET_FMT_lx
- " PC=%08lx size=%04x\n", addr, (long)tb_pc(tb), tb->size);
- }
-}
-
-/* verify that all the pages have correct rights for code
- *
- * Called with mmap_lock held.
- */
-static void tb_invalidate_check(target_ulong address)
-{
- address &= TARGET_PAGE_MASK;
- qht_iter(&tb_ctx.htable, do_tb_invalidate_check, &address);
-}
-
-static void do_tb_page_check(void *p, uint32_t hash, void *userp)
-{
- TranslationBlock *tb = p;
- int flags1, flags2;
-
- flags1 = page_get_flags(tb_pc(tb));
- flags2 = page_get_flags(tb_pc(tb) + tb->size - 1);
- if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
- printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
- (long)tb_pc(tb), tb->size, flags1, flags2);
- }
-}
-
-/* verify that all the pages have correct rights for code */
-static void tb_page_check(void)
-{
- qht_iter(&tb_ctx.htable, do_tb_page_check, NULL);
-}
-
-#endif /* CONFIG_USER_ONLY */
-
-/*
- * user-mode: call with mmap_lock held
- * !user-mode: call with @pd->lock held
- */
-static inline void tb_page_remove(PageDesc *pd, TranslationBlock *tb)
-{
- TranslationBlock *tb1;
- uintptr_t *pprev;
- unsigned int n1;
-
- assert_page_locked(pd);
- pprev = &pd->first_tb;
- PAGE_FOR_EACH_TB(pd, tb1, n1) {
- if (tb1 == tb) {
- *pprev = tb1->page_next[n1];
- return;
- }
- pprev = &tb1->page_next[n1];
- }
- g_assert_not_reached();
-}
-
-/* remove @orig from its @n_orig-th jump list */
-static inline void tb_remove_from_jmp_list(TranslationBlock *orig, int n_orig)
-{
- uintptr_t ptr, ptr_locked;
- TranslationBlock *dest;
- TranslationBlock *tb;
- uintptr_t *pprev;
- int n;
-
- /* mark the LSB of jmp_dest[] so that no further jumps can be inserted */
- ptr = qatomic_or_fetch(&orig->jmp_dest[n_orig], 1);
- dest = (TranslationBlock *)(ptr & ~1);
- if (dest == NULL) {
- return;
- }
-
- qemu_spin_lock(&dest->jmp_lock);
- /*
- * While acquiring the lock, the jump might have been removed if the
- * destination TB was invalidated; check again.
- */
- ptr_locked = qatomic_read(&orig->jmp_dest[n_orig]);
- if (ptr_locked != ptr) {
- qemu_spin_unlock(&dest->jmp_lock);
- /*
- * The only possibility is that the jump was unlinked via
- * tb_jump_unlink(dest). Seeing here another destination would be a bug,
- * because we set the LSB above.
- */
- g_assert(ptr_locked == 1 && dest->cflags & CF_INVALID);
- return;
- }
- /*
- * We first acquired the lock, and since the destination pointer matches,
- * we know for sure that @orig is in the jmp list.
- */
- pprev = &dest->jmp_list_head;
- TB_FOR_EACH_JMP(dest, tb, n) {
- if (tb == orig && n == n_orig) {
- *pprev = tb->jmp_list_next[n];
- /* no need to set orig->jmp_dest[n]; setting the LSB was enough */
- qemu_spin_unlock(&dest->jmp_lock);
- return;
- }
- pprev = &tb->jmp_list_next[n];
- }
- g_assert_not_reached();
-}
-
-/* reset the jump entry 'n' of a TB so that it is not chained to
- another TB */
-static inline void tb_reset_jump(TranslationBlock *tb, int n)
-{
- uintptr_t addr = (uintptr_t)(tb->tc.ptr + tb->jmp_reset_offset[n]);
- tb_set_jmp_target(tb, n, addr);
-}
-
-/* remove any jumps to the TB */
-static inline void tb_jmp_unlink(TranslationBlock *dest)
-{
- TranslationBlock *tb;
- int n;
-
- qemu_spin_lock(&dest->jmp_lock);
-
- TB_FOR_EACH_JMP(dest, tb, n) {
- tb_reset_jump(tb, n);
- qatomic_and(&tb->jmp_dest[n], (uintptr_t)NULL | 1);
- /* No need to clear the list entry; setting the dest ptr is enough */
- }
- dest->jmp_list_head = (uintptr_t)NULL;
-
- qemu_spin_unlock(&dest->jmp_lock);
-}
-
-static void tb_jmp_cache_inval_tb(TranslationBlock *tb)
-{
- CPUState *cpu;
-
- if (TARGET_TB_PCREL) {
- /* A TB may be at any virtual address */
- CPU_FOREACH(cpu) {
- tcg_flush_jmp_cache(cpu);
- }
- } else {
- uint32_t h = tb_jmp_cache_hash_func(tb_pc(tb));
-
- CPU_FOREACH(cpu) {
- CPUJumpCache *jc = cpu->tb_jmp_cache;
-
- if (qatomic_read(&jc->array[h].tb) == tb) {
- qatomic_set(&jc->array[h].tb, NULL);
- }
- }
- }
-}
-
-/*
- * In user-mode, call with mmap_lock held.
- * In !user-mode, if @rm_from_page_list is set, call with the TB's pages'
- * locks held.
- */
-static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
-{
- PageDesc *p;
- uint32_t h;
- tb_page_addr_t phys_pc;
- uint32_t orig_cflags = tb_cflags(tb);
-
- assert_memory_lock();
-
- /* make sure no further incoming jumps will be chained to this TB */
- qemu_spin_lock(&tb->jmp_lock);
- qatomic_set(&tb->cflags, tb->cflags | CF_INVALID);
- qemu_spin_unlock(&tb->jmp_lock);
-
- /* remove the TB from the hash list */
- phys_pc = tb->page_addr[0];
- h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : tb_pc(tb)),
- tb->flags, orig_cflags, tb->trace_vcpu_dstate);
- if (!qht_remove(&tb_ctx.htable, tb, h)) {
- return;
- }
-
- /* remove the TB from the page list */
- if (rm_from_page_list) {
- p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
- tb_page_remove(p, tb);
- if (tb->page_addr[1] != -1) {
- p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
- tb_page_remove(p, tb);
- }
- }
-
- /* remove the TB from the hash list */
- tb_jmp_cache_inval_tb(tb);
-
- /* suppress this TB from the two jump lists */
- tb_remove_from_jmp_list(tb, 0);
- tb_remove_from_jmp_list(tb, 1);
-
- /* suppress any remaining jumps to this TB */
- tb_jmp_unlink(tb);
-
- qatomic_set(&tb_ctx.tb_phys_invalidate_count,
- tb_ctx.tb_phys_invalidate_count + 1);
-}
-
-static void tb_phys_invalidate__locked(TranslationBlock *tb)
-{
- qemu_thread_jit_write();
- do_tb_phys_invalidate(tb, true);
- qemu_thread_jit_execute();
-}
-
-/* invalidate one TB
- *
- * Called with mmap_lock held in user-mode.
- */
-void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
-{
- if (page_addr == -1 && tb->page_addr[0] != -1) {
- page_lock_tb(tb);
- do_tb_phys_invalidate(tb, true);
- page_unlock_tb(tb);
- } else {
- do_tb_phys_invalidate(tb, false);
- }
-}
-
-/* add the tb in the target page and protect it if necessary
- *
- * Called with mmap_lock held for user-mode emulation.
- * Called with @p->lock held in !user-mode.
- */
-static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
- unsigned int n, tb_page_addr_t page_addr)
-{
-#ifndef CONFIG_USER_ONLY
- bool page_already_protected;
-#endif
-
- assert_page_locked(p);
-
- tb->page_addr[n] = page_addr;
- tb->page_next[n] = p->first_tb;
-#ifndef CONFIG_USER_ONLY
- page_already_protected = p->first_tb != (uintptr_t)NULL;
-#endif
- p->first_tb = (uintptr_t)tb | n;
-
-#if defined(CONFIG_USER_ONLY)
- /* translator_loop() must have made all TB pages non-writable */
- assert(!(p->flags & PAGE_WRITE));
-#else
- /* if some code is already present, then the pages are already
- protected. So we handle the case where only the first TB is
- allocated in a physical page */
- if (!page_already_protected) {
- tlb_protect_code(page_addr);
- }
-#endif
-}
-
-/*
- * Add a new TB and link it to the physical page tables. phys_page2 is
- * (-1) to indicate that only one page contains the TB.
- *
- * Called with mmap_lock held for user-mode emulation.
- *
- * Returns a pointer @tb, or a pointer to an existing TB that matches @tb.
- * Note that in !user-mode, another thread might have already added a TB
- * for the same block of guest code that @tb corresponds to. In that case,
- * the caller should discard the original @tb, and use instead the returned TB.
- */
-static TranslationBlock *
-tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
- tb_page_addr_t phys_page2)
-{
- PageDesc *p;
- PageDesc *p2 = NULL;
- void *existing_tb = NULL;
- uint32_t h;
-
- assert_memory_lock();
- tcg_debug_assert(!(tb->cflags & CF_INVALID));
-
- /*
- * Add the TB to the page list, acquiring first the pages's locks.
- * We keep the locks held until after inserting the TB in the hash table,
- * so that if the insertion fails we know for sure that the TBs are still
- * in the page descriptors.
- * Note that inserting into the hash table first isn't an option, since
- * we can only insert TBs that are fully initialized.
- */
- page_lock_pair(&p, phys_pc, &p2, phys_page2, true);
- tb_page_add(p, tb, 0, phys_pc);
- if (p2) {
- tb_page_add(p2, tb, 1, phys_page2);
- } else {
- tb->page_addr[1] = -1;
- }
-
- /* add in the hash table */
- h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : tb_pc(tb)),
- tb->flags, tb->cflags, tb->trace_vcpu_dstate);
- qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
-
- /* remove TB from the page(s) if we couldn't insert it */
- if (unlikely(existing_tb)) {
- tb_page_remove(p, tb);
- if (p2) {
- tb_page_remove(p2, tb);
- }
- tb = existing_tb;
- }
-
- if (p2 && p2 != p) {
- page_unlock(p2);
- }
- page_unlock(p);
-
-#ifdef CONFIG_USER_ONLY
- if (DEBUG_TB_CHECK_GATE) {
- tb_page_check();
- }
-#endif
- return tb;
-}
-
/* Called with mmap_lock held for user mode emulation. */
TranslationBlock *tb_gen_code(CPUState *cpu,
target_ulong pc, target_ulong cs_base,
@@ -1400,8 +779,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
tb->flags = flags;
tb->cflags = cflags;
tb->trace_vcpu_dstate = *cpu->trace_dstate;
- tb->page_addr[0] = phys_pc;
- tb->page_addr[1] = -1;
+ tb_set_page_addr0(tb, phys_pc);
+ tb_set_page_addr1(tb, -1);
tcg_ctx->tb_cflags = cflags;
tb_overflow:
@@ -1599,7 +978,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
* a temporary one-insn TB, and we have nothing left to do. Return early
* before attempting to link to other TBs or add to the lookup table.
*/
- if (tb->page_addr[0] == -1) {
+ if (tb_page_addr0(tb) == -1) {
return tb;
}
@@ -1614,7 +993,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
* No explicit memory barrier is required -- tb_link_page() makes the
* TB visible in a consistent state.
*/
- existing_tb = tb_link_page(tb, tb->page_addr[0], tb->page_addr[1]);
+ existing_tb = tb_link_page(tb, tb_page_addr0(tb), tb_page_addr1(tb));
/* if the TB already exists, discard what we just translated */
if (unlikely(existing_tb != tb)) {
uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
@@ -1627,251 +1006,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
return tb;
}
-/*
- * @p must be non-NULL.
- * user-mode: call with mmap_lock held.
- * !user-mode: call with all @pages locked.
- */
-static void
-tb_invalidate_phys_page_range__locked(struct page_collection *pages,
- PageDesc *p, tb_page_addr_t start,
- tb_page_addr_t end,
- uintptr_t retaddr)
-{
- TranslationBlock *tb;
- tb_page_addr_t tb_start, tb_end;
- int n;
-#ifdef TARGET_HAS_PRECISE_SMC
- CPUState *cpu = current_cpu;
- CPUArchState *env = NULL;
- bool current_tb_not_found = retaddr != 0;
- bool current_tb_modified = false;
- TranslationBlock *current_tb = NULL;
- target_ulong current_pc = 0;
- target_ulong current_cs_base = 0;
- uint32_t current_flags = 0;
-#endif /* TARGET_HAS_PRECISE_SMC */
-
- assert_page_locked(p);
-
-#if defined(TARGET_HAS_PRECISE_SMC)
- if (cpu != NULL) {
- env = cpu->env_ptr;
- }
-#endif
-
- /* we remove all the TBs in the range [start, end[ */
- /* XXX: see if in some cases it could be faster to invalidate all
- the code */
- PAGE_FOR_EACH_TB(p, tb, n) {
- assert_page_locked(p);
- /* NOTE: this is subtle as a TB may span two physical pages */
- if (n == 0) {
- /* NOTE: tb_end may be after the end of the page, but
- it is not a problem */
- tb_start = tb->page_addr[0];
- tb_end = tb_start + tb->size;
- } else {
- tb_start = tb->page_addr[1];
- tb_end = tb_start + ((tb->page_addr[0] + tb->size)
- & ~TARGET_PAGE_MASK);
- }
- if (!(tb_end <= start || tb_start >= end)) {
-#ifdef TARGET_HAS_PRECISE_SMC
- if (current_tb_not_found) {
- current_tb_not_found = false;
- /* now we have a real cpu fault */
- current_tb = tcg_tb_lookup(retaddr);
- }
- if (current_tb == tb &&
- (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
- /*
- * If we are modifying the current TB, we must stop
- * its execution. We could be more precise by checking
- * that the modification is after the current PC, but it
- * would require a specialized function to partially
- * restore the CPU state.
- */
- current_tb_modified = true;
- cpu_restore_state_from_tb(cpu, current_tb, retaddr, true);
- cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
- &current_flags);
- }
-#endif /* TARGET_HAS_PRECISE_SMC */
- tb_phys_invalidate__locked(tb);
- }
- }
-#if !defined(CONFIG_USER_ONLY)
- /* if no code remaining, no need to continue to use slow writes */
- if (!p->first_tb) {
- tlb_unprotect_code(start);
- }
-#endif
-#ifdef TARGET_HAS_PRECISE_SMC
- if (current_tb_modified) {
- page_collection_unlock(pages);
- /* Force execution of one insn next time. */
- cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu);
- mmap_unlock();
- cpu_loop_exit_noexc(cpu);
- }
-#endif
-}
-
-/*
- * Invalidate all TBs which intersect with the target physical address range
- * [start;end[. NOTE: start and end must refer to the *same* physical page.
- * 'is_cpu_write_access' should be true if called from a real cpu write
- * access: the virtual CPU will exit the current TB if code is modified inside
- * this TB.
- *
- * Called with mmap_lock held for user-mode emulation
- */
-void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end)
-{
- struct page_collection *pages;
- PageDesc *p;
-
- assert_memory_lock();
-
- p = page_find(start >> TARGET_PAGE_BITS);
- if (p == NULL) {
- return;
- }
- pages = page_collection_lock(start, end);
- tb_invalidate_phys_page_range__locked(pages, p, start, end, 0);
- page_collection_unlock(pages);
-}
-
-/*
- * Invalidate all TBs which intersect with the target physical address range
- * [start;end[. NOTE: start and end may refer to *different* physical pages.
- * 'is_cpu_write_access' should be true if called from a real cpu write
- * access: the virtual CPU will exit the current TB if code is modified inside
- * this TB.
- *
- * Called with mmap_lock held for user-mode emulation.
- */
-#ifdef CONFIG_SOFTMMU
-void tb_invalidate_phys_range(ram_addr_t start, ram_addr_t end)
-#else
-void tb_invalidate_phys_range(target_ulong start, target_ulong end)
-#endif
-{
- struct page_collection *pages;
- tb_page_addr_t next;
-
- assert_memory_lock();
-
- pages = page_collection_lock(start, end);
- for (next = (start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
- start < end;
- start = next, next += TARGET_PAGE_SIZE) {
- PageDesc *pd = page_find(start >> TARGET_PAGE_BITS);
- tb_page_addr_t bound = MIN(next, end);
-
- if (pd == NULL) {
- continue;
- }
- tb_invalidate_phys_page_range__locked(pages, pd, start, bound, 0);
- }
- page_collection_unlock(pages);
-}
-
-#ifdef CONFIG_SOFTMMU
-/* len must be <= 8 and start must be a multiple of len.
- * Called via softmmu_template.h when code areas are written to with
- * iothread mutex not held.
- *
- * Call with all @pages in the range [@start, @start + len[ locked.
- */
-void tb_invalidate_phys_page_fast(struct page_collection *pages,
- tb_page_addr_t start, int len,
- uintptr_t retaddr)
-{
- PageDesc *p;
-
- assert_memory_lock();
-
- p = page_find(start >> TARGET_PAGE_BITS);
- if (!p) {
- return;
- }
-
- assert_page_locked(p);
- tb_invalidate_phys_page_range__locked(pages, p, start, start + len,
- retaddr);
-}
-#else
-/* Called with mmap_lock held. If pc is not 0 then it indicates the
- * host PC of the faulting store instruction that caused this invalidate.
- * Returns true if the caller needs to abort execution of the current
- * TB (because it was modified by this store and the guest CPU has
- * precise-SMC semantics).
- */
-static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc)
-{
- TranslationBlock *tb;
- PageDesc *p;
- int n;
-#ifdef TARGET_HAS_PRECISE_SMC
- TranslationBlock *current_tb = NULL;
- CPUState *cpu = current_cpu;
- CPUArchState *env = NULL;
- int current_tb_modified = 0;
- target_ulong current_pc = 0;
- target_ulong current_cs_base = 0;
- uint32_t current_flags = 0;
-#endif
-
- assert_memory_lock();
-
- addr &= TARGET_PAGE_MASK;
- p = page_find(addr >> TARGET_PAGE_BITS);
- if (!p) {
- return false;
- }
-
-#ifdef TARGET_HAS_PRECISE_SMC
- if (p->first_tb && pc != 0) {
- current_tb = tcg_tb_lookup(pc);
- }
- if (cpu != NULL) {
- env = cpu->env_ptr;
- }
-#endif
- assert_page_locked(p);
- PAGE_FOR_EACH_TB(p, tb, n) {
-#ifdef TARGET_HAS_PRECISE_SMC
- if (current_tb == tb &&
- (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
- /* If we are modifying the current TB, we must stop
- its execution. We could be more precise by checking
- that the modification is after the current PC, but it
- would require a specialized function to partially
- restore the CPU state */
-
- current_tb_modified = 1;
- cpu_restore_state_from_tb(cpu, current_tb, pc, true);
- cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
- &current_flags);
- }
-#endif /* TARGET_HAS_PRECISE_SMC */
- tb_phys_invalidate(tb, addr);
- }
- p->first_tb = (uintptr_t)NULL;
-#ifdef TARGET_HAS_PRECISE_SMC
- if (current_tb_modified) {
- /* Force execution of one insn next time. */
- cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu);
- return true;
- }
-#endif
-
- return false;
-}
-#endif
-
/* user-mode: call with mmap_lock held */
void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
{
@@ -2014,7 +1148,7 @@ static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
if (tb->size > tst->max_target_size) {
tst->max_target_size = tb->size;
}
- if (tb->page_addr[1] != -1) {
+ if (tb_page_addr1(tb) != -1) {
tst->cross_page++;
}
if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
@@ -2226,7 +1360,7 @@ int page_get_flags(target_ulong address)
void page_set_flags(target_ulong start, target_ulong end, int flags)
{
target_ulong addr, len;
- bool reset_target_data;
+ bool reset, inval_tb = false;
/* This function should never be called with addresses outside the
guest address space. If this assert fires, it probably indicates
@@ -2243,7 +1377,10 @@ void page_set_flags(target_ulong start, target_ulong end, int flags)
if (flags & PAGE_WRITE) {
flags |= PAGE_WRITE_ORG;
}
- reset_target_data = !(flags & PAGE_VALID) || (flags & PAGE_RESET);
+ reset = !(flags & PAGE_VALID) || (flags & PAGE_RESET);
+ if (reset) {
+ page_reset_target_data(start, end);
+ }
flags &= ~PAGE_RESET;
for (addr = start, len = end - start;
@@ -2251,68 +1388,23 @@ void page_set_flags(target_ulong start, target_ulong end, int flags)
len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, true);
- /* If the write protection bit is set, then we invalidate
- the code inside. */
- if (!(p->flags & PAGE_WRITE) &&
- (flags & PAGE_WRITE) &&
- p->first_tb) {
- tb_invalidate_phys_page(addr, 0);
- }
- if (reset_target_data) {
- g_free(p->target_data);
- p->target_data = NULL;
- p->flags = flags;
- } else {
- /* Using mprotect on a page does not change sticky bits. */
- p->flags = (p->flags & PAGE_STICKY) | flags;
+ /*
+ * If the page was executable, but is reset, or is no longer
+ * executable, or has become writable, then invalidate any code.
+ */
+ if ((p->flags & PAGE_EXEC)
+ && (reset ||
+ !(flags & PAGE_EXEC) ||
+ (flags & ~p->flags & PAGE_WRITE))) {
+ inval_tb = true;
}
+ /* Using mprotect on a page does not change sticky bits. */
+ p->flags = (reset ? 0 : p->flags & PAGE_STICKY) | flags;
}
-}
-
-void page_reset_target_data(target_ulong start, target_ulong end)
-{
- target_ulong addr, len;
-
- /*
- * This function should never be called with addresses outside the
- * guest address space. If this assert fires, it probably indicates
- * a missing call to h2g_valid.
- */
- assert(end - 1 <= GUEST_ADDR_MAX);
- assert(start < end);
- assert_memory_lock();
-
- start = start & TARGET_PAGE_MASK;
- end = TARGET_PAGE_ALIGN(end);
-
- for (addr = start, len = end - start;
- len != 0;
- len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
- PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
-
- g_free(p->target_data);
- p->target_data = NULL;
- }
-}
-void *page_get_target_data(target_ulong address)
-{
- PageDesc *p = page_find(address >> TARGET_PAGE_BITS);
- return p ? p->target_data : NULL;
-}
-
-void *page_alloc_target_data(target_ulong address, size_t size)
-{
- PageDesc *p = page_find(address >> TARGET_PAGE_BITS);
- void *ret = NULL;
-
- if (p->flags & PAGE_VALID) {
- ret = p->target_data;
- if (!ret) {
- p->target_data = ret = g_malloc0(size);
- }
+ if (inval_tb) {
+ tb_invalidate_phys_range(start, end);
}
- return ret;
}
int page_check_range(target_ulong start, target_ulong len, int flags)
@@ -2396,9 +1488,6 @@ void page_protect(tb_page_addr_t page_addr)
}
mprotect(g2h_untagged(page_addr), qemu_host_page_size,
(prot & PAGE_BITS) & ~PAGE_WRITE);
- if (DEBUG_TB_INVALIDATE_GATE) {
- printf("protecting code page: 0x" TB_PAGE_ADDR_FMT "\n", page_addr);
- }
}
}
@@ -2453,12 +1542,8 @@ int page_unprotect(target_ulong address, uintptr_t pc)
/* and since the content will be modified, we must invalidate
the corresponding translated code. */
- current_tb_invalidated |= tb_invalidate_phys_page(addr, pc);
-#ifdef CONFIG_USER_ONLY
- if (DEBUG_TB_CHECK_GATE) {
- tb_invalidate_check(addr);
- }
-#endif
+ current_tb_invalidated |=
+ tb_invalidate_phys_page_unwind(addr, pc);
}
mprotect((void *)g2h_untagged(host_start), qemu_host_page_size,
prot & PAGE_BITS);
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index 8e78fd7a9c..061519691f 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -157,7 +157,7 @@ static void *translator_access(CPUArchState *env, DisasContextBase *db,
tb = db->tb;
/* Use slow path if first page is MMIO. */
- if (unlikely(tb->page_addr[0] == -1)) {
+ if (unlikely(tb_page_addr0(tb) == -1)) {
return NULL;
}
@@ -169,13 +169,14 @@ static void *translator_access(CPUArchState *env, DisasContextBase *db,
host = db->host_addr[1];
base = TARGET_PAGE_ALIGN(db->pc_first);
if (host == NULL) {
- tb->page_addr[1] =
+ tb_page_addr_t phys_page =
get_page_addr_code_hostp(env, base, &db->host_addr[1]);
+ /* We cannot handle MMIO as second page. */
+ assert(phys_page != -1);
+ tb_set_page_addr1(tb, phys_page);
#ifdef CONFIG_USER_ONLY
page_protect(end);
#endif
- /* We cannot handle MMIO as second page. */
- assert(tb->page_addr[1] != -1);
host = db->host_addr[1];
}
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index 521aa8b61e..fb7d6ee9e9 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -210,6 +210,48 @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
return addr;
}
+void page_reset_target_data(target_ulong start, target_ulong end)
+{
+#ifdef TARGET_PAGE_DATA_SIZE
+ target_ulong addr, len;
+
+ /*
+ * This function should never be called with addresses outside the
+ * guest address space. If this assert fires, it probably indicates
+ * a missing call to h2g_valid.
+ */
+ assert(end - 1 <= GUEST_ADDR_MAX);
+ assert(start < end);
+ assert_memory_lock();
+
+ start = start & TARGET_PAGE_MASK;
+ end = TARGET_PAGE_ALIGN(end);
+
+ for (addr = start, len = end - start;
+ len != 0;
+ len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
+ PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
+
+ g_free(p->target_data);
+ p->target_data = NULL;
+ }
+#endif
+}
+
+#ifdef TARGET_PAGE_DATA_SIZE
+void *page_get_target_data(target_ulong address)
+{
+ PageDesc *p = page_find(address >> TARGET_PAGE_BITS);
+ void *ret = p->target_data;
+
+ if (!ret) {
+ ret = g_malloc0(TARGET_PAGE_DATA_SIZE);
+ p->target_data = ret;
+ }
+ return ret;
+}
+#endif
+
/* The softmmu versions of these helpers are in cputlb.c. */
/*
diff --git a/block.c b/block.c
index 1fbf6b9e69..c69be2cfe3 100644
--- a/block.c
+++ b/block.c
@@ -1641,6 +1641,20 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
goto open_failed;
}
+ assert(!(bs->supported_read_flags & ~BDRV_REQ_MASK));
+ assert(!(bs->supported_write_flags & ~BDRV_REQ_MASK));
+
+ /*
+ * Always allow the BDRV_REQ_REGISTERED_BUF optimization hint. This saves
+ * drivers that pass read/write requests through to a child the trouble of
+ * declaring support explicitly.
+ *
+ * Drivers must not propagate this flag accidentally when they initiate I/O
+ * to a bounce buffer. That case should be rare though.
+ */
+ bs->supported_read_flags |= BDRV_REQ_REGISTERED_BUF;
+ bs->supported_write_flags |= BDRV_REQ_REGISTERED_BUF;
+
ret = refresh_total_sectors(bs, bs->total_sectors);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not refresh total sector count");
diff --git a/block/blkio.c b/block/blkio.c
new file mode 100644
index 0000000000..82f26eedd2
--- /dev/null
+++ b/block/blkio.c
@@ -0,0 +1,1008 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * libblkio BlockDriver
+ *
+ * Copyright Red Hat, Inc.
+ *
+ * Author:
+ * Stefan Hajnoczi <stefanha@redhat.com>
+ */
+
+#include "qemu/osdep.h"
+#include <blkio.h>
+#include "block/block_int.h"
+#include "exec/memory.h"
+#include "exec/cpu-common.h" /* for qemu_ram_get_fd() */
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qapi/qmp/qdict.h"
+#include "qemu/module.h"
+#include "exec/memory.h" /* for ram_block_discard_disable() */
+
+/*
+ * Keep the QEMU BlockDriver names identical to the libblkio driver names.
+ * Using macros instead of typing out the string literals avoids typos.
+ */
+#define DRIVER_IO_URING "io_uring"
+#define DRIVER_NVME_IO_URING "nvme-io_uring"
+#define DRIVER_VIRTIO_BLK_VHOST_USER "virtio-blk-vhost-user"
+#define DRIVER_VIRTIO_BLK_VHOST_VDPA "virtio-blk-vhost-vdpa"
+
+/*
+ * Allocated bounce buffers are kept in a list sorted by buffer address.
+ */
+typedef struct BlkioBounceBuf {
+ QLIST_ENTRY(BlkioBounceBuf) next;
+
+ /* The bounce buffer */
+ struct iovec buf;
+} BlkioBounceBuf;
+
+typedef struct {
+ /*
+ * libblkio is not thread-safe so this lock protects ->blkio and
+ * ->blkioq.
+ */
+ QemuMutex blkio_lock;
+ struct blkio *blkio;
+ struct blkioq *blkioq; /* make this multi-queue in the future... */
+ int completion_fd;
+
+ /*
+ * Polling fetches the next completion into this field.
+ *
+ * No lock is necessary since only one thread calls aio_poll() and invokes
+ * fd and poll handlers.
+ */
+ struct blkio_completion poll_completion;
+
+ /*
+ * Protects ->bounce_pool, ->bounce_bufs, ->bounce_available.
+ *
+ * Lock ordering: ->bounce_lock before ->blkio_lock.
+ */
+ CoMutex bounce_lock;
+
+ /* Bounce buffer pool */
+ struct blkio_mem_region bounce_pool;
+
+ /* Sorted list of allocated bounce buffers */
+ QLIST_HEAD(, BlkioBounceBuf) bounce_bufs;
+
+ /* Queue for coroutines waiting for bounce buffer space */
+ CoQueue bounce_available;
+
+ /* The value of the "mem-region-alignment" property */
+ size_t mem_region_alignment;
+
+ /* Can we skip adding/deleting blkio_mem_regions? */
+ bool needs_mem_regions;
+
+ /* Are file descriptors necessary for blkio_mem_regions? */
+ bool needs_mem_region_fd;
+
+ /* Are madvise(MADV_DONTNEED)-style operations unavailable? */
+ bool may_pin_mem_regions;
+} BDRVBlkioState;
+
+/* Called with s->bounce_lock held */
+static int blkio_resize_bounce_pool(BDRVBlkioState *s, int64_t bytes)
+{
+ /* There can be no allocated bounce buffers during resize */
+ assert(QLIST_EMPTY(&s->bounce_bufs));
+
+ /* Pad size to reduce frequency of resize calls */
+ bytes += 128 * 1024;
+
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
+ int ret;
+
+ if (s->bounce_pool.addr) {
+ blkio_unmap_mem_region(s->blkio, &s->bounce_pool);
+ blkio_free_mem_region(s->blkio, &s->bounce_pool);
+ memset(&s->bounce_pool, 0, sizeof(s->bounce_pool));
+ }
+
+ /* Automatically freed when s->blkio is destroyed */
+ ret = blkio_alloc_mem_region(s->blkio, &s->bounce_pool, bytes);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = blkio_map_mem_region(s->blkio, &s->bounce_pool);
+ if (ret < 0) {
+ blkio_free_mem_region(s->blkio, &s->bounce_pool);
+ memset(&s->bounce_pool, 0, sizeof(s->bounce_pool));
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/* Called with s->bounce_lock held */
+static bool
+blkio_do_alloc_bounce_buffer(BDRVBlkioState *s, BlkioBounceBuf *bounce,
+ int64_t bytes)
+{
+ void *addr = s->bounce_pool.addr;
+ BlkioBounceBuf *cur = NULL;
+ BlkioBounceBuf *prev = NULL;
+ ptrdiff_t space;
+
+ /*
+ * This is just a linear search over the holes between requests. An
+ * efficient allocator would be nice.
+ */
+ QLIST_FOREACH(cur, &s->bounce_bufs, next) {
+ space = cur->buf.iov_base - addr;
+ if (bytes <= space) {
+ QLIST_INSERT_BEFORE(cur, bounce, next);
+ bounce->buf.iov_base = addr;
+ bounce->buf.iov_len = bytes;
+ return true;
+ }
+
+ addr = cur->buf.iov_base + cur->buf.iov_len;
+ prev = cur;
+ }
+
+ /* Is there space after the last request? */
+ space = s->bounce_pool.addr + s->bounce_pool.len - addr;
+ if (bytes > space) {
+ return false;
+ }
+ if (prev) {
+ QLIST_INSERT_AFTER(prev, bounce, next);
+ } else {
+ QLIST_INSERT_HEAD(&s->bounce_bufs, bounce, next);
+ }
+ bounce->buf.iov_base = addr;
+ bounce->buf.iov_len = bytes;
+ return true;
+}
+
+static int coroutine_fn
+blkio_alloc_bounce_buffer(BDRVBlkioState *s, BlkioBounceBuf *bounce,
+ int64_t bytes)
+{
+ /*
+ * Ensure fairness: first time around we join the back of the queue,
+ * subsequently we join the front so we don't lose our place.
+ */
+ CoQueueWaitFlags wait_flags = 0;
+
+ QEMU_LOCK_GUARD(&s->bounce_lock);
+
+ /* Ensure fairness: don't even try if other requests are already waiting */
+ if (!qemu_co_queue_empty(&s->bounce_available)) {
+ qemu_co_queue_wait_flags(&s->bounce_available, &s->bounce_lock,
+ wait_flags);
+ wait_flags = CO_QUEUE_WAIT_FRONT;
+ }
+
+ while (true) {
+ if (blkio_do_alloc_bounce_buffer(s, bounce, bytes)) {
+ /* Kick the next queued request since there may be space */
+ qemu_co_queue_next(&s->bounce_available);
+ return 0;
+ }
+
+ /*
+ * If there are no in-flight requests then the pool was simply too
+ * small.
+ */
+ if (QLIST_EMPTY(&s->bounce_bufs)) {
+ bool ok;
+ int ret;
+
+ ret = blkio_resize_bounce_pool(s, bytes);
+ if (ret < 0) {
+ /* Kick the next queued request since that may fail too */
+ qemu_co_queue_next(&s->bounce_available);
+ return ret;
+ }
+
+ ok = blkio_do_alloc_bounce_buffer(s, bounce, bytes);
+ assert(ok); /* must have space this time */
+ return 0;
+ }
+
+ qemu_co_queue_wait_flags(&s->bounce_available, &s->bounce_lock,
+ wait_flags);
+ wait_flags = CO_QUEUE_WAIT_FRONT;
+ }
+}
+
+static void coroutine_fn blkio_free_bounce_buffer(BDRVBlkioState *s,
+ BlkioBounceBuf *bounce)
+{
+ QEMU_LOCK_GUARD(&s->bounce_lock);
+
+ QLIST_REMOVE(bounce, next);
+
+ /* Wake up waiting coroutines since space may now be available */
+ qemu_co_queue_next(&s->bounce_available);
+}
+
+/* For async to .bdrv_co_*() conversion */
+typedef struct {
+ Coroutine *coroutine;
+ int ret;
+} BlkioCoData;
+
+static void blkio_completion_fd_read(void *opaque)
+{
+ BlockDriverState *bs = opaque;
+ BDRVBlkioState *s = bs->opaque;
+ uint64_t val;
+ int ret;
+
+ /* Polling may have already fetched a completion */
+ if (s->poll_completion.user_data != NULL) {
+ BlkioCoData *cod = s->poll_completion.user_data;
+ cod->ret = s->poll_completion.ret;
+
+ /* Clear it in case aio_co_wake() enters a nested event loop */
+ s->poll_completion.user_data = NULL;
+
+ aio_co_wake(cod->coroutine);
+ }
+
+ /* Reset completion fd status */
+ ret = read(s->completion_fd, &val, sizeof(val));
+
+ /* Ignore errors, there's nothing we can do */
+ (void)ret;
+
+ /*
+ * Reading one completion at a time makes nested event loop re-entrancy
+ * simple. Change this loop to get multiple completions in one go if it
+ * becomes a performance bottleneck.
+ */
+ while (true) {
+ struct blkio_completion completion;
+
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
+ ret = blkioq_do_io(s->blkioq, &completion, 0, 1, NULL);
+ }
+ if (ret != 1) {
+ break;
+ }
+
+ BlkioCoData *cod = completion.user_data;
+ cod->ret = completion.ret;
+ aio_co_wake(cod->coroutine);
+ }
+}
+
+static bool blkio_completion_fd_poll(void *opaque)
+{
+ BlockDriverState *bs = opaque;
+ BDRVBlkioState *s = bs->opaque;
+ int ret;
+
+ /* Just in case we already fetched a completion */
+ if (s->poll_completion.user_data != NULL) {
+ return true;
+ }
+
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
+ ret = blkioq_do_io(s->blkioq, &s->poll_completion, 0, 1, NULL);
+ }
+ return ret == 1;
+}
+
+static void blkio_completion_fd_poll_ready(void *opaque)
+{
+ blkio_completion_fd_read(opaque);
+}
+
+static void blkio_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context)
+{
+ BDRVBlkioState *s = bs->opaque;
+
+ aio_set_fd_handler(new_context,
+ s->completion_fd,
+ false,
+ blkio_completion_fd_read,
+ NULL,
+ blkio_completion_fd_poll,
+ blkio_completion_fd_poll_ready,
+ bs);
+}
+
+static void blkio_detach_aio_context(BlockDriverState *bs)
+{
+ BDRVBlkioState *s = bs->opaque;
+
+ aio_set_fd_handler(bdrv_get_aio_context(bs),
+ s->completion_fd,
+ false, NULL, NULL, NULL, NULL, NULL);
+}
+
+/* Call with s->blkio_lock held to submit I/O after enqueuing a new request */
+static void blkio_submit_io(BlockDriverState *bs)
+{
+ if (qatomic_read(&bs->io_plugged) == 0) {
+ BDRVBlkioState *s = bs->opaque;
+
+ blkioq_do_io(s->blkioq, NULL, 0, 0, NULL);
+ }
+}
+
+static int coroutine_fn
+blkio_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
+{
+ BDRVBlkioState *s = bs->opaque;
+ BlkioCoData cod = {
+ .coroutine = qemu_coroutine_self(),
+ };
+
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
+ blkioq_discard(s->blkioq, offset, bytes, &cod, 0);
+ blkio_submit_io(bs);
+ }
+
+ qemu_coroutine_yield();
+ return cod.ret;
+}
+
+static int coroutine_fn
+blkio_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags)
+{
+ BlkioCoData cod = {
+ .coroutine = qemu_coroutine_self(),
+ };
+ BDRVBlkioState *s = bs->opaque;
+ bool use_bounce_buffer =
+ s->needs_mem_regions && !(flags & BDRV_REQ_REGISTERED_BUF);
+ BlkioBounceBuf bounce;
+ struct iovec *iov = qiov->iov;
+ int iovcnt = qiov->niov;
+
+ if (use_bounce_buffer) {
+ int ret = blkio_alloc_bounce_buffer(s, &bounce, bytes);
+ if (ret < 0) {
+ return ret;
+ }
+
+ iov = &bounce.buf;
+ iovcnt = 1;
+ }
+
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
+ blkioq_readv(s->blkioq, offset, iov, iovcnt, &cod, 0);
+ blkio_submit_io(bs);
+ }
+
+ qemu_coroutine_yield();
+
+ if (use_bounce_buffer) {
+ if (cod.ret == 0) {
+ qemu_iovec_from_buf(qiov, 0,
+ bounce.buf.iov_base,
+ bounce.buf.iov_len);
+ }
+
+ blkio_free_bounce_buffer(s, &bounce);
+ }
+
+ return cod.ret;
+}
+
+static int coroutine_fn blkio_co_pwritev(BlockDriverState *bs, int64_t offset,
+ int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+{
+ uint32_t blkio_flags = (flags & BDRV_REQ_FUA) ? BLKIO_REQ_FUA : 0;
+ BlkioCoData cod = {
+ .coroutine = qemu_coroutine_self(),
+ };
+ BDRVBlkioState *s = bs->opaque;
+ bool use_bounce_buffer =
+ s->needs_mem_regions && !(flags & BDRV_REQ_REGISTERED_BUF);
+ BlkioBounceBuf bounce;
+ struct iovec *iov = qiov->iov;
+ int iovcnt = qiov->niov;
+
+ if (use_bounce_buffer) {
+ int ret = blkio_alloc_bounce_buffer(s, &bounce, bytes);
+ if (ret < 0) {
+ return ret;
+ }
+
+ qemu_iovec_to_buf(qiov, 0, bounce.buf.iov_base, bytes);
+ iov = &bounce.buf;
+ iovcnt = 1;
+ }
+
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
+ blkioq_writev(s->blkioq, offset, iov, iovcnt, &cod, blkio_flags);
+ blkio_submit_io(bs);
+ }
+
+ qemu_coroutine_yield();
+
+ if (use_bounce_buffer) {
+ blkio_free_bounce_buffer(s, &bounce);
+ }
+
+ return cod.ret;
+}
+
+static int coroutine_fn blkio_co_flush(BlockDriverState *bs)
+{
+ BDRVBlkioState *s = bs->opaque;
+ BlkioCoData cod = {
+ .coroutine = qemu_coroutine_self(),
+ };
+
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
+ blkioq_flush(s->blkioq, &cod, 0);
+ blkio_submit_io(bs);
+ }
+
+ qemu_coroutine_yield();
+ return cod.ret;
+}
+
+static int coroutine_fn blkio_co_pwrite_zeroes(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, BdrvRequestFlags flags)
+{
+ BDRVBlkioState *s = bs->opaque;
+ BlkioCoData cod = {
+ .coroutine = qemu_coroutine_self(),
+ };
+ uint32_t blkio_flags = 0;
+
+ if (flags & BDRV_REQ_FUA) {
+ blkio_flags |= BLKIO_REQ_FUA;
+ }
+ if (!(flags & BDRV_REQ_MAY_UNMAP)) {
+ blkio_flags |= BLKIO_REQ_NO_UNMAP;
+ }
+ if (flags & BDRV_REQ_NO_FALLBACK) {
+ blkio_flags |= BLKIO_REQ_NO_FALLBACK;
+ }
+
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
+ blkioq_write_zeroes(s->blkioq, offset, bytes, &cod, blkio_flags);
+ blkio_submit_io(bs);
+ }
+
+ qemu_coroutine_yield();
+ return cod.ret;
+}
+
+static void blkio_io_unplug(BlockDriverState *bs)
+{
+ BDRVBlkioState *s = bs->opaque;
+
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
+ blkio_submit_io(bs);
+ }
+}
+
+typedef enum {
+ BMRR_OK,
+ BMRR_SKIP,
+ BMRR_FAIL,
+} BlkioMemRegionResult;
+
+/*
+ * Produce a struct blkio_mem_region for a given address and size.
+ *
+ * This function produces identical results when called multiple times with the
+ * same arguments. This property is necessary because blkio_unmap_mem_region()
+ * must receive the same struct blkio_mem_region field values that were passed
+ * to blkio_map_mem_region().
+ */
+static BlkioMemRegionResult
+blkio_mem_region_from_host(BlockDriverState *bs,
+ void *host, size_t size,
+ struct blkio_mem_region *region,
+ Error **errp)
+{
+ BDRVBlkioState *s = bs->opaque;
+ int fd = -1;
+ ram_addr_t fd_offset = 0;
+
+ if (((uintptr_t)host | size) % s->mem_region_alignment) {
+ error_setg(errp, "unaligned buf %p with size %zu", host, size);
+ return BMRR_FAIL;
+ }
+
+ /* Attempt to find the fd for the underlying memory */
+ if (s->needs_mem_region_fd) {
+ RAMBlock *ram_block;
+ RAMBlock *end_block;
+ ram_addr_t offset;
+
+ /*
+ * bdrv_register_buf() is called with the BQL held so mr lives at least
+ * until this function returns.
+ */
+ ram_block = qemu_ram_block_from_host(host, false, &fd_offset);
+ if (ram_block) {
+ fd = qemu_ram_get_fd(ram_block);
+ }
+ if (fd == -1) {
+ /*
+ * Ideally every RAMBlock would have an fd. pc-bios and other
+ * things don't. Luckily they are usually not I/O buffers and we
+ * can just ignore them.
+ */
+ return BMRR_SKIP;
+ }
+
+ /* Make sure the fd covers the entire range */
+ end_block = qemu_ram_block_from_host(host + size - 1, false, &offset);
+ if (ram_block != end_block) {
+ error_setg(errp, "registered buffer at %p with size %zu extends "
+ "beyond RAMBlock", host, size);
+ return BMRR_FAIL;
+ }
+ }
+
+ *region = (struct blkio_mem_region){
+ .addr = host,
+ .len = size,
+ .fd = fd,
+ .fd_offset = fd_offset,
+ };
+ return BMRR_OK;
+}
+
+static bool blkio_register_buf(BlockDriverState *bs, void *host, size_t size,
+ Error **errp)
+{
+ BDRVBlkioState *s = bs->opaque;
+ struct blkio_mem_region region;
+ BlkioMemRegionResult region_result;
+ int ret;
+
+ /*
+ * Mapping memory regions conflicts with RAM discard (virtio-mem) when
+ * there is pinning, so only do it when necessary.
+ */
+ if (!s->needs_mem_regions && s->may_pin_mem_regions) {
+ return true;
+ }
+
+ region_result = blkio_mem_region_from_host(bs, host, size, &region, errp);
+ if (region_result == BMRR_SKIP) {
+ return true;
+ } else if (region_result != BMRR_OK) {
+ return false;
+ }
+
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
+ ret = blkio_map_mem_region(s->blkio, &region);
+ }
+
+ if (ret < 0) {
+ error_setg(errp, "Failed to add blkio mem region %p with size %zu: %s",
+ host, size, blkio_get_error_msg());
+ return false;
+ }
+ return true;
+}
+
+static void blkio_unregister_buf(BlockDriverState *bs, void *host, size_t size)
+{
+ BDRVBlkioState *s = bs->opaque;
+ struct blkio_mem_region region;
+
+ /* See blkio_register_buf() */
+ if (!s->needs_mem_regions && s->may_pin_mem_regions) {
+ return;
+ }
+
+ if (blkio_mem_region_from_host(bs, host, size, &region, NULL) != BMRR_OK) {
+ return;
+ }
+
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
+ blkio_unmap_mem_region(s->blkio, &region);
+ }
+}
+
+static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp)
+{
+ const char *filename = qdict_get_str(options, "filename");
+ BDRVBlkioState *s = bs->opaque;
+ int ret;
+
+ ret = blkio_set_str(s->blkio, "path", filename);
+ qdict_del(options, "filename");
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "failed to set path: %s",
+ blkio_get_error_msg());
+ return ret;
+ }
+
+ if (flags & BDRV_O_NOCACHE) {
+ ret = blkio_set_bool(s->blkio, "direct", true);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "failed to set direct: %s",
+ blkio_get_error_msg());
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int blkio_nvme_io_uring(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp)
+{
+ const char *filename = qdict_get_str(options, "filename");
+ BDRVBlkioState *s = bs->opaque;
+ int ret;
+
+ ret = blkio_set_str(s->blkio, "path", filename);
+ qdict_del(options, "filename");
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "failed to set path: %s",
+ blkio_get_error_msg());
+ return ret;
+ }
+
+ if (!(flags & BDRV_O_NOCACHE)) {
+ error_setg(errp, "cache.direct=off is not supported");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int blkio_virtio_blk_common_open(BlockDriverState *bs,
+ QDict *options, int flags, Error **errp)
+{
+ const char *path = qdict_get_try_str(options, "path");
+ BDRVBlkioState *s = bs->opaque;
+ int ret;
+
+ if (!path) {
+ error_setg(errp, "missing 'path' option");
+ return -EINVAL;
+ }
+
+ ret = blkio_set_str(s->blkio, "path", path);
+ qdict_del(options, "path");
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "failed to set path: %s",
+ blkio_get_error_msg());
+ return ret;
+ }
+
+ if (!(flags & BDRV_O_NOCACHE)) {
+ error_setg(errp, "cache.direct=off is not supported");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp)
+{
+ const char *blkio_driver = bs->drv->protocol_name;
+ BDRVBlkioState *s = bs->opaque;
+ int ret;
+
+ ret = blkio_create(blkio_driver, &s->blkio);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "blkio_create failed: %s",
+ blkio_get_error_msg());
+ return ret;
+ }
+
+ if (strcmp(blkio_driver, DRIVER_IO_URING) == 0) {
+ ret = blkio_io_uring_open(bs, options, flags, errp);
+ } else if (strcmp(blkio_driver, DRIVER_NVME_IO_URING) == 0) {
+ ret = blkio_nvme_io_uring(bs, options, flags, errp);
+ } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_USER) == 0) {
+ ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
+ } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_VDPA) == 0) {
+ ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
+ } else {
+ g_assert_not_reached();
+ }
+ if (ret < 0) {
+ blkio_destroy(&s->blkio);
+ return ret;
+ }
+
+ if (!(flags & BDRV_O_RDWR)) {
+ ret = blkio_set_bool(s->blkio, "read-only", true);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "failed to set read-only: %s",
+ blkio_get_error_msg());
+ blkio_destroy(&s->blkio);
+ return ret;
+ }
+ }
+
+ ret = blkio_connect(s->blkio);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "blkio_connect failed: %s",
+ blkio_get_error_msg());
+ blkio_destroy(&s->blkio);
+ return ret;
+ }
+
+ ret = blkio_get_bool(s->blkio,
+ "needs-mem-regions",
+ &s->needs_mem_regions);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret,
+ "failed to get needs-mem-regions: %s",
+ blkio_get_error_msg());
+ blkio_destroy(&s->blkio);
+ return ret;
+ }
+
+ ret = blkio_get_bool(s->blkio,
+ "needs-mem-region-fd",
+ &s->needs_mem_region_fd);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret,
+ "failed to get needs-mem-region-fd: %s",
+ blkio_get_error_msg());
+ blkio_destroy(&s->blkio);
+ return ret;
+ }
+
+ ret = blkio_get_uint64(s->blkio,
+ "mem-region-alignment",
+ &s->mem_region_alignment);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret,
+ "failed to get mem-region-alignment: %s",
+ blkio_get_error_msg());
+ blkio_destroy(&s->blkio);
+ return ret;
+ }
+
+ ret = blkio_get_bool(s->blkio,
+ "may-pin-mem-regions",
+ &s->may_pin_mem_regions);
+ if (ret < 0) {
+ /* Be conservative (assume pinning) if the property is not supported */
+ s->may_pin_mem_regions = s->needs_mem_regions;
+ }
+
+ /*
+ * Notify if libblkio drivers pin memory and prevent features like
+ * virtio-mem from working.
+ */
+ if (s->may_pin_mem_regions) {
+ ret = ram_block_discard_disable(true);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "ram_block_discard_disable() failed");
+ blkio_destroy(&s->blkio);
+ return ret;
+ }
+ }
+
+ ret = blkio_start(s->blkio);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "blkio_start failed: %s",
+ blkio_get_error_msg());
+ blkio_destroy(&s->blkio);
+ if (s->may_pin_mem_regions) {
+ ram_block_discard_disable(false);
+ }
+ return ret;
+ }
+
+ bs->supported_write_flags = BDRV_REQ_FUA | BDRV_REQ_REGISTERED_BUF;
+ bs->supported_zero_flags = BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP |
+ BDRV_REQ_NO_FALLBACK;
+
+ qemu_mutex_init(&s->blkio_lock);
+ qemu_co_mutex_init(&s->bounce_lock);
+ qemu_co_queue_init(&s->bounce_available);
+ QLIST_INIT(&s->bounce_bufs);
+ s->blkioq = blkio_get_queue(s->blkio, 0);
+ s->completion_fd = blkioq_get_completion_fd(s->blkioq);
+
+ blkio_attach_aio_context(bs, bdrv_get_aio_context(bs));
+ return 0;
+}
+
+static void blkio_close(BlockDriverState *bs)
+{
+ BDRVBlkioState *s = bs->opaque;
+
+ /* There is no destroy() API for s->bounce_lock */
+
+ qemu_mutex_destroy(&s->blkio_lock);
+ blkio_detach_aio_context(bs);
+ blkio_destroy(&s->blkio);
+
+ if (s->may_pin_mem_regions) {
+ ram_block_discard_disable(false);
+ }
+}
+
+static int64_t blkio_getlength(BlockDriverState *bs)
+{
+ BDRVBlkioState *s = bs->opaque;
+ uint64_t capacity;
+ int ret;
+
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
+ ret = blkio_get_uint64(s->blkio, "capacity", &capacity);
+ }
+ if (ret < 0) {
+ return -ret;
+ }
+
+ return capacity;
+}
+
+static int blkio_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+ return 0;
+}
+
+static void blkio_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+ BDRVBlkioState *s = bs->opaque;
+ QEMU_LOCK_GUARD(&s->blkio_lock);
+ int value;
+ int ret;
+
+ ret = blkio_get_int(s->blkio, "request-alignment", &value);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "failed to get \"request-alignment\": %s",
+ blkio_get_error_msg());
+ return;
+ }
+ bs->bl.request_alignment = value;
+ if (bs->bl.request_alignment < 1 ||
+ bs->bl.request_alignment >= INT_MAX ||
+ !is_power_of_2(bs->bl.request_alignment)) {
+ error_setg(errp, "invalid \"request-alignment\" value %" PRIu32 ", "
+ "must be a power of 2 less than INT_MAX",
+ bs->bl.request_alignment);
+ return;
+ }
+
+ ret = blkio_get_int(s->blkio, "optimal-io-size", &value);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "failed to get \"optimal-io-size\": %s",
+ blkio_get_error_msg());
+ return;
+ }
+ bs->bl.opt_transfer = value;
+ if (bs->bl.opt_transfer > INT_MAX ||
+ (bs->bl.opt_transfer % bs->bl.request_alignment)) {
+ error_setg(errp, "invalid \"optimal-io-size\" value %" PRIu32 ", must "
+ "be a multiple of %" PRIu32, bs->bl.opt_transfer,
+ bs->bl.request_alignment);
+ return;
+ }
+
+ ret = blkio_get_int(s->blkio, "max-transfer", &value);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "failed to get \"max-transfer\": %s",
+ blkio_get_error_msg());
+ return;
+ }
+ bs->bl.max_transfer = value;
+ if ((bs->bl.max_transfer % bs->bl.request_alignment) ||
+ (bs->bl.opt_transfer && (bs->bl.max_transfer % bs->bl.opt_transfer))) {
+ error_setg(errp, "invalid \"max-transfer\" value %" PRIu32 ", must be "
+ "a multiple of %" PRIu32 " and %" PRIu32 " (if non-zero)",
+ bs->bl.max_transfer, bs->bl.request_alignment,
+ bs->bl.opt_transfer);
+ return;
+ }
+
+ ret = blkio_get_int(s->blkio, "buf-alignment", &value);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "failed to get \"buf-alignment\": %s",
+ blkio_get_error_msg());
+ return;
+ }
+ if (value < 1) {
+ error_setg(errp, "invalid \"buf-alignment\" value %d, must be "
+ "positive", value);
+ return;
+ }
+ bs->bl.min_mem_alignment = value;
+
+ ret = blkio_get_int(s->blkio, "optimal-buf-alignment", &value);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret,
+ "failed to get \"optimal-buf-alignment\": %s",
+ blkio_get_error_msg());
+ return;
+ }
+ if (value < 1) {
+ error_setg(errp, "invalid \"optimal-buf-alignment\" value %d, "
+ "must be positive", value);
+ return;
+ }
+ bs->bl.opt_mem_alignment = value;
+
+ ret = blkio_get_int(s->blkio, "max-segments", &value);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "failed to get \"max-segments\": %s",
+ blkio_get_error_msg());
+ return;
+ }
+ if (value < 1) {
+ error_setg(errp, "invalid \"max-segments\" value %d, must be positive",
+ value);
+ return;
+ }
+ bs->bl.max_iov = value;
+}
+
+/*
+ * TODO
+ * Missing libblkio APIs:
+ * - block_status
+ * - co_invalidate_cache
+ *
+ * Out of scope?
+ * - create
+ * - truncate
+ */
+
+#define BLKIO_DRIVER(name, ...) \
+ { \
+ .format_name = name, \
+ .protocol_name = name, \
+ .instance_size = sizeof(BDRVBlkioState), \
+ .bdrv_file_open = blkio_file_open, \
+ .bdrv_close = blkio_close, \
+ .bdrv_getlength = blkio_getlength, \
+ .bdrv_get_info = blkio_get_info, \
+ .bdrv_attach_aio_context = blkio_attach_aio_context, \
+ .bdrv_detach_aio_context = blkio_detach_aio_context, \
+ .bdrv_co_pdiscard = blkio_co_pdiscard, \
+ .bdrv_co_preadv = blkio_co_preadv, \
+ .bdrv_co_pwritev = blkio_co_pwritev, \
+ .bdrv_co_flush_to_disk = blkio_co_flush, \
+ .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \
+ .bdrv_io_unplug = blkio_io_unplug, \
+ .bdrv_refresh_limits = blkio_refresh_limits, \
+ .bdrv_register_buf = blkio_register_buf, \
+ .bdrv_unregister_buf = blkio_unregister_buf, \
+ __VA_ARGS__ \
+ }
+
+static BlockDriver bdrv_io_uring = BLKIO_DRIVER(
+ DRIVER_IO_URING,
+ .bdrv_needs_filename = true,
+);
+
+static BlockDriver bdrv_nvme_io_uring = BLKIO_DRIVER(
+ DRIVER_NVME_IO_URING,
+ .bdrv_needs_filename = true,
+);
+
+static BlockDriver bdrv_virtio_blk_vhost_user = BLKIO_DRIVER(
+ DRIVER_VIRTIO_BLK_VHOST_USER
+);
+
+static BlockDriver bdrv_virtio_blk_vhost_vdpa = BLKIO_DRIVER(
+ DRIVER_VIRTIO_BLK_VHOST_VDPA
+);
+
+static void bdrv_blkio_init(void)
+{
+ bdrv_register(&bdrv_io_uring);
+ bdrv_register(&bdrv_nvme_io_uring);
+ bdrv_register(&bdrv_virtio_blk_vhost_user);
+ bdrv_register(&bdrv_virtio_blk_vhost_vdpa);
+}
+
+block_init(bdrv_blkio_init);
diff --git a/block/blkverify.c b/block/blkverify.c
index 020b1ae7b6..f36fd6aeb2 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -235,8 +235,8 @@ blkverify_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
qemu_iovec_init(&raw_qiov, qiov->niov);
qemu_iovec_clone(&raw_qiov, qiov, buf);
- ret = blkverify_co_prwv(bs, &r, offset, bytes, qiov, &raw_qiov, flags,
- false);
+ ret = blkverify_co_prwv(bs, &r, offset, bytes, qiov, &raw_qiov,
+ flags & ~BDRV_REQ_REGISTERED_BUF, false);
cmp_offset = qemu_iovec_compare(qiov, &raw_qiov);
if (cmp_offset != -1) {
diff --git a/block/block-backend.c b/block/block-backend.c
index aa4adf06ae..4f59664397 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -2545,16 +2545,16 @@ static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter)
}
}
-void blk_register_buf(BlockBackend *blk, void *host, size_t size)
+bool blk_register_buf(BlockBackend *blk, void *host, size_t size, Error **errp)
{
GLOBAL_STATE_CODE();
- bdrv_register_buf(blk_bs(blk), host, size);
+ return bdrv_register_buf(blk_bs(blk), host, size, errp);
}
-void blk_unregister_buf(BlockBackend *blk, void *host)
+void blk_unregister_buf(BlockBackend *blk, void *host, size_t size)
{
GLOBAL_STATE_CODE();
- bdrv_unregister_buf(blk_bs(blk), host);
+ bdrv_unregister_buf(blk_bs(blk), host, size);
}
int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
diff --git a/block/block-ram-registrar.c b/block/block-ram-registrar.c
new file mode 100644
index 0000000000..25dbafa789
--- /dev/null
+++ b/block/block-ram-registrar.c
@@ -0,0 +1,58 @@
+/*
+ * BlockBackend RAM Registrar
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/block-backend.h"
+#include "sysemu/block-ram-registrar.h"
+#include "qapi/error.h"
+
+static void ram_block_added(RAMBlockNotifier *n, void *host, size_t size,
+ size_t max_size)
+{
+ BlockRAMRegistrar *r = container_of(n, BlockRAMRegistrar, notifier);
+ Error *err = NULL;
+
+ if (!r->ok) {
+ return; /* don't try again if we've already failed */
+ }
+
+ if (!blk_register_buf(r->blk, host, max_size, &err)) {
+ error_report_err(err);
+ ram_block_notifier_remove(&r->notifier);
+ r->ok = false;
+ }
+}
+
+static void ram_block_removed(RAMBlockNotifier *n, void *host, size_t size,
+ size_t max_size)
+{
+ BlockRAMRegistrar *r = container_of(n, BlockRAMRegistrar, notifier);
+ blk_unregister_buf(r->blk, host, max_size);
+}
+
+void blk_ram_registrar_init(BlockRAMRegistrar *r, BlockBackend *blk)
+{
+ r->blk = blk;
+ r->notifier = (RAMBlockNotifier){
+ .ram_block_added = ram_block_added,
+ .ram_block_removed = ram_block_removed,
+
+ /*
+ * .ram_block_resized() is not necessary because we use the max_size
+ * value that does not change across resize.
+ */
+ };
+ r->ok = true;
+
+ ram_block_notifier_add(&r->notifier);
+}
+
+void blk_ram_registrar_destroy(BlockRAMRegistrar *r)
+{
+ if (r->ok) {
+ ram_block_notifier_remove(&r->notifier);
+ }
+}
diff --git a/block/crypto.c b/block/crypto.c
index 7a57774b76..c7365598a7 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -410,7 +410,6 @@ block_crypto_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block);
uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block);
- assert(!flags);
assert(payload_offset < INT64_MAX);
assert(QEMU_IS_ALIGNED(offset, sector_size));
assert(QEMU_IS_ALIGNED(bytes, sector_size));
@@ -473,7 +472,8 @@ block_crypto_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block);
uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block);
- assert(!(flags & ~BDRV_REQ_FUA));
+ flags &= ~BDRV_REQ_REGISTERED_BUF;
+
assert(payload_offset < INT64_MAX);
assert(QEMU_IS_ALIGNED(offset, sector_size));
assert(QEMU_IS_ALIGNED(bytes, sector_size));
diff --git a/block/file-posix.c b/block/file-posix.c
index 23acffb9a4..b9647c5ffc 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -2133,7 +2133,6 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
int64_t bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
- assert(flags == 0);
return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE);
}
diff --git a/block/gluster.c b/block/gluster.c
index bb1144cf6a..7c90f7ba4b 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -1236,7 +1236,6 @@ static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs,
QEMUIOVector *qiov,
int flags)
{
- assert(!flags);
return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1);
}
diff --git a/block/io.c b/block/io.c
index d30073036e..a9673465dd 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1130,8 +1130,7 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
int ret;
bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
- assert(!(flags & ~BDRV_REQ_MASK));
- assert(!(flags & BDRV_REQ_NO_FALLBACK));
+ assert(!(flags & ~bs->supported_read_flags));
if (!drv) {
return -ENOMEDIUM;
@@ -1195,23 +1194,29 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
BdrvRequestFlags flags)
{
BlockDriver *drv = bs->drv;
+ bool emulate_fua = false;
int64_t sector_num;
unsigned int nb_sectors;
QEMUIOVector local_qiov;
int ret;
bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
- assert(!(flags & ~BDRV_REQ_MASK));
- assert(!(flags & BDRV_REQ_NO_FALLBACK));
if (!drv) {
return -ENOMEDIUM;
}
+ if ((flags & BDRV_REQ_FUA) &&
+ (~bs->supported_write_flags & BDRV_REQ_FUA)) {
+ flags &= ~BDRV_REQ_FUA;
+ emulate_fua = true;
+ }
+
+ flags &= bs->supported_write_flags;
+
if (drv->bdrv_co_pwritev_part) {
ret = drv->bdrv_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset,
- flags & bs->supported_write_flags);
- flags &= ~bs->supported_write_flags;
+ flags);
goto emulate_flags;
}
@@ -1221,9 +1226,7 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
}
if (drv->bdrv_co_pwritev) {
- ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov,
- flags & bs->supported_write_flags);
- flags &= ~bs->supported_write_flags;
+ ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov, flags);
goto emulate_flags;
}
@@ -1233,10 +1236,8 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
.coroutine = qemu_coroutine_self(),
};
- acb = drv->bdrv_aio_pwritev(bs, offset, bytes, qiov,
- flags & bs->supported_write_flags,
+ acb = drv->bdrv_aio_pwritev(bs, offset, bytes, qiov, flags,
bdrv_co_io_em_complete, &co);
- flags &= ~bs->supported_write_flags;
if (acb == NULL) {
ret = -EIO;
} else {
@@ -1254,12 +1255,10 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
assert(bytes <= BDRV_REQUEST_MAX_BYTES);
assert(drv->bdrv_co_writev);
- ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov,
- flags & bs->supported_write_flags);
- flags &= ~bs->supported_write_flags;
+ ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov, flags);
emulate_flags:
- if (ret == 0 && (flags & BDRV_REQ_FUA)) {
+ if (ret == 0 && emulate_fua) {
ret = bdrv_co_flush(bs);
}
@@ -1487,11 +1486,14 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
align);
- /* TODO: We would need a per-BDS .supported_read_flags and
+ /*
+ * TODO: We would need a per-BDS .supported_read_flags and
* potential fallback support, if we ever implement any read flags
* to pass through to drivers. For now, there aren't any
- * passthrough flags. */
- assert(!(flags & ~(BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH)));
+ * passthrough flags except the BDRV_REQ_REGISTERED_BUF optimization hint.
+ */
+ assert(!(flags & ~(BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH |
+ BDRV_REQ_REGISTERED_BUF)));
/* Handle Copy on Read and associated serialisation */
if (flags & BDRV_REQ_COPY_ON_READ) {
@@ -1532,7 +1534,7 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
goto out;
}
- assert(!(flags & ~bs->supported_read_flags));
+ assert(!(flags & ~(bs->supported_read_flags | BDRV_REQ_REGISTERED_BUF)));
max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
if (bytes <= max_bytes && bytes <= max_transfer) {
@@ -1721,7 +1723,8 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
static int bdrv_pad_request(BlockDriverState *bs,
QEMUIOVector **qiov, size_t *qiov_offset,
int64_t *offset, int64_t *bytes,
- BdrvRequestPadding *pad, bool *padded)
+ BdrvRequestPadding *pad, bool *padded,
+ BdrvRequestFlags *flags)
{
int ret;
@@ -1749,6 +1752,10 @@ static int bdrv_pad_request(BlockDriverState *bs,
if (padded) {
*padded = true;
}
+ if (flags) {
+ /* Can't use optimization hint with bounce buffer */
+ *flags &= ~BDRV_REQ_REGISTERED_BUF;
+ }
return 0;
}
@@ -1803,7 +1810,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
}
ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
- NULL);
+ NULL, &flags);
if (ret < 0) {
goto fail;
}
@@ -1848,6 +1855,11 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
return -ENOTSUP;
}
+ /* By definition there is no user buffer so this flag doesn't make sense */
+ if (flags & BDRV_REQ_REGISTERED_BUF) {
+ return -EINVAL;
+ }
+
/* Invalidate the cached block-status data range if this write overlaps */
bdrv_bsc_invalidate_range(bs, offset, bytes);
@@ -2133,6 +2145,9 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
bool padding;
BdrvRequestPadding pad;
+ /* This flag doesn't make sense for padding or zero writes */
+ flags &= ~BDRV_REQ_REGISTERED_BUF;
+
padding = bdrv_init_padding(bs, offset, bytes, &pad);
if (padding) {
assert(!(flags & BDRV_REQ_NO_WAIT));
@@ -2250,7 +2265,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
* alignment only if there is no ZERO flag.
*/
ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
- &padded);
+ &padded, &flags);
if (ret < 0) {
return ret;
}
@@ -3262,29 +3277,57 @@ void bdrv_io_unplug(BlockDriverState *bs)
}
}
-void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size)
+/* Helper that undoes bdrv_register_buf() when it fails partway through */
+static void bdrv_register_buf_rollback(BlockDriverState *bs,
+ void *host,
+ size_t size,
+ BdrvChild *final_child)
+{
+ BdrvChild *child;
+
+ QLIST_FOREACH(child, &bs->children, next) {
+ if (child == final_child) {
+ break;
+ }
+
+ bdrv_unregister_buf(child->bs, host, size);
+ }
+
+ if (bs->drv && bs->drv->bdrv_unregister_buf) {
+ bs->drv->bdrv_unregister_buf(bs, host, size);
+ }
+}
+
+bool bdrv_register_buf(BlockDriverState *bs, void *host, size_t size,
+ Error **errp)
{
BdrvChild *child;
GLOBAL_STATE_CODE();
if (bs->drv && bs->drv->bdrv_register_buf) {
- bs->drv->bdrv_register_buf(bs, host, size);
+ if (!bs->drv->bdrv_register_buf(bs, host, size, errp)) {
+ return false;
+ }
}
QLIST_FOREACH(child, &bs->children, next) {
- bdrv_register_buf(child->bs, host, size);
+ if (!bdrv_register_buf(child->bs, host, size, errp)) {
+ bdrv_register_buf_rollback(bs, host, size, child);
+ return false;
+ }
}
+ return true;
}
-void bdrv_unregister_buf(BlockDriverState *bs, void *host)
+void bdrv_unregister_buf(BlockDriverState *bs, void *host, size_t size)
{
BdrvChild *child;
GLOBAL_STATE_CODE();
if (bs->drv && bs->drv->bdrv_unregister_buf) {
- bs->drv->bdrv_unregister_buf(bs, host);
+ bs->drv->bdrv_unregister_buf(bs, host, size);
}
QLIST_FOREACH(child, &bs->children, next) {
- bdrv_unregister_buf(child->bs, host);
+ bdrv_unregister_buf(child->bs, host, size);
}
}
diff --git a/block/meson.build b/block/meson.build
index 60bc305597..b7c68b83a3 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -46,6 +46,7 @@ block_ss.add(files(
), zstd, zlib, gnutls)
softmmu_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
+softmmu_ss.add(files('block-ram-registrar.c'))
if get_option('qcow1').allowed()
block_ss.add(files('qcow.c'))
@@ -92,6 +93,7 @@ block_modules = {}
modsrc = []
foreach m : [
+ [blkio, 'blkio', files('blkio.c')],
[curl, 'curl', files('curl.c')],
[glusterfs, 'gluster', files('gluster.c')],
[libiscsi, 'iscsi', [files('iscsi.c'), libm]],
diff --git a/block/mirror.c b/block/mirror.c
index 80c0109d39..bed089d2e0 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1486,6 +1486,8 @@ static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs,
qemu_iovec_init(&bounce_qiov, 1);
qemu_iovec_add(&bounce_qiov, bounce_buf, bytes);
qiov = &bounce_qiov;
+
+ flags &= ~BDRV_REQ_REGISTERED_BUF;
}
ret = bdrv_mirror_top_do_write(bs, MIRROR_METHOD_COPY, offset, bytes, qiov,
diff --git a/block/nbd.c b/block/nbd.c
index 494b9d683e..7d485c86d2 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -1222,7 +1222,6 @@ static int coroutine_fn nbd_client_co_preadv(BlockDriverState *bs, int64_t offse
};
assert(bytes <= NBD_MAX_BUFFER_SIZE);
- assert(!flags);
if (!bytes) {
return 0;
diff --git a/block/nvme.c b/block/nvme.c
index 2b24f95164..656624c585 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -1587,22 +1587,22 @@ static void nvme_aio_unplug(BlockDriverState *bs)
}
}
-static void nvme_register_buf(BlockDriverState *bs, void *host, size_t size)
+static bool nvme_register_buf(BlockDriverState *bs, void *host, size_t size,
+ Error **errp)
{
int ret;
- Error *local_err = NULL;
BDRVNVMeState *s = bs->opaque;
- ret = qemu_vfio_dma_map(s->vfio, host, size, false, NULL, &local_err);
- if (ret) {
- /* FIXME: we may run out of IOVA addresses after repeated
- * bdrv_register_buf/bdrv_unregister_buf, because nvme_vfio_dma_unmap
- * doesn't reclaim addresses for fixed mappings. */
- error_reportf_err(local_err, "nvme_register_buf failed: ");
- }
+ /*
+ * FIXME: we may run out of IOVA addresses after repeated
+ * bdrv_register_buf/bdrv_unregister_buf, because nvme_vfio_dma_unmap
+ * doesn't reclaim addresses for fixed mappings.
+ */
+ ret = qemu_vfio_dma_map(s->vfio, host, size, false, NULL, errp);
+ return ret == 0;
}
-static void nvme_unregister_buf(BlockDriverState *bs, void *host)
+static void nvme_unregister_buf(BlockDriverState *bs, void *host, size_t size)
{
BDRVNVMeState *s = bs->opaque;
diff --git a/block/parallels.c b/block/parallels.c
index c1523e7dab..dd15a44100 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -329,7 +329,6 @@ static coroutine_fn int parallels_co_writev(BlockDriverState *bs,
QEMUIOVector hd_qiov;
int ret = 0;
- assert(!flags);
qemu_iovec_init(&hd_qiov, qiov->niov);
while (nb_sectors > 0) {
diff --git a/block/qcow.c b/block/qcow.c
index 311aaa8705..e9180c7b61 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -628,7 +628,6 @@ static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, int64_t offset,
uint8_t *buf;
void *orig_buf;
- assert(!flags);
if (qiov->niov > 1) {
buf = orig_buf = qemu_try_blockalign(bs, qiov->size);
if (buf == NULL) {
@@ -725,7 +724,6 @@ static coroutine_fn int qcow_co_pwritev(BlockDriverState *bs, int64_t offset,
uint8_t *buf;
void *orig_buf;
- assert(!flags);
s->cluster_cache_offset = -1; /* disable compressed cache */
/* We must always copy the iov when encrypting, so we
diff --git a/block/qed.c b/block/qed.c
index bda00e6257..99a9ec9b57 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -1395,7 +1395,6 @@ static int coroutine_fn bdrv_qed_co_writev(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
QEMUIOVector *qiov, int flags)
{
- assert(!flags);
return qed_co_request(bs, sector_num, qiov, nb_sectors, QED_AIOCB_WRITE);
}
diff --git a/block/raw-format.c b/block/raw-format.c
index f337ac7569..c8dc9bc850 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -258,6 +258,8 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
qemu_iovec_add(&local_qiov, buf, 512);
qemu_iovec_concat(&local_qiov, qiov, 512, qiov->size - 512);
qiov = &local_qiov;
+
+ flags &= ~BDRV_REQ_REGISTERED_BUF;
}
ret = raw_adjust_offset(bs, &offset, bytes, true);
diff --git a/block/replication.c b/block/replication.c
index c67f931f37..13f1d39571 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -261,7 +261,6 @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs,
int ret;
int64_t n;
- assert(!flags);
ret = replication_get_io_status(s);
if (ret < 0) {
goto out;
diff --git a/block/ssh.c b/block/ssh.c
index a2dc646536..a3cddc392c 100644
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -1196,7 +1196,6 @@ static coroutine_fn int ssh_co_writev(BlockDriverState *bs,
BDRVSSHState *s = bs->opaque;
int ret;
- assert(!flags);
qemu_co_mutex_lock(&s->lock);
ret = ssh_write(s, bs, sector_num * BDRV_SECTOR_SIZE,
nb_sectors * BDRV_SECTOR_SIZE, qiov);
diff --git a/block/vhdx.c b/block/vhdx.c
index e10e78ebfd..e2344ee0b7 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -1342,7 +1342,6 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
uint64_t bat_prior_offset = 0;
bool bat_update = false;
- assert(!flags);
qemu_iovec_init(&hd_qiov, qiov->niov);
qemu_co_mutex_lock(&s->lock);
diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c
index e54e26de17..d6c5a344c9 100644
--- a/bsd-user/mmap.c
+++ b/bsd-user/mmap.c
@@ -663,7 +663,6 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
page_dump(stdout);
printf("\n");
#endif
- tb_invalidate_phys_range(start, start + len);
mmap_unlock();
return start;
fail:
@@ -769,7 +768,6 @@ int target_munmap(abi_ulong start, abi_ulong len)
if (ret == 0) {
page_set_flags(start, start + len, 0);
- tb_invalidate_phys_range(start, start + len);
}
mmap_unlock();
return ret;
diff --git a/contrib/elf2dmp/main.c b/contrib/elf2dmp/main.c
index b9fc6d230c..d77b8f98f7 100644
--- a/contrib/elf2dmp/main.c
+++ b/contrib/elf2dmp/main.c
@@ -125,6 +125,7 @@ static KDDEBUGGER_DATA64 *get_kdbg(uint64_t KernBase, struct pdb_reader *pdb,
if (va_space_rw(vs, KdDebuggerDataBlock, kdbg, kdbg_hdr.Size, 0)) {
eprintf("Failed to extract entire KDBG\n");
+ free(kdbg);
return NULL;
}
diff --git a/cpu.c b/cpu.c
index 14365e36f3..2a09b05205 100644
--- a/cpu.c
+++ b/cpu.c
@@ -277,7 +277,7 @@ void list_cpus(const char *optarg)
void tb_invalidate_phys_addr(target_ulong addr)
{
mmap_lock();
- tb_invalidate_phys_page_range(addr, addr + 1);
+ tb_invalidate_phys_page(addr);
mmap_unlock();
}
#else
@@ -298,7 +298,7 @@ void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs)
return;
}
ram_addr = memory_region_get_ram_addr(mr) + addr;
- tb_invalidate_phys_page_range(ram_addr, ram_addr + 1);
+ tb_invalidate_phys_page(ram_addr);
}
#endif
diff --git a/docs/devel/reset.rst b/docs/devel/reset.rst
index abea1102dc..7cc6a6b314 100644
--- a/docs/devel/reset.rst
+++ b/docs/devel/reset.rst
@@ -210,9 +210,11 @@ Polling the reset state
Resettable interface provides the ``resettable_is_in_reset()`` function.
This function returns true if the object parameter is currently under reset.
-An object is under reset from the beginning of the *init* phase to the end of
-the *exit* phase. During all three phases, the function will return that the
-object is in reset.
+An object is under reset from the beginning of the *enter* phase (before
+either its children or its own enter method is called) to the *exit*
+phase. During *enter* and *hold* phase only, the function will return that the
+object is in reset. The state is changed after the *exit* is propagated to
+its children and just before calling the object's own *exit* method.
This function may be used if the object behavior has to be adapted
while in reset state. For example if a device has an irq input,
diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
index cfb4b0768b..e3af79bb8c 100644
--- a/docs/system/arm/emulation.rst
+++ b/docs/system/arm/emulation.rst
@@ -24,6 +24,7 @@ the following architecture extensions:
- FEAT_Debugv8p4 (Debug changes for v8.4)
- FEAT_DotProd (Advanced SIMD dot product instructions)
- FEAT_DoubleFault (Double Fault Extension)
+- FEAT_E0PD (Preventing EL0 access to halves of address maps)
- FEAT_ETS (Enhanced Translation Synchronization)
- FEAT_FCMA (Floating-point complex number instructions)
- FEAT_FHM (Floating-point half-precision multiplication instructions)
@@ -32,6 +33,7 @@ the following architecture extensions:
- FEAT_FlagM (Flag manipulation instructions v2)
- FEAT_FlagM2 (Enhancements to flag manipulation instructions)
- FEAT_GTG (Guest translation granule size)
+- FEAT_HAFDBS (Hardware management of the access flag and dirty bit state)
- FEAT_HCX (Support for the HCRX_EL2 register)
- FEAT_HPDS (Hierarchical permission disables)
- FEAT_I8MM (AArch64 Int8 matrix multiplication instructions)
diff --git a/dump/dump.c b/dump/dump.c
index 236559b03a..df117c847f 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -103,6 +103,7 @@ static int dump_cleanup(DumpState *s)
memory_mapping_list_free(&s->list);
close(s->fd);
g_free(s->guest_note);
+ g_array_unref(s->string_table_buf);
s->guest_note = NULL;
if (s->resume) {
if (s->detached) {
@@ -152,11 +153,10 @@ static void prepare_elf64_header(DumpState *s, Elf64_Ehdr *elf_header)
elf_header->e_phoff = cpu_to_dump64(s, s->phdr_offset);
elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr));
elf_header->e_phnum = cpu_to_dump16(s, phnum);
- if (s->shdr_num) {
- elf_header->e_shoff = cpu_to_dump64(s, s->shdr_offset);
- elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr));
- elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num);
- }
+ elf_header->e_shoff = cpu_to_dump64(s, s->shdr_offset);
+ elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr));
+ elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num);
+ elf_header->e_shstrndx = cpu_to_dump16(s, s->shdr_num - 1);
}
static void prepare_elf32_header(DumpState *s, Elf32_Ehdr *elf_header)
@@ -180,11 +180,10 @@ static void prepare_elf32_header(DumpState *s, Elf32_Ehdr *elf_header)
elf_header->e_phoff = cpu_to_dump32(s, s->phdr_offset);
elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr));
elf_header->e_phnum = cpu_to_dump16(s, phnum);
- if (s->shdr_num) {
- elf_header->e_shoff = cpu_to_dump32(s, s->shdr_offset);
- elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr));
- elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num);
- }
+ elf_header->e_shoff = cpu_to_dump32(s, s->shdr_offset);
+ elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr));
+ elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num);
+ elf_header->e_shstrndx = cpu_to_dump16(s, s->shdr_num - 1);
}
static void write_elf_header(DumpState *s, Error **errp)
@@ -195,6 +194,8 @@ static void write_elf_header(DumpState *s, Error **errp)
void *header_ptr;
int ret;
+ /* The NULL header and the shstrtab are always defined */
+ assert(s->shdr_num >= 2);
if (dump_is_64bit(s)) {
prepare_elf64_header(s, &elf64_header);
header_size = sizeof(elf64_header);
@@ -380,30 +381,136 @@ static void write_elf_phdr_note(DumpState *s, Error **errp)
}
}
-static void write_elf_section(DumpState *s, int type, Error **errp)
+static void prepare_elf_section_hdr_zero(DumpState *s)
{
- Elf32_Shdr shdr32;
- Elf64_Shdr shdr64;
+ if (dump_is_64bit(s)) {
+ Elf64_Shdr *shdr64 = s->elf_section_hdrs;
+
+ shdr64->sh_info = cpu_to_dump32(s, s->phdr_num);
+ } else {
+ Elf32_Shdr *shdr32 = s->elf_section_hdrs;
+
+ shdr32->sh_info = cpu_to_dump32(s, s->phdr_num);
+ }
+}
+
+static void prepare_elf_section_hdr_string(DumpState *s, void *buff)
+{
+ uint64_t index = s->string_table_buf->len;
+ const char strtab[] = ".shstrtab";
+ Elf32_Shdr shdr32 = {};
+ Elf64_Shdr shdr64 = {};
int shdr_size;
void *shdr;
- int ret;
- if (type == 0) {
- shdr_size = sizeof(Elf32_Shdr);
- memset(&shdr32, 0, shdr_size);
- shdr32.sh_info = cpu_to_dump32(s, s->phdr_num);
- shdr = &shdr32;
- } else {
+ g_array_append_vals(s->string_table_buf, strtab, sizeof(strtab));
+ if (dump_is_64bit(s)) {
shdr_size = sizeof(Elf64_Shdr);
- memset(&shdr64, 0, shdr_size);
- shdr64.sh_info = cpu_to_dump32(s, s->phdr_num);
+ shdr64.sh_type = SHT_STRTAB;
+ shdr64.sh_offset = s->section_offset + s->elf_section_data_size;
+ shdr64.sh_name = index;
+ shdr64.sh_size = s->string_table_buf->len;
shdr = &shdr64;
+ } else {
+ shdr_size = sizeof(Elf32_Shdr);
+ shdr32.sh_type = SHT_STRTAB;
+ shdr32.sh_offset = s->section_offset + s->elf_section_data_size;
+ shdr32.sh_name = index;
+ shdr32.sh_size = s->string_table_buf->len;
+ shdr = &shdr32;
+ }
+ memcpy(buff, shdr, shdr_size);
+}
+
+static bool prepare_elf_section_hdrs(DumpState *s, Error **errp)
+{
+ size_t len, sizeof_shdr;
+ void *buff_hdr;
+
+ /*
+ * Section ordering:
+ * - HDR zero
+ * - Arch section hdrs
+ * - String table hdr
+ */
+ sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr);
+ len = sizeof_shdr * s->shdr_num;
+ s->elf_section_hdrs = g_malloc0(len);
+ buff_hdr = s->elf_section_hdrs;
+
+ /*
+ * The first section header is ALWAYS a special initial section
+ * header.
+ *
+ * The header should be 0 with one exception being that if
+ * phdr_num is PN_XNUM then the sh_info field contains the real
+ * number of segment entries.
+ *
+ * As we zero allocate the buffer we will only need to modify
+ * sh_info for the PN_XNUM case.
+ */
+ if (s->phdr_num >= PN_XNUM) {
+ prepare_elf_section_hdr_zero(s);
}
+ buff_hdr += sizeof_shdr;
+
+ /* Add architecture defined section headers */
+ if (s->dump_info.arch_sections_write_hdr_fn
+ && s->shdr_num > 2) {
+ buff_hdr += s->dump_info.arch_sections_write_hdr_fn(s, buff_hdr);
+
+ if (s->shdr_num >= SHN_LORESERVE) {
+ error_setg_errno(errp, EINVAL,
+ "dump: too many architecture defined sections");
+ return false;
+ }
+ }
+
+ /*
+ * String table is the last section since strings are added via
+ * arch_sections_write_hdr().
+ */
+ prepare_elf_section_hdr_string(s, buff_hdr);
+ return true;
+}
- ret = fd_write_vmcore(shdr, shdr_size, s);
+static void write_elf_section_headers(DumpState *s, Error **errp)
+{
+ size_t sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr);
+ int ret;
+
+ if (!prepare_elf_section_hdrs(s, errp)) {
+ return;
+ }
+
+ ret = fd_write_vmcore(s->elf_section_hdrs, s->shdr_num * sizeof_shdr, s);
if (ret < 0) {
- error_setg_errno(errp, -ret,
- "dump: failed to write section header table");
+ error_setg_errno(errp, -ret, "dump: failed to write section headers");
+ }
+
+ g_free(s->elf_section_hdrs);
+}
+
+static void write_elf_sections(DumpState *s, Error **errp)
+{
+ int ret;
+
+ if (s->elf_section_data_size) {
+ /* Write architecture section data */
+ ret = fd_write_vmcore(s->elf_section_data,
+ s->elf_section_data_size, s);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret,
+ "dump: failed to write architecture section data");
+ return;
+ }
+ }
+
+ /* Write string table */
+ ret = fd_write_vmcore(s->string_table_buf->data,
+ s->string_table_buf->len, s);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "dump: failed to write string table data");
}
}
@@ -554,6 +661,8 @@ static void dump_begin(DumpState *s, Error **errp)
* --------------
* | elf header |
* --------------
+ * | sctn_hdr |
+ * --------------
* | PT_NOTE |
* --------------
* | PT_LOAD |
@@ -562,8 +671,6 @@ static void dump_begin(DumpState *s, Error **errp)
* --------------
* | PT_LOAD |
* --------------
- * | sec_hdr |
- * --------------
* | elf note |
* --------------
* | memory |
@@ -579,6 +686,12 @@ static void dump_begin(DumpState *s, Error **errp)
return;
}
+ /* write section headers to vmcore */
+ write_elf_section_headers(s, errp);
+ if (*errp) {
+ return;
+ }
+
/* write PT_NOTE to vmcore */
write_elf_phdr_note(s, errp);
if (*errp) {
@@ -591,21 +704,13 @@ static void dump_begin(DumpState *s, Error **errp)
return;
}
- /* write section to vmcore */
- if (s->shdr_num) {
- write_elf_section(s, 1, errp);
- if (*errp) {
- return;
- }
- }
-
/* write notes to vmcore */
write_elf_notes(s, errp);
}
-static int64_t dump_filtered_memblock_size(GuestPhysBlock *block,
- int64_t filter_area_start,
- int64_t filter_area_length)
+int64_t dump_filtered_memblock_size(GuestPhysBlock *block,
+ int64_t filter_area_start,
+ int64_t filter_area_length)
{
int64_t size, left, right;
@@ -623,9 +728,9 @@ static int64_t dump_filtered_memblock_size(GuestPhysBlock *block,
return size;
}
-static int64_t dump_filtered_memblock_start(GuestPhysBlock *block,
- int64_t filter_area_start,
- int64_t filter_area_length)
+int64_t dump_filtered_memblock_start(GuestPhysBlock *block,
+ int64_t filter_area_start,
+ int64_t filter_area_length)
{
if (filter_area_length) {
/* return -1 if the block is not within filter area */
@@ -665,6 +770,31 @@ static void dump_iterate(DumpState *s, Error **errp)
}
}
+static void dump_end(DumpState *s, Error **errp)
+{
+ int rc;
+ ERRP_GUARD();
+
+ if (s->elf_section_data_size) {
+ s->elf_section_data = g_malloc0(s->elf_section_data_size);
+ }
+
+ /* Adds the architecture defined section data to s->elf_section_data */
+ if (s->dump_info.arch_sections_write_fn &&
+ s->elf_section_data_size) {
+ rc = s->dump_info.arch_sections_write_fn(s, s->elf_section_data);
+ if (rc) {
+ error_setg_errno(errp, rc,
+ "dump: failed to get arch section data");
+ g_free(s->elf_section_data);
+ return;
+ }
+ }
+
+ /* write sections to vmcore */
+ write_elf_sections(s, errp);
+}
+
static void create_vmcore(DumpState *s, Error **errp)
{
ERRP_GUARD();
@@ -674,7 +804,14 @@ static void create_vmcore(DumpState *s, Error **errp)
return;
}
+ /* Iterate over memory and dump it to file */
dump_iterate(s, errp);
+ if (*errp) {
+ return;
+ }
+
+ /* Write the section data */
+ dump_end(s, errp);
}
static int write_start_flat_header(int fd)
@@ -1684,6 +1821,14 @@ static void dump_init(DumpState *s, int fd, bool has_format,
s->filter_area_begin = begin;
s->filter_area_length = length;
+ /* First index is 0, it's the special null name */
+ s->string_table_buf = g_array_new(FALSE, TRUE, 1);
+ /*
+ * Allocate the null name, due to the clearing option set to true
+ * it will be 0.
+ */
+ g_array_set_size(s->string_table_buf, 1);
+
memory_mapping_list_init(&s->list);
guest_phys_blocks_init(&s->guest_phys_blocks);
@@ -1820,38 +1965,53 @@ static void dump_init(DumpState *s, int fd, bool has_format,
}
/*
- * calculate phdr_num
+ * The first section header is always a special one in which most
+ * fields are 0. The section header string table is also always
+ * set.
+ */
+ s->shdr_num = 2;
+
+ /*
+ * Adds the number of architecture sections to shdr_num and sets
+ * elf_section_data_size so we know the offsets and sizes of all
+ * parts.
+ */
+ if (s->dump_info.arch_sections_add_fn) {
+ s->dump_info.arch_sections_add_fn(s);
+ }
+
+ /*
+ * calculate shdr_num so we know the offsets and sizes of all
+ * parts.
+ * Calculate phdr_num
*
- * the type of ehdr->e_phnum is uint16_t, so we should avoid overflow
+ * The absolute maximum amount of phdrs is UINT32_MAX - 1 as
+ * sh_info is 32 bit. There's special handling once we go over
+ * UINT16_MAX - 1 but that is handled in the ehdr and section
+ * code.
*/
- s->phdr_num = 1; /* PT_NOTE */
- if (s->list.num < UINT16_MAX - 2) {
- s->shdr_num = 0;
+ s->phdr_num = 1; /* Reserve PT_NOTE */
+ if (s->list.num <= UINT32_MAX - 1) {
s->phdr_num += s->list.num;
} else {
- /* sh_info of section 0 holds the real number of phdrs */
- s->shdr_num = 1;
-
- /* the type of shdr->sh_info is uint32_t, so we should avoid overflow */
- if (s->list.num <= UINT32_MAX - 1) {
- s->phdr_num += s->list.num;
- } else {
- s->phdr_num = UINT32_MAX;
- }
+ s->phdr_num = UINT32_MAX;
}
+ /*
+ * Now that the number of section and program headers is known we
+ * can calculate the offsets of the headers and data.
+ */
if (dump_is_64bit(s)) {
- s->phdr_offset = sizeof(Elf64_Ehdr);
- s->shdr_offset = s->phdr_offset + sizeof(Elf64_Phdr) * s->phdr_num;
- s->note_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num;
- s->memory_offset = s->note_offset + s->note_size;
+ s->shdr_offset = sizeof(Elf64_Ehdr);
+ s->phdr_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num;
+ s->note_offset = s->phdr_offset + sizeof(Elf64_Phdr) * s->phdr_num;
} else {
-
- s->phdr_offset = sizeof(Elf32_Ehdr);
- s->shdr_offset = s->phdr_offset + sizeof(Elf32_Phdr) * s->phdr_num;
- s->note_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num;
- s->memory_offset = s->note_offset + s->note_size;
+ s->shdr_offset = sizeof(Elf32_Ehdr);
+ s->phdr_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num;
+ s->note_offset = s->phdr_offset + sizeof(Elf32_Phdr) * s->phdr_num;
}
+ s->memory_offset = s->note_offset + s->note_size;
+ s->section_offset = s->memory_offset + s->total_size;
return;
diff --git a/dump/win_dump.c b/dump/win_dump.c
index fd91350fbb..f20b6051b6 100644
--- a/dump/win_dump.c
+++ b/dump/win_dump.c
@@ -273,6 +273,13 @@ static void patch_and_save_context(WinDumpHeader *h, bool x64,
uint64_t Context;
WinContext ctx;
+ if (i >= WIN_DUMP_FIELD(NumberProcessors)) {
+ warn_report("win-dump: number of QEMU CPUs is bigger than"
+ " NumberProcessors (%u) in guest Windows",
+ WIN_DUMP_FIELD(NumberProcessors));
+ return;
+ }
+
if (cpu_read_ptr(x64, first_cpu,
KiProcessorBlock + i * win_dump_ptr_size(x64),
&Prcb)) {
diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index 9bf13133e5..072cf67956 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -1803,7 +1803,7 @@ static void coroutine_fn v9fs_walk(void *opaque)
err = pdu_unmarshal(pdu, offset, "ddw", &fid, &newfid, &nwnames);
if (err < 0) {
pdu_complete(pdu, err);
- return ;
+ return;
}
offset += err;
diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index bc3ecdb619..55f114ef72 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -1099,7 +1099,7 @@ static void aspeed_machine_palmetto_class_init(ObjectClass *oc, void *data)
amc->soc_name = "ast2400-a1";
amc->hw_strap1 = PALMETTO_BMC_HW_STRAP1;
amc->fmc_model = "n25q256a";
- amc->spi_model = "mx25l25635e";
+ amc->spi_model = "mx25l25635f";
amc->num_cs = 1;
amc->i2c_init = palmetto_bmc_i2c_init;
mc->default_ram_size = 256 * MiB;
@@ -1150,7 +1150,7 @@ static void aspeed_machine_ast2500_evb_class_init(ObjectClass *oc, void *data)
amc->soc_name = "ast2500-a1";
amc->hw_strap1 = AST2500_EVB_HW_STRAP1;
amc->fmc_model = "mx25l25635e";
- amc->spi_model = "mx25l25635e";
+ amc->spi_model = "mx25l25635f";
amc->num_cs = 1;
amc->i2c_init = ast2500_evb_i2c_init;
mc->default_ram_size = 512 * MiB;
@@ -1200,7 +1200,7 @@ static void aspeed_machine_witherspoon_class_init(ObjectClass *oc, void *data)
mc->desc = "OpenPOWER Witherspoon BMC (ARM1176)";
amc->soc_name = "ast2500-a1";
amc->hw_strap1 = WITHERSPOON_BMC_HW_STRAP1;
- amc->fmc_model = "mx25l25635e";
+ amc->fmc_model = "mx25l25635f";
amc->spi_model = "mx66l1g45g";
amc->num_cs = 2;
amc->i2c_init = witherspoon_bmc_i2c_init;
@@ -1330,6 +1330,13 @@ static void aspeed_machine_fuji_class_init(ObjectClass *oc, void *data)
aspeed_soc_num_cpus(amc->soc_name);
};
+/* On 32-bit hosts, lower RAM to 1G because of the 2047 MB limit */
+#if HOST_LONG_BITS == 32
+#define BLETCHLEY_BMC_RAM_SIZE (1 * GiB)
+#else
+#define BLETCHLEY_BMC_RAM_SIZE (2 * GiB)
+#endif
+
static void aspeed_machine_bletchley_class_init(ObjectClass *oc, void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
@@ -1344,17 +1351,17 @@ static void aspeed_machine_bletchley_class_init(ObjectClass *oc, void *data)
amc->num_cs = 2;
amc->macs_mask = ASPEED_MAC2_ON;
amc->i2c_init = bletchley_bmc_i2c_init;
- mc->default_ram_size = 512 * MiB;
+ mc->default_ram_size = BLETCHLEY_BMC_RAM_SIZE;
mc->default_cpus = mc->min_cpus = mc->max_cpus =
aspeed_soc_num_cpus(amc->soc_name);
}
-static void fby35_reset(MachineState *state)
+static void fby35_reset(MachineState *state, ShutdownCause reason)
{
AspeedMachineState *bmc = ASPEED_MACHINE(state);
AspeedGPIOState *gpio = &bmc->soc.gpio;
- qemu_devices_reset();
+ qemu_devices_reset(reason);
/* Board ID: 7 (Class-1, 4 slots) */
object_property_set_bool(OBJECT(gpio), "gpioV4", true, &error_fatal);
diff --git a/hw/arm/aspeed_ast2600.c b/hw/arm/aspeed_ast2600.c
index aa2cd90bec..cd75465c2b 100644
--- a/hw/arm/aspeed_ast2600.c
+++ b/hw/arm/aspeed_ast2600.c
@@ -307,6 +307,8 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp)
object_property_set_int(OBJECT(&s->cpu[i]), "cntfrq", 1125000000,
&error_abort);
+ object_property_set_bool(OBJECT(&s->cpu[i]), "neon", false,
+ &error_abort);
object_property_set_link(OBJECT(&s->cpu[i]), "memory",
OBJECT(s->memory), &error_abort);
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index b0b92af188..b106f31468 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -683,6 +683,8 @@ int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo,
* the DTB is copied again upon reset, even if addr points into RAM.
*/
rom_add_blob_fixed_as("dtb", fdt, size, addr, as);
+ qemu_register_reset_nosnapshotload(qemu_fdt_randomize_seeds,
+ rom_ptr_for_as(as, addr, size));
g_free(fdt);
diff --git a/hw/arm/mps2-tz.c b/hw/arm/mps2-tz.c
index 394192b9b2..284c09c91d 100644
--- a/hw/arm/mps2-tz.c
+++ b/hw/arm/mps2-tz.c
@@ -1239,7 +1239,7 @@ static void mps2_set_remap(Object *obj, const char *value, Error **errp)
}
}
-static void mps2_machine_reset(MachineState *machine)
+static void mps2_machine_reset(MachineState *machine, ShutdownCause reason)
{
MPS2TZMachineState *mms = MPS2TZ_MACHINE(machine);
@@ -1249,7 +1249,7 @@ static void mps2_machine_reset(MachineState *machine)
* reset see the correct mapping.
*/
remap_memory(mms, mms->remap);
- qemu_devices_reset();
+ qemu_devices_reset(reason);
}
static void mps2tz_class_init(ObjectClass *oc, void *data)
diff --git a/hw/arm/nseries.c b/hw/arm/nseries.c
index 692c94ceb4..b151113c27 100644
--- a/hw/arm/nseries.c
+++ b/hw/arm/nseries.c
@@ -702,7 +702,7 @@ static uint32_t mipid_txrx(void *opaque, uint32_t cmd, int len)
static void *mipid_init(void)
{
- struct mipid_s *s = (struct mipid_s *) g_malloc0(sizeof(*s));
+ struct mipid_s *s = g_malloc0(sizeof(*s));
s->id = 0x838f03;
mipid_reset(s);
@@ -1300,7 +1300,7 @@ static int n810_atag_setup(const struct arm_boot_info *info, void *p)
static void n8x0_init(MachineState *machine,
struct arm_boot_info *binfo, int model)
{
- struct n800_s *s = (struct n800_s *) g_malloc0(sizeof(*s));
+ struct n800_s *s = g_malloc0(sizeof(*s));
MachineClass *mc = MACHINE_GET_CLASS(machine);
if (machine->ram_size != mc->default_ram_size) {
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index cda9defe8f..b871350856 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1371,14 +1371,15 @@ static void create_smmu(const VirtMachineState *vms,
static void create_virtio_iommu_dt_bindings(VirtMachineState *vms)
{
- const char compat[] = "virtio,pci-iommu";
+ const char compat[] = "virtio,pci-iommu\0pci1af4,1057";
uint16_t bdf = vms->virtio_iommu_bdf;
MachineState *ms = MACHINE(vms);
char *node;
vms->iommu_phandle = qemu_fdt_alloc_phandle(ms->fdt);
- node = g_strdup_printf("%s/virtio_iommu@%d", vms->pciehb_nodename, bdf);
+ node = g_strdup_printf("%s/virtio_iommu@%x,%x", vms->pciehb_nodename,
+ PCI_SLOT(bdf), PCI_FUNC(bdf));
qemu_fdt_add_subnode(ms->fdt, node);
qemu_fdt_setprop(ms->fdt, node, "compatible", compat, sizeof(compat));
qemu_fdt_setprop_sized_cells(ms->fdt, node, "reg",
diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index a8d2519141..02adc87527 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -35,6 +35,7 @@
#include "qapi/error.h"
#include "trace.h"
#include "qom/object.h"
+#include "m25p80_sfdp.h"
/* 16 MiB max in 3 byte address mode */
#define MAX_3BYTES_SIZE 0x1000000
@@ -72,6 +73,7 @@ typedef struct FlashPartInfo {
* This field inform how many die is in the chip.
*/
uint8_t die_cnt;
+ uint8_t (*sfdp_read)(uint32_t sfdp_addr);
} FlashPartInfo;
/* adapted from linux */
@@ -230,12 +232,16 @@ static const FlashPartInfo known_devices[] = {
{ INFO("mx25l6405d", 0xc22017, 0, 64 << 10, 128, 0) },
{ INFO("mx25l12805d", 0xc22018, 0, 64 << 10, 256, 0) },
{ INFO("mx25l12855e", 0xc22618, 0, 64 << 10, 256, 0) },
- { INFO6("mx25l25635e", 0xc22019, 0xc22019, 64 << 10, 512, 0) },
+ { INFO6("mx25l25635e", 0xc22019, 0xc22019, 64 << 10, 512,
+ ER_4K | ER_32K), .sfdp_read = m25p80_sfdp_mx25l25635e },
+ { INFO6("mx25l25635f", 0xc22019, 0xc22019, 64 << 10, 512,
+ ER_4K | ER_32K), .sfdp_read = m25p80_sfdp_mx25l25635f },
{ INFO("mx25l25655e", 0xc22619, 0, 64 << 10, 512, 0) },
{ INFO("mx66l51235f", 0xc2201a, 0, 64 << 10, 1024, ER_4K | ER_32K) },
{ INFO("mx66u51235f", 0xc2253a, 0, 64 << 10, 1024, ER_4K | ER_32K) },
{ INFO("mx66u1g45g", 0xc2253b, 0, 64 << 10, 2048, ER_4K | ER_32K) },
- { INFO("mx66l1g45g", 0xc2201b, 0, 64 << 10, 2048, ER_4K | ER_32K) },
+ { INFO("mx66l1g45g", 0xc2201b, 0, 64 << 10, 2048, ER_4K | ER_32K),
+ .sfdp_read = m25p80_sfdp_mx66l1g45g },
/* Micron */
{ INFO("n25q032a11", 0x20bb16, 0, 64 << 10, 64, ER_4K) },
@@ -245,13 +251,15 @@ static const FlashPartInfo known_devices[] = {
{ INFO("n25q128a11", 0x20bb18, 0, 64 << 10, 256, ER_4K) },
{ INFO("n25q128a13", 0x20ba18, 0, 64 << 10, 256, ER_4K) },
{ INFO("n25q256a11", 0x20bb19, 0, 64 << 10, 512, ER_4K) },
- { INFO("n25q256a13", 0x20ba19, 0, 64 << 10, 512, ER_4K) },
+ { INFO("n25q256a13", 0x20ba19, 0, 64 << 10, 512, ER_4K),
+ .sfdp_read = m25p80_sfdp_n25q256a },
{ INFO("n25q512a11", 0x20bb20, 0, 64 << 10, 1024, ER_4K) },
{ INFO("n25q512a13", 0x20ba20, 0, 64 << 10, 1024, ER_4K) },
{ INFO("n25q128", 0x20ba18, 0, 64 << 10, 256, 0) },
{ INFO("n25q256a", 0x20ba19, 0, 64 << 10, 512,
- ER_4K | HAS_SR_BP3_BIT6 | HAS_SR_TB) },
- { INFO("n25q512a", 0x20ba20, 0, 64 << 10, 1024, ER_4K) },
+ ER_4K | HAS_SR_BP3_BIT6 | HAS_SR_TB),
+ .sfdp_read = m25p80_sfdp_n25q256a },
+ { INFO("n25q512a", 0x20ba20, 0, 64 << 10, 1024, ER_4K) },
{ INFO("n25q512ax3", 0x20ba20, 0x1000, 64 << 10, 1024, ER_4K) },
{ INFO("mt25ql512ab", 0x20ba20, 0x1044, 64 << 10, 1024, ER_4K | ER_32K) },
{ INFO_STACKED("mt35xu01g", 0x2c5b1b, 0x104100, 128 << 10, 1024,
@@ -337,9 +345,12 @@ static const FlashPartInfo known_devices[] = {
{ INFO("w25q64", 0xef4017, 0, 64 << 10, 128, ER_4K) },
{ INFO("w25q80", 0xef5014, 0, 64 << 10, 16, ER_4K) },
{ INFO("w25q80bl", 0xef4014, 0, 64 << 10, 16, ER_4K) },
- { INFO("w25q256", 0xef4019, 0, 64 << 10, 512, ER_4K) },
- { INFO("w25q512jv", 0xef4020, 0, 64 << 10, 1024, ER_4K) },
- { INFO("w25q01jvq", 0xef4021, 0, 64 << 10, 2048, ER_4K) },
+ { INFO("w25q256", 0xef4019, 0, 64 << 10, 512, ER_4K),
+ .sfdp_read = m25p80_sfdp_w25q256 },
+ { INFO("w25q512jv", 0xef4020, 0, 64 << 10, 1024, ER_4K),
+ .sfdp_read = m25p80_sfdp_w25q512jv },
+ { INFO("w25q01jvq", 0xef4021, 0, 64 << 10, 2048, ER_4K),
+ .sfdp_read = m25p80_sfdp_w25q01jvq },
};
typedef enum {
@@ -355,6 +366,7 @@ typedef enum {
BULK_ERASE = 0xc7,
READ_FSR = 0x70,
RDCR = 0x15,
+ RDSFDP = 0x5a,
READ = 0x03,
READ4 = 0x13,
@@ -421,6 +433,7 @@ typedef enum {
STATE_COLLECTING_DATA,
STATE_COLLECTING_VAR_LEN_DATA,
STATE_READING_DATA,
+ STATE_READING_SFDP,
} CMDState;
typedef enum {
@@ -679,6 +692,8 @@ static inline int get_addr_length(Flash *s)
}
switch (s->cmd_in_progress) {
+ case RDSFDP:
+ return 3;
case PP4:
case PP4_4:
case QPP_4:
@@ -823,6 +838,11 @@ static void complete_collecting_data(Flash *s)
" by device\n");
}
break;
+
+ case RDSFDP:
+ s->state = STATE_READING_SFDP;
+ break;
+
default:
break;
}
@@ -1431,6 +1451,16 @@ static void decode_new_cmd(Flash *s, uint32_t value)
qemu_log_mask(LOG_GUEST_ERROR, "M25P80: Unknown cmd %x\n", value);
}
break;
+ case RDSFDP:
+ if (s->pi->sfdp_read) {
+ s->needed_bytes = get_addr_length(s) + 1; /* SFDP addr + dummy */
+ s->pos = 0;
+ s->len = 0;
+ s->state = STATE_COLLECTING_DATA;
+ break;
+ }
+ /* Fallthrough */
+
default:
s->pos = 0;
s->len = 1;
@@ -1538,6 +1568,12 @@ static uint32_t m25p80_transfer8(SSIPeripheral *ss, uint32_t tx)
}
}
break;
+ case STATE_READING_SFDP:
+ assert(s->pi->sfdp_read);
+ r = s->pi->sfdp_read(s->cur_addr);
+ trace_m25p80_read_sfdp(s, s->cur_addr, (uint8_t)r);
+ s->cur_addr = (s->cur_addr + 1) & (M25P80_SFDP_MAX_SIZE - 1);
+ break;
default:
case STATE_IDLE:
diff --git a/hw/block/m25p80_sfdp.c b/hw/block/m25p80_sfdp.c
new file mode 100644
index 0000000000..77615fa29e
--- /dev/null
+++ b/hw/block/m25p80_sfdp.c
@@ -0,0 +1,332 @@
+/*
+ * M25P80 Serial Flash Discoverable Parameter (SFDP)
+ *
+ * Copyright (c) 2020, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/host-utils.h"
+#include "m25p80_sfdp.h"
+
+#define define_sfdp_read(model) \
+ uint8_t m25p80_sfdp_##model(uint32_t addr) \
+ { \
+ assert(is_power_of_2(sizeof(sfdp_##model))); \
+ return sfdp_##model[addr & (sizeof(sfdp_##model) - 1)]; \
+ }
+
+/*
+ * Micron
+ */
+static const uint8_t sfdp_n25q256a[] = {
+ 0x53, 0x46, 0x44, 0x50, 0x00, 0x01, 0x00, 0xff,
+ 0x00, 0x00, 0x01, 0x09, 0x30, 0x00, 0x00, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xe5, 0x20, 0xfb, 0xff, 0xff, 0xff, 0xff, 0x0f,
+ 0x29, 0xeb, 0x27, 0x6b, 0x08, 0x3b, 0x27, 0xbb,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x27, 0xbb,
+ 0xff, 0xff, 0x29, 0xeb, 0x0c, 0x20, 0x10, 0xd8,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+};
+define_sfdp_read(n25q256a);
+
+
+/*
+ * Matronix
+ */
+
+/* mx25l25635e. No 4B opcodes */
+static const uint8_t sfdp_mx25l25635e[] = {
+ 0x53, 0x46, 0x44, 0x50, 0x00, 0x01, 0x01, 0xff,
+ 0x00, 0x00, 0x01, 0x09, 0x30, 0x00, 0x00, 0xff,
+ 0xc2, 0x00, 0x01, 0x04, 0x60, 0x00, 0x00, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xe5, 0x20, 0xf3, 0xff, 0xff, 0xff, 0xff, 0x0f,
+ 0x44, 0xeb, 0x08, 0x6b, 0x08, 0x3b, 0x04, 0xbb,
+ 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0xff,
+ 0xff, 0xff, 0x00, 0xff, 0x0c, 0x20, 0x0f, 0x52,
+ 0x10, 0xd8, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x36, 0x00, 0x27, 0xf7, 0x4f, 0xff, 0xff,
+ 0xd9, 0xc8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+};
+define_sfdp_read(mx25l25635e)
+
+static const uint8_t sfdp_mx25l25635f[] = {
+ 0x53, 0x46, 0x44, 0x50, 0x00, 0x01, 0x01, 0xff,
+ 0x00, 0x00, 0x01, 0x09, 0x30, 0x00, 0x00, 0xff,
+ 0xc2, 0x00, 0x01, 0x04, 0x60, 0x00, 0x00, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xe5, 0x20, 0xf3, 0xff, 0xff, 0xff, 0xff, 0x0f,
+ 0x44, 0xeb, 0x08, 0x6b, 0x08, 0x3b, 0x04, 0xbb,
+ 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0xff,
+ 0xff, 0xff, 0x44, 0xeb, 0x0c, 0x20, 0x0f, 0x52,
+ 0x10, 0xd8, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x36, 0x00, 0x27, 0x9d, 0xf9, 0xc0, 0x64,
+ 0x85, 0xcb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xc2, 0xf5, 0x08, 0x0a,
+ 0x08, 0x04, 0x03, 0x06, 0x00, 0x00, 0x07, 0x29,
+ 0x17, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+};
+define_sfdp_read(mx25l25635f);
+
+static const uint8_t sfdp_mx66l1g45g[] = {
+ 0x53, 0x46, 0x44, 0x50, 0x06, 0x01, 0x02, 0xff,
+ 0x00, 0x06, 0x01, 0x10, 0x30, 0x00, 0x00, 0xff,
+ 0xc2, 0x00, 0x01, 0x04, 0x10, 0x01, 0x00, 0xff,
+ 0x84, 0x00, 0x01, 0x02, 0xc0, 0x00, 0x00, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xe5, 0x20, 0xfb, 0xff, 0xff, 0xff, 0xff, 0x3f,
+ 0x44, 0xeb, 0x08, 0x6b, 0x08, 0x3b, 0x04, 0xbb,
+ 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0xff,
+ 0xff, 0xff, 0x44, 0xeb, 0x0c, 0x20, 0x0f, 0x52,
+ 0x10, 0xd8, 0x00, 0xff, 0xd6, 0x49, 0xc5, 0x00,
+ 0x85, 0xdf, 0x04, 0xe3, 0x44, 0x03, 0x67, 0x38,
+ 0x30, 0xb0, 0x30, 0xb0, 0xf7, 0xbd, 0xd5, 0x5c,
+ 0x4a, 0x9e, 0x29, 0xff, 0xf0, 0x50, 0xf9, 0x85,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x7f, 0xef, 0xff, 0xff, 0x21, 0x5c, 0xdc, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x36, 0x00, 0x27, 0x9d, 0xf9, 0xc0, 0x64,
+ 0x85, 0xcb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xc2, 0xf5, 0x08, 0x00, 0x0c, 0x04, 0x08, 0x08,
+ 0x01, 0x00, 0x19, 0x0f, 0x01, 0x01, 0x06, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+};
+define_sfdp_read(mx66l1g45g);
+
+/*
+ * Windbond
+ */
+
+static const uint8_t sfdp_w25q256[] = {
+ 0x53, 0x46, 0x44, 0x50, 0x00, 0x01, 0x00, 0xff,
+ 0x00, 0x00, 0x01, 0x09, 0x80, 0x00, 0x00, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xe5, 0x20, 0xf3, 0xff, 0xff, 0xff, 0xff, 0x0f,
+ 0x44, 0xeb, 0x08, 0x6b, 0x08, 0x3b, 0x42, 0xbb,
+ 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00,
+ 0xff, 0xff, 0x21, 0xeb, 0x0c, 0x20, 0x0f, 0x52,
+ 0x10, 0xd8, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+};
+define_sfdp_read(w25q256);
+
+static const uint8_t sfdp_w25q512jv[] = {
+ 0x53, 0x46, 0x44, 0x50, 0x06, 0x01, 0x01, 0xff,
+ 0x00, 0x06, 0x01, 0x10, 0x80, 0x00, 0x00, 0xff,
+ 0x84, 0x00, 0x01, 0x02, 0xd0, 0x00, 0x00, 0xff,
+ 0x03, 0x00, 0x01, 0x02, 0xf0, 0x00, 0x00, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xe5, 0x20, 0xfb, 0xff, 0xff, 0xff, 0xff, 0x1f,
+ 0x44, 0xeb, 0x08, 0x6b, 0x08, 0x3b, 0x42, 0xbb,
+ 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00,
+ 0xff, 0xff, 0x40, 0xeb, 0x0c, 0x20, 0x0f, 0x52,
+ 0x10, 0xd8, 0x00, 0x00, 0x36, 0x02, 0xa6, 0x00,
+ 0x82, 0xea, 0x14, 0xe2, 0xe9, 0x63, 0x76, 0x33,
+ 0x7a, 0x75, 0x7a, 0x75, 0xf7, 0xa2, 0xd5, 0x5c,
+ 0x19, 0xf7, 0x4d, 0xff, 0xe9, 0x70, 0xf9, 0xa5,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0x0a, 0xf0, 0xff, 0x21, 0xff, 0xdc, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+};
+define_sfdp_read(w25q512jv);
+
+static const uint8_t sfdp_w25q01jvq[] = {
+ 0x53, 0x46, 0x44, 0x50, 0x06, 0x01, 0x01, 0xff,
+ 0x00, 0x06, 0x01, 0x10, 0x80, 0x00, 0x00, 0xff,
+ 0x84, 0x00, 0x01, 0x02, 0xd0, 0x00, 0x00, 0xff,
+ 0x03, 0x00, 0x01, 0x02, 0xf0, 0x00, 0x00, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xe5, 0x20, 0xfb, 0xff, 0xff, 0xff, 0xff, 0x3f,
+ 0x44, 0xeb, 0x08, 0x6b, 0x08, 0x3b, 0x42, 0xbb,
+ 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00,
+ 0xff, 0xff, 0x40, 0xeb, 0x0c, 0x20, 0x0f, 0x52,
+ 0x10, 0xd8, 0x00, 0x00, 0x36, 0x02, 0xa6, 0x00,
+ 0x82, 0xea, 0x14, 0xe2, 0xe9, 0x63, 0x76, 0x33,
+ 0x7a, 0x75, 0x7a, 0x75, 0xf7, 0xa2, 0xd5, 0x5c,
+ 0x19, 0xf7, 0x4d, 0xff, 0xe9, 0x70, 0xf9, 0xa5,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0x0a, 0xf0, 0xff, 0x21, 0xff, 0xdc, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+};
+define_sfdp_read(w25q01jvq);
diff --git a/hw/block/m25p80_sfdp.h b/hw/block/m25p80_sfdp.h
new file mode 100644
index 0000000000..df7adfb5ce
--- /dev/null
+++ b/hw/block/m25p80_sfdp.h
@@ -0,0 +1,29 @@
+/*
+ * M25P80 SFDP
+ *
+ * Copyright (c) 2020, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef HW_M25P80_SFDP_H
+#define HW_M25P80_SFDP_H
+
+/*
+ * SFDP area has a 3 bytes address space.
+ */
+#define M25P80_SFDP_MAX_SIZE (1 << 24)
+
+uint8_t m25p80_sfdp_n25q256a(uint32_t addr);
+
+uint8_t m25p80_sfdp_mx25l25635e(uint32_t addr);
+uint8_t m25p80_sfdp_mx25l25635f(uint32_t addr);
+uint8_t m25p80_sfdp_mx66l1g45g(uint32_t addr);
+
+uint8_t m25p80_sfdp_w25q256(uint32_t addr);
+uint8_t m25p80_sfdp_w25q512jv(uint32_t addr);
+
+uint8_t m25p80_sfdp_w25q01jvq(uint32_t addr);
+
+#endif
diff --git a/hw/block/meson.build b/hw/block/meson.build
index 1908abd45c..b434d5654c 100644
--- a/hw/block/meson.build
+++ b/hw/block/meson.build
@@ -12,6 +12,7 @@ softmmu_ss.add(when: 'CONFIG_ONENAND', if_true: files('onenand.c'))
softmmu_ss.add(when: 'CONFIG_PFLASH_CFI01', if_true: files('pflash_cfi01.c'))
softmmu_ss.add(when: 'CONFIG_PFLASH_CFI02', if_true: files('pflash_cfi02.c'))
softmmu_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80.c'))
+softmmu_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80_sfdp.c'))
softmmu_ss.add(when: 'CONFIG_SWIM', if_true: files('swim.c'))
softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c'))
softmmu_ss.add(when: 'CONFIG_TC58128', if_true: files('tc58128.c'))
diff --git a/hw/block/trace-events b/hw/block/trace-events
index d86b53520c..2c45a62bd5 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -80,5 +80,6 @@ m25p80_page_program(void *s, uint32_t addr, uint8_t tx) "[%p] page program cur_a
m25p80_transfer(void *s, uint8_t state, uint32_t len, uint8_t needed, uint32_t pos, uint32_t cur_addr, uint8_t t) "[%p] Transfer state 0x%"PRIx8" len 0x%"PRIx32" needed 0x%"PRIx8" pos 0x%"PRIx32" addr 0x%"PRIx32" tx 0x%"PRIx8
m25p80_read_byte(void *s, uint32_t addr, uint8_t v) "[%p] Read byte 0x%"PRIx32"=0x%"PRIx8
m25p80_read_data(void *s, uint32_t pos, uint8_t v) "[%p] Read data 0x%"PRIx32"=0x%"PRIx8
+m25p80_read_sfdp(void *s, uint32_t addr, uint8_t v) "[%p] Read SFDP 0x%"PRIx32"=0x%"PRIx8
m25p80_binding(void *s) "[%p] Binding to IF_MTD drive"
m25p80_binding_no_bdrv(void *s) "[%p] No BDRV - binding to RAM"
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 8131ec2dbc..f717550fdc 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -21,6 +21,7 @@
#include "hw/block/block.h"
#include "hw/qdev-properties.h"
#include "sysemu/blockdev.h"
+#include "sysemu/block-ram-registrar.h"
#include "sysemu/sysemu.h"
#include "sysemu/runstate.h"
#include "hw/virtio/virtio-blk.h"
@@ -362,12 +363,14 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
}
}
-static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb,
+static inline void submit_requests(VirtIOBlock *s, MultiReqBuffer *mrb,
int start, int num_reqs, int niov)
{
+ BlockBackend *blk = s->blk;
QEMUIOVector *qiov = &mrb->reqs[start]->qiov;
int64_t sector_num = mrb->reqs[start]->sector_num;
bool is_write = mrb->is_write;
+ BdrvRequestFlags flags = 0;
if (num_reqs > 1) {
int i;
@@ -398,12 +401,18 @@ static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb,
num_reqs - 1);
}
+ if (blk_ram_registrar_ok(&s->blk_ram_registrar)) {
+ flags |= BDRV_REQ_REGISTERED_BUF;
+ }
+
if (is_write) {
- blk_aio_pwritev(blk, sector_num << BDRV_SECTOR_BITS, qiov, 0,
- virtio_blk_rw_complete, mrb->reqs[start]);
+ blk_aio_pwritev(blk, sector_num << BDRV_SECTOR_BITS, qiov,
+ flags, virtio_blk_rw_complete,
+ mrb->reqs[start]);
} else {
- blk_aio_preadv(blk, sector_num << BDRV_SECTOR_BITS, qiov, 0,
- virtio_blk_rw_complete, mrb->reqs[start]);
+ blk_aio_preadv(blk, sector_num << BDRV_SECTOR_BITS, qiov,
+ flags, virtio_blk_rw_complete,
+ mrb->reqs[start]);
}
}
@@ -425,14 +434,14 @@ static int multireq_compare(const void *a, const void *b)
}
}
-static void virtio_blk_submit_multireq(BlockBackend *blk, MultiReqBuffer *mrb)
+static void virtio_blk_submit_multireq(VirtIOBlock *s, MultiReqBuffer *mrb)
{
int i = 0, start = 0, num_reqs = 0, niov = 0, nb_sectors = 0;
uint32_t max_transfer;
int64_t sector_num = 0;
if (mrb->num_reqs == 1) {
- submit_requests(blk, mrb, 0, 1, -1);
+ submit_requests(s, mrb, 0, 1, -1);
mrb->num_reqs = 0;
return;
}
@@ -452,11 +461,11 @@ static void virtio_blk_submit_multireq(BlockBackend *blk, MultiReqBuffer *mrb)
* 3. merge would exceed maximum transfer length of backend device
*/
if (sector_num + nb_sectors != req->sector_num ||
- niov > blk_get_max_iov(blk) - req->qiov.niov ||
+ niov > blk_get_max_iov(s->blk) - req->qiov.niov ||
req->qiov.size > max_transfer ||
nb_sectors > (max_transfer -
req->qiov.size) / BDRV_SECTOR_SIZE) {
- submit_requests(blk, mrb, start, num_reqs, niov);
+ submit_requests(s, mrb, start, num_reqs, niov);
num_reqs = 0;
}
}
@@ -472,7 +481,7 @@ static void virtio_blk_submit_multireq(BlockBackend *blk, MultiReqBuffer *mrb)
num_reqs++;
}
- submit_requests(blk, mrb, start, num_reqs, niov);
+ submit_requests(s, mrb, start, num_reqs, niov);
mrb->num_reqs = 0;
}
@@ -487,7 +496,7 @@ static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb)
* Make sure all outstanding writes are posted to the backing device.
*/
if (mrb->is_write && mrb->num_reqs > 0) {
- virtio_blk_submit_multireq(s->blk, mrb);
+ virtio_blk_submit_multireq(s, mrb);
}
blk_aio_flush(s->blk, virtio_blk_flush_complete, req);
}
@@ -667,7 +676,7 @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
if (mrb->num_reqs > 0 && (mrb->num_reqs == VIRTIO_BLK_MAX_MERGE_REQS ||
is_write != mrb->is_write ||
!s->conf.request_merging)) {
- virtio_blk_submit_multireq(s->blk, mrb);
+ virtio_blk_submit_multireq(s, mrb);
}
assert(mrb->num_reqs < VIRTIO_BLK_MAX_MERGE_REQS);
@@ -774,7 +783,7 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
} while (!virtio_queue_empty(vq));
if (mrb.num_reqs) {
- virtio_blk_submit_multireq(s->blk, &mrb);
+ virtio_blk_submit_multireq(s, &mrb);
}
blk_io_unplug(s->blk);
@@ -823,7 +832,7 @@ void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh)
}
if (mrb.num_reqs) {
- virtio_blk_submit_multireq(s->blk, &mrb);
+ virtio_blk_submit_multireq(s, &mrb);
}
if (is_bh) {
blk_dec_in_flight(s->conf.conf.blk);
@@ -1205,6 +1214,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
}
s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
+ blk_ram_registrar_init(&s->blk_ram_registrar, s->blk);
blk_set_dev_ops(s->blk, &virtio_block_ops, s);
blk_iostatus_enable(s->blk);
@@ -1230,6 +1240,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev)
virtio_del_queue(vdev, i);
}
qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2);
+ blk_ram_registrar_destroy(&s->blk_ram_registrar);
qemu_del_vm_change_state_handler(s->change);
blockdev_mark_auto_del(s->blk);
virtio_cleanup(vdev);
diff --git a/hw/char/exynos4210_uart.c b/hw/char/exynos4210_uart.c
index addcd59b02..7b7c56b6ef 100644
--- a/hw/char/exynos4210_uart.c
+++ b/hw/char/exynos4210_uart.c
@@ -211,7 +211,7 @@ static void fifo_reset(Exynos4210UartFIFO *q)
g_free(q->data);
q->data = NULL;
- q->data = (uint8_t *)g_malloc0(q->size);
+ q->data = g_malloc0(q->size);
q->sp = 0;
q->rp = 0;
diff --git a/hw/core/numa.c b/hw/core/numa.c
index 26d8e5f616..ea24a5fa8c 100644
--- a/hw/core/numa.c
+++ b/hw/core/numa.c
@@ -822,6 +822,19 @@ static int ram_block_notify_add_single(RAMBlock *rb, void *opaque)
return 0;
}
+static int ram_block_notify_remove_single(RAMBlock *rb, void *opaque)
+{
+ const ram_addr_t max_size = qemu_ram_get_max_length(rb);
+ const ram_addr_t size = qemu_ram_get_used_length(rb);
+ void *host = qemu_ram_get_host_addr(rb);
+ RAMBlockNotifier *notifier = opaque;
+
+ if (host) {
+ notifier->ram_block_removed(notifier, host, size, max_size);
+ }
+ return 0;
+}
+
void ram_block_notifier_add(RAMBlockNotifier *n)
{
QLIST_INSERT_HEAD(&ram_list.ramblock_notifiers, n, next);
@@ -835,13 +848,18 @@ void ram_block_notifier_add(RAMBlockNotifier *n)
void ram_block_notifier_remove(RAMBlockNotifier *n)
{
QLIST_REMOVE(n, next);
+
+ if (n->ram_block_removed) {
+ qemu_ram_foreach_block(ram_block_notify_remove_single, n);
+ }
}
void ram_block_notify_add(void *host, size_t size, size_t max_size)
{
RAMBlockNotifier *notifier;
+ RAMBlockNotifier *next;
- QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) {
+ QLIST_FOREACH_SAFE(notifier, &ram_list.ramblock_notifiers, next, next) {
if (notifier->ram_block_added) {
notifier->ram_block_added(notifier, host, size, max_size);
}
@@ -851,8 +869,9 @@ void ram_block_notify_add(void *host, size_t size, size_t max_size)
void ram_block_notify_remove(void *host, size_t size, size_t max_size)
{
RAMBlockNotifier *notifier;
+ RAMBlockNotifier *next;
- QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) {
+ QLIST_FOREACH_SAFE(notifier, &ram_list.ramblock_notifiers, next, next) {
if (notifier->ram_block_removed) {
notifier->ram_block_removed(notifier, host, size, max_size);
}
@@ -862,8 +881,9 @@ void ram_block_notify_remove(void *host, size_t size, size_t max_size)
void ram_block_notify_resize(void *host, size_t old_size, size_t new_size)
{
RAMBlockNotifier *notifier;
+ RAMBlockNotifier *next;
- QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) {
+ QLIST_FOREACH_SAFE(notifier, &ram_list.ramblock_notifiers, next, next) {
if (notifier->ram_block_resized) {
notifier->ram_block_resized(notifier, host, old_size, new_size);
}
diff --git a/hw/core/reset.c b/hw/core/reset.c
index 36be82c491..d3263b613e 100644
--- a/hw/core/reset.c
+++ b/hw/core/reset.c
@@ -33,6 +33,7 @@ typedef struct QEMUResetEntry {
QTAILQ_ENTRY(QEMUResetEntry) entry;
QEMUResetHandler *func;
void *opaque;
+ bool skip_on_snapshot_load;
} QEMUResetEntry;
static QTAILQ_HEAD(, QEMUResetEntry) reset_handlers =
@@ -47,6 +48,16 @@ void qemu_register_reset(QEMUResetHandler *func, void *opaque)
QTAILQ_INSERT_TAIL(&reset_handlers, re, entry);
}
+void qemu_register_reset_nosnapshotload(QEMUResetHandler *func, void *opaque)
+{
+ QEMUResetEntry *re = g_new0(QEMUResetEntry, 1);
+
+ re->func = func;
+ re->opaque = opaque;
+ re->skip_on_snapshot_load = true;
+ QTAILQ_INSERT_TAIL(&reset_handlers, re, entry);
+}
+
void qemu_unregister_reset(QEMUResetHandler *func, void *opaque)
{
QEMUResetEntry *re;
@@ -60,12 +71,16 @@ void qemu_unregister_reset(QEMUResetHandler *func, void *opaque)
}
}
-void qemu_devices_reset(void)
+void qemu_devices_reset(ShutdownCause reason)
{
QEMUResetEntry *re, *nre;
/* reset all devices */
QTAILQ_FOREACH_SAFE(re, &reset_handlers, entry, nre) {
+ if (reason == SHUTDOWN_CAUSE_SNAPSHOT_LOAD &&
+ re->skip_on_snapshot_load) {
+ continue;
+ }
re->func(re->opaque);
}
}
diff --git a/hw/core/resettable.c b/hw/core/resettable.c
index 96a99ce39e..c3df75c6ba 100644
--- a/hw/core/resettable.c
+++ b/hw/core/resettable.c
@@ -201,12 +201,11 @@ static void resettable_phase_exit(Object *obj, void *opaque, ResetType type)
resettable_child_foreach(rc, obj, resettable_phase_exit, NULL, type);
assert(s->count > 0);
- if (s->count == 1) {
+ if (--s->count == 0) {
trace_resettable_phase_exit_exec(obj, obj_typename, !!rc->phases.exit);
if (rc->phases.exit && !resettable_get_tr_func(rc, obj)) {
rc->phases.exit(obj);
}
- s->count = 0;
}
s->exit_phase_in_progress = false;
trace_resettable_phase_exit_end(obj, obj_typename, s->count);
diff --git a/hw/core/sysbus-fdt.c b/hw/core/sysbus-fdt.c
index edb0c49b19..eebcd28f9a 100644
--- a/hw/core/sysbus-fdt.c
+++ b/hw/core/sysbus-fdt.c
@@ -299,7 +299,8 @@ static int add_amd_xgbe_fdt_node(SysBusDevice *sbdev, void *opaque)
void *guest_fdt = data->fdt, *host_fdt;
const void *r;
int i, prop_len;
- uint32_t *irq_attr, *reg_attr, *host_clock_phandles;
+ uint32_t *irq_attr, *reg_attr;
+ const uint32_t *host_clock_phandles;
uint64_t mmio_base, irq_number;
uint32_t guest_clock_phandles[2];
@@ -339,7 +340,7 @@ static int add_amd_xgbe_fdt_node(SysBusDevice *sbdev, void *opaque)
error_report("%s clocks property should contain 2 handles", __func__);
exit(1);
}
- host_clock_phandles = (uint32_t *)r;
+ host_clock_phandles = r;
guest_clock_phandles[0] = qemu_fdt_alloc_phandle(guest_fdt);
guest_clock_phandles[1] = qemu_fdt_alloc_phandle(guest_fdt);
diff --git a/hw/display/blizzard.c b/hw/display/blizzard.c
index 105241577d..ebe230dd0a 100644
--- a/hw/display/blizzard.c
+++ b/hw/display/blizzard.c
@@ -1007,7 +1007,7 @@ static const GraphicHwOps blizzard_ops = {
void *s1d13745_init(qemu_irq gpio_int)
{
- BlizzardState *s = (BlizzardState *) g_malloc0(sizeof(*s));
+ BlizzardState *s = g_malloc0(sizeof(*s));
DisplaySurface *surface;
s->fb = g_malloc(0x180000);
diff --git a/hw/dma/pl330.c b/hw/dma/pl330.c
index 08e5938ec7..e5d521c329 100644
--- a/hw/dma/pl330.c
+++ b/hw/dma/pl330.c
@@ -1328,7 +1328,7 @@ static void pl330_debug_exec(PL330State *s)
}
if (!insn) {
pl330_fault(ch, PL330_FAULT_UNDEF_INSTR | PL330_FAULT_DBG_INSTR);
- return ;
+ return;
}
ch->stall = 0;
insn->exec(ch, opcode, args, insn->size - 1);
diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c
index e53d5f0fa7..19ea7c2c66 100644
--- a/hw/hppa/machine.c
+++ b/hw/hppa/machine.c
@@ -411,12 +411,12 @@ static void machine_hppa_init(MachineState *machine)
cpu[0]->env.gr[19] = FW_CFG_IO_BASE;
}
-static void hppa_machine_reset(MachineState *ms)
+static void hppa_machine_reset(MachineState *ms, ShutdownCause reason)
{
unsigned int smp_cpus = ms->smp.cpus;
int i;
- qemu_devices_reset();
+ qemu_devices_reset(reason);
/* Start all CPUs at the firmware entry point.
* Monarch CPU will initialize firmware, secondary CPUs
diff --git a/hw/hyperv/hyperv.c b/hw/hyperv/hyperv.c
index 4a1b59cb9d..57b402b956 100644
--- a/hw/hyperv/hyperv.c
+++ b/hw/hyperv/hyperv.c
@@ -157,7 +157,7 @@ void hyperv_synic_reset(CPUState *cs)
SynICState *synic = get_synic(cs);
if (synic) {
- device_legacy_reset(DEVICE(synic));
+ device_cold_reset(DEVICE(synic));
}
}
diff --git a/hw/i2c/aspeed_i2c.c b/hw/i2c/aspeed_i2c.c
index 42c6d69b82..c166fd20fa 100644
--- a/hw/i2c/aspeed_i2c.c
+++ b/hw/i2c/aspeed_i2c.c
@@ -1131,7 +1131,9 @@ static int aspeed_i2c_bus_slave_event(I2CSlave *slave, enum i2c_event event)
AspeedI2CBus *bus = ASPEED_I2C_BUS(qbus->parent);
uint32_t reg_intr_sts = aspeed_i2c_bus_intr_sts_offset(bus);
uint32_t reg_byte_buf = aspeed_i2c_bus_byte_buf_offset(bus);
- uint32_t value;
+ uint32_t reg_dev_addr = aspeed_i2c_bus_dev_addr_offset(bus);
+ uint32_t dev_addr = SHARED_ARRAY_FIELD_EX32(bus->regs, reg_dev_addr,
+ SLAVE_DEV_ADDR1);
if (aspeed_i2c_is_new_mode(bus->controller)) {
return aspeed_i2c_bus_new_slave_event(bus, event);
@@ -1139,8 +1141,8 @@ static int aspeed_i2c_bus_slave_event(I2CSlave *slave, enum i2c_event event)
switch (event) {
case I2C_START_SEND_ASYNC:
- value = SHARED_ARRAY_FIELD_EX32(bus->regs, reg_byte_buf, TX_BUF);
- SHARED_ARRAY_FIELD_DP32(bus->regs, reg_byte_buf, RX_BUF, value << 1);
+ /* Bit[0] == 0 indicates "send". */
+ SHARED_ARRAY_FIELD_DP32(bus->regs, reg_byte_buf, RX_BUF, dev_addr << 1);
ARRAY_FIELD_DP32(bus->regs, I2CD_INTR_STS, SLAVE_ADDR_RX_MATCH, 1);
SHARED_ARRAY_FIELD_DP32(bus->regs, reg_intr_sts, RX_DONE, 1);
diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
index 52f9aa9d8c..ffd1884100 100644
--- a/hw/i386/microvm.c
+++ b/hw/i386/microvm.c
@@ -467,7 +467,7 @@ static void microvm_machine_state_init(MachineState *machine)
microvm_devices_init(mms);
}
-static void microvm_machine_reset(MachineState *machine)
+static void microvm_machine_reset(MachineState *machine, ShutdownCause reason)
{
MicrovmMachineState *mms = MICROVM_MACHINE(machine);
CPUState *cs;
@@ -480,7 +480,7 @@ static void microvm_machine_reset(MachineState *machine)
mms->kernel_cmdline_fixed = true;
}
- qemu_devices_reset();
+ qemu_devices_reset(reason);
CPU_FOREACH(cs) {
cpu = X86_CPU(cs);
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 768982ae9a..3e86083db3 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1847,12 +1847,12 @@ static void pc_machine_initfn(Object *obj)
cxl_machine_init(obj, &pcms->cxl_devices_state);
}
-static void pc_machine_reset(MachineState *machine)
+static void pc_machine_reset(MachineState *machine, ShutdownCause reason)
{
CPUState *cs;
X86CPU *cpu;
- qemu_devices_reset();
+ qemu_devices_reset(reason);
/* Reset APIC after devices have been reset to cancel
* any changes that qemu_devices_reset() might have done.
@@ -1867,7 +1867,7 @@ static void pc_machine_reset(MachineState *machine)
static void pc_machine_wakeup(MachineState *machine)
{
cpu_synchronize_all_states();
- pc_machine_reset(machine);
+ pc_machine_reset(machine, SHUTDOWN_CAUSE_NONE);
cpu_synchronize_all_post_reset();
}
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 1148f70c03..bd50a064a3 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -1111,7 +1111,7 @@ void x86_load_linux(X86MachineState *x86ms,
setup_data->type = cpu_to_le32(SETUP_RNG_SEED);
setup_data->len = cpu_to_le32(RNG_SEED_LENGTH);
qemu_guest_getrandom_nofail(setup_data->data, RNG_SEED_LENGTH);
- qemu_register_reset(reset_rng_seed, setup_data);
+ qemu_register_reset_nosnapshotload(reset_rng_seed, setup_data);
fw_cfg_add_bytes_callback(fw_cfg, FW_CFG_KERNEL_DATA, reset_rng_seed, NULL,
setup_data, kernel, kernel_size, true);
} else {
diff --git a/hw/m68k/q800.c b/hw/m68k/q800.c
index e09e244ddc..9d52ca6613 100644
--- a/hw/m68k/q800.c
+++ b/hw/m68k/q800.c
@@ -321,27 +321,23 @@ static const TypeInfo glue_info = {
},
};
-typedef struct {
- M68kCPU *cpu;
- struct bi_record *rng_seed;
-} ResetInfo;
-
static void main_cpu_reset(void *opaque)
{
- ResetInfo *reset_info = opaque;
- M68kCPU *cpu = reset_info->cpu;
+ M68kCPU *cpu = opaque;
CPUState *cs = CPU(cpu);
- if (reset_info->rng_seed) {
- qemu_guest_getrandom_nofail((void *)reset_info->rng_seed->data + 2,
- be16_to_cpu(*(uint16_t *)reset_info->rng_seed->data));
- }
-
cpu_reset(cs);
cpu->env.aregs[7] = ldl_phys(cs->as, 0);
cpu->env.pc = ldl_phys(cs->as, 4);
}
+static void rerandomize_rng_seed(void *opaque)
+{
+ struct bi_record *rng_seed = opaque;
+ qemu_guest_getrandom_nofail((void *)rng_seed->data + 2,
+ be16_to_cpu(*(uint16_t *)rng_seed->data));
+}
+
static uint8_t fake_mac_rom[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -397,7 +393,6 @@ static void q800_init(MachineState *machine)
NubusBus *nubus;
DeviceState *glue;
DriveInfo *dinfo;
- ResetInfo *reset_info;
uint8_t rng_seed[32];
linux_boot = (kernel_filename != NULL);
@@ -408,12 +403,9 @@ static void q800_init(MachineState *machine)
exit(1);
}
- reset_info = g_new0(ResetInfo, 1);
-
/* init CPUs */
cpu = M68K_CPU(cpu_create(machine->cpu_type));
- reset_info->cpu = cpu;
- qemu_register_reset(main_cpu_reset, reset_info);
+ qemu_register_reset(main_cpu_reset, cpu);
/* RAM */
memory_region_add_subregion(get_system_memory(), 0, machine->ram);
@@ -687,9 +679,10 @@ static void q800_init(MachineState *machine)
BOOTINFO0(param_ptr, BI_LAST);
rom_add_blob_fixed_as("bootinfo", param_blob, param_ptr - param_blob,
parameters_base, cs->as);
- reset_info->rng_seed = rom_ptr_for_as(cs->as, parameters_base,
- param_ptr - param_blob) +
- (param_rng_seed - param_blob);
+ qemu_register_reset_nosnapshotload(rerandomize_rng_seed,
+ rom_ptr_for_as(cs->as, parameters_base,
+ param_ptr - param_blob) +
+ (param_rng_seed - param_blob));
g_free(param_blob);
} else {
uint8_t *ptr;
diff --git a/hw/m68k/virt.c b/hw/m68k/virt.c
index 89c4108eb5..da5eafd275 100644
--- a/hw/m68k/virt.c
+++ b/hw/m68k/virt.c
@@ -89,7 +89,6 @@ typedef struct {
M68kCPU *cpu;
hwaddr initial_pc;
hwaddr initial_stack;
- struct bi_record *rng_seed;
} ResetInfo;
static void main_cpu_reset(void *opaque)
@@ -98,16 +97,18 @@ static void main_cpu_reset(void *opaque)
M68kCPU *cpu = reset_info->cpu;
CPUState *cs = CPU(cpu);
- if (reset_info->rng_seed) {
- qemu_guest_getrandom_nofail((void *)reset_info->rng_seed->data + 2,
- be16_to_cpu(*(uint16_t *)reset_info->rng_seed->data));
- }
-
cpu_reset(cs);
cpu->env.aregs[7] = reset_info->initial_stack;
cpu->env.pc = reset_info->initial_pc;
}
+static void rerandomize_rng_seed(void *opaque)
+{
+ struct bi_record *rng_seed = opaque;
+ qemu_guest_getrandom_nofail((void *)rng_seed->data + 2,
+ be16_to_cpu(*(uint16_t *)rng_seed->data));
+}
+
static void virt_init(MachineState *machine)
{
M68kCPU *cpu = NULL;
@@ -289,9 +290,10 @@ static void virt_init(MachineState *machine)
BOOTINFO0(param_ptr, BI_LAST);
rom_add_blob_fixed_as("bootinfo", param_blob, param_ptr - param_blob,
parameters_base, cs->as);
- reset_info->rng_seed = rom_ptr_for_as(cs->as, parameters_base,
- param_ptr - param_blob) +
- (param_rng_seed - param_blob);
+ qemu_register_reset_nosnapshotload(rerandomize_rng_seed,
+ rom_ptr_for_as(cs->as, parameters_base,
+ param_ptr - param_blob) +
+ (param_rng_seed - param_blob));
g_free(param_blob);
}
}
diff --git a/hw/mips/boston.c b/hw/mips/boston.c
index d2ab9da1a0..cab63f43bf 100644
--- a/hw/mips/boston.c
+++ b/hw/mips/boston.c
@@ -41,6 +41,7 @@
#include "sysemu/sysemu.h"
#include "sysemu/qtest.h"
#include "sysemu/runstate.h"
+#include "sysemu/reset.h"
#include <libfdt.h>
#include "qom/object.h"
@@ -810,6 +811,8 @@ static void boston_mach_init(MachineState *machine)
/* Calculate real fdt size after filter */
dt_size = fdt_totalsize(dtb_load_data);
rom_add_blob_fixed("dtb", dtb_load_data, dt_size, dtb_paddr);
+ qemu_register_reset_nosnapshotload(qemu_fdt_randomize_seeds,
+ rom_ptr(dtb_paddr, dt_size));
} else {
/* Try to load file as FIT */
fit_err = load_fit(&boston_fit_loader, machine->kernel_filename, s);
diff --git a/hw/mips/malta.c b/hw/mips/malta.c
index 0e932988e0..7c3ad0974b 100644
--- a/hw/mips/malta.c
+++ b/hw/mips/malta.c
@@ -26,6 +26,7 @@
#include "qemu/units.h"
#include "qemu/bitops.h"
#include "qemu/datadir.h"
+#include "qemu/guest-random.h"
#include "hw/clock.h"
#include "hw/southbridge/piix.h"
#include "hw/isa/superio.h"
@@ -1017,6 +1018,17 @@ static void G_GNUC_PRINTF(3, 4) prom_set(uint32_t *prom_buf, int index,
va_end(ap);
}
+static void reinitialize_rng_seed(void *opaque)
+{
+ char *rng_seed_hex = opaque;
+ uint8_t rng_seed[32];
+
+ qemu_guest_getrandom_nofail(rng_seed, sizeof(rng_seed));
+ for (size_t i = 0; i < sizeof(rng_seed); ++i) {
+ sprintf(rng_seed_hex + i * 2, "%02x", rng_seed[i]);
+ }
+}
+
/* Kernel */
static uint64_t load_kernel(void)
{
@@ -1028,6 +1040,9 @@ static uint64_t load_kernel(void)
long prom_size;
int prom_index = 0;
uint64_t (*xlate_to_kseg0) (void *opaque, uint64_t addr);
+ uint8_t rng_seed[32];
+ char rng_seed_hex[sizeof(rng_seed) * 2 + 1];
+ size_t rng_seed_prom_offset;
#if TARGET_BIG_ENDIAN
big_endian = 1;
@@ -1115,9 +1130,21 @@ static uint64_t load_kernel(void)
prom_set(prom_buf, prom_index++, "modetty0");
prom_set(prom_buf, prom_index++, "38400n8r");
+
+ qemu_guest_getrandom_nofail(rng_seed, sizeof(rng_seed));
+ for (size_t i = 0; i < sizeof(rng_seed); ++i) {
+ sprintf(rng_seed_hex + i * 2, "%02x", rng_seed[i]);
+ }
+ prom_set(prom_buf, prom_index++, "rngseed");
+ rng_seed_prom_offset = prom_index * ENVP_ENTRY_SIZE +
+ sizeof(uint32_t) * ENVP_NB_ENTRIES;
+ prom_set(prom_buf, prom_index++, "%s", rng_seed_hex);
+
prom_set(prom_buf, prom_index++, NULL);
rom_add_blob_fixed("prom", prom_buf, prom_size, ENVP_PADDR);
+ qemu_register_reset_nosnapshotload(reinitialize_rng_seed,
+ rom_ptr(ENVP_PADDR, prom_size) + rng_seed_prom_offset);
g_free(prom_buf);
return kernel_entry;
diff --git a/hw/misc/cbus.c b/hw/misc/cbus.c
index 3c3721ad2d..653e8ddcd5 100644
--- a/hw/misc/cbus.c
+++ b/hw/misc/cbus.c
@@ -133,7 +133,7 @@ static void cbus_sel(void *opaque, int line, int level)
CBus *cbus_init(qemu_irq dat)
{
- CBusPriv *s = (CBusPriv *) g_malloc0(sizeof(*s));
+ CBusPriv *s = g_malloc0(sizeof(*s));
s->dat_out = dat;
s->cbus.clk = qemu_allocate_irq(cbus_clk, s, 0);
@@ -388,7 +388,7 @@ static void retu_io(void *opaque, int rw, int reg, uint16_t *val)
void *retu_init(qemu_irq irq, int vilma)
{
- CBusRetu *s = (CBusRetu *) g_malloc0(sizeof(*s));
+ CBusRetu *s = g_malloc0(sizeof(*s));
s->irq = irq;
s->irqen = 0xffff;
@@ -604,7 +604,7 @@ static void tahvo_io(void *opaque, int rw, int reg, uint16_t *val)
void *tahvo_init(qemu_irq irq, int betty)
{
- CBusTahvo *s = (CBusTahvo *) g_malloc0(sizeof(*s));
+ CBusTahvo *s = g_malloc0(sizeof(*s));
s->irq = irq;
s->irqen = 0xffff;
diff --git a/hw/net/can/can_sja1000.c b/hw/net/can/can_sja1000.c
index e0f76d3eb3..73201f9139 100644
--- a/hw/net/can/can_sja1000.c
+++ b/hw/net/can/can_sja1000.c
@@ -431,7 +431,7 @@ void can_sja_mem_write(CanSJA1000State *s, hwaddr addr, uint64_t val,
(unsigned long long)val, (unsigned int)addr);
if (addr > CAN_SJA_MEM_SIZE) {
- return ;
+ return;
}
if (s->clock & 0x80) { /* PeliCAN Mode */
diff --git a/hw/nvram/eeprom93xx.c b/hw/nvram/eeprom93xx.c
index a1b9c78844..1081e2cc0d 100644
--- a/hw/nvram/eeprom93xx.c
+++ b/hw/nvram/eeprom93xx.c
@@ -315,7 +315,7 @@ eeprom_t *eeprom93xx_new(DeviceState *dev, uint16_t nwords)
addrbits = 6;
}
- eeprom = (eeprom_t *)g_malloc0(sizeof(*eeprom) + nwords * 2);
+ eeprom = g_malloc0(sizeof(*eeprom) + nwords * 2);
eeprom->size = nwords;
eeprom->addrbits = addrbits;
/* Output DO is tristate, read results in 1. */
diff --git a/hw/openrisc/boot.c b/hw/openrisc/boot.c
index 128ccbcba2..007e80cd5a 100644
--- a/hw/openrisc/boot.c
+++ b/hw/openrisc/boot.c
@@ -14,6 +14,7 @@
#include "hw/openrisc/boot.h"
#include "sysemu/device_tree.h"
#include "sysemu/qtest.h"
+#include "sysemu/reset.h"
#include <libfdt.h>
@@ -111,6 +112,8 @@ uint32_t openrisc_load_fdt(void *fdt, hwaddr load_start,
rom_add_blob_fixed_as("fdt", fdt, fdtsize, fdt_addr,
&address_space_memory);
+ qemu_register_reset_nosnapshotload(qemu_fdt_randomize_seeds,
+ rom_ptr_for_as(&address_space_memory, fdt_addr, fdtsize));
return fdt_addr;
}
diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c
index ecf682b148..bb4d008ba9 100644
--- a/hw/ppc/pegasos2.c
+++ b/hw/ppc/pegasos2.c
@@ -248,14 +248,14 @@ static void pegasos2_pci_config_write(Pegasos2MachineState *pm, int bus,
pegasos2_mv_reg_write(pm, pcicfg + 4, len, val);
}
-static void pegasos2_machine_reset(MachineState *machine)
+static void pegasos2_machine_reset(MachineState *machine, ShutdownCause reason)
{
Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine);
void *fdt;
uint64_t d[2];
int sz;
- qemu_devices_reset();
+ qemu_devices_reset(reason);
if (!pm->vof) {
return; /* Firmware should set up machine so nothing to do */
}
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 40bb573d1a..3d01e26f84 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -643,13 +643,13 @@ static void pnv_powerdown_notify(Notifier *n, void *opaque)
}
}
-static void pnv_reset(MachineState *machine)
+static void pnv_reset(MachineState *machine, ShutdownCause reason)
{
PnvMachineState *pnv = PNV_MACHINE(machine);
IPMIBmc *bmc;
void *fdt;
- qemu_devices_reset();
+ qemu_devices_reset(reason);
/*
* The machine should provide by default an internal BMC simulator.
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index f79ac85ca1..66b414d2e9 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1623,7 +1623,7 @@ void spapr_check_mmu_mode(bool guest_radix)
}
}
-static void spapr_machine_reset(MachineState *machine)
+static void spapr_machine_reset(MachineState *machine, ShutdownCause reason)
{
SpaprMachineState *spapr = SPAPR_MACHINE(machine);
PowerPCCPU *first_ppc_cpu;
@@ -1649,7 +1649,7 @@ static void spapr_machine_reset(MachineState *machine)
spapr_setup_hpt(spapr);
}
- qemu_devices_reset();
+ qemu_devices_reset(reason);
spapr_ovec_cleanup(spapr->ov5_cas);
spapr->ov5_cas = spapr_ovec_new();
diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c
index e82bf27338..ebd351c840 100644
--- a/hw/riscv/boot.c
+++ b/hw/riscv/boot.c
@@ -30,6 +30,7 @@
#include "sysemu/device_tree.h"
#include "sysemu/qtest.h"
#include "sysemu/kvm.h"
+#include "sysemu/reset.h"
#include <libfdt.h>
@@ -241,6 +242,8 @@ uint64_t riscv_load_fdt(hwaddr dram_base, uint64_t mem_size, void *fdt)
rom_add_blob_fixed_as("fdt", fdt, fdtsize, fdt_addr,
&address_space_memory);
+ qemu_register_reset_nosnapshotload(qemu_fdt_randomize_seeds,
+ rom_ptr_for_as(&address_space_memory, fdt_addr, fdtsize));
return fdt_addr;
}
diff --git a/hw/rx/rx-gdbsim.c b/hw/rx/rx-gdbsim.c
index 8ffe1b8035..47c17026c7 100644
--- a/hw/rx/rx-gdbsim.c
+++ b/hw/rx/rx-gdbsim.c
@@ -25,6 +25,7 @@
#include "hw/rx/rx62n.h"
#include "sysemu/qtest.h"
#include "sysemu/device_tree.h"
+#include "sysemu/reset.h"
#include "hw/boards.h"
#include "qom/object.h"
@@ -148,6 +149,8 @@ static void rx_gdbsim_init(MachineState *machine)
dtb_offset = ROUND_DOWN(machine->ram_size - dtb_size, 16);
rom_add_blob_fixed("dtb", dtb, dtb_size,
SDRAM_BASE + dtb_offset);
+ qemu_register_reset_nosnapshotload(qemu_fdt_randomize_seeds,
+ rom_ptr(SDRAM_BASE + dtb_offset, dtb_size));
/* Set dtb address to R1 */
RX_CPU(first_cpu)->env.regs[1] = SDRAM_BASE + dtb_offset;
}
diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c
index 401b63d6cb..728ba24547 100644
--- a/hw/s390x/pv.c
+++ b/hw/s390x/pv.c
@@ -20,6 +20,11 @@
#include "exec/confidential-guest-support.h"
#include "hw/s390x/ipl.h"
#include "hw/s390x/pv.h"
+#include "target/s390x/kvm/kvm_s390x.h"
+
+static bool info_valid;
+static struct kvm_s390_pv_info_vm info_vm;
+static struct kvm_s390_pv_info_dump info_dump;
static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data)
{
@@ -56,6 +61,42 @@ static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data)
} \
}
+int s390_pv_query_info(void)
+{
+ struct kvm_s390_pv_info info = {
+ .header.id = KVM_PV_INFO_VM,
+ .header.len_max = sizeof(info.header) + sizeof(info.vm),
+ };
+ int rc;
+
+ /* Info API's first user is dump so they are bundled */
+ if (!kvm_s390_get_protected_dump()) {
+ return 0;
+ }
+
+ rc = s390_pv_cmd(KVM_PV_INFO, &info);
+ if (rc) {
+ error_report("KVM PV INFO cmd %x failed: %s",
+ info.header.id, strerror(-rc));
+ return rc;
+ }
+ memcpy(&info_vm, &info.vm, sizeof(info.vm));
+
+ info.header.id = KVM_PV_INFO_DUMP;
+ info.header.len_max = sizeof(info.header) + sizeof(info.dump);
+ rc = s390_pv_cmd(KVM_PV_INFO, &info);
+ if (rc) {
+ error_report("KVM PV INFO cmd %x failed: %s",
+ info.header.id, strerror(-rc));
+ return rc;
+ }
+
+ memcpy(&info_dump, &info.dump, sizeof(info.dump));
+ info_valid = true;
+
+ return rc;
+}
+
int s390_pv_vm_enable(void)
{
return s390_pv_cmd(KVM_PV_ENABLE, NULL);
@@ -114,6 +155,77 @@ void s390_pv_inject_reset_error(CPUState *cs)
env->regs[r1 + 1] = DIAG_308_RC_INVAL_FOR_PV;
}
+uint64_t kvm_s390_pv_dmp_get_size_cpu(void)
+{
+ return info_dump.dump_cpu_buffer_len;
+}
+
+uint64_t kvm_s390_pv_dmp_get_size_completion_data(void)
+{
+ return info_dump.dump_config_finalize_len;
+}
+
+uint64_t kvm_s390_pv_dmp_get_size_mem_state(void)
+{
+ return info_dump.dump_config_mem_buffer_per_1m;
+}
+
+bool kvm_s390_pv_info_basic_valid(void)
+{
+ return info_valid;
+}
+
+static int s390_pv_dump_cmd(uint64_t subcmd, uint64_t uaddr, uint64_t gaddr,
+ uint64_t len)
+{
+ struct kvm_s390_pv_dmp dmp = {
+ .subcmd = subcmd,
+ .buff_addr = uaddr,
+ .buff_len = len,
+ .gaddr = gaddr,
+ };
+ int ret;
+
+ ret = s390_pv_cmd(KVM_PV_DUMP, (void *)&dmp);
+ if (ret) {
+ error_report("KVM DUMP command %ld failed", subcmd);
+ }
+ return ret;
+}
+
+int kvm_s390_dump_cpu(S390CPU *cpu, void *buff)
+{
+ struct kvm_s390_pv_dmp dmp = {
+ .subcmd = KVM_PV_DUMP_CPU,
+ .buff_addr = (uint64_t)buff,
+ .gaddr = 0,
+ .buff_len = info_dump.dump_cpu_buffer_len,
+ };
+ struct kvm_pv_cmd pv = {
+ .cmd = KVM_PV_DUMP,
+ .data = (uint64_t)&dmp,
+ };
+
+ return kvm_vcpu_ioctl(CPU(cpu), KVM_S390_PV_CPU_COMMAND, &pv);
+}
+
+int kvm_s390_dump_init(void)
+{
+ return s390_pv_dump_cmd(KVM_PV_DUMP_INIT, 0, 0, 0);
+}
+
+int kvm_s390_dump_mem_state(uint64_t gaddr, size_t len, void *dest)
+{
+ return s390_pv_dump_cmd(KVM_PV_DUMP_CONFIG_STOR_STATE, (uint64_t)dest,
+ gaddr, len);
+}
+
+int kvm_s390_dump_completion_data(void *buff)
+{
+ return s390_pv_dump_cmd(KVM_PV_DUMP_COMPLETE, (uint64_t)buff, 0,
+ info_dump.dump_config_finalize_len);
+}
+
#define TYPE_S390_PV_GUEST "s390-pv-guest"
OBJECT_DECLARE_SIMPLE_TYPE(S390PVGuest, S390_PV_GUEST)
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 03855c7231..806de32034 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -366,6 +366,12 @@ static int s390_machine_protect(S390CcwMachineState *ms)
ms->pv = true;
+ /* Will return 0 if API is not available since it's not vital */
+ rc = s390_pv_query_info();
+ if (rc) {
+ goto out_err;
+ }
+
/* Set SE header and unpack */
rc = s390_ipl_prepare_pv_header();
if (rc) {
@@ -405,7 +411,7 @@ static void s390_pv_prepare_reset(S390CcwMachineState *ms)
s390_pv_prep_reset();
}
-static void s390_machine_reset(MachineState *machine)
+static void s390_machine_reset(MachineState *machine, ShutdownCause reason)
{
S390CcwMachineState *ms = S390_CCW_MACHINE(machine);
enum s390_reset reset_type;
@@ -427,7 +433,7 @@ static void s390_machine_reset(MachineState *machine)
s390_machine_unprotect(ms);
}
- qemu_devices_reset();
+ qemu_devices_reset(reason);
s390_crypto_reset();
/* configure and start the ipl CPU only */
diff --git a/hw/ssi/aspeed_smc.c b/hw/ssi/aspeed_smc.c
index faed7e0cbe..22df4be528 100644
--- a/hw/ssi/aspeed_smc.c
+++ b/hw/ssi/aspeed_smc.c
@@ -388,7 +388,7 @@ static inline int aspeed_smc_flash_cmd(const AspeedSMCFlash *fl)
static inline int aspeed_smc_flash_addr_width(const AspeedSMCFlash *fl)
{
const AspeedSMCState *s = fl->controller;
- AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s);
+ AspeedSMCClass *asc = fl->asc;
if (asc->addr_width) {
return asc->addr_width(s);
@@ -420,7 +420,7 @@ static uint32_t aspeed_smc_check_segment_addr(const AspeedSMCFlash *fl,
uint32_t addr)
{
const AspeedSMCState *s = fl->controller;
- AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s);
+ AspeedSMCClass *asc = fl->asc;
AspeedSegments seg;
asc->reg_to_segment(s, s->regs[R_SEG_ADDR0 + fl->cs], &seg);
@@ -1234,7 +1234,6 @@ static const TypeInfo aspeed_smc_info = {
static void aspeed_smc_flash_realize(DeviceState *dev, Error **errp)
{
AspeedSMCFlash *s = ASPEED_SMC_FLASH(dev);
- AspeedSMCClass *asc;
g_autofree char *name = g_strdup_printf(TYPE_ASPEED_SMC_FLASH ".%d", s->cs);
if (!s->controller) {
@@ -1242,14 +1241,14 @@ static void aspeed_smc_flash_realize(DeviceState *dev, Error **errp)
return;
}
- asc = ASPEED_SMC_GET_CLASS(s->controller);
+ s->asc = ASPEED_SMC_GET_CLASS(s->controller);
/*
* Use the default segment value to size the memory region. This
* can be changed by FW at runtime.
*/
memory_region_init_io(&s->mmio, OBJECT(s), &aspeed_smc_flash_ops,
- s, name, asc->segments[s->cs].size);
+ s, name, s->asc->segments[s->cs].size);
sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->mmio);
}
diff --git a/hw/ssi/ssi.c b/hw/ssi/ssi.c
index 003931fb50..d54a109bee 100644
--- a/hw/ssi/ssi.c
+++ b/hw/ssi/ssi.c
@@ -38,9 +38,8 @@ static void ssi_cs_default(void *opaque, int n, int level)
bool cs = !!level;
assert(n == 0);
if (s->cs != cs) {
- SSIPeripheralClass *ssc = SSI_PERIPHERAL_GET_CLASS(s);
- if (ssc->set_cs) {
- ssc->set_cs(s, cs);
+ if (s->spc->set_cs) {
+ s->spc->set_cs(s, cs);
}
}
s->cs = cs;
@@ -48,11 +47,11 @@ static void ssi_cs_default(void *opaque, int n, int level)
static uint32_t ssi_transfer_raw_default(SSIPeripheral *dev, uint32_t val)
{
- SSIPeripheralClass *ssc = SSI_PERIPHERAL_GET_CLASS(dev);
+ SSIPeripheralClass *ssc = dev->spc;
if ((dev->cs && ssc->cs_polarity == SSI_CS_HIGH) ||
- (!dev->cs && ssc->cs_polarity == SSI_CS_LOW) ||
- ssc->cs_polarity == SSI_CS_NONE) {
+ (!dev->cs && ssc->cs_polarity == SSI_CS_LOW) ||
+ ssc->cs_polarity == SSI_CS_NONE) {
return ssc->transfer(dev, val);
}
return 0;
@@ -67,6 +66,7 @@ static void ssi_peripheral_realize(DeviceState *dev, Error **errp)
ssc->cs_polarity != SSI_CS_NONE) {
qdev_init_gpio_in_named(dev, ssi_cs_default, SSI_GPIO_CS, 1);
}
+ s->spc = ssc;
ssc->realize(s, errp);
}
@@ -115,13 +115,11 @@ uint32_t ssi_transfer(SSIBus *bus, uint32_t val)
{
BusState *b = BUS(bus);
BusChild *kid;
- SSIPeripheralClass *ssc;
uint32_t r = 0;
QTAILQ_FOREACH(kid, &b->children, sibling) {
- SSIPeripheral *peripheral = SSI_PERIPHERAL(kid->child);
- ssc = SSI_PERIPHERAL_GET_CLASS(peripheral);
- r |= ssc->transfer_raw(peripheral, val);
+ SSIPeripheral *p = SSI_PERIPHERAL(kid->child);
+ r |= p->spc->transfer_raw(p, val);
}
return r;
diff --git a/hw/timer/imx_epit.c b/hw/timer/imx_epit.c
index 2bf8c754b2..ec0fa440d7 100644
--- a/hw/timer/imx_epit.c
+++ b/hw/timer/imx_epit.c
@@ -275,10 +275,15 @@ static void imx_epit_write(void *opaque, hwaddr offset, uint64_t value,
/* If IOVW bit is set then set the timer value */
ptimer_set_count(s->timer_reload, s->lr);
}
-
+ /*
+ * Commit the change to s->timer_reload, so it can propagate. Otherwise
+ * the timer interrupt may not fire properly. The commit must happen
+ * before calling imx_epit_reload_compare_timer(), which reads
+ * s->timer_reload internally again.
+ */
+ ptimer_transaction_commit(s->timer_reload);
imx_epit_reload_compare_timer(s);
ptimer_transaction_commit(s->timer_cmp);
- ptimer_transaction_commit(s->timer_reload);
break;
case 3: /* CMP */
diff --git a/hw/timer/renesas_cmt.c b/hw/timer/renesas_cmt.c
index 2e0fd21a36..69eabc678a 100644
--- a/hw/timer/renesas_cmt.c
+++ b/hw/timer/renesas_cmt.c
@@ -57,7 +57,7 @@ static void update_events(RCMTState *cmt, int ch)
if ((cmt->cmstr & (1 << ch)) == 0) {
/* count disable, so not happened next event. */
- return ;
+ return;
}
next_time = cmt->cmcor[ch] - cmt->cmcnt[ch];
next_time *= NANOSECONDS_PER_SECOND;
diff --git a/hw/timer/renesas_tmr.c b/hw/timer/renesas_tmr.c
index d96002e1ee..c15f654738 100644
--- a/hw/timer/renesas_tmr.c
+++ b/hw/timer/renesas_tmr.c
@@ -67,18 +67,18 @@ static void update_events(RTMRState *tmr, int ch)
int i, event;
if (tmr->tccr[ch] == 0) {
- return ;
+ return;
}
if (FIELD_EX8(tmr->tccr[ch], TCCR, CSS) == 0) {
/* external clock mode */
/* event not happened */
- return ;
+ return;
}
if (FIELD_EX8(tmr->tccr[0], TCCR, CSS) == CSS_CASCADING) {
/* cascading mode */
if (ch == 1) {
tmr->next[ch] = none;
- return ;
+ return;
}
diff[cmia] = concat_reg(tmr->tcora) - concat_reg(tmr->tcnt);
diff[cmib] = concat_reg(tmr->tcorb) - concat_reg(tmr->tcnt);
@@ -384,7 +384,7 @@ static void timer_events(RTMRState *tmr, int ch)
tmr->tcorb[ch]) & 0xff;
} else {
if (ch == 1) {
- return ;
+ return;
}
tcnt = issue_event(tmr, ch, 16,
concat_reg(tmr->tcnt),
diff --git a/hw/usb/ccid-card-emulated.c b/hw/usb/ccid-card-emulated.c
index 1ddf7297f6..ee41a81801 100644
--- a/hw/usb/ccid-card-emulated.c
+++ b/hw/usb/ccid-card-emulated.c
@@ -140,7 +140,7 @@ static void emulated_apdu_from_guest(CCIDCardState *base,
const uint8_t *apdu, uint32_t len)
{
EmulatedState *card = EMULATED_CCID_CARD(base);
- EmulEvent *event = (EmulEvent *)g_malloc(sizeof(EmulEvent) + len);
+ EmulEvent *event = g_malloc(sizeof(EmulEvent) + len);
assert(event);
event->p.data.type = EMUL_GUEST_APDU;
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index e7d80242b7..34db51e241 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1675,7 +1675,7 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
error_setg(errp, "VIRTIO_F_IOMMU_PLATFORM was supported by"
" neither legacy nor transitional device");
- return ;
+ return;
}
/*
* Legacy and transitional devices use specific subsystem IDs.
diff --git a/include/block/block-common.h b/include/block/block-common.h
index fdb7306e78..061606e867 100644
--- a/include/block/block-common.h
+++ b/include/block/block-common.h
@@ -80,6 +80,15 @@ typedef enum {
*/
BDRV_REQ_MAY_UNMAP = 0x4,
+ /*
+ * An optimization hint when all QEMUIOVector elements are within
+ * previously registered bdrv_register_buf() memory ranges.
+ *
+ * Code that replaces the user's QEMUIOVector elements with bounce buffers
+ * must take care to clear this flag.
+ */
+ BDRV_REQ_REGISTERED_BUF = 0x8,
+
BDRV_REQ_FUA = 0x10,
BDRV_REQ_WRITE_COMPRESSED = 0x20,
diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
index 21265e3966..eba4ed23b4 100644
--- a/include/block/block-global-state.h
+++ b/include/block/block-global-state.h
@@ -243,9 +243,15 @@ void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp);
* Register/unregister a buffer for I/O. For example, VFIO drivers are
* interested to know the memory areas that would later be used for I/O, so
* that they can prepare IOMMU mapping etc., to get better performance.
+ *
+ * Buffers must not overlap and they must be unregistered with the same <host,
+ * size> values that they were registered with.
+ *
+ * Returns: true on success, false on failure
*/
-void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size);
-void bdrv_unregister_buf(BlockDriverState *bs, void *host);
+bool bdrv_register_buf(BlockDriverState *bs, void *host, size_t size,
+ Error **errp);
+void bdrv_unregister_buf(BlockDriverState *bs, void *host, size_t size);
void bdrv_cancel_in_flight(BlockDriverState *bs);
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 8947abab76..9c569be162 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -433,9 +433,12 @@ struct BlockDriver {
* that it can do IOMMU mapping with VFIO etc., in order to get better
* performance. In the case of VFIO drivers, this callback is used to do
* DMA mapping for hot buffers.
+ *
+ * Returns: true on success, false on failure
*/
- void (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size);
- void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host);
+ bool (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size,
+ Error **errp);
+ void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host, size_t size);
/*
* This field is modified only under the BQL, and is part of
@@ -1051,7 +1054,7 @@ struct BlockDriverState {
/*
* Flags honored during pread
*/
- unsigned int supported_read_flags;
+ BdrvRequestFlags supported_read_flags;
/*
* Flags honored during pwrite (so far: BDRV_REQ_FUA,
* BDRV_REQ_WRITE_UNCHANGED).
@@ -1069,12 +1072,12 @@ struct BlockDriverState {
* flag), or they have to explicitly take the WRITE permission for
* their children.
*/
- unsigned int supported_write_flags;
+ BdrvRequestFlags supported_write_flags;
/*
* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA,
* BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED)
*/
- unsigned int supported_zero_flags;
+ BdrvRequestFlags supported_zero_flags;
/*
* Flags honoured during truncate (so far: BDRV_REQ_ZERO_WRITE).
*
@@ -1082,7 +1085,7 @@ struct BlockDriverState {
* that any added space reads as all zeros. If this can't be guaranteed,
* the operation must fail.
*/
- unsigned int supported_truncate_flags;
+ BdrvRequestFlags supported_truncate_flags;
/* the following member gives a name to every node on the bs graph. */
char node_name[32];
diff --git a/include/elf.h b/include/elf.h
index 3d6b9062c0..8bf1e72720 100644
--- a/include/elf.h
+++ b/include/elf.h
@@ -1650,6 +1650,8 @@ typedef struct elf64_shdr {
#define NT_TASKSTRUCT 4
#define NT_AUXV 6
#define NT_PRXFPREG 0x46e62b7f /* copied from gdb5.1/include/elf/common.h */
+#define NT_S390_PV_CPU_DATA 0x30e /* s390 protvirt cpu dump data */
+#define NT_S390_RI_CB 0x30d /* s390 runtime instrumentation */
#define NT_S390_GS_CB 0x30b /* s390 guarded storage registers */
#define NT_S390_VXRS_HIGH 0x30a /* s390 vector registers 16-31 */
#define NT_S390_VXRS_LOW 0x309 /* s390 vector registers 0-15 (lower half) */
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 16b7df41bf..2eb1176538 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -281,28 +281,18 @@ void page_reset_target_data(target_ulong start, target_ulong end);
int page_check_range(target_ulong start, target_ulong len, int flags);
/**
- * page_alloc_target_data(address, size)
+ * page_get_target_data(address)
* @address: guest virtual address
- * @size: size of data to allocate
*
- * Allocate @size bytes of out-of-band data to associate with the
- * guest page at @address. If the page is not mapped, NULL will
- * be returned. If there is existing data associated with @address,
- * no new memory will be allocated.
+ * Return TARGET_PAGE_DATA_SIZE bytes of out-of-band data to associate
+ * with the guest page at @address, allocating it if necessary. The
+ * caller should already have verified that the address is valid.
*
* The memory will be freed when the guest page is deallocated,
* e.g. with the munmap system call.
*/
-void *page_alloc_target_data(target_ulong address, size_t size);
-
-/**
- * page_get_target_data(address)
- * @address: guest virtual address
- *
- * Return any out-of-bound memory assocated with the guest page
- * at @address, as per page_alloc_target_data.
- */
-void *page_get_target_data(target_ulong address);
+void *page_get_target_data(target_ulong address)
+ __attribute__((returns_nonnull));
#endif
CPUArchState *cpu_copy(CPUArchState *env);
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index c493510ee9..6feaa40ca7 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -92,6 +92,7 @@ void qemu_ram_set_uf_zeroable(RAMBlock *rb);
bool qemu_ram_is_migratable(RAMBlock *rb);
void qemu_ram_set_migratable(RAMBlock *rb);
void qemu_ram_unset_migratable(RAMBlock *rb);
+int qemu_ram_get_fd(RAMBlock *rb);
size_t qemu_ram_pagesize(RAMBlock *block);
size_t qemu_ram_pagesize_largest(void);
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index e5f8b224a5..e948992a80 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -39,9 +39,6 @@ typedef ram_addr_t tb_page_addr_t;
#define TB_PAGE_ADDR_FMT RAM_ADDR_FMT
#endif
-void restore_state_to_opc(CPUArchState *env, TranslationBlock *tb,
- target_ulong *data);
-
/**
* cpu_restore_state:
* @cpu: the vCPU state is to be restore to
@@ -610,18 +607,40 @@ static inline uint32_t tb_cflags(const TranslationBlock *tb)
return qatomic_read(&tb->cflags);
}
+static inline tb_page_addr_t tb_page_addr0(const TranslationBlock *tb)
+{
+ return tb->page_addr[0];
+}
+
+static inline tb_page_addr_t tb_page_addr1(const TranslationBlock *tb)
+{
+ return tb->page_addr[1];
+}
+
+static inline void tb_set_page_addr0(TranslationBlock *tb,
+ tb_page_addr_t addr)
+{
+ tb->page_addr[0] = addr;
+}
+
+static inline void tb_set_page_addr1(TranslationBlock *tb,
+ tb_page_addr_t addr)
+{
+ tb->page_addr[1] = addr;
+}
+
/* current cflags for hashing/comparison */
uint32_t curr_cflags(CPUState *cpu);
/* TranslationBlock invalidate API */
#if defined(CONFIG_USER_ONLY)
void tb_invalidate_phys_addr(target_ulong addr);
-void tb_invalidate_phys_range(target_ulong start, target_ulong end);
#else
void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs);
#endif
void tb_flush(CPUState *cpu);
void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
+void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end);
void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr);
/* GETPC is the true target of the return instruction that we'll execute. */
@@ -642,14 +661,6 @@ extern __thread uintptr_t tci_tb_ptr;
smaller than 4 bytes, so we don't worry about special-casing this. */
#define GETPC_ADJ 2
-#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_DEBUG_TCG)
-void assert_no_pages_locked(void);
-#else
-static inline void assert_no_pages_locked(void)
-{
-}
-#endif
-
#if !defined(CONFIG_USER_ONLY)
/**
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index f3e0c78161..1500680458 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -147,8 +147,6 @@ static inline void qemu_ram_block_writeback(RAMBlock *block)
#define DIRTY_CLIENTS_ALL ((1 << DIRTY_MEMORY_NUM) - 1)
#define DIRTY_CLIENTS_NOCODE (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE))
-void tb_invalidate_phys_range(ram_addr_t start, ram_addr_t end);
-
static inline bool cpu_physical_memory_get_dirty(ram_addr_t start,
ram_addr_t length,
unsigned client)
diff --git a/include/exec/translate-all.h b/include/exec/translate-all.h
index 9f646389af..3e9cb91565 100644
--- a/include/exec/translate-all.h
+++ b/include/exec/translate-all.h
@@ -29,7 +29,7 @@ void page_collection_unlock(struct page_collection *set);
void tb_invalidate_phys_page_fast(struct page_collection *pages,
tb_page_addr_t start, int len,
uintptr_t retaddr);
-void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end);
+void tb_invalidate_phys_page(tb_page_addr_t addr);
void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr);
#ifdef CONFIG_USER_ONLY
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 311ed17e18..90f1dd3aeb 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -231,7 +231,7 @@ struct MachineClass {
const char *deprecation_reason;
void (*init)(MachineState *state);
- void (*reset)(MachineState *state);
+ void (*reset)(MachineState *state, ShutdownCause reason);
void (*wakeup)(MachineState *state);
int (*kvm_type)(MachineState *machine, const char *arg);
diff --git a/include/hw/core/tcg-cpu-ops.h b/include/hw/core/tcg-cpu-ops.h
index 78c6c6635d..20e3c0ffbb 100644
--- a/include/hw/core/tcg-cpu-ops.h
+++ b/include/hw/core/tcg-cpu-ops.h
@@ -31,6 +31,17 @@ struct TCGCPUOps {
* function to restore all the state, and register it here.
*/
void (*synchronize_from_tb)(CPUState *cpu, const TranslationBlock *tb);
+ /**
+ * @restore_state_to_opc: Synchronize state from INDEX_op_start_insn
+ *
+ * This is called when we unwind state in the middle of a TB,
+ * usually before raising an exception. Set all part of the CPU
+ * state which are tracked insn-by-insn in the target-specific
+ * arguments to start_insn, passed as @data.
+ */
+ void (*restore_state_to_opc)(CPUState *cpu, const TranslationBlock *tb,
+ const uint64_t *data);
+
/** @cpu_exec_enter: Callback for cpu_exec preparation */
void (*cpu_exec_enter)(CPUState *cpu);
/** @cpu_exec_exit: Callback for cpu_exec cleanup */
diff --git a/include/hw/elf_ops.h b/include/hw/elf_ops.h
index 7c3b1d0f6c..fbe0b1e956 100644
--- a/include/hw/elf_ops.h
+++ b/include/hw/elf_ops.h
@@ -117,7 +117,7 @@ static void glue(load_symbols, SZ)(struct elfhdr *ehdr, int fd, int must_swab,
shdr_table = load_at(fd, ehdr->e_shoff,
sizeof(struct elf_shdr) * ehdr->e_shnum);
if (!shdr_table) {
- return ;
+ return;
}
if (must_swab) {
diff --git a/include/hw/i2c/aspeed_i2c.h b/include/hw/i2c/aspeed_i2c.h
index 300a89b343..adc904d6c1 100644
--- a/include/hw/i2c/aspeed_i2c.h
+++ b/include/hw/i2c/aspeed_i2c.h
@@ -130,6 +130,7 @@ REG32(I2CD_CMD, 0x14) /* I2CD Command/Status */
SHARED_FIELD(M_TX_CMD, 1, 1)
SHARED_FIELD(M_START_CMD, 0, 1)
REG32(I2CD_DEV_ADDR, 0x18) /* Slave Device Address */
+ SHARED_FIELD(SLAVE_DEV_ADDR1, 0, 7)
REG32(I2CD_POOL_CTRL, 0x1C) /* Pool Buffer Control */
SHARED_FIELD(RX_COUNT, 24, 5)
SHARED_FIELD(RX_SIZE, 16, 5)
diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h
index 1f1f545bfc..9360aa1091 100644
--- a/include/hw/s390x/pv.h
+++ b/include/hw/s390x/pv.h
@@ -38,6 +38,7 @@ static inline bool s390_is_pv(void)
return ccw->pv;
}
+int s390_pv_query_info(void);
int s390_pv_vm_enable(void);
void s390_pv_vm_disable(void);
int s390_pv_set_sec_parms(uint64_t origin, uint64_t length);
@@ -46,8 +47,17 @@ void s390_pv_prep_reset(void);
int s390_pv_verify(void);
void s390_pv_unshare(void);
void s390_pv_inject_reset_error(CPUState *cs);
+uint64_t kvm_s390_pv_dmp_get_size_cpu(void);
+uint64_t kvm_s390_pv_dmp_get_size_mem_state(void);
+uint64_t kvm_s390_pv_dmp_get_size_completion_data(void);
+bool kvm_s390_pv_info_basic_valid(void);
+int kvm_s390_dump_init(void);
+int kvm_s390_dump_cpu(S390CPU *cpu, void *buff);
+int kvm_s390_dump_mem_state(uint64_t addr, size_t len, void *dest);
+int kvm_s390_dump_completion_data(void *buff);
#else /* CONFIG_KVM */
static inline bool s390_is_pv(void) { return false; }
+static inline int s390_pv_query_info(void) { return 0; }
static inline int s390_pv_vm_enable(void) { return 0; }
static inline void s390_pv_vm_disable(void) {}
static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { return 0; }
@@ -56,6 +66,15 @@ static inline void s390_pv_prep_reset(void) {}
static inline int s390_pv_verify(void) { return 0; }
static inline void s390_pv_unshare(void) {}
static inline void s390_pv_inject_reset_error(CPUState *cs) {};
+static inline uint64_t kvm_s390_pv_dmp_get_size_cpu(void) { return 0; }
+static inline uint64_t kvm_s390_pv_dmp_get_size_mem_state(void) { return 0; }
+static inline uint64_t kvm_s390_pv_dmp_get_size_completion_data(void) { return 0; }
+static inline bool kvm_s390_pv_info_basic_valid(void) { return false; }
+static inline int kvm_s390_dump_init(void) { return 0; }
+static inline int kvm_s390_dump_cpu(S390CPU *cpu, void *buff) { return 0; }
+static inline int kvm_s390_dump_mem_state(uint64_t addr, size_t len,
+ void *dest) { return 0; }
+static inline int kvm_s390_dump_completion_data(void *buff) { return 0; }
#endif /* CONFIG_KVM */
int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp);
diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
index 3b1b3d278e..6ea4b64fe7 100644
--- a/include/hw/scsi/scsi.h
+++ b/include/hw/scsi/scsi.h
@@ -188,7 +188,6 @@ SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk,
const char *serial, Error **errp);
void scsi_bus_set_ua(SCSIBus *bus, SCSISense sense);
void scsi_bus_legacy_handle_cmdline(SCSIBus *bus);
-void scsi_legacy_handle_cmdline(void);
SCSIRequest *scsi_req_alloc(const SCSIReqOps *reqops, SCSIDevice *d,
uint32_t tag, uint32_t lun, void *hba_private);
diff --git a/include/hw/ssi/aspeed_smc.h b/include/hw/ssi/aspeed_smc.h
index 2d5f8f3d8f..8e1dda556b 100644
--- a/include/hw/ssi/aspeed_smc.h
+++ b/include/hw/ssi/aspeed_smc.h
@@ -30,6 +30,7 @@
#include "qom/object.h"
struct AspeedSMCState;
+struct AspeedSMCClass;
#define TYPE_ASPEED_SMC_FLASH "aspeed.smc.flash"
OBJECT_DECLARE_SIMPLE_TYPE(AspeedSMCFlash, ASPEED_SMC_FLASH)
@@ -37,6 +38,7 @@ struct AspeedSMCFlash {
SysBusDevice parent_obj;
struct AspeedSMCState *controller;
+ struct AspeedSMCClass *asc;
uint8_t cs;
MemoryRegion mmio;
diff --git a/include/hw/ssi/ssi.h b/include/hw/ssi/ssi.h
index f411858ab0..6950f86810 100644
--- a/include/hw/ssi/ssi.h
+++ b/include/hw/ssi/ssi.h
@@ -59,6 +59,9 @@ struct SSIPeripheralClass {
struct SSIPeripheral {
DeviceState parent_obj;
+ /* cache the class */
+ SSIPeripheralClass *spc;
+
/* Chip select state */
bool cs;
};
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index d311c57cca..7f589b4146 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -19,6 +19,7 @@
#include "hw/block/block.h"
#include "sysemu/iothread.h"
#include "sysemu/block-backend.h"
+#include "sysemu/block-ram-registrar.h"
#include "qom/object.h"
#define TYPE_VIRTIO_BLK "virtio-blk-device"
@@ -64,6 +65,7 @@ struct VirtIOBlock {
struct VirtIOBlockDataPlane *dataplane;
uint64_t host_features;
size_t config_size;
+ BlockRAMRegistrar blk_ram_registrar;
};
typedef struct VirtIOBlockReq {
diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
index 7e8fc8e7cd..874134fd19 100644
--- a/include/qemu/atomic.h
+++ b/include/qemu/atomic.h
@@ -133,7 +133,7 @@
#define qatomic_read(ptr) \
({ \
- QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
+ qemu_build_assert(sizeof(*ptr) <= ATOMIC_REG_SIZE); \
qatomic_read__nocheck(ptr); \
})
@@ -141,7 +141,7 @@
__atomic_store_n(ptr, i, __ATOMIC_RELAXED)
#define qatomic_set(ptr, i) do { \
- QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
+ qemu_build_assert(sizeof(*ptr) <= ATOMIC_REG_SIZE); \
qatomic_set__nocheck(ptr, i); \
} while(0)
@@ -159,27 +159,27 @@
#define qatomic_rcu_read(ptr) \
({ \
- QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
+ qemu_build_assert(sizeof(*ptr) <= ATOMIC_REG_SIZE); \
typeof_strip_qual(*ptr) _val; \
qatomic_rcu_read__nocheck(ptr, &_val); \
_val; \
})
#define qatomic_rcu_set(ptr, i) do { \
- QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
+ qemu_build_assert(sizeof(*ptr) <= ATOMIC_REG_SIZE); \
__atomic_store_n(ptr, i, __ATOMIC_RELEASE); \
} while(0)
#define qatomic_load_acquire(ptr) \
({ \
- QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
+ qemu_build_assert(sizeof(*ptr) <= ATOMIC_REG_SIZE); \
typeof_strip_qual(*ptr) _val; \
__atomic_load(ptr, &_val, __ATOMIC_ACQUIRE); \
_val; \
})
#define qatomic_store_release(ptr, i) do { \
- QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
+ qemu_build_assert(sizeof(*ptr) <= ATOMIC_REG_SIZE); \
__atomic_store_n(ptr, i, __ATOMIC_RELEASE); \
} while(0)
@@ -191,7 +191,7 @@
})
#define qatomic_xchg(ptr, i) ({ \
- QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
+ qemu_build_assert(sizeof(*ptr) <= ATOMIC_REG_SIZE); \
qatomic_xchg__nocheck(ptr, i); \
})
@@ -204,7 +204,7 @@
})
#define qatomic_cmpxchg(ptr, old, new) ({ \
- QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
+ qemu_build_assert(sizeof(*ptr) <= ATOMIC_REG_SIZE); \
qatomic_cmpxchg__nocheck(ptr, old, new); \
})
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
index aae33cce17..608fe45dcf 100644
--- a/include/qemu/coroutine.h
+++ b/include/qemu/coroutine.h
@@ -198,14 +198,25 @@ typedef struct CoQueue {
*/
void qemu_co_queue_init(CoQueue *queue);
+typedef enum {
+ /*
+ * Enqueue at front instead of back. Use this to re-queue a request when
+ * its wait condition is not satisfied after being woken up.
+ */
+ CO_QUEUE_WAIT_FRONT = 0x1,
+} CoQueueWaitFlags;
+
/**
* Adds the current coroutine to the CoQueue and transfers control to the
* caller of the coroutine. The mutex is unlocked during the wait and
* locked again afterwards.
*/
#define qemu_co_queue_wait(queue, lock) \
- qemu_co_queue_wait_impl(queue, QEMU_MAKE_LOCKABLE(lock))
-void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock);
+ qemu_co_queue_wait_impl(queue, QEMU_MAKE_LOCKABLE(lock), 0)
+#define qemu_co_queue_wait_flags(queue, lock, flags) \
+ qemu_co_queue_wait_impl(queue, QEMU_MAKE_LOCKABLE(lock), (flags))
+void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock,
+ CoQueueWaitFlags flags);
/**
* Removes the next coroutine from the CoQueue, and queue it to run after
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index b1c161c035..2276094729 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -186,6 +186,14 @@ void QEMU_ERROR("code path is reachable")
#define qemu_build_not_reached() g_assert_not_reached()
#endif
+/**
+ * qemu_build_assert()
+ *
+ * The compiler, during optimization, is expected to prove that the
+ * assertion is true.
+ */
+#define qemu_build_assert(test) while (!(test)) qemu_build_not_reached()
+
/*
* According to waitpid man page:
* WCOREDUMP
diff --git a/include/qemu/thread.h b/include/qemu/thread.h
index af19f2b3fc..20641e5844 100644
--- a/include/qemu/thread.h
+++ b/include/qemu/thread.h
@@ -227,7 +227,7 @@ struct QemuSpin {
static inline void qemu_spin_init(QemuSpin *spin)
{
- __sync_lock_release(&spin->value);
+ qatomic_set(&spin->value, 0);
#ifdef CONFIG_TSAN
__tsan_mutex_create(spin, __tsan_mutex_not_static);
#endif
@@ -246,7 +246,7 @@ static inline void qemu_spin_lock(QemuSpin *spin)
#ifdef CONFIG_TSAN
__tsan_mutex_pre_lock(spin, 0);
#endif
- while (unlikely(__sync_lock_test_and_set(&spin->value, true))) {
+ while (unlikely(qatomic_xchg(&spin->value, 1))) {
while (qatomic_read(&spin->value)) {
cpu_relax();
}
@@ -261,7 +261,7 @@ static inline bool qemu_spin_trylock(QemuSpin *spin)
#ifdef CONFIG_TSAN
__tsan_mutex_pre_lock(spin, __tsan_mutex_try_lock);
#endif
- bool busy = __sync_lock_test_and_set(&spin->value, true);
+ bool busy = qatomic_xchg(&spin->value, true);
#ifdef CONFIG_TSAN
unsigned flags = __tsan_mutex_try_lock;
flags |= busy ? __tsan_mutex_try_lock_failed : 0;
@@ -280,7 +280,7 @@ static inline void qemu_spin_unlock(QemuSpin *spin)
#ifdef CONFIG_TSAN
__tsan_mutex_pre_unlock(spin, 0);
#endif
- __sync_lock_release(&spin->value);
+ qatomic_store_release(&spin->value, 0);
#ifdef CONFIG_TSAN
__tsan_mutex_post_unlock(spin, 0);
#endif
diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h
index 415f0c91d7..6858e39cb6 100644
--- a/include/sysemu/block-backend-global-state.h
+++ b/include/sysemu/block-backend-global-state.h
@@ -106,8 +106,8 @@ void blk_io_limits_enable(BlockBackend *blk, const char *group);
void blk_io_limits_update_group(BlockBackend *blk, const char *group);
void blk_set_force_allow_inactivate(BlockBackend *blk);
-void blk_register_buf(BlockBackend *blk, void *host, size_t size);
-void blk_unregister_buf(BlockBackend *blk, void *host);
+bool blk_register_buf(BlockBackend *blk, void *host, size_t size, Error **errp);
+void blk_unregister_buf(BlockBackend *blk, void *host, size_t size);
const BdrvChild *blk_root(BlockBackend *blk);
diff --git a/include/sysemu/block-ram-registrar.h b/include/sysemu/block-ram-registrar.h
new file mode 100644
index 0000000000..d8b2f7942b
--- /dev/null
+++ b/include/sysemu/block-ram-registrar.h
@@ -0,0 +1,37 @@
+/*
+ * BlockBackend RAM Registrar
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef BLOCK_RAM_REGISTRAR_H
+#define BLOCK_RAM_REGISTRAR_H
+
+#include "exec/ramlist.h"
+
+/**
+ * struct BlockRAMRegistrar:
+ *
+ * Keeps RAMBlock memory registered with a BlockBackend using
+ * blk_register_buf() including hotplugged memory.
+ *
+ * Emulated devices or other BlockBackend users initialize a BlockRAMRegistrar
+ * with blk_ram_registrar_init() before submitting I/O requests with the
+ * BDRV_REQ_REGISTERED_BUF flag set.
+ */
+typedef struct {
+ BlockBackend *blk;
+ RAMBlockNotifier notifier;
+ bool ok;
+} BlockRAMRegistrar;
+
+void blk_ram_registrar_init(BlockRAMRegistrar *r, BlockBackend *blk);
+void blk_ram_registrar_destroy(BlockRAMRegistrar *r);
+
+/* Have all RAMBlocks been registered successfully? */
+static inline bool blk_ram_registrar_ok(BlockRAMRegistrar *r)
+{
+ return r->ok;
+}
+
+#endif /* BLOCK_RAM_REGISTRAR_H */
diff --git a/include/sysemu/device_tree.h b/include/sysemu/device_tree.h
index e7c5441f56..ca5339beae 100644
--- a/include/sysemu/device_tree.h
+++ b/include/sysemu/device_tree.h
@@ -197,6 +197,15 @@ int qemu_fdt_setprop_sized_cells_from_array(void *fdt,
qdt_tmp); \
})
+
+/**
+ * qemu_fdt_randomize_seeds:
+ * @fdt: device tree blob
+ *
+ * Re-randomize all "rng-seed" properties with new seeds.
+ */
+void qemu_fdt_randomize_seeds(void *fdt);
+
#define FDT_PCI_RANGE_RELOCATABLE 0x80000000
#define FDT_PCI_RANGE_PREFETCHABLE 0x40000000
#define FDT_PCI_RANGE_ALIASED 0x20000000
diff --git a/include/sysemu/dump-arch.h b/include/sysemu/dump-arch.h
index e25b02e990..59bbc9be38 100644
--- a/include/sysemu/dump-arch.h
+++ b/include/sysemu/dump-arch.h
@@ -21,6 +21,9 @@ typedef struct ArchDumpInfo {
uint32_t page_size; /* The target's page size. If it's variable and
* unknown, then this should be the maximum. */
uint64_t phys_base; /* The target's physmem base. */
+ void (*arch_sections_add_fn)(DumpState *s);
+ uint64_t (*arch_sections_write_hdr_fn)(DumpState *s, uint8_t *buff);
+ int (*arch_sections_write_fn)(DumpState *s, uint8_t *buff);
} ArchDumpInfo;
struct GuestPhysBlockList; /* memory_mapping.h */
diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
index b62513d87d..4ffed0b659 100644
--- a/include/sysemu/dump.h
+++ b/include/sysemu/dump.h
@@ -154,15 +154,8 @@ typedef struct DumpState {
GuestPhysBlockList guest_phys_blocks;
ArchDumpInfo dump_info;
MemoryMappingList list;
- uint32_t phdr_num;
- uint32_t shdr_num;
bool resume;
bool detached;
- ssize_t note_size;
- hwaddr shdr_offset;
- hwaddr phdr_offset;
- hwaddr section_offset;
- hwaddr note_offset;
hwaddr memory_offset;
int fd;
@@ -177,6 +170,20 @@ typedef struct DumpState {
int64_t filter_area_begin; /* Start address of partial guest memory area */
int64_t filter_area_length; /* Length of partial guest memory area */
+ /* Elf dump related data */
+ uint32_t phdr_num;
+ uint32_t shdr_num;
+ ssize_t note_size;
+ hwaddr shdr_offset;
+ hwaddr phdr_offset;
+ hwaddr section_offset;
+ hwaddr note_offset;
+
+ void *elf_section_hdrs; /* Pointer to section header buffer */
+ void *elf_section_data; /* Pointer to section data buffer */
+ uint64_t elf_section_data_size; /* Size of section data */
+ GArray *string_table_buf; /* String table data buffer */
+
uint8_t *note_buf; /* buffer for notes */
size_t note_buf_offset; /* the writing place in note_buf */
uint32_t nr_cpus; /* number of guest's cpu */
@@ -208,4 +215,9 @@ typedef struct DumpState {
uint16_t cpu_to_dump16(DumpState *s, uint16_t val);
uint32_t cpu_to_dump32(DumpState *s, uint32_t val);
uint64_t cpu_to_dump64(DumpState *s, uint64_t val);
+
+int64_t dump_filtered_memblock_size(GuestPhysBlock *block, int64_t filter_area_start,
+ int64_t filter_area_length);
+int64_t dump_filtered_memblock_start(GuestPhysBlock *block, int64_t filter_area_start,
+ int64_t filter_area_length);
#endif
diff --git a/include/sysemu/reset.h b/include/sysemu/reset.h
index 0b0d6d7598..609e4d50c2 100644
--- a/include/sysemu/reset.h
+++ b/include/sysemu/reset.h
@@ -1,10 +1,13 @@
#ifndef QEMU_SYSEMU_RESET_H
#define QEMU_SYSEMU_RESET_H
+#include "qapi/qapi-events-run-state.h"
+
typedef void QEMUResetHandler(void *opaque);
void qemu_register_reset(QEMUResetHandler *func, void *opaque);
+void qemu_register_reset_nosnapshotload(QEMUResetHandler *func, void *opaque);
void qemu_unregister_reset(QEMUResetHandler *func, void *opaque);
-void qemu_devices_reset(void);
+void qemu_devices_reset(ShutdownCause reason);
#endif
diff --git a/linux-user/cpu_loop-common.h b/linux-user/cpu_loop-common.h
index 36ff5b14f2..e644d2ef90 100644
--- a/linux-user/cpu_loop-common.h
+++ b/linux-user/cpu_loop-common.h
@@ -23,18 +23,9 @@
#include "exec/log.h"
#include "special-errno.h"
-#define EXCP_DUMP(env, fmt, ...) \
-do { \
- CPUState *cs = env_cpu(env); \
- fprintf(stderr, fmt , ## __VA_ARGS__); \
- fprintf(stderr, "Failing executable: %s\n", exec_path); \
- cpu_dump_state(cs, stderr, 0); \
- if (qemu_log_separate()) { \
- qemu_log(fmt, ## __VA_ARGS__); \
- qemu_log("Failing executable: %s\n", exec_path); \
- log_cpu_state(cs, 0); \
- } \
-} while (0)
+void target_exception_dump(CPUArchState *env, const char *fmt, int code);
+#define EXCP_DUMP(env, fmt, code) \
+ target_exception_dump(env, fmt, code)
void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs);
#endif
diff --git a/linux-user/i386/cpu_loop.c b/linux-user/i386/cpu_loop.c
index 42837399bc..865413c08f 100644
--- a/linux-user/i386/cpu_loop.c
+++ b/linux-user/i386/cpu_loop.c
@@ -201,7 +201,6 @@ void cpu_loop(CPUX86State *env)
{
CPUState *cs = env_cpu(env);
int trapnr;
- abi_ulong pc;
abi_ulong ret;
for(;;) {
@@ -307,9 +306,8 @@ void cpu_loop(CPUX86State *env)
cpu_exec_step_atomic(cs);
break;
default:
- pc = env->segs[R_CS].base + env->eip;
- EXCP_DUMP(env, "qemu: 0x%08lx: unhandled CPU exception 0x%x - aborting\n",
- (long)pc, trapnr);
+ EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n",
+ trapnr);
abort();
}
process_pending_signals(env);
diff --git a/linux-user/ioctls.h b/linux-user/ioctls.h
index f182d40190..071f7ca253 100644
--- a/linux-user/ioctls.h
+++ b/linux-user/ioctls.h
@@ -96,9 +96,7 @@
IOCTL(BLKROGET, IOC_R, MK_PTR(TYPE_INT))
IOCTL(BLKRRPART, 0, TYPE_NULL)
IOCTL(BLKGETSIZE, IOC_R, MK_PTR(TYPE_ULONG))
-#ifdef BLKGETSIZE64
IOCTL(BLKGETSIZE64, IOC_R, MK_PTR(TYPE_ULONGLONG))
-#endif
IOCTL(BLKFLSBUF, 0, TYPE_NULL)
IOCTL(BLKRASET, 0, TYPE_INT)
IOCTL(BLKRAGET, IOC_R, MK_PTR(TYPE_LONG))
@@ -107,33 +105,15 @@
IOCTL_SPECIAL(BLKPG, IOC_W, do_ioctl_blkpg,
MK_PTR(MK_STRUCT(STRUCT_blkpg_ioctl_arg)))
-#ifdef BLKDISCARD
IOCTL(BLKDISCARD, IOC_W, MK_PTR(MK_ARRAY(TYPE_ULONGLONG, 2)))
-#endif
-#ifdef BLKIOMIN
IOCTL(BLKIOMIN, IOC_R, MK_PTR(TYPE_INT))
-#endif
-#ifdef BLKIOOPT
IOCTL(BLKIOOPT, IOC_R, MK_PTR(TYPE_INT))
-#endif
-#ifdef BLKALIGNOFF
IOCTL(BLKALIGNOFF, IOC_R, MK_PTR(TYPE_INT))
-#endif
-#ifdef BLKPBSZGET
IOCTL(BLKPBSZGET, IOC_R, MK_PTR(TYPE_INT))
-#endif
-#ifdef BLKDISCARDZEROES
IOCTL(BLKDISCARDZEROES, IOC_R, MK_PTR(TYPE_INT))
-#endif
-#ifdef BLKSECDISCARD
IOCTL(BLKSECDISCARD, IOC_W, MK_PTR(MK_ARRAY(TYPE_ULONGLONG, 2)))
-#endif
-#ifdef BLKROTATIONAL
IOCTL(BLKROTATIONAL, IOC_R, MK_PTR(TYPE_SHORT))
-#endif
-#ifdef BLKZEROOUT
IOCTL(BLKZEROOUT, IOC_W, MK_PTR(MK_ARRAY(TYPE_ULONGLONG, 2)))
-#endif
IOCTL(FDMSGON, 0, TYPE_NULL)
IOCTL(FDMSGOFF, 0, TYPE_NULL)
@@ -149,17 +129,13 @@
IOCTL(FDTWADDLE, 0, TYPE_NULL)
IOCTL(FDEJECT, 0, TYPE_NULL)
-#ifdef FIBMAP
IOCTL(FIBMAP, IOC_W | IOC_R, MK_PTR(TYPE_LONG))
-#endif
#ifdef FICLONE
IOCTL(FICLONE, IOC_W, TYPE_INT)
IOCTL(FICLONERANGE, IOC_W, MK_PTR(MK_STRUCT(STRUCT_file_clone_range)))
#endif
-#ifdef FIGETBSZ
IOCTL(FIGETBSZ, IOC_R, MK_PTR(TYPE_LONG))
-#endif
#ifdef CONFIG_FIEMAP
IOCTL_SPECIAL(FS_IOC_FIEMAP, IOC_W | IOC_R, do_ioctl_fs_ioc_fiemap,
MK_PTR(MK_STRUCT(STRUCT_fiemap)))
diff --git a/linux-user/mmap.c b/linux-user/mmap.c
index 28f3bc85ed..10f5079331 100644
--- a/linux-user/mmap.c
+++ b/linux-user/mmap.c
@@ -182,7 +182,6 @@ int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
}
page_set_flags(start, start + len, page_flags);
- tb_invalidate_phys_range(start, start + len);
ret = 0;
error:
@@ -662,7 +661,6 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
qemu_log_unlock(f);
}
}
- tb_invalidate_phys_range(start, start + len);
mmap_unlock();
return start;
fail:
@@ -766,7 +764,6 @@ int target_munmap(abi_ulong start, abi_ulong len)
if (ret == 0) {
page_set_flags(start, start + len, 0);
- tb_invalidate_phys_range(start, start + len);
}
mmap_unlock();
return ret;
@@ -856,7 +853,6 @@ abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
page_set_flags(new_addr, new_addr + new_size,
prot | PAGE_VALID | PAGE_RESET);
}
- tb_invalidate_phys_range(new_addr, new_addr + new_size);
mmap_unlock();
return new_addr;
}
diff --git a/linux-user/strace.c b/linux-user/strace.c
index 37bc96df9b..9ae5a812cd 100644
--- a/linux-user/strace.c
+++ b/linux-user/strace.c
@@ -1969,7 +1969,7 @@ print_execv(CPUArchState *cpu_env, const struct syscallname *name,
}
#endif
-#ifdef TARGET_NR_faccessat
+#if defined(TARGET_NR_faccessat) || defined(TARGET_NR_faccessat2)
static void
print_faccessat(CPUArchState *cpu_env, const struct syscallname *name,
abi_long arg0, abi_long arg1, abi_long arg2,
@@ -3383,10 +3383,10 @@ print_pidfd_send_signal(CPUArchState *cpu_env, const struct syscallname *name,
unlock_user(p, arg2, 0);
} else {
- print_pointer(arg2, 1);
+ print_pointer(arg2, 0);
}
- print_raw_param("%u", arg3, 0);
+ print_raw_param("%u", arg3, 1);
print_syscall_epilogue(name);
}
#endif
diff --git a/linux-user/strace.list b/linux-user/strace.list
index a87415bf3d..3df2184580 100644
--- a/linux-user/strace.list
+++ b/linux-user/strace.list
@@ -178,6 +178,9 @@
#ifdef TARGET_NR_faccessat
{ TARGET_NR_faccessat, "faccessat" , NULL, print_faccessat, NULL },
#endif
+#ifdef TARGET_NR_faccessat2
+{ TARGET_NR_faccessat2, "faccessat2" , NULL, print_faccessat, NULL },
+#endif
#ifdef TARGET_NR_fadvise64
{ TARGET_NR_fadvise64, "fadvise64" , NULL, NULL, NULL },
#endif
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 2e954d8dbd..8402c1399d 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -111,6 +111,31 @@
#define FS_IOC32_SETFLAGS _IOW('f', 2, int)
#define FS_IOC32_GETVERSION _IOR('v', 1, int)
#define FS_IOC32_SETVERSION _IOW('v', 2, int)
+
+#define BLKGETSIZE64 _IOR(0x12,114,size_t)
+#define BLKDISCARD _IO(0x12,119)
+#define BLKIOMIN _IO(0x12,120)
+#define BLKIOOPT _IO(0x12,121)
+#define BLKALIGNOFF _IO(0x12,122)
+#define BLKPBSZGET _IO(0x12,123)
+#define BLKDISCARDZEROES _IO(0x12,124)
+#define BLKSECDISCARD _IO(0x12,125)
+#define BLKROTATIONAL _IO(0x12,126)
+#define BLKZEROOUT _IO(0x12,127)
+
+#define FIBMAP _IO(0x00,1)
+#define FIGETBSZ _IO(0x00,2)
+
+struct file_clone_range {
+ __s64 src_fd;
+ __u64 src_offset;
+ __u64 src_length;
+ __u64 dest_offset;
+};
+
+#define FICLONE _IOW(0x94, 9, int)
+#define FICLONERANGE _IOW(0x94, 13, struct file_clone_range)
+
#else
#include <linux/fs.h>
#endif
@@ -158,6 +183,7 @@
#include "qapi/error.h"
#include "fd-trans.h"
#include "tcg/tcg.h"
+#include "cpu_loop-common.h"
#ifndef CLONE_IO
#define CLONE_IO 0x80000000 /* Clone io context */
@@ -8144,6 +8170,33 @@ static int is_proc_myself(const char *filename, const char *entry)
return 0;
}
+static void excp_dump_file(FILE *logfile, CPUArchState *env,
+ const char *fmt, int code)
+{
+ if (logfile) {
+ CPUState *cs = env_cpu(env);
+
+ fprintf(logfile, fmt, code);
+ fprintf(logfile, "Failing executable: %s\n", exec_path);
+ cpu_dump_state(cs, logfile, 0);
+ open_self_maps(env, fileno(logfile));
+ }
+}
+
+void target_exception_dump(CPUArchState *env, const char *fmt, int code)
+{
+ /* dump to console */
+ excp_dump_file(stderr, env, fmt, code);
+
+ /* dump to log file */
+ if (qemu_log_separate()) {
+ FILE *logfile = qemu_log_trylock();
+
+ excp_dump_file(logfile, env, fmt, code);
+ qemu_log_unlock(logfile);
+ }
+}
+
#if HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN || \
defined(TARGET_SPARC) || defined(TARGET_M68K) || defined(TARGET_HPPA)
static int is_proc(const char *filename, const char *entry)
@@ -8251,8 +8304,7 @@ static int do_openat(CPUArchState *cpu_env, int dirfd, const char *pathname, int
};
if (is_proc_myself(pathname, "exe")) {
- int execfd = qemu_getauxval(AT_EXECFD);
- return execfd ? execfd : safe_openat(dirfd, exec_path, flags, mode);
+ return safe_openat(dirfd, exec_path, flags, mode);
}
for (fake_open = fakes; fake_open->filename; fake_open++) {
@@ -8679,16 +8731,21 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1,
#if defined(__NR_pidfd_send_signal) && defined(TARGET_NR_pidfd_send_signal)
case TARGET_NR_pidfd_send_signal:
{
- siginfo_t uinfo;
+ siginfo_t uinfo, *puinfo;
- p = lock_user(VERIFY_READ, arg3, sizeof(target_siginfo_t), 1);
- if (!p) {
- return -TARGET_EFAULT;
+ if (arg3) {
+ p = lock_user(VERIFY_READ, arg3, sizeof(target_siginfo_t), 1);
+ if (!p) {
+ return -TARGET_EFAULT;
+ }
+ target_to_host_siginfo(&uinfo, p);
+ unlock_user(p, arg3, 0);
+ puinfo = &uinfo;
+ } else {
+ puinfo = NULL;
}
- target_to_host_siginfo(&uinfo, p);
- unlock_user(p, arg3, 0);
ret = get_errno(pidfd_send_signal(arg1, target_to_host_signal(arg2),
- &uinfo, arg4));
+ puinfo, arg4));
}
return ret;
#endif
@@ -8855,7 +8912,11 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1,
* before the execve completes and makes it the other
* program's problem.
*/
- ret = get_errno(safe_execve(p, argp, envp));
+ if (is_proc_myself(p, "exe")) {
+ ret = get_errno(safe_execve(exec_path, argp, envp));
+ } else {
+ ret = get_errno(safe_execve(p, argp, envp));
+ }
unlock_user(p, arg1, 0);
goto execve_end;
@@ -9110,6 +9171,15 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1,
unlock_user(p, arg2, 0);
return ret;
#endif
+#if defined(TARGET_NR_faccessat2)
+ case TARGET_NR_faccessat2:
+ if (!(p = lock_user_string(arg2))) {
+ return -TARGET_EFAULT;
+ }
+ ret = get_errno(faccessat(arg1, p, arg3, arg4));
+ unlock_user(p, arg2, 0);
+ return ret;
+#endif
#ifdef TARGET_NR_nice /* not on alpha */
case TARGET_NR_nice:
return get_errno(nice(arg1));
@@ -11793,7 +11863,7 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1,
return -host_to_target_errno(ret);
#endif
-#if TARGET_ABI_BITS == 32
+#if TARGET_ABI_BITS == 32 && !defined(TARGET_ABI_MIPSN32)
#ifdef TARGET_NR_fadvise64_64
case TARGET_NR_fadvise64_64:
@@ -11920,7 +11990,7 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1,
return get_errno(sys_gettid());
#ifdef TARGET_NR_readahead
case TARGET_NR_readahead:
-#if TARGET_ABI_BITS == 32
+#if TARGET_ABI_BITS == 32 && !defined(TARGET_ABI_MIPSN32)
if (regpairs_aligned(cpu_env, num)) {
arg2 = arg3;
arg3 = arg4;
@@ -12612,7 +12682,7 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1,
#endif /* CONFIG_EVENTFD */
#if defined(CONFIG_FALLOCATE) && defined(TARGET_NR_fallocate)
case TARGET_NR_fallocate:
-#if TARGET_ABI_BITS == 32
+#if TARGET_ABI_BITS == 32 && !defined(TARGET_ABI_MIPSN32)
ret = get_errno(fallocate(arg1, arg2, target_offset64(arg3, arg4),
target_offset64(arg5, arg6)));
#else
@@ -12623,7 +12693,7 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1,
#if defined(CONFIG_SYNC_FILE_RANGE)
#if defined(TARGET_NR_sync_file_range)
case TARGET_NR_sync_file_range:
-#if TARGET_ABI_BITS == 32
+#if TARGET_ABI_BITS == 32 && !defined(TARGET_ABI_MIPSN32)
#if defined(TARGET_MIPS)
ret = get_errno(sync_file_range(arg1, target_offset64(arg3, arg4),
target_offset64(arg5, arg6), arg7));
@@ -12645,7 +12715,7 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1,
case TARGET_NR_arm_sync_file_range:
#endif
/* This is like sync_file_range but the arguments are reordered */
-#if TARGET_ABI_BITS == 32
+#if TARGET_ABI_BITS == 32 && !defined(TARGET_ABI_MIPSN32)
ret = get_errno(sync_file_range(arg1, target_offset64(arg3, arg4),
target_offset64(arg5, arg6), arg2));
#else
diff --git a/meson.build b/meson.build
index 5f114c89d9..f06d465279 100644
--- a/meson.build
+++ b/meson.build
@@ -75,7 +75,7 @@ have_tools = get_option('tools') \
.allowed()
have_ga = get_option('guest_agent') \
.disable_auto_if(not have_system and not have_tools) \
- .require(targetos in ['sunos', 'linux', 'windows'],
+ .require(targetos in ['sunos', 'linux', 'windows', 'freebsd'],
error_message: 'unsupported OS for QEMU guest agent') \
.allowed()
have_block = have_system or have_tools
@@ -777,6 +777,13 @@ if not get_option('virglrenderer').auto() or have_system or have_vhost_user_gpu
required: get_option('virglrenderer'),
kwargs: static_kwargs)
endif
+blkio = not_found
+if not get_option('blkio').auto() or have_block
+ blkio = dependency('blkio',
+ method: 'pkg-config',
+ required: get_option('blkio'),
+ kwargs: static_kwargs)
+endif
curl = not_found
if not get_option('curl').auto() or have_block
curl = dependency('libcurl', version: '>=7.29.0',
@@ -1821,6 +1828,7 @@ config_host_data.set('CONFIG_LIBUDEV', libudev.found())
config_host_data.set('CONFIG_LZO', lzo.found())
config_host_data.set('CONFIG_MPATH', mpathpersist.found())
config_host_data.set('CONFIG_MPATH_NEW_API', mpathpersist_new_api)
+config_host_data.set('CONFIG_BLKIO', blkio.found())
config_host_data.set('CONFIG_CURL', curl.found())
config_host_data.set('CONFIG_CURSES', curses.found())
config_host_data.set('CONFIG_GBM', gbm.found())
@@ -3878,6 +3886,7 @@ summary_info += {'PAM': pam}
summary_info += {'iconv support': iconv}
summary_info += {'curses support': curses}
summary_info += {'virgl support': virgl}
+summary_info += {'blkio support': blkio}
summary_info += {'curl support': curl}
summary_info += {'Multipath support': mpathpersist}
summary_info += {'PNG support': png}
diff --git a/meson_options.txt b/meson_options.txt
index 79c6af18d5..66128178bf 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -117,6 +117,8 @@ option('bzip2', type : 'feature', value : 'auto',
description: 'bzip2 support for DMG images')
option('cap_ng', type : 'feature', value : 'auto',
description: 'cap_ng support')
+option('blkio', type : 'feature', value : 'auto',
+ description: 'libblkio block device driver')
option('bpf', type : 'feature', value : 'auto',
description: 'eBPF support')
option('cocoa', type : 'feature', value : 'auto',
diff --git a/migration/savevm.c b/migration/savevm.c
index 48e85c052c..a0cdb714f7 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -3058,7 +3058,7 @@ bool load_snapshot(const char *name, const char *vmstate,
goto err_drain;
}
- qemu_system_reset(SHUTDOWN_CAUSE_NONE);
+ qemu_system_reset(SHUTDOWN_CAUSE_SNAPSHOT_LOAD);
mis->from_src_file = f;
if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 882b266532..cb5079e645 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2951,11 +2951,18 @@
'file', 'snapshot-access', 'ftp', 'ftps', 'gluster',
{'name': 'host_cdrom', 'if': 'HAVE_HOST_BLOCK_DEVICE' },
{'name': 'host_device', 'if': 'HAVE_HOST_BLOCK_DEVICE' },
- 'http', 'https', 'iscsi',
- 'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
- 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
+ 'http', 'https',
+ { 'name': 'io_uring', 'if': 'CONFIG_BLKIO' },
+ 'iscsi',
+ 'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme',
+ { 'name': 'nvme-io_uring', 'if': 'CONFIG_BLKIO' },
+ 'parallels', 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum',
+ 'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
- 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
+ 'ssh', 'throttle', 'vdi', 'vhdx',
+ { 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
+ { 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
+ 'vmdk', 'vpc', 'vvfat' ] }
##
# @BlockdevOptionsFile:
@@ -3679,6 +3686,58 @@
'*logfile': 'str' } }
##
+# @BlockdevOptionsIoUring:
+#
+# Driver specific block device options for the io_uring backend.
+#
+# @filename: path to the image file
+#
+# Since: 7.2
+##
+{ 'struct': 'BlockdevOptionsIoUring',
+ 'data': { 'filename': 'str' },
+ 'if': 'CONFIG_BLKIO' }
+
+##
+# @BlockdevOptionsNvmeIoUring:
+#
+# Driver specific block device options for the nvme-io_uring backend.
+#
+# @filename: path to the image file
+#
+# Since: 7.2
+##
+{ 'struct': 'BlockdevOptionsNvmeIoUring',
+ 'data': { 'filename': 'str' },
+ 'if': 'CONFIG_BLKIO' }
+
+##
+# @BlockdevOptionsVirtioBlkVhostUser:
+#
+# Driver specific block device options for the virtio-blk-vhost-user backend.
+#
+# @path: path to the vhost-user UNIX domain socket.
+#
+# Since: 7.2
+##
+{ 'struct': 'BlockdevOptionsVirtioBlkVhostUser',
+ 'data': { 'path': 'str' },
+ 'if': 'CONFIG_BLKIO' }
+
+##
+# @BlockdevOptionsVirtioBlkVhostVdpa:
+#
+# Driver specific block device options for the virtio-blk-vhost-vdpa backend.
+#
+# @path: path to the vhost-vdpa character device.
+#
+# Since: 7.2
+##
+{ 'struct': 'BlockdevOptionsVirtioBlkVhostVdpa',
+ 'data': { 'path': 'str' },
+ 'if': 'CONFIG_BLKIO' }
+
+##
# @IscsiTransport:
#
# An enumeration of libiscsi transport types
@@ -4305,6 +4364,8 @@
'if': 'HAVE_HOST_BLOCK_DEVICE' },
'http': 'BlockdevOptionsCurlHttp',
'https': 'BlockdevOptionsCurlHttps',
+ 'io_uring': { 'type': 'BlockdevOptionsIoUring',
+ 'if': 'CONFIG_BLKIO' },
'iscsi': 'BlockdevOptionsIscsi',
'luks': 'BlockdevOptionsLUKS',
'nbd': 'BlockdevOptionsNbd',
@@ -4312,6 +4373,8 @@
'null-aio': 'BlockdevOptionsNull',
'null-co': 'BlockdevOptionsNull',
'nvme': 'BlockdevOptionsNVMe',
+ 'nvme-io_uring': { 'type': 'BlockdevOptionsNvmeIoUring',
+ 'if': 'CONFIG_BLKIO' },
'parallels': 'BlockdevOptionsGenericFormat',
'preallocate':'BlockdevOptionsPreallocate',
'qcow2': 'BlockdevOptionsQcow2',
@@ -4327,6 +4390,12 @@
'throttle': 'BlockdevOptionsThrottle',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
+ 'virtio-blk-vhost-user':
+ { 'type': 'BlockdevOptionsVirtioBlkVhostUser',
+ 'if': 'CONFIG_BLKIO' },
+ 'virtio-blk-vhost-vdpa':
+ { 'type': 'BlockdevOptionsVirtioBlkVhostVdpa',
+ 'if': 'CONFIG_BLKIO' },
'vmdk': 'BlockdevOptionsGenericCOWFormat',
'vpc': 'BlockdevOptionsGenericFormat',
'vvfat': 'BlockdevOptionsVVFAT'
diff --git a/qapi/run-state.json b/qapi/run-state.json
index 49989d30e6..419c188dd1 100644
--- a/qapi/run-state.json
+++ b/qapi/run-state.json
@@ -86,12 +86,16 @@
# ignores --no-reboot. This is useful for sanitizing
# hypercalls on s390 that are used during kexec/kdump/boot
#
+# @snapshot-load: A snapshot is being loaded by the record & replay
+# subsystem. This value is used only within QEMU. It
+# doesn't occur in QMP. (since 7.2)
+#
##
{ 'enum': 'ShutdownCause',
# Beware, shutdown_caused_by_guest() depends on enumeration order
'data': [ 'none', 'host-error', 'host-qmp-quit', 'host-qmp-system-reset',
'host-signal', 'host-ui', 'guest-shutdown', 'guest-reset',
- 'guest-panic', 'subsystem-reset'] }
+ 'guest-panic', 'subsystem-reset', 'snapshot-load'] }
##
# @StatusInfo:
diff --git a/qemu-img.c b/qemu-img.c
index ace3adf8ae..a3b64c88af 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -4371,7 +4371,7 @@ static int img_bench(int argc, char **argv)
struct timeval t1, t2;
int i;
bool force_share = false;
- size_t buf_size;
+ size_t buf_size = 0;
for (;;) {
static const struct option long_options[] = {
@@ -4570,7 +4570,7 @@ static int img_bench(int argc, char **argv)
data.buf = blk_blockalign(blk, buf_size);
memset(data.buf, pattern, data.nrreq * data.bufsize);
- blk_register_buf(blk, data.buf, buf_size);
+ blk_register_buf(blk, data.buf, buf_size, &error_fatal);
data.qiov = g_new(QEMUIOVector, data.nrreq);
for (i = 0; i < data.nrreq; i++) {
@@ -4593,7 +4593,7 @@ static int img_bench(int argc, char **argv)
out:
if (data.buf) {
- blk_unregister_buf(blk, data.buf);
+ blk_unregister_buf(blk, data.buf, buf_size);
}
qemu_vfree(data.buf);
blk_unref(blk);
diff --git a/qga/channel-posix.c b/qga/channel-posix.c
index 6796a02cff..0c5175d957 100644
--- a/qga/channel-posix.c
+++ b/qga/channel-posix.c
@@ -138,7 +138,7 @@ static gboolean ga_channel_open(GAChannel *c, const gchar *path,
0
);
if (fd == -1) {
- error_setg_errno(errp, errno, "error opening channel");
+ error_setg_errno(errp, errno, "error opening channel '%s'", path);
return false;
}
#ifdef CONFIG_SOLARIS
@@ -149,6 +149,25 @@ static gboolean ga_channel_open(GAChannel *c, const gchar *path,
return false;
}
#endif
+#ifdef __FreeBSD__
+ /*
+ * In the default state channel sends echo of every command to a
+ * client. The client programm doesn't expect this and raises an
+ * error. Suppress echo by resetting ECHO terminal flag.
+ */
+ struct termios tio;
+ if (tcgetattr(fd, &tio) < 0) {
+ error_setg_errno(errp, errno, "error getting channel termios attrs");
+ close(fd);
+ return false;
+ }
+ tio.c_lflag &= ~ECHO;
+ if (tcsetattr(fd, TCSAFLUSH, &tio) < 0) {
+ error_setg_errno(errp, errno, "error setting channel termios attrs");
+ close(fd);
+ return false;
+ }
+#endif /* __FreeBSD__ */
ret = ga_channel_client_add(c, fd);
if (ret) {
error_setg(errp, "error adding channel to main loop");
@@ -163,7 +182,7 @@ static gboolean ga_channel_open(GAChannel *c, const gchar *path,
assert(fd < 0);
fd = qga_open_cloexec(path, O_RDWR | O_NOCTTY | O_NONBLOCK, 0);
if (fd == -1) {
- error_setg_errno(errp, errno, "error opening channel");
+ error_setg_errno(errp, errno, "error opening channel '%s'", path);
return false;
}
tcgetattr(fd, &tio);
diff --git a/qga/commands-bsd.c b/qga/commands-bsd.c
new file mode 100644
index 0000000000..15cade2d4c
--- /dev/null
+++ b/qga/commands-bsd.c
@@ -0,0 +1,200 @@
+/*
+ * QEMU Guest Agent BSD-specific command implementations
+ *
+ * Copyright (c) Virtuozzo International GmbH.
+ *
+ * Authors:
+ * Alexander Ivanov <alexander.ivanov@virtuozzo.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qga-qapi-commands.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/error.h"
+#include "qemu/queue.h"
+#include "commands-common.h"
+#include <sys/ioctl.h>
+#include <sys/param.h>
+#include <sys/ucred.h>
+#include <sys/mount.h>
+#include <net/if_dl.h>
+#include <net/ethernet.h>
+#include <paths.h>
+
+#if defined(CONFIG_FSFREEZE) || defined(CONFIG_FSTRIM)
+bool build_fs_mount_list(FsMountList *mounts, Error **errp)
+{
+ FsMount *mount;
+ struct statfs *mntbuf, *mntp;
+ struct stat statbuf;
+ int i, count, ret;
+
+ count = getmntinfo(&mntbuf, MNT_NOWAIT);
+ if (count == 0) {
+ error_setg_errno(errp, errno, "getmntinfo failed");
+ return false;
+ }
+
+ for (i = 0; i < count; i++) {
+ mntp = &mntbuf[i];
+ ret = stat(mntp->f_mntonname, &statbuf);
+ if (ret != 0) {
+ error_setg_errno(errp, errno, "stat failed on %s",
+ mntp->f_mntonname);
+ return false;
+ }
+
+ mount = g_new0(FsMount, 1);
+
+ mount->dirname = g_strdup(mntp->f_mntonname);
+ mount->devtype = g_strdup(mntp->f_fstypename);
+ mount->devmajor = major(mount->dev);
+ mount->devminor = minor(mount->dev);
+ mount->fsid = mntp->f_fsid;
+ mount->dev = statbuf.st_dev;
+
+ QTAILQ_INSERT_TAIL(mounts, mount, next);
+ }
+ return true;
+}
+#endif /* CONFIG_FSFREEZE || CONFIG_FSTRIM */
+
+#if defined(CONFIG_FSFREEZE)
+static int ufssuspend_fd = -1;
+static int ufssuspend_cnt;
+
+int64_t qmp_guest_fsfreeze_do_freeze_list(bool has_mountpoints,
+ strList *mountpoints,
+ FsMountList mounts,
+ Error **errp)
+{
+ int ret;
+ strList *list;
+ struct FsMount *mount;
+
+ if (ufssuspend_fd != -1) {
+ error_setg(errp, "filesystems have already frozen");
+ return -1;
+ }
+
+ ufssuspend_cnt = 0;
+ ufssuspend_fd = qemu_open(_PATH_UFSSUSPEND, O_RDWR, errp);
+ if (ufssuspend_fd == -1) {
+ return -1;
+ }
+
+ QTAILQ_FOREACH_REVERSE(mount, &mounts, next) {
+ /*
+ * To issue fsfreeze in the reverse order of mounts, check if the
+ * mount is listed in the list here
+ */
+ if (has_mountpoints) {
+ for (list = mountpoints; list; list = list->next) {
+ if (g_str_equal(list->value, mount->dirname)) {
+ break;
+ }
+ }
+ if (!list) {
+ continue;
+ }
+ }
+
+ /* Only UFS supports suspend */
+ if (!g_str_equal(mount->devtype, "ufs")) {
+ continue;
+ }
+
+ ret = ioctl(ufssuspend_fd, UFSSUSPEND, &mount->fsid);
+ if (ret == -1) {
+ /*
+ * ioctl returns EBUSY for all the FS except the first one
+ * that was suspended
+ */
+ if (errno == EBUSY) {
+ continue;
+ }
+ error_setg_errno(errp, errno, "failed to freeze %s",
+ mount->dirname);
+ goto error;
+ }
+ ufssuspend_cnt++;
+ }
+ return ufssuspend_cnt;
+error:
+ close(ufssuspend_fd);
+ ufssuspend_fd = -1;
+ return -1;
+
+}
+
+/*
+ * We don't need to call UFSRESUME ioctl because all the frozen FS
+ * are thawed on /dev/ufssuspend closing.
+ */
+int qmp_guest_fsfreeze_do_thaw(Error **errp)
+{
+ int ret = ufssuspend_cnt;
+ ufssuspend_cnt = 0;
+ if (ufssuspend_fd != -1) {
+ close(ufssuspend_fd);
+ ufssuspend_fd = -1;
+ }
+ return ret;
+}
+
+GuestFilesystemInfoList *qmp_guest_get_fsinfo(Error **errp)
+{
+ error_setg(errp, QERR_UNSUPPORTED);
+ return NULL;
+}
+
+GuestDiskInfoList *qmp_guest_get_disks(Error **errp)
+{
+ error_setg(errp, QERR_UNSUPPORTED);
+ return NULL;
+}
+
+GuestDiskStatsInfoList *qmp_guest_get_diskstats(Error **errp)
+{
+ error_setg(errp, QERR_UNSUPPORTED);
+ return NULL;
+}
+
+GuestCpuStatsList *qmp_guest_get_cpustats(Error **errp)
+{
+ error_setg(errp, QERR_UNSUPPORTED);
+ return NULL;
+}
+#endif /* CONFIG_FSFREEZE */
+
+#ifdef HAVE_GETIFADDRS
+/*
+ * Fill "buf" with MAC address by ifaddrs. Pointer buf must point to a
+ * buffer with ETHER_ADDR_LEN length at least.
+ *
+ * Returns false in case of an error, otherwise true. "obtained" arguument
+ * is true if a MAC address was obtained successful, otherwise false.
+ */
+bool guest_get_hw_addr(struct ifaddrs *ifa, unsigned char *buf,
+ bool *obtained, Error **errp)
+{
+ struct sockaddr_dl *sdp;
+
+ *obtained = false;
+
+ if (ifa->ifa_addr->sa_family != AF_LINK) {
+ /* We can get HW address only for AF_LINK family. */
+ g_debug("failed to get MAC address of %s", ifa->ifa_name);
+ return true;
+ }
+
+ sdp = (struct sockaddr_dl *)ifa->ifa_addr;
+ memcpy(buf, sdp->sdl_data + sdp->sdl_nlen, ETHER_ADDR_LEN);
+ *obtained = true;
+
+ return true;
+}
+#endif /* HAVE_GETIFADDRS */
diff --git a/qga/commands-common.h b/qga/commands-common.h
index d0e4a9696f..8c1c56aac9 100644
--- a/qga/commands-common.h
+++ b/qga/commands-common.h
@@ -10,6 +10,57 @@
#define QGA_COMMANDS_COMMON_H
#include "qga-qapi-types.h"
+#include "guest-agent-core.h"
+#include "qemu/queue.h"
+
+#if defined(__linux__)
+#include <linux/fs.h>
+#ifdef FIFREEZE
+#define CONFIG_FSFREEZE
+#endif
+#ifdef FITRIM
+#define CONFIG_FSTRIM
+#endif
+#endif /* __linux__ */
+
+#ifdef __FreeBSD__
+#include <ufs/ffs/fs.h>
+#ifdef UFSSUSPEND
+#define CONFIG_FSFREEZE
+#endif
+#endif /* __FreeBSD__ */
+
+#if defined(CONFIG_FSFREEZE) || defined(CONFIG_FSTRIM)
+typedef struct FsMount {
+ char *dirname;
+ char *devtype;
+ unsigned int devmajor, devminor;
+#if defined(__FreeBSD__)
+ dev_t dev;
+ fsid_t fsid;
+#endif
+ QTAILQ_ENTRY(FsMount) next;
+} FsMount;
+
+typedef QTAILQ_HEAD(FsMountList, FsMount) FsMountList;
+
+bool build_fs_mount_list(FsMountList *mounts, Error **errp);
+void free_fs_mount_list(FsMountList *mounts);
+#endif /* CONFIG_FSFREEZE || CONFIG_FSTRIM */
+
+#if defined(CONFIG_FSFREEZE)
+int64_t qmp_guest_fsfreeze_do_freeze_list(bool has_mountpoints,
+ strList *mountpoints,
+ FsMountList mounts,
+ Error **errp);
+int qmp_guest_fsfreeze_do_thaw(Error **errp);
+#endif /* CONFIG_FSFREEZE */
+
+#ifdef HAVE_GETIFADDRS
+#include <ifaddrs.h>
+bool guest_get_hw_addr(struct ifaddrs *ifa, unsigned char *buf,
+ bool *obtained, Error **errp);
+#endif
typedef struct GuestFileHandle GuestFileHandle;
diff --git a/qga/commands-linux.c b/qga/commands-linux.c
new file mode 100644
index 0000000000..214e408fcd
--- /dev/null
+++ b/qga/commands-linux.c
@@ -0,0 +1,286 @@
+/*
+ * QEMU Guest Agent Linux-specific command implementations
+ *
+ * Copyright IBM Corp. 2011
+ *
+ * Authors:
+ * Michael Roth <mdroth@linux.vnet.ibm.com>
+ * Michal Privoznik <mprivozn@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "commands-common.h"
+#include "cutils.h"
+#include <mntent.h>
+#include <sys/ioctl.h>
+
+#if defined(CONFIG_FSFREEZE) || defined(CONFIG_FSTRIM)
+static int dev_major_minor(const char *devpath,
+ unsigned int *devmajor, unsigned int *devminor)
+{
+ struct stat st;
+
+ *devmajor = 0;
+ *devminor = 0;
+
+ if (stat(devpath, &st) < 0) {
+ slog("failed to stat device file '%s': %s", devpath, strerror(errno));
+ return -1;
+ }
+ if (S_ISDIR(st.st_mode)) {
+ /* It is bind mount */
+ return -2;
+ }
+ if (S_ISBLK(st.st_mode)) {
+ *devmajor = major(st.st_rdev);
+ *devminor = minor(st.st_rdev);
+ return 0;
+ }
+ return -1;
+}
+
+static bool build_fs_mount_list_from_mtab(FsMountList *mounts, Error **errp)
+{
+ struct mntent *ment;
+ FsMount *mount;
+ char const *mtab = "/proc/self/mounts";
+ FILE *fp;
+ unsigned int devmajor, devminor;
+
+ fp = setmntent(mtab, "r");
+ if (!fp) {
+ error_setg(errp, "failed to open mtab file: '%s'", mtab);
+ return false;
+ }
+
+ while ((ment = getmntent(fp))) {
+ /*
+ * An entry which device name doesn't start with a '/' is
+ * either a dummy file system or a network file system.
+ * Add special handling for smbfs and cifs as is done by
+ * coreutils as well.
+ */
+ if ((ment->mnt_fsname[0] != '/') ||
+ (strcmp(ment->mnt_type, "smbfs") == 0) ||
+ (strcmp(ment->mnt_type, "cifs") == 0)) {
+ continue;
+ }
+ if (dev_major_minor(ment->mnt_fsname, &devmajor, &devminor) == -2) {
+ /* Skip bind mounts */
+ continue;
+ }
+
+ mount = g_new0(FsMount, 1);
+ mount->dirname = g_strdup(ment->mnt_dir);
+ mount->devtype = g_strdup(ment->mnt_type);
+ mount->devmajor = devmajor;
+ mount->devminor = devminor;
+
+ QTAILQ_INSERT_TAIL(mounts, mount, next);
+ }
+
+ endmntent(fp);
+ return true;
+}
+
+static void decode_mntname(char *name, int len)
+{
+ int i, j = 0;
+ for (i = 0; i <= len; i++) {
+ if (name[i] != '\\') {
+ name[j++] = name[i];
+ } else if (name[i + 1] == '\\') {
+ name[j++] = '\\';
+ i++;
+ } else if (name[i + 1] >= '0' && name[i + 1] <= '3' &&
+ name[i + 2] >= '0' && name[i + 2] <= '7' &&
+ name[i + 3] >= '0' && name[i + 3] <= '7') {
+ name[j++] = (name[i + 1] - '0') * 64 +
+ (name[i + 2] - '0') * 8 +
+ (name[i + 3] - '0');
+ i += 3;
+ } else {
+ name[j++] = name[i];
+ }
+ }
+}
+
+/*
+ * Walk the mount table and build a list of local file systems
+ */
+bool build_fs_mount_list(FsMountList *mounts, Error **errp)
+{
+ FsMount *mount;
+ char const *mountinfo = "/proc/self/mountinfo";
+ FILE *fp;
+ char *line = NULL, *dash;
+ size_t n;
+ char check;
+ unsigned int devmajor, devminor;
+ int ret, dir_s, dir_e, type_s, type_e, dev_s, dev_e;
+
+ fp = fopen(mountinfo, "r");
+ if (!fp) {
+ return build_fs_mount_list_from_mtab(mounts, errp);
+ }
+
+ while (getline(&line, &n, fp) != -1) {
+ ret = sscanf(line, "%*u %*u %u:%u %*s %n%*s%n%c",
+ &devmajor, &devminor, &dir_s, &dir_e, &check);
+ if (ret < 3) {
+ continue;
+ }
+ dash = strstr(line + dir_e, " - ");
+ if (!dash) {
+ continue;
+ }
+ ret = sscanf(dash, " - %n%*s%n %n%*s%n%c",
+ &type_s, &type_e, &dev_s, &dev_e, &check);
+ if (ret < 1) {
+ continue;
+ }
+ line[dir_e] = 0;
+ dash[type_e] = 0;
+ dash[dev_e] = 0;
+ decode_mntname(line + dir_s, dir_e - dir_s);
+ decode_mntname(dash + dev_s, dev_e - dev_s);
+ if (devmajor == 0) {
+ /* btrfs reports major number = 0 */
+ if (strcmp("btrfs", dash + type_s) != 0 ||
+ dev_major_minor(dash + dev_s, &devmajor, &devminor) < 0) {
+ continue;
+ }
+ }
+
+ mount = g_new0(FsMount, 1);
+ mount->dirname = g_strdup(line + dir_s);
+ mount->devtype = g_strdup(dash + type_s);
+ mount->devmajor = devmajor;
+ mount->devminor = devminor;
+
+ QTAILQ_INSERT_TAIL(mounts, mount, next);
+ }
+ free(line);
+
+ fclose(fp);
+ return true;
+}
+#endif /* CONFIG_FSFREEZE || CONFIG_FSTRIM */
+
+#ifdef CONFIG_FSFREEZE
+/*
+ * Walk list of mounted file systems in the guest, and freeze the ones which
+ * are real local file systems.
+ */
+int64_t qmp_guest_fsfreeze_do_freeze_list(bool has_mountpoints,
+ strList *mountpoints,
+ FsMountList mounts,
+ Error **errp)
+{
+ struct FsMount *mount;
+ strList *list;
+ int fd, ret, i = 0;
+
+ QTAILQ_FOREACH_REVERSE(mount, &mounts, next) {
+ /* To issue fsfreeze in the reverse order of mounts, check if the
+ * mount is listed in the list here */
+ if (has_mountpoints) {
+ for (list = mountpoints; list; list = list->next) {
+ if (strcmp(list->value, mount->dirname) == 0) {
+ break;
+ }
+ }
+ if (!list) {
+ continue;
+ }
+ }
+
+ fd = qga_open_cloexec(mount->dirname, O_RDONLY, 0);
+ if (fd == -1) {
+ error_setg_errno(errp, errno, "failed to open %s", mount->dirname);
+ return -1;
+ }
+
+ /* we try to cull filesystems we know won't work in advance, but other
+ * filesystems may not implement fsfreeze for less obvious reasons.
+ * these will report EOPNOTSUPP. we simply ignore these when tallying
+ * the number of frozen filesystems.
+ * if a filesystem is mounted more than once (aka bind mount) a
+ * consecutive attempt to freeze an already frozen filesystem will
+ * return EBUSY.
+ *
+ * any other error means a failure to freeze a filesystem we
+ * expect to be freezable, so return an error in those cases
+ * and return system to thawed state.
+ */
+ ret = ioctl(fd, FIFREEZE);
+ if (ret == -1) {
+ if (errno != EOPNOTSUPP && errno != EBUSY) {
+ error_setg_errno(errp, errno, "failed to freeze %s",
+ mount->dirname);
+ close(fd);
+ return -1;
+ }
+ } else {
+ i++;
+ }
+ close(fd);
+ }
+ return i;
+}
+
+int qmp_guest_fsfreeze_do_thaw(Error **errp)
+{
+ int ret;
+ FsMountList mounts;
+ FsMount *mount;
+ int fd, i = 0, logged;
+ Error *local_err = NULL;
+
+ QTAILQ_INIT(&mounts);
+ if (!build_fs_mount_list(&mounts, &local_err)) {
+ error_propagate(errp, local_err);
+ return -1;
+ }
+
+ QTAILQ_FOREACH(mount, &mounts, next) {
+ logged = false;
+ fd = qga_open_cloexec(mount->dirname, O_RDONLY, 0);
+ if (fd == -1) {
+ continue;
+ }
+ /* we have no way of knowing whether a filesystem was actually unfrozen
+ * as a result of a successful call to FITHAW, only that if an error
+ * was returned the filesystem was *not* unfrozen by that particular
+ * call.
+ *
+ * since multiple preceding FIFREEZEs require multiple calls to FITHAW
+ * to unfreeze, continuing issuing FITHAW until an error is returned,
+ * in which case either the filesystem is in an unfreezable state, or,
+ * more likely, it was thawed previously (and remains so afterward).
+ *
+ * also, since the most recent successful call is the one that did
+ * the actual unfreeze, we can use this to provide an accurate count
+ * of the number of filesystems unfrozen by guest-fsfreeze-thaw, which
+ * may * be useful for determining whether a filesystem was unfrozen
+ * during the freeze/thaw phase by a process other than qemu-ga.
+ */
+ do {
+ ret = ioctl(fd, FITHAW);
+ if (ret == 0 && !logged) {
+ i++;
+ logged = true;
+ }
+ } while (ret == 0);
+ close(fd);
+ }
+
+ free_fs_mount_list(&mounts);
+
+ return i;
+}
+#endif /* CONFIG_FSFREEZE */
diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index eea819cff0..32493d6383 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -16,11 +16,9 @@
#include <sys/utsname.h>
#include <sys/wait.h>
#include <dirent.h>
-#include "guest-agent-core.h"
#include "qga-qapi-commands.h"
#include "qapi/error.h"
#include "qapi/qmp/qerror.h"
-#include "qemu/queue.h"
#include "qemu/host-utils.h"
#include "qemu/sockets.h"
#include "qemu/base64.h"
@@ -35,28 +33,20 @@
#if defined(__linux__)
#include <mntent.h>
-#include <linux/fs.h>
#include <sys/statvfs.h>
#include <linux/nvme_ioctl.h>
#ifdef CONFIG_LIBUDEV
#include <libudev.h>
#endif
-
-#ifdef FIFREEZE
-#define CONFIG_FSFREEZE
-#endif
-#ifdef FITRIM
-#define CONFIG_FSTRIM
-#endif
#endif
#ifdef HAVE_GETIFADDRS
#include <arpa/inet.h>
#include <sys/socket.h>
#include <net/if.h>
+#include <net/ethernet.h>
#include <sys/types.h>
-#include <ifaddrs.h>
#ifdef CONFIG_SOLARIS
#include <sys/sockio.h>
#endif
@@ -92,6 +82,10 @@ void qmp_guest_shutdown(bool has_mode, const char *mode, Error **errp)
const char *powerdown_flag = "-i5";
const char *halt_flag = "-i0";
const char *reboot_flag = "-i6";
+#elif defined(CONFIG_BSD)
+ const char *powerdown_flag = "-p";
+ const char *halt_flag = "-h";
+ const char *reboot_flag = "-r";
#else
const char *powerdown_flag = "-P";
const char *halt_flag = "-H";
@@ -122,6 +116,9 @@ void qmp_guest_shutdown(bool has_mode, const char *mode, Error **errp)
#ifdef CONFIG_SOLARIS
execl("/sbin/shutdown", "shutdown", shutdown_flag, "-g0", "-y",
"hypervisor initiated shutdown", (char *)NULL);
+#elif defined(CONFIG_BSD)
+ execl("/sbin/shutdown", "shutdown", shutdown_flag, "+0",
+ "hypervisor initiated shutdown", (char *)NULL);
#else
execl("/sbin/shutdown", "shutdown", "-h", shutdown_flag, "+0",
"hypervisor initiated shutdown", (char *)NULL);
@@ -617,20 +614,8 @@ void qmp_guest_file_flush(int64_t handle, Error **errp)
}
}
-/* linux-specific implementations. avoid this if at all possible. */
-#if defined(__linux__)
-
#if defined(CONFIG_FSFREEZE) || defined(CONFIG_FSTRIM)
-typedef struct FsMount {
- char *dirname;
- char *devtype;
- unsigned int devmajor, devminor;
- QTAILQ_ENTRY(FsMount) next;
-} FsMount;
-
-typedef QTAILQ_HEAD(FsMountList, FsMount) FsMountList;
-
-static void free_fs_mount_list(FsMountList *mounts)
+void free_fs_mount_list(FsMountList *mounts)
{
FsMount *mount, *temp;
@@ -645,159 +630,158 @@ static void free_fs_mount_list(FsMountList *mounts)
g_free(mount);
}
}
+#endif
+
+#if defined(CONFIG_FSFREEZE)
+typedef enum {
+ FSFREEZE_HOOK_THAW = 0,
+ FSFREEZE_HOOK_FREEZE,
+} FsfreezeHookArg;
+
+static const char *fsfreeze_hook_arg_string[] = {
+ "thaw",
+ "freeze",
+};
-static int dev_major_minor(const char *devpath,
- unsigned int *devmajor, unsigned int *devminor)
+static void execute_fsfreeze_hook(FsfreezeHookArg arg, Error **errp)
{
- struct stat st;
+ int status;
+ pid_t pid;
+ const char *hook;
+ const char *arg_str = fsfreeze_hook_arg_string[arg];
+ Error *local_err = NULL;
+
+ hook = ga_fsfreeze_hook(ga_state);
+ if (!hook) {
+ return;
+ }
+ if (access(hook, X_OK) != 0) {
+ error_setg_errno(errp, errno, "can't access fsfreeze hook '%s'", hook);
+ return;
+ }
- *devmajor = 0;
- *devminor = 0;
+ slog("executing fsfreeze hook with arg '%s'", arg_str);
+ pid = fork();
+ if (pid == 0) {
+ setsid();
+ reopen_fd_to_null(0);
+ reopen_fd_to_null(1);
+ reopen_fd_to_null(2);
- if (stat(devpath, &st) < 0) {
- slog("failed to stat device file '%s': %s", devpath, strerror(errno));
- return -1;
+ execl(hook, hook, arg_str, NULL);
+ _exit(EXIT_FAILURE);
+ } else if (pid < 0) {
+ error_setg_errno(errp, errno, "failed to create child process");
+ return;
}
- if (S_ISDIR(st.st_mode)) {
- /* It is bind mount */
- return -2;
+
+ ga_wait_child(pid, &status, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
}
- if (S_ISBLK(st.st_mode)) {
- *devmajor = major(st.st_rdev);
- *devminor = minor(st.st_rdev);
- return 0;
+
+ if (!WIFEXITED(status)) {
+ error_setg(errp, "fsfreeze hook has terminated abnormally");
+ return;
+ }
+
+ status = WEXITSTATUS(status);
+ if (status) {
+ error_setg(errp, "fsfreeze hook has failed with status %d", status);
+ return;
}
- return -1;
}
/*
- * Walk the mount table and build a list of local file systems
+ * Return status of freeze/thaw
*/
-static bool build_fs_mount_list_from_mtab(FsMountList *mounts, Error **errp)
+GuestFsfreezeStatus qmp_guest_fsfreeze_status(Error **errp)
{
- struct mntent *ment;
- FsMount *mount;
- char const *mtab = "/proc/self/mounts";
- FILE *fp;
- unsigned int devmajor, devminor;
-
- fp = setmntent(mtab, "r");
- if (!fp) {
- error_setg(errp, "failed to open mtab file: '%s'", mtab);
- return false;
+ if (ga_is_frozen(ga_state)) {
+ return GUEST_FSFREEZE_STATUS_FROZEN;
}
- while ((ment = getmntent(fp))) {
- /*
- * An entry which device name doesn't start with a '/' is
- * either a dummy file system or a network file system.
- * Add special handling for smbfs and cifs as is done by
- * coreutils as well.
- */
- if ((ment->mnt_fsname[0] != '/') ||
- (strcmp(ment->mnt_type, "smbfs") == 0) ||
- (strcmp(ment->mnt_type, "cifs") == 0)) {
- continue;
- }
- if (dev_major_minor(ment->mnt_fsname, &devmajor, &devminor) == -2) {
- /* Skip bind mounts */
- continue;
- }
+ return GUEST_FSFREEZE_STATUS_THAWED;
+}
+
+int64_t qmp_guest_fsfreeze_freeze(Error **errp)
+{
+ return qmp_guest_fsfreeze_freeze_list(false, NULL, errp);
+}
+
+int64_t qmp_guest_fsfreeze_freeze_list(bool has_mountpoints,
+ strList *mountpoints,
+ Error **errp)
+{
+ int ret;
+ FsMountList mounts;
+ Error *local_err = NULL;
- mount = g_new0(FsMount, 1);
- mount->dirname = g_strdup(ment->mnt_dir);
- mount->devtype = g_strdup(ment->mnt_type);
- mount->devmajor = devmajor;
- mount->devminor = devminor;
+ slog("guest-fsfreeze called");
- QTAILQ_INSERT_TAIL(mounts, mount, next);
+ execute_fsfreeze_hook(FSFREEZE_HOOK_FREEZE, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return -1;
}
- endmntent(fp);
- return true;
-}
+ QTAILQ_INIT(&mounts);
+ if (!build_fs_mount_list(&mounts, &local_err)) {
+ error_propagate(errp, local_err);
+ return -1;
+ }
-static void decode_mntname(char *name, int len)
-{
- int i, j = 0;
- for (i = 0; i <= len; i++) {
- if (name[i] != '\\') {
- name[j++] = name[i];
- } else if (name[i + 1] == '\\') {
- name[j++] = '\\';
- i++;
- } else if (name[i + 1] >= '0' && name[i + 1] <= '3' &&
- name[i + 2] >= '0' && name[i + 2] <= '7' &&
- name[i + 3] >= '0' && name[i + 3] <= '7') {
- name[j++] = (name[i + 1] - '0') * 64 +
- (name[i + 2] - '0') * 8 +
- (name[i + 3] - '0');
- i += 3;
- } else {
- name[j++] = name[i];
- }
+ /* cannot risk guest agent blocking itself on a write in this state */
+ ga_set_frozen(ga_state);
+
+ ret = qmp_guest_fsfreeze_do_freeze_list(has_mountpoints, mountpoints,
+ mounts, errp);
+
+ free_fs_mount_list(&mounts);
+ /* We may not issue any FIFREEZE here.
+ * Just unset ga_state here and ready for the next call.
+ */
+ if (ret == 0) {
+ ga_unset_frozen(ga_state);
+ } else if (ret < 0) {
+ qmp_guest_fsfreeze_thaw(NULL);
}
+ return ret;
}
-static bool build_fs_mount_list(FsMountList *mounts, Error **errp)
+int64_t qmp_guest_fsfreeze_thaw(Error **errp)
{
- FsMount *mount;
- char const *mountinfo = "/proc/self/mountinfo";
- FILE *fp;
- char *line = NULL, *dash;
- size_t n;
- char check;
- unsigned int devmajor, devminor;
- int ret, dir_s, dir_e, type_s, type_e, dev_s, dev_e;
+ int ret;
- fp = fopen(mountinfo, "r");
- if (!fp) {
- return build_fs_mount_list_from_mtab(mounts, errp);
+ ret = qmp_guest_fsfreeze_do_thaw(errp);
+ if (ret >= 0) {
+ ga_unset_frozen(ga_state);
+ execute_fsfreeze_hook(FSFREEZE_HOOK_THAW, errp);
+ } else {
+ ret = 0;
}
- while (getline(&line, &n, fp) != -1) {
- ret = sscanf(line, "%*u %*u %u:%u %*s %n%*s%n%c",
- &devmajor, &devminor, &dir_s, &dir_e, &check);
- if (ret < 3) {
- continue;
- }
- dash = strstr(line + dir_e, " - ");
- if (!dash) {
- continue;
- }
- ret = sscanf(dash, " - %n%*s%n %n%*s%n%c",
- &type_s, &type_e, &dev_s, &dev_e, &check);
- if (ret < 1) {
- continue;
- }
- line[dir_e] = 0;
- dash[type_e] = 0;
- dash[dev_e] = 0;
- decode_mntname(line + dir_s, dir_e - dir_s);
- decode_mntname(dash + dev_s, dev_e - dev_s);
- if (devmajor == 0) {
- /* btrfs reports major number = 0 */
- if (strcmp("btrfs", dash + type_s) != 0 ||
- dev_major_minor(dash + dev_s, &devmajor, &devminor) < 0) {
- continue;
- }
- }
+ return ret;
+}
- mount = g_new0(FsMount, 1);
- mount->dirname = g_strdup(line + dir_s);
- mount->devtype = g_strdup(dash + type_s);
- mount->devmajor = devmajor;
- mount->devminor = devminor;
+static void guest_fsfreeze_cleanup(void)
+{
+ Error *err = NULL;
- QTAILQ_INSERT_TAIL(mounts, mount, next);
+ if (ga_is_frozen(ga_state) == GUEST_FSFREEZE_STATUS_FROZEN) {
+ qmp_guest_fsfreeze_thaw(&err);
+ if (err) {
+ slog("failed to clean up frozen filesystems: %s",
+ error_get_pretty(err));
+ error_free(err);
+ }
}
- free(line);
-
- fclose(fp);
- return true;
}
#endif
+/* linux-specific implementations. avoid this if at all possible. */
+#if defined(__linux__)
#if defined(CONFIG_FSFREEZE)
static char *get_pci_driver(char const *syspath, int pathlen, Error **errp)
@@ -1621,248 +1605,6 @@ GuestFilesystemInfoList *qmp_guest_get_fsinfo(Error **errp)
free_fs_mount_list(&mounts);
return ret;
}
-
-
-typedef enum {
- FSFREEZE_HOOK_THAW = 0,
- FSFREEZE_HOOK_FREEZE,
-} FsfreezeHookArg;
-
-static const char *fsfreeze_hook_arg_string[] = {
- "thaw",
- "freeze",
-};
-
-static void execute_fsfreeze_hook(FsfreezeHookArg arg, Error **errp)
-{
- int status;
- pid_t pid;
- const char *hook;
- const char *arg_str = fsfreeze_hook_arg_string[arg];
- Error *local_err = NULL;
-
- hook = ga_fsfreeze_hook(ga_state);
- if (!hook) {
- return;
- }
- if (access(hook, X_OK) != 0) {
- error_setg_errno(errp, errno, "can't access fsfreeze hook '%s'", hook);
- return;
- }
-
- slog("executing fsfreeze hook with arg '%s'", arg_str);
- pid = fork();
- if (pid == 0) {
- setsid();
- reopen_fd_to_null(0);
- reopen_fd_to_null(1);
- reopen_fd_to_null(2);
-
- execl(hook, hook, arg_str, NULL);
- _exit(EXIT_FAILURE);
- } else if (pid < 0) {
- error_setg_errno(errp, errno, "failed to create child process");
- return;
- }
-
- ga_wait_child(pid, &status, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- return;
- }
-
- if (!WIFEXITED(status)) {
- error_setg(errp, "fsfreeze hook has terminated abnormally");
- return;
- }
-
- status = WEXITSTATUS(status);
- if (status) {
- error_setg(errp, "fsfreeze hook has failed with status %d", status);
- return;
- }
-}
-
-/*
- * Return status of freeze/thaw
- */
-GuestFsfreezeStatus qmp_guest_fsfreeze_status(Error **errp)
-{
- if (ga_is_frozen(ga_state)) {
- return GUEST_FSFREEZE_STATUS_FROZEN;
- }
-
- return GUEST_FSFREEZE_STATUS_THAWED;
-}
-
-int64_t qmp_guest_fsfreeze_freeze(Error **errp)
-{
- return qmp_guest_fsfreeze_freeze_list(false, NULL, errp);
-}
-
-/*
- * Walk list of mounted file systems in the guest, and freeze the ones which
- * are real local file systems.
- */
-int64_t qmp_guest_fsfreeze_freeze_list(bool has_mountpoints,
- strList *mountpoints,
- Error **errp)
-{
- int ret = 0, i = 0;
- strList *list;
- FsMountList mounts;
- struct FsMount *mount;
- Error *local_err = NULL;
- int fd;
-
- slog("guest-fsfreeze called");
-
- execute_fsfreeze_hook(FSFREEZE_HOOK_FREEZE, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- return -1;
- }
-
- QTAILQ_INIT(&mounts);
- if (!build_fs_mount_list(&mounts, &local_err)) {
- error_propagate(errp, local_err);
- return -1;
- }
-
- /* cannot risk guest agent blocking itself on a write in this state */
- ga_set_frozen(ga_state);
-
- QTAILQ_FOREACH_REVERSE(mount, &mounts, next) {
- /* To issue fsfreeze in the reverse order of mounts, check if the
- * mount is listed in the list here */
- if (has_mountpoints) {
- for (list = mountpoints; list; list = list->next) {
- if (strcmp(list->value, mount->dirname) == 0) {
- break;
- }
- }
- if (!list) {
- continue;
- }
- }
-
- fd = qga_open_cloexec(mount->dirname, O_RDONLY, 0);
- if (fd == -1) {
- error_setg_errno(errp, errno, "failed to open %s", mount->dirname);
- goto error;
- }
-
- /* we try to cull filesystems we know won't work in advance, but other
- * filesystems may not implement fsfreeze for less obvious reasons.
- * these will report EOPNOTSUPP. we simply ignore these when tallying
- * the number of frozen filesystems.
- * if a filesystem is mounted more than once (aka bind mount) a
- * consecutive attempt to freeze an already frozen filesystem will
- * return EBUSY.
- *
- * any other error means a failure to freeze a filesystem we
- * expect to be freezable, so return an error in those cases
- * and return system to thawed state.
- */
- ret = ioctl(fd, FIFREEZE);
- if (ret == -1) {
- if (errno != EOPNOTSUPP && errno != EBUSY) {
- error_setg_errno(errp, errno, "failed to freeze %s",
- mount->dirname);
- close(fd);
- goto error;
- }
- } else {
- i++;
- }
- close(fd);
- }
-
- free_fs_mount_list(&mounts);
- /* We may not issue any FIFREEZE here.
- * Just unset ga_state here and ready for the next call.
- */
- if (i == 0) {
- ga_unset_frozen(ga_state);
- }
- return i;
-
-error:
- free_fs_mount_list(&mounts);
- qmp_guest_fsfreeze_thaw(NULL);
- return 0;
-}
-
-/*
- * Walk list of frozen file systems in the guest, and thaw them.
- */
-int64_t qmp_guest_fsfreeze_thaw(Error **errp)
-{
- int ret;
- FsMountList mounts;
- FsMount *mount;
- int fd, i = 0, logged;
- Error *local_err = NULL;
-
- QTAILQ_INIT(&mounts);
- if (!build_fs_mount_list(&mounts, &local_err)) {
- error_propagate(errp, local_err);
- return 0;
- }
-
- QTAILQ_FOREACH(mount, &mounts, next) {
- logged = false;
- fd = qga_open_cloexec(mount->dirname, O_RDONLY, 0);
- if (fd == -1) {
- continue;
- }
- /* we have no way of knowing whether a filesystem was actually unfrozen
- * as a result of a successful call to FITHAW, only that if an error
- * was returned the filesystem was *not* unfrozen by that particular
- * call.
- *
- * since multiple preceding FIFREEZEs require multiple calls to FITHAW
- * to unfreeze, continuing issuing FITHAW until an error is returned,
- * in which case either the filesystem is in an unfreezable state, or,
- * more likely, it was thawed previously (and remains so afterward).
- *
- * also, since the most recent successful call is the one that did
- * the actual unfreeze, we can use this to provide an accurate count
- * of the number of filesystems unfrozen by guest-fsfreeze-thaw, which
- * may * be useful for determining whether a filesystem was unfrozen
- * during the freeze/thaw phase by a process other than qemu-ga.
- */
- do {
- ret = ioctl(fd, FITHAW);
- if (ret == 0 && !logged) {
- i++;
- logged = true;
- }
- } while (ret == 0);
- close(fd);
- }
-
- ga_unset_frozen(ga_state);
- free_fs_mount_list(&mounts);
-
- execute_fsfreeze_hook(FSFREEZE_HOOK_THAW, errp);
-
- return i;
-}
-
-static void guest_fsfreeze_cleanup(void)
-{
- Error *err = NULL;
-
- if (ga_is_frozen(ga_state) == GUEST_FSFREEZE_STATUS_FROZEN) {
- qmp_guest_fsfreeze_thaw(&err);
- if (err) {
- slog("failed to clean up frozen filesystems: %s",
- error_get_pretty(err));
- error_free(err);
- }
- }
-}
#endif /* CONFIG_FSFREEZE */
#if defined(CONFIG_FSTRIM)
@@ -2372,7 +2114,9 @@ int64_t qmp_guest_set_vcpus(GuestLogicalProcessorList *vcpus, Error **errp)
return processed;
}
+#endif /* __linux__ */
+#if defined(__linux__) || defined(__FreeBSD__)
void qmp_guest_set_user_password(const char *username,
const char *password,
bool crypted,
@@ -2406,10 +2150,15 @@ void qmp_guest_set_user_password(const char *username,
goto out;
}
+#ifdef __FreeBSD__
+ chpasswddata = g_strdup(rawpasswddata);
+ passwd_path = g_find_program_in_path("pw");
+#else
chpasswddata = g_strdup_printf("%s:%s\n", username, rawpasswddata);
- chpasswdlen = strlen(chpasswddata);
-
passwd_path = g_find_program_in_path("chpasswd");
+#endif
+
+ chpasswdlen = strlen(chpasswddata);
if (!passwd_path) {
error_setg(errp, "cannot find 'passwd' program in PATH");
@@ -2430,11 +2179,17 @@ void qmp_guest_set_user_password(const char *username,
reopen_fd_to_null(1);
reopen_fd_to_null(2);
+#ifdef __FreeBSD__
+ const char *h_arg;
+ h_arg = (crypted) ? "-H" : "-h";
+ execl(passwd_path, "pw", "usermod", "-n", username, h_arg, "0", NULL);
+#else
if (crypted) {
execl(passwd_path, "chpasswd", "-e", NULL);
} else {
execl(passwd_path, "chpasswd", NULL);
}
+#endif
_exit(EXIT_FAILURE);
} else if (pid < 0) {
error_setg_errno(errp, errno, "failed to create child process");
@@ -2477,7 +2232,17 @@ out:
close(datafd[1]);
}
}
+#else /* __linux__ || __FreeBSD__ */
+void qmp_guest_set_user_password(const char *username,
+ const char *password,
+ bool crypted,
+ Error **errp)
+{
+ error_setg(errp, QERR_UNSUPPORTED);
+}
+#endif /* __linux__ || __FreeBSD__ */
+#ifdef __linux__
static void ga_read_sysfs_file(int dirfd, const char *pathname, char *buf,
int size, Error **errp)
{
@@ -3014,14 +2779,6 @@ int64_t qmp_guest_set_vcpus(GuestLogicalProcessorList *vcpus, Error **errp)
return -1;
}
-void qmp_guest_set_user_password(const char *username,
- const char *password,
- bool crypted,
- Error **errp)
-{
- error_setg(errp, QERR_UNSUPPORTED);
-}
-
GuestMemoryBlockList *qmp_guest_get_memory_blocks(Error **errp)
{
error_setg(errp, QERR_UNSUPPORTED);
@@ -3124,6 +2881,57 @@ static int guest_get_network_stats(const char *name,
return -1;
}
+#ifndef __FreeBSD__
+/*
+ * Fill "buf" with MAC address by ifaddrs. Pointer buf must point to a
+ * buffer with ETHER_ADDR_LEN length at least.
+ *
+ * Returns false in case of an error, otherwise true. "obtained" argument
+ * is true if a MAC address was obtained successful, otherwise false.
+ */
+bool guest_get_hw_addr(struct ifaddrs *ifa, unsigned char *buf,
+ bool *obtained, Error **errp)
+{
+ struct ifreq ifr;
+ int sock;
+
+ *obtained = false;
+
+ /* we haven't obtained HW address yet */
+ sock = socket(PF_INET, SOCK_STREAM, 0);
+ if (sock == -1) {
+ error_setg_errno(errp, errno, "failed to create socket");
+ return false;
+ }
+
+ memset(&ifr, 0, sizeof(ifr));
+ pstrcpy(ifr.ifr_name, IF_NAMESIZE, ifa->ifa_name);
+ if (ioctl(sock, SIOCGIFHWADDR, &ifr) == -1) {
+ /*
+ * We can't get the hw addr of this interface, but that's not a
+ * fatal error.
+ */
+ if (errno == EADDRNOTAVAIL) {
+ /* The interface doesn't have a hw addr (e.g. loopback). */
+ g_debug("failed to get MAC address of %s: %s",
+ ifa->ifa_name, strerror(errno));
+ } else{
+ g_warning("failed to get MAC address of %s: %s",
+ ifa->ifa_name, strerror(errno));
+ }
+ } else {
+#ifdef CONFIG_SOLARIS
+ memcpy(buf, &ifr.ifr_addr.sa_data, ETHER_ADDR_LEN);
+#else
+ memcpy(buf, &ifr.ifr_hwaddr.sa_data, ETHER_ADDR_LEN);
+#endif
+ *obtained = true;
+ }
+ close(sock);
+ return true;
+}
+#endif /* __FreeBSD__ */
+
/*
* Build information about guest interfaces
*/
@@ -3144,9 +2952,8 @@ GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp)
GuestNetworkInterfaceStat *interface_stat = NULL;
char addr4[INET_ADDRSTRLEN];
char addr6[INET6_ADDRSTRLEN];
- int sock;
- struct ifreq ifr;
- unsigned char *mac_addr;
+ unsigned char mac_addr[ETHER_ADDR_LEN];
+ bool obtained;
void *p;
g_debug("Processing %s interface", ifa->ifa_name);
@@ -3161,45 +2968,17 @@ GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp)
}
if (!info->has_hardware_address) {
- /* we haven't obtained HW address yet */
- sock = socket(PF_INET, SOCK_STREAM, 0);
- if (sock == -1) {
- error_setg_errno(errp, errno, "failed to create socket");
+ if (!guest_get_hw_addr(ifa, mac_addr, &obtained, errp)) {
goto error;
}
-
- memset(&ifr, 0, sizeof(ifr));
- pstrcpy(ifr.ifr_name, IF_NAMESIZE, info->name);
- if (ioctl(sock, SIOCGIFHWADDR, &ifr) == -1) {
- /*
- * We can't get the hw addr of this interface, but that's not a
- * fatal error. Don't set info->hardware_address, but keep
- * going.
- */
- if (errno == EADDRNOTAVAIL) {
- /* The interface doesn't have a hw addr (e.g. loopback). */
- g_debug("failed to get MAC address of %s: %s",
- ifa->ifa_name, strerror(errno));
- } else{
- g_warning("failed to get MAC address of %s: %s",
- ifa->ifa_name, strerror(errno));
- }
-
- } else {
-#ifdef CONFIG_SOLARIS
- mac_addr = (unsigned char *) &ifr.ifr_addr.sa_data;
-#else
- mac_addr = (unsigned char *) &ifr.ifr_hwaddr.sa_data;
-#endif
+ if (obtained) {
info->hardware_address =
g_strdup_printf("%02x:%02x:%02x:%02x:%02x:%02x",
(int) mac_addr[0], (int) mac_addr[1],
(int) mac_addr[2], (int) mac_addr[3],
(int) mac_addr[4], (int) mac_addr[5]);
-
info->has_hardware_address = true;
}
- close(sock);
}
if (ifa->ifa_addr &&
diff --git a/qga/main.c b/qga/main.c
index 5a9d8252e0..b3580508fa 100644
--- a/qga/main.c
+++ b/qga/main.c
@@ -37,17 +37,16 @@
#include "qga/service-win32.h"
#include "qga/vss-win32.h"
#endif
-#ifdef __linux__
-#include <linux/fs.h>
-#ifdef FIFREEZE
-#define CONFIG_FSFREEZE
-#endif
-#endif
+#include "commands-common.h"
#ifndef _WIN32
+#ifdef __FreeBSD__
+#define QGA_VIRTIO_PATH_DEFAULT "/dev/vtcon/org.qemu.guest_agent.0"
+#else /* __FreeBSD__ */
#define QGA_VIRTIO_PATH_DEFAULT "/dev/virtio-ports/org.qemu.guest_agent.0"
-#define QGA_STATE_RELATIVE_DIR "run"
+#endif /* __FreeBSD__ */
#define QGA_SERIAL_PATH_DEFAULT "/dev/ttyS0"
+#define QGA_STATE_RELATIVE_DIR "run"
#else
#define QGA_VIRTIO_PATH_DEFAULT "\\\\.\\Global\\org.qemu.guest_agent.0"
#define QGA_STATE_RELATIVE_DIR "qemu-ga"
diff --git a/qga/meson.build b/qga/meson.build
index a0ffd6d268..3cfb9166e5 100644
--- a/qga/meson.build
+++ b/qga/meson.build
@@ -72,6 +72,12 @@ qga_ss.add(when: 'CONFIG_POSIX', if_true: files(
'commands-posix.c',
'commands-posix-ssh.c',
))
+qga_ss.add(when: 'CONFIG_LINUX', if_true: files(
+ 'commands-linux.c',
+))
+qga_ss.add(when: 'CONFIG_BSD', if_true: files(
+ 'commands-bsd.c',
+))
qga_ss.add(when: 'CONFIG_WIN32', if_true: files(
'channel-win32.c',
'commands-win32.c',
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index eb3267bef5..2cb0de5601 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -67,6 +67,7 @@ meson_options_help() {
printf "%s\n" ' auth-pam PAM access control'
printf "%s\n" ' avx2 AVX2 optimizations'
printf "%s\n" ' avx512f AVX512F optimizations'
+ printf "%s\n" ' blkio libblkio block device driver'
printf "%s\n" ' bochs bochs image format support'
printf "%s\n" ' bpf eBPF support'
printf "%s\n" ' brlapi brlapi character device driver'
@@ -198,6 +199,8 @@ _meson_option_parse() {
--disable-gcov) printf "%s" -Db_coverage=false ;;
--enable-lto) printf "%s" -Db_lto=true ;;
--disable-lto) printf "%s" -Db_lto=false ;;
+ --enable-blkio) printf "%s" -Dblkio=enabled ;;
+ --disable-blkio) printf "%s" -Dblkio=disabled ;;
--block-drv-ro-whitelist=*) quote_sh "-Dblock_drv_ro_whitelist=$2" ;;
--block-drv-rw-whitelist=*) quote_sh "-Dblock_drv_rw_whitelist=$2" ;;
--enable-block-drv-whitelist-in-tools) printf "%s" -Dblock_drv_whitelist_in_tools=true ;;
diff --git a/scripts/vmstate-static-checker.py b/scripts/vmstate-static-checker.py
index b369388360..dfeee8231a 100755
--- a/scripts/vmstate-static-checker.py
+++ b/scripts/vmstate-static-checker.py
@@ -367,7 +367,6 @@ def check_machine_type(s, d):
if s["Name"] != d["Name"]:
print("Warning: checking incompatible machine types:", end=' ')
print("\"" + s["Name"] + "\", \"" + d["Name"] + "\"")
- return
def main():
diff --git a/softmmu/device_tree.c b/softmmu/device_tree.c
index ce74f3d48d..30aa3aea9f 100644
--- a/softmmu/device_tree.c
+++ b/softmmu/device_tree.c
@@ -22,6 +22,7 @@
#include "qemu/option.h"
#include "qemu/bswap.h"
#include "qemu/cutils.h"
+#include "qemu/guest-random.h"
#include "sysemu/device_tree.h"
#include "hw/loader.h"
#include "hw/boards.h"
@@ -680,3 +681,23 @@ void hmp_dumpdtb(Monitor *mon, const QDict *qdict)
info_report("dtb dumped to %s", filename);
}
+
+void qemu_fdt_randomize_seeds(void *fdt)
+{
+ int noffset, poffset, len;
+ const char *name;
+ uint8_t *data;
+
+ for (noffset = fdt_next_node(fdt, 0, NULL);
+ noffset >= 0;
+ noffset = fdt_next_node(fdt, noffset, NULL)) {
+ for (poffset = fdt_first_property_offset(fdt, noffset);
+ poffset >= 0;
+ poffset = fdt_next_property_offset(fdt, poffset)) {
+ data = (uint8_t *)fdt_getprop_by_offset(fdt, poffset, &name, &len);
+ if (!data || strcmp(name, "rng-seed"))
+ continue;
+ qemu_guest_getrandom_nofail(data, len);
+ }
+ }
+}
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index 56e03e07b5..d9578ccfd4 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -1748,6 +1748,11 @@ void qemu_ram_unset_migratable(RAMBlock *rb)
rb->flags &= ~RAM_MIGRATABLE;
}
+int qemu_ram_get_fd(RAMBlock *rb)
+{
+ return rb->fd;
+}
+
/* Called with iothread lock held. */
void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
{
diff --git a/softmmu/runstate.c b/softmmu/runstate.c
index 1e68680b9d..3dd83d5e5d 100644
--- a/softmmu/runstate.c
+++ b/softmmu/runstate.c
@@ -441,11 +441,16 @@ void qemu_system_reset(ShutdownCause reason)
cpu_synchronize_all_states();
if (mc && mc->reset) {
- mc->reset(current_machine);
+ mc->reset(current_machine, reason);
} else {
- qemu_devices_reset();
+ qemu_devices_reset(reason);
}
- if (reason && reason != SHUTDOWN_CAUSE_SUBSYSTEM_RESET) {
+ switch (reason) {
+ case SHUTDOWN_CAUSE_NONE:
+ case SHUTDOWN_CAUSE_SUBSYSTEM_RESET:
+ case SHUTDOWN_CAUSE_SNAPSHOT_LOAD:
+ break;
+ default:
qapi_event_send_reset(shutdown_caused_by_guest(reason), reason);
}
cpu_synchronize_all_post_reset();
diff --git a/stubs/meson.build b/stubs/meson.build
index d8f3fd5c44..4314161f5f 100644
--- a/stubs/meson.build
+++ b/stubs/meson.build
@@ -29,6 +29,7 @@ stub_ss.add(files('migr-blocker.c'))
stub_ss.add(files('module-opts.c'))
stub_ss.add(files('monitor.c'))
stub_ss.add(files('monitor-core.c'))
+stub_ss.add(files('physmem.c'))
stub_ss.add(files('qemu-timer-notify-cb.c'))
stub_ss.add(files('qmp_memory_device.c'))
stub_ss.add(files('qmp-command-available.c'))
diff --git a/stubs/physmem.c b/stubs/physmem.c
new file mode 100644
index 0000000000..1fc5f2df29
--- /dev/null
+++ b/stubs/physmem.c
@@ -0,0 +1,13 @@
+#include "qemu/osdep.h"
+#include "exec/cpu-common.h"
+
+RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
+ ram_addr_t *offset)
+{
+ return NULL;
+}
+
+int qemu_ram_get_fd(RAMBlock *rb)
+{
+ return -1;
+}
diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c
index 979a629d59..270ae787b1 100644
--- a/target/alpha/cpu.c
+++ b/target/alpha/cpu.c
@@ -40,6 +40,14 @@ static vaddr alpha_cpu_get_pc(CPUState *cs)
return cpu->env.pc;
}
+static void alpha_restore_state_to_opc(CPUState *cs,
+ const TranslationBlock *tb,
+ const uint64_t *data)
+{
+ AlphaCPU *cpu = ALPHA_CPU(cs);
+
+ cpu->env.pc = data[0];
+}
static bool alpha_cpu_has_work(CPUState *cs)
{
@@ -226,6 +234,7 @@ static const struct SysemuCPUOps alpha_sysemu_ops = {
static const struct TCGCPUOps alpha_tcg_ops = {
.initialize = alpha_translate_init,
+ .restore_state_to_opc = alpha_restore_state_to_opc,
#ifdef CONFIG_USER_ONLY
.record_sigsegv = alpha_cpu_record_sigsegv,
diff --git a/target/alpha/translate.c b/target/alpha/translate.c
index 6766350f56..f9bcdeb717 100644
--- a/target/alpha/translate.c
+++ b/target/alpha/translate.c
@@ -3049,9 +3049,3 @@ void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
DisasContext dc;
translator_loop(cpu, tb, max_insns, pc, host_pc, &alpha_tr_ops, &dc.base);
}
-
-void restore_state_to_opc(CPUAlphaState *env, TranslationBlock *tb,
- target_ulong *data)
-{
- env->pc = data[0];
-}
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 0bc5e9b125..a021df9e9e 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -90,6 +90,31 @@ void arm_cpu_synchronize_from_tb(CPUState *cs,
}
}
}
+
+static void arm_restore_state_to_opc(CPUState *cs,
+ const TranslationBlock *tb,
+ const uint64_t *data)
+{
+ CPUARMState *env = cs->env_ptr;
+
+ if (is_a64(env)) {
+ if (TARGET_TB_PCREL) {
+ env->pc = (env->pc & TARGET_PAGE_MASK) | data[0];
+ } else {
+ env->pc = data[0];
+ }
+ env->condexec_bits = 0;
+ env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
+ } else {
+ if (TARGET_TB_PCREL) {
+ env->regs[15] = (env->regs[15] & TARGET_PAGE_MASK) | data[0];
+ } else {
+ env->regs[15] = data[0];
+ }
+ env->condexec_bits = data[1];
+ env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
+ }
+}
#endif /* CONFIG_TCG */
static bool arm_cpu_has_work(CPUState *cs)
@@ -562,14 +587,24 @@ static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx,
if ((target_el > cur_el) && (target_el != 1)) {
/* Exceptions targeting a higher EL may not be maskable */
if (arm_feature(env, ARM_FEATURE_AARCH64)) {
- /*
- * 64-bit masking rules are simple: exceptions to EL3
- * can't be masked, and exceptions to EL2 can only be
- * masked from Secure state. The HCR and SCR settings
- * don't affect the masking logic, only the interrupt routing.
- */
- if (target_el == 3 || !secure || (env->cp15.scr_el3 & SCR_EEL2)) {
+ switch (target_el) {
+ case 2:
+ /*
+ * According to ARM DDI 0487H.a, an interrupt can be masked
+ * when HCR_E2H and HCR_TGE are both set regardless of the
+ * current Security state. Note that we need to revisit this
+ * part again once we need to support NMI.
+ */
+ if ((hcr_el2 & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) {
+ unmasked = true;
+ }
+ break;
+ case 3:
+ /* Interrupt cannot be masked when the target EL is 3 */
unmasked = true;
+ break;
+ default:
+ g_assert_not_reached();
}
} else {
/*
@@ -2152,6 +2187,7 @@ static const struct TCGCPUOps arm_tcg_ops = {
.initialize = arm_translate_init,
.synchronize_from_tb = arm_cpu_synchronize_from_tb,
.debug_excp_handler = arm_debug_excp_handler,
+ .restore_state_to_opc = arm_restore_state_to_opc,
#ifdef CONFIG_USER_ONLY
.record_sigsegv = arm_cpu_record_sigsegv,
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 64fc03214c..9aeed3c848 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -3410,6 +3410,14 @@ extern const uint64_t pred_esz_masks[5];
#define PAGE_MTE PAGE_TARGET_2
#define PAGE_TARGET_STICKY PAGE_MTE
+/* We associate one allocation tag per 16 bytes, the minimum. */
+#define LOG2_TAG_GRANULE 4
+#define TAG_GRANULE (1 << LOG2_TAG_GRANULE)
+
+#ifdef CONFIG_USER_ONLY
+#define TARGET_PAGE_DATA_SIZE (TARGET_PAGE_SIZE >> (LOG2_TAG_GRANULE + 1))
+#endif
+
#ifdef TARGET_TAGGED_ADDRESSES
/**
* cpu_untagged_addr:
@@ -4139,6 +4147,21 @@ static inline bool isar_feature_aa64_lva(const ARMISARegisters *id)
return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, VARANGE) != 0;
}
+static inline bool isar_feature_aa64_e0pd(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, E0PD) != 0;
+}
+
+static inline bool isar_feature_aa64_hafs(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HAFDBS) != 0;
+}
+
+static inline bool isar_feature_aa64_hdbs(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HAFDBS) >= 2;
+}
+
static inline bool isar_feature_aa64_tts2uxn(const ARMISARegisters *id)
{
return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, XNX) != 0;
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 85e0d1daf1..3d74f134f5 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -1165,6 +1165,7 @@ static void aarch64_max_initfn(Object *obj)
cpu->isar.id_aa64mmfr0 = t;
t = cpu->isar.id_aa64mmfr1;
+ t = FIELD_DP64(t, ID_AA64MMFR1, HAFDBS, 2); /* FEAT_HAFDBS */
t = FIELD_DP64(t, ID_AA64MMFR1, VMIDBITS, 2); /* FEAT_VMID16 */
t = FIELD_DP64(t, ID_AA64MMFR1, VH, 1); /* FEAT_VHE */
t = FIELD_DP64(t, ID_AA64MMFR1, HPDS, 1); /* FEAT_HPDS */
@@ -1185,6 +1186,7 @@ static void aarch64_max_initfn(Object *obj)
t = FIELD_DP64(t, ID_AA64MMFR2, FWB, 1); /* FEAT_S2FWB */
t = FIELD_DP64(t, ID_AA64MMFR2, TTL, 1); /* FEAT_TTL */
t = FIELD_DP64(t, ID_AA64MMFR2, BBM, 2); /* FEAT_BBM at level 2 */
+ t = FIELD_DP64(t, ID_AA64MMFR2, E0PD, 1); /* FEAT_E0PD */
cpu->isar.id_aa64mmfr2 = t;
t = cpu->isar.id_aa64zfr0;
diff --git a/target/arm/helper.c b/target/arm/helper.c
index c672903f43..b070a20f1a 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -10352,7 +10352,7 @@ int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx)
{
if (regime_has_2_ranges(mmu_idx)) {
return extract64(tcr, 37, 2);
- } else if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) {
+ } else if (regime_is_stage2(mmu_idx)) {
return 0; /* VTCR_EL2 */
} else {
/* Replicate the single TBI bit so we always have 2 bits. */
@@ -10364,7 +10364,7 @@ int aa64_va_parameter_tbid(uint64_t tcr, ARMMMUIdx mmu_idx)
{
if (regime_has_2_ranges(mmu_idx)) {
return extract64(tcr, 51, 2);
- } else if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) {
+ } else if (regime_is_stage2(mmu_idx)) {
return 0; /* VTCR_EL2 */
} else {
/* Replicate the single TBID bit so we always have 2 bits. */
@@ -10470,11 +10470,11 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
ARMMMUIdx mmu_idx, bool data)
{
uint64_t tcr = regime_tcr(env, mmu_idx);
- bool epd, hpd, tsz_oob, ds;
+ bool epd, hpd, tsz_oob, ds, ha, hd;
int select, tsz, tbi, max_tsz, min_tsz, ps, sh;
ARMGranuleSize gran;
ARMCPU *cpu = env_archcpu(env);
- bool stage2 = mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S;
+ bool stage2 = regime_is_stage2(mmu_idx);
if (!regime_has_2_ranges(mmu_idx)) {
select = 0;
@@ -10489,8 +10489,12 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
epd = false;
sh = extract32(tcr, 12, 2);
ps = extract32(tcr, 16, 3);
+ ha = extract32(tcr, 21, 1) && cpu_isar_feature(aa64_hafs, cpu);
+ hd = extract32(tcr, 22, 1) && cpu_isar_feature(aa64_hdbs, cpu);
ds = extract64(tcr, 32, 1);
} else {
+ bool e0pd;
+
/*
* Bit 55 is always between the two regions, and is canonical for
* determining if address tagging is enabled.
@@ -10502,15 +10506,24 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
epd = extract32(tcr, 7, 1);
sh = extract32(tcr, 12, 2);
hpd = extract64(tcr, 41, 1);
+ e0pd = extract64(tcr, 55, 1);
} else {
tsz = extract32(tcr, 16, 6);
gran = tg1_to_gran_size(extract32(tcr, 30, 2));
epd = extract32(tcr, 23, 1);
sh = extract32(tcr, 28, 2);
hpd = extract64(tcr, 42, 1);
+ e0pd = extract64(tcr, 56, 1);
}
ps = extract64(tcr, 32, 3);
+ ha = extract64(tcr, 39, 1) && cpu_isar_feature(aa64_hafs, cpu);
+ hd = extract64(tcr, 40, 1) && cpu_isar_feature(aa64_hdbs, cpu);
ds = extract64(tcr, 59, 1);
+
+ if (e0pd && cpu_isar_feature(aa64_e0pd, cpu) &&
+ regime_is_user(env, mmu_idx)) {
+ epd = true;
+ }
}
gran = sanitize_gran_size(cpu, gran, stage2);
@@ -10532,22 +10545,18 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
}
ds = false;
} else if (ds) {
- switch (mmu_idx) {
- case ARMMMUIdx_Stage2:
- case ARMMMUIdx_Stage2_S:
+ if (regime_is_stage2(mmu_idx)) {
if (gran == Gran16K) {
ds = cpu_isar_feature(aa64_tgran16_2_lpa2, cpu);
} else {
ds = cpu_isar_feature(aa64_tgran4_2_lpa2, cpu);
}
- break;
- default:
+ } else {
if (gran == Gran16K) {
ds = cpu_isar_feature(aa64_tgran16_lpa2, cpu);
} else {
ds = cpu_isar_feature(aa64_tgran4_lpa2, cpu);
}
- break;
}
if (ds) {
min_tsz = 12;
@@ -10581,6 +10590,8 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
.hpd = hpd,
.tsz_oob = tsz_oob,
.ds = ds,
+ .ha = ha,
+ .hd = ha && hd,
.gran = gran,
};
}
diff --git a/target/arm/internals.h b/target/arm/internals.h
index c3c3920ded..d9121d9ff8 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -338,6 +338,7 @@ typedef enum ARMFaultType {
ARMFault_AsyncExternal,
ARMFault_Debug,
ARMFault_TLBConflict,
+ ARMFault_UnsuppAtomicUpdate,
ARMFault_Lockdown,
ARMFault_Exclusive,
ARMFault_ICacheMaint,
@@ -524,6 +525,9 @@ static inline uint32_t arm_fi_to_lfsc(ARMMMUFaultInfo *fi)
case ARMFault_TLBConflict:
fsc = 0x30;
break;
+ case ARMFault_UnsuppAtomicUpdate:
+ fsc = 0x31;
+ break;
case ARMFault_Lockdown:
fsc = 0x34;
break;
@@ -673,6 +677,11 @@ static inline bool regime_is_pan(CPUARMState *env, ARMMMUIdx mmu_idx)
}
}
+static inline bool regime_is_stage2(ARMMMUIdx mmu_idx)
+{
+ return mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S;
+}
+
/* Return the exception level which controls this address translation regime */
static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx)
{
@@ -707,6 +716,25 @@ static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx)
}
}
+static inline bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx)
+{
+ switch (mmu_idx) {
+ case ARMMMUIdx_E20_0:
+ case ARMMMUIdx_Stage1_E0:
+ case ARMMMUIdx_MUser:
+ case ARMMMUIdx_MSUser:
+ case ARMMMUIdx_MUserNegPri:
+ case ARMMMUIdx_MSUserNegPri:
+ return true;
+ default:
+ return false;
+ case ARMMMUIdx_E10_0:
+ case ARMMMUIdx_E10_1:
+ case ARMMMUIdx_E10_1_PAN:
+ g_assert_not_reached();
+ }
+}
+
/* Return the SCTLR value which controls this address translation regime */
static inline uint64_t regime_sctlr(CPUARMState *env, ARMMMUIdx mmu_idx)
{
@@ -1041,6 +1069,8 @@ typedef struct ARMVAParameters {
bool hpd : 1;
bool tsz_oob : 1; /* tsz has been clamped to legal range */
bool ds : 1;
+ bool ha : 1;
+ bool hd : 1;
ARMGranuleSize gran : 2;
} ARMVAParameters;
@@ -1164,10 +1194,6 @@ void arm_log_exception(CPUState *cs);
*/
#define GMID_EL1_BS 6
-/* We associate one allocation tag per 16 bytes, the minimum. */
-#define LOG2_TAG_GRANULE 4
-#define TAG_GRANULE (1 << LOG2_TAG_GRANULE)
-
/*
* SVE predicates are 1/8 the size of SVE vectors, and cannot use
* the same simd_desc() encoding due to restrictions on size.
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
index e85208339e..86b3754838 100644
--- a/target/arm/mte_helper.c
+++ b/target/arm/mte_helper.c
@@ -95,11 +95,6 @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
}
tags = page_get_target_data(clean_ptr);
- if (tags == NULL) {
- size_t alloc_size = TARGET_PAGE_SIZE >> (LOG2_TAG_GRANULE + 1);
- tags = page_alloc_target_data(clean_ptr, alloc_size);
- assert(tags != NULL);
- }
index = extract32(ptr, LOG2_TAG_GRANULE + 1,
TARGET_PAGE_BITS - LOG2_TAG_GRANULE - 1);
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
index 6c5ed56a10..58a7bbda50 100644
--- a/target/arm/ptw.c
+++ b/target/arm/ptw.c
@@ -9,6 +9,7 @@
#include "qemu/osdep.h"
#include "qemu/log.h"
#include "qemu/range.h"
+#include "qemu/main-loop.h"
#include "exec/exec-all.h"
#include "cpu.h"
#include "internals.h"
@@ -17,10 +18,13 @@
typedef struct S1Translate {
ARMMMUIdx in_mmu_idx;
+ ARMMMUIdx in_ptw_idx;
bool in_secure;
bool in_debug;
bool out_secure;
+ bool out_rw;
bool out_be;
+ hwaddr out_virt;
hwaddr out_phys;
void *out_host;
} S1Translate;
@@ -104,25 +108,6 @@ static bool regime_translation_big_endian(CPUARMState *env, ARMMMUIdx mmu_idx)
return (regime_sctlr(env, mmu_idx) & SCTLR_EE) != 0;
}
-static bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx)
-{
- switch (mmu_idx) {
- case ARMMMUIdx_E20_0:
- case ARMMMUIdx_Stage1_E0:
- case ARMMMUIdx_MUser:
- case ARMMMUIdx_MSUser:
- case ARMMMUIdx_MUserNegPri:
- case ARMMMUIdx_MSUserNegPri:
- return true;
- default:
- return false;
- case ARMMMUIdx_E10_0:
- case ARMMMUIdx_E10_1:
- case ARMMMUIdx_E10_1_PAN:
- g_assert_not_reached();
- }
-}
-
/* Return the TTBR associated with this translation regime */
static uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn)
{
@@ -233,33 +218,26 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw,
{
bool is_secure = ptw->in_secure;
ARMMMUIdx mmu_idx = ptw->in_mmu_idx;
- ARMMMUIdx s2_mmu_idx = is_secure ? ARMMMUIdx_Stage2_S : ARMMMUIdx_Stage2;
- bool s2_phys = false;
+ ARMMMUIdx s2_mmu_idx = ptw->in_ptw_idx;
uint8_t pte_attrs;
bool pte_secure;
- if (!arm_mmu_idx_is_stage1_of_2(mmu_idx)
- || regime_translation_disabled(env, s2_mmu_idx, is_secure)) {
- s2_mmu_idx = is_secure ? ARMMMUIdx_Phys_S : ARMMMUIdx_Phys_NS;
- s2_phys = true;
- }
+ ptw->out_virt = addr;
if (unlikely(ptw->in_debug)) {
/*
* From gdbstub, do not use softmmu so that we don't modify the
* state of the cpu at all, including softmmu tlb contents.
*/
- if (s2_phys) {
- ptw->out_phys = addr;
- pte_attrs = 0;
- pte_secure = is_secure;
- } else {
+ if (regime_is_stage2(s2_mmu_idx)) {
S1Translate s2ptw = {
.in_mmu_idx = s2_mmu_idx,
+ .in_ptw_idx = is_secure ? ARMMMUIdx_Phys_S : ARMMMUIdx_Phys_NS,
.in_secure = is_secure,
.in_debug = true,
};
GetPhysAddrResult s2 = { };
+
if (!get_phys_addr_lpae(env, &s2ptw, addr, MMU_DATA_LOAD,
false, &s2, fi)) {
goto fail;
@@ -267,8 +245,14 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw,
ptw->out_phys = s2.f.phys_addr;
pte_attrs = s2.cacheattrs.attrs;
pte_secure = s2.f.attrs.secure;
+ } else {
+ /* Regime is physical. */
+ ptw->out_phys = addr;
+ pte_attrs = 0;
+ pte_secure = is_secure;
}
ptw->out_host = NULL;
+ ptw->out_rw = false;
} else {
CPUTLBEntryFull *full;
int flags;
@@ -283,11 +267,12 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw,
goto fail;
}
ptw->out_phys = full->phys_addr;
+ ptw->out_rw = full->prot & PAGE_WRITE;
pte_attrs = full->pte_attrs;
pte_secure = full->attrs.secure;
}
- if (!s2_phys) {
+ if (regime_is_stage2(s2_mmu_idx)) {
uint64_t hcr = arm_hcr_el2_eff_secstate(env, is_secure);
if ((hcr & HCR_PTW) && S2_attrs_are_device(hcr, pte_attrs)) {
@@ -322,24 +307,20 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw,
}
/* All loads done in the course of a page table walk go through here. */
-static uint32_t arm_ldl_ptw(CPUARMState *env, S1Translate *ptw, hwaddr addr,
+static uint32_t arm_ldl_ptw(CPUARMState *env, S1Translate *ptw,
ARMMMUFaultInfo *fi)
{
CPUState *cs = env_cpu(env);
+ void *host = ptw->out_host;
uint32_t data;
- if (!S1_ptw_translate(env, ptw, addr, fi)) {
- /* Failure. */
- assert(fi->s1ptw);
- return 0;
- }
-
- if (likely(ptw->out_host)) {
+ if (likely(host)) {
/* Page tables are in RAM, and we have the host address. */
+ data = qatomic_read((uint32_t *)host);
if (ptw->out_be) {
- data = ldl_be_p(ptw->out_host);
+ data = be32_to_cpu(data);
} else {
- data = ldl_le_p(ptw->out_host);
+ data = le32_to_cpu(data);
}
} else {
/* Page tables are in MMIO. */
@@ -361,25 +342,29 @@ static uint32_t arm_ldl_ptw(CPUARMState *env, S1Translate *ptw, hwaddr addr,
return data;
}
-static uint64_t arm_ldq_ptw(CPUARMState *env, S1Translate *ptw, hwaddr addr,
+static uint64_t arm_ldq_ptw(CPUARMState *env, S1Translate *ptw,
ARMMMUFaultInfo *fi)
{
CPUState *cs = env_cpu(env);
+ void *host = ptw->out_host;
uint64_t data;
- if (!S1_ptw_translate(env, ptw, addr, fi)) {
- /* Failure. */
- assert(fi->s1ptw);
- return 0;
- }
-
- if (likely(ptw->out_host)) {
+ if (likely(host)) {
/* Page tables are in RAM, and we have the host address. */
+#ifdef CONFIG_ATOMIC64
+ data = qatomic_read__nocheck((uint64_t *)host);
+ if (ptw->out_be) {
+ data = be64_to_cpu(data);
+ } else {
+ data = le64_to_cpu(data);
+ }
+#else
if (ptw->out_be) {
- data = ldq_be_p(ptw->out_host);
+ data = ldq_be_p(host);
} else {
- data = ldq_le_p(ptw->out_host);
+ data = ldq_le_p(host);
}
+#endif
} else {
/* Page tables are in MMIO. */
MemTxAttrs attrs = { .secure = ptw->out_secure };
@@ -400,6 +385,91 @@ static uint64_t arm_ldq_ptw(CPUARMState *env, S1Translate *ptw, hwaddr addr,
return data;
}
+static uint64_t arm_casq_ptw(CPUARMState *env, uint64_t old_val,
+ uint64_t new_val, S1Translate *ptw,
+ ARMMMUFaultInfo *fi)
+{
+ uint64_t cur_val;
+ void *host = ptw->out_host;
+
+ if (unlikely(!host)) {
+ fi->type = ARMFault_UnsuppAtomicUpdate;
+ fi->s1ptw = true;
+ return 0;
+ }
+
+ /*
+ * Raising a stage2 Protection fault for an atomic update to a read-only
+ * page is delayed until it is certain that there is a change to make.
+ */
+ if (unlikely(!ptw->out_rw)) {
+ int flags;
+ void *discard;
+
+ env->tlb_fi = fi;
+ flags = probe_access_flags(env, ptw->out_virt, MMU_DATA_STORE,
+ arm_to_core_mmu_idx(ptw->in_ptw_idx),
+ true, &discard, 0);
+ env->tlb_fi = NULL;
+
+ if (unlikely(flags & TLB_INVALID_MASK)) {
+ assert(fi->type != ARMFault_None);
+ fi->s2addr = ptw->out_virt;
+ fi->stage2 = true;
+ fi->s1ptw = true;
+ fi->s1ns = !ptw->in_secure;
+ return 0;
+ }
+
+ /* In case CAS mismatches and we loop, remember writability. */
+ ptw->out_rw = true;
+ }
+
+#ifdef CONFIG_ATOMIC64
+ if (ptw->out_be) {
+ old_val = cpu_to_be64(old_val);
+ new_val = cpu_to_be64(new_val);
+ cur_val = qatomic_cmpxchg__nocheck((uint64_t *)host, old_val, new_val);
+ cur_val = be64_to_cpu(cur_val);
+ } else {
+ old_val = cpu_to_le64(old_val);
+ new_val = cpu_to_le64(new_val);
+ cur_val = qatomic_cmpxchg__nocheck((uint64_t *)host, old_val, new_val);
+ cur_val = le64_to_cpu(cur_val);
+ }
+#else
+ /*
+ * We can't support the full 64-bit atomic cmpxchg on the host.
+ * Because this is only used for FEAT_HAFDBS, which is only for AA64,
+ * we know that TCG_OVERSIZED_GUEST is set, which means that we are
+ * running in round-robin mode and could only race with dma i/o.
+ */
+#ifndef TCG_OVERSIZED_GUEST
+# error "Unexpected configuration"
+#endif
+ bool locked = qemu_mutex_iothread_locked();
+ if (!locked) {
+ qemu_mutex_lock_iothread();
+ }
+ if (ptw->out_be) {
+ cur_val = ldq_be_p(host);
+ if (cur_val == old_val) {
+ stq_be_p(host, new_val);
+ }
+ } else {
+ cur_val = ldq_le_p(host);
+ if (cur_val == old_val) {
+ stq_le_p(host, new_val);
+ }
+ }
+ if (!locked) {
+ qemu_mutex_unlock_iothread();
+ }
+#endif
+
+ return cur_val;
+}
+
static bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx,
uint32_t *table, uint32_t address)
{
@@ -529,7 +599,10 @@ static bool get_phys_addr_v5(CPUARMState *env, S1Translate *ptw,
fi->type = ARMFault_Translation;
goto do_fault;
}
- desc = arm_ldl_ptw(env, ptw, table, fi);
+ if (!S1_ptw_translate(env, ptw, table, fi)) {
+ goto do_fault;
+ }
+ desc = arm_ldl_ptw(env, ptw, fi);
if (fi->type != ARMFault_None) {
goto do_fault;
}
@@ -567,7 +640,10 @@ static bool get_phys_addr_v5(CPUARMState *env, S1Translate *ptw,
/* Fine pagetable. */
table = (desc & 0xfffff000) | ((address >> 8) & 0xffc);
}
- desc = arm_ldl_ptw(env, ptw, table, fi);
+ if (!S1_ptw_translate(env, ptw, table, fi)) {
+ goto do_fault;
+ }
+ desc = arm_ldl_ptw(env, ptw, fi);
if (fi->type != ARMFault_None) {
goto do_fault;
}
@@ -652,7 +728,10 @@ static bool get_phys_addr_v6(CPUARMState *env, S1Translate *ptw,
fi->type = ARMFault_Translation;
goto do_fault;
}
- desc = arm_ldl_ptw(env, ptw, table, fi);
+ if (!S1_ptw_translate(env, ptw, table, fi)) {
+ goto do_fault;
+ }
+ desc = arm_ldl_ptw(env, ptw, fi);
if (fi->type != ARMFault_None) {
goto do_fault;
}
@@ -705,7 +784,10 @@ static bool get_phys_addr_v6(CPUARMState *env, S1Translate *ptw,
ns = extract32(desc, 3, 1);
/* Lookup l2 entry. */
table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc);
- desc = arm_ldl_ptw(env, ptw, table, fi);
+ if (!S1_ptw_translate(env, ptw, table, fi)) {
+ goto do_fault;
+ }
+ desc = arm_ldl_ptw(env, ptw, fi);
if (fi->type != ARMFault_None) {
goto do_fault;
}
@@ -842,8 +924,7 @@ static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64,
bool have_wxn;
int wxn = 0;
- assert(mmu_idx != ARMMMUIdx_Stage2);
- assert(mmu_idx != ARMMMUIdx_Stage2_S);
+ assert(!regime_is_stage2(mmu_idx));
user_rw = simple_ap_to_rw_prot_is_user(ap, true);
if (is_user) {
@@ -1067,15 +1148,13 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
ARMCPU *cpu = env_archcpu(env);
ARMMMUIdx mmu_idx = ptw->in_mmu_idx;
bool is_secure = ptw->in_secure;
- /* Read an LPAE long-descriptor translation table. */
- ARMFaultType fault_type = ARMFault_Translation;
uint32_t level;
ARMVAParameters param;
uint64_t ttbr;
hwaddr descaddr, indexmask, indexmask_grainsize;
uint32_t tableattrs;
target_ulong page_size;
- uint32_t attrs;
+ uint64_t attrs;
int32_t stride;
int addrsize, inputsize, outputsize;
uint64_t tcr = regime_tcr(env, mmu_idx);
@@ -1083,7 +1162,8 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
uint32_t el = regime_el(env, mmu_idx);
uint64_t descaddrmask;
bool aarch64 = arm_el_is_aa64(env, el);
- bool guarded = false;
+ uint64_t descriptor, new_descriptor;
+ bool nstable;
/* TODO: This code does not support shareability levels. */
if (aarch64) {
@@ -1103,8 +1183,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
* so our choice is to always raise the fault.
*/
if (param.tsz_oob) {
- fault_type = ARMFault_Translation;
- goto do_fault;
+ goto do_translation_fault;
}
addrsize = 64 - 8 * param.tbi;
@@ -1141,8 +1220,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
addrsize - inputsize);
if (-top_bits != param.select) {
/* The gap between the two regions is a Translation fault */
- fault_type = ARMFault_Translation;
- goto do_fault;
+ goto do_translation_fault;
}
}
@@ -1168,10 +1246,10 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
* Translation table walk disabled => Translation fault on TLB miss
* Note: This is always 0 on 64-bit EL2 and EL3.
*/
- goto do_fault;
+ goto do_translation_fault;
}
- if (mmu_idx != ARMMMUIdx_Stage2 && mmu_idx != ARMMMUIdx_Stage2_S) {
+ if (!regime_is_stage2(mmu_idx)) {
/*
* The starting level depends on the virtual address size (which can
* be up to 48 bits) and the translation granule size. It indicates
@@ -1199,8 +1277,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
if (param.ds && stride == 9 && sl2) {
if (sl0 != 0) {
level = 0;
- fault_type = ARMFault_Translation;
- goto do_fault;
+ goto do_translation_fault;
}
startlevel = -1;
} else if (!aarch64 || stride == 9) {
@@ -1219,8 +1296,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
ok = check_s2_mmu_setup(cpu, aarch64, startlevel,
inputsize, stride, outputsize);
if (!ok) {
- fault_type = ARMFault_Translation;
- goto do_fault;
+ goto do_translation_fault;
}
level = startlevel;
}
@@ -1242,7 +1318,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
descaddr |= extract64(ttbr, 2, 4) << 48;
} else if (descaddr >> outputsize) {
level = 0;
- fault_type = ARMFault_AddressSize;
+ fi->type = ARMFault_AddressSize;
goto do_fault;
}
@@ -1276,120 +1352,173 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
* bits at each step.
*/
tableattrs = is_secure ? 0 : (1 << 4);
- for (;;) {
- uint64_t descriptor;
- bool nstable;
-
- descaddr |= (address >> (stride * (4 - level))) & indexmask;
- descaddr &= ~7ULL;
- nstable = extract32(tableattrs, 4, 1);
- ptw->in_secure = !nstable;
- descriptor = arm_ldq_ptw(env, ptw, descaddr, fi);
- if (fi->type != ARMFault_None) {
- goto do_fault;
- }
- if (!(descriptor & 1) ||
- (!(descriptor & 2) && (level == 3))) {
- /* Invalid, or the Reserved level 3 encoding */
- goto do_fault;
+ next_level:
+ descaddr |= (address >> (stride * (4 - level))) & indexmask;
+ descaddr &= ~7ULL;
+ nstable = extract32(tableattrs, 4, 1);
+ if (!nstable) {
+ /*
+ * Stage2_S -> Stage2 or Phys_S -> Phys_NS
+ * Assert that the non-secure idx are even, and relative order.
+ */
+ QEMU_BUILD_BUG_ON((ARMMMUIdx_Phys_NS & 1) != 0);
+ QEMU_BUILD_BUG_ON((ARMMMUIdx_Stage2 & 1) != 0);
+ QEMU_BUILD_BUG_ON(ARMMMUIdx_Phys_NS + 1 != ARMMMUIdx_Phys_S);
+ QEMU_BUILD_BUG_ON(ARMMMUIdx_Stage2 + 1 != ARMMMUIdx_Stage2_S);
+ ptw->in_ptw_idx &= ~1;
+ ptw->in_secure = false;
+ }
+ if (!S1_ptw_translate(env, ptw, descaddr, fi)) {
+ goto do_fault;
+ }
+ descriptor = arm_ldq_ptw(env, ptw, fi);
+ if (fi->type != ARMFault_None) {
+ goto do_fault;
+ }
+ new_descriptor = descriptor;
+
+ restart_atomic_update:
+ if (!(descriptor & 1) || (!(descriptor & 2) && (level == 3))) {
+ /* Invalid, or the Reserved level 3 encoding */
+ goto do_translation_fault;
+ }
+
+ descaddr = descriptor & descaddrmask;
+
+ /*
+ * For FEAT_LPA and PS=6, bits [51:48] of descaddr are in [15:12]
+ * of descriptor. For FEAT_LPA2 and effective DS, bits [51:50] of
+ * descaddr are in [9:8]. Otherwise, if descaddr is out of range,
+ * raise AddressSizeFault.
+ */
+ if (outputsize > 48) {
+ if (param.ds) {
+ descaddr |= extract64(descriptor, 8, 2) << 50;
+ } else {
+ descaddr |= extract64(descriptor, 12, 4) << 48;
}
+ } else if (descaddr >> outputsize) {
+ fi->type = ARMFault_AddressSize;
+ goto do_fault;
+ }
+
+ if ((descriptor & 2) && (level < 3)) {
+ /*
+ * Table entry. The top five bits are attributes which may
+ * propagate down through lower levels of the table (and
+ * which are all arranged so that 0 means "no effect", so
+ * we can gather them up by ORing in the bits at each level).
+ */
+ tableattrs |= extract64(descriptor, 59, 5);
+ level++;
+ indexmask = indexmask_grainsize;
+ goto next_level;
+ }
- descaddr = descriptor & descaddrmask;
+ /*
+ * Block entry at level 1 or 2, or page entry at level 3.
+ * These are basically the same thing, although the number
+ * of bits we pull in from the vaddr varies. Note that although
+ * descaddrmask masks enough of the low bits of the descriptor
+ * to give a correct page or table address, the address field
+ * in a block descriptor is smaller; so we need to explicitly
+ * clear the lower bits here before ORing in the low vaddr bits.
+ *
+ * Afterward, descaddr is the final physical address.
+ */
+ page_size = (1ULL << ((stride * (4 - level)) + 3));
+ descaddr &= ~(hwaddr)(page_size - 1);
+ descaddr |= (address & (page_size - 1));
+ if (likely(!ptw->in_debug)) {
/*
- * For FEAT_LPA and PS=6, bits [51:48] of descaddr are in [15:12]
- * of descriptor. For FEAT_LPA2 and effective DS, bits [51:50] of
- * descaddr are in [9:8]. Otherwise, if descaddr is out of range,
- * raise AddressSizeFault.
+ * Access flag.
+ * If HA is enabled, prepare to update the descriptor below.
+ * Otherwise, pass the access fault on to software.
*/
- if (outputsize > 48) {
- if (param.ds) {
- descaddr |= extract64(descriptor, 8, 2) << 50;
+ if (!(descriptor & (1 << 10))) {
+ if (param.ha) {
+ new_descriptor |= 1 << 10; /* AF */
} else {
- descaddr |= extract64(descriptor, 12, 4) << 48;
+ fi->type = ARMFault_AccessFlag;
+ goto do_fault;
}
- } else if (descaddr >> outputsize) {
- fault_type = ARMFault_AddressSize;
- goto do_fault;
}
- if ((descriptor & 2) && (level < 3)) {
- /*
- * Table entry. The top five bits are attributes which may
- * propagate down through lower levels of the table (and
- * which are all arranged so that 0 means "no effect", so
- * we can gather them up by ORing in the bits at each level).
- */
- tableattrs |= extract64(descriptor, 59, 5);
- level++;
- indexmask = indexmask_grainsize;
- continue;
- }
/*
- * Block entry at level 1 or 2, or page entry at level 3.
- * These are basically the same thing, although the number
- * of bits we pull in from the vaddr varies. Note that although
- * descaddrmask masks enough of the low bits of the descriptor
- * to give a correct page or table address, the address field
- * in a block descriptor is smaller; so we need to explicitly
- * clear the lower bits here before ORing in the low vaddr bits.
+ * Dirty Bit.
+ * If HD is enabled, pre-emptively set/clear the appropriate AP/S2AP
+ * bit for writeback. The actual write protection test may still be
+ * overridden by tableattrs, to be merged below.
*/
- page_size = (1ULL << ((stride * (4 - level)) + 3));
- descaddr &= ~(hwaddr)(page_size - 1);
- descaddr |= (address & (page_size - 1));
- /* Extract attributes from the descriptor */
- attrs = extract64(descriptor, 2, 10)
- | (extract64(descriptor, 52, 12) << 10);
-
- if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) {
- /* Stage 2 table descriptors do not include any attribute fields */
- break;
- }
- /* Merge in attributes from table descriptors */
- attrs |= nstable << 3; /* NS */
- guarded = extract64(descriptor, 50, 1); /* GP */
- if (param.hpd) {
- /* HPD disables all the table attributes except NSTable. */
- break;
+ if (param.hd
+ && extract64(descriptor, 51, 1) /* DBM */
+ && access_type == MMU_DATA_STORE) {
+ if (regime_is_stage2(mmu_idx)) {
+ new_descriptor |= 1ull << 7; /* set S2AP[1] */
+ } else {
+ new_descriptor &= ~(1ull << 7); /* clear AP[2] */
+ }
}
- attrs |= extract32(tableattrs, 0, 2) << 11; /* XN, PXN */
- /*
- * The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1
- * means "force PL1 access only", which means forcing AP[1] to 0.
- */
- attrs &= ~(extract32(tableattrs, 2, 1) << 4); /* !APT[0] => AP[1] */
- attrs |= extract32(tableattrs, 3, 1) << 5; /* APT[1] => AP[2] */
- break;
}
+
/*
- * Here descaddr is the final physical address, and attributes
- * are all in attrs.
+ * Extract attributes from the (modified) descriptor, and apply
+ * table descriptors. Stage 2 table descriptors do not include
+ * any attribute fields. HPD disables all the table attributes
+ * except NSTable.
*/
- fault_type = ARMFault_AccessFlag;
- if ((attrs & (1 << 8)) == 0) {
- /* Access flag */
- goto do_fault;
+ attrs = new_descriptor & (MAKE_64BIT_MASK(2, 10) | MAKE_64BIT_MASK(50, 14));
+ if (!regime_is_stage2(mmu_idx)) {
+ attrs |= nstable << 5; /* NS */
+ if (!param.hpd) {
+ attrs |= extract64(tableattrs, 0, 2) << 53; /* XN, PXN */
+ /*
+ * The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1
+ * means "force PL1 access only", which means forcing AP[1] to 0.
+ */
+ attrs &= ~(extract64(tableattrs, 2, 1) << 6); /* !APT[0] => AP[1] */
+ attrs |= extract32(tableattrs, 3, 1) << 7; /* APT[1] => AP[2] */
+ }
}
- ap = extract32(attrs, 4, 2);
-
- if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) {
+ ap = extract32(attrs, 6, 2);
+ if (regime_is_stage2(mmu_idx)) {
ns = mmu_idx == ARMMMUIdx_Stage2;
- xn = extract32(attrs, 11, 2);
+ xn = extract64(attrs, 53, 2);
result->f.prot = get_S2prot(env, ap, xn, s1_is_el0);
} else {
- ns = extract32(attrs, 3, 1);
- xn = extract32(attrs, 12, 1);
- pxn = extract32(attrs, 11, 1);
+ ns = extract32(attrs, 5, 1);
+ xn = extract64(attrs, 54, 1);
+ pxn = extract64(attrs, 53, 1);
result->f.prot = get_S1prot(env, mmu_idx, aarch64, ap, ns, xn, pxn);
}
- fault_type = ARMFault_Permission;
if (!(result->f.prot & (1 << access_type))) {
+ fi->type = ARMFault_Permission;
goto do_fault;
}
+ /* If FEAT_HAFDBS has made changes, update the PTE. */
+ if (new_descriptor != descriptor) {
+ new_descriptor = arm_casq_ptw(env, descriptor, new_descriptor, ptw, fi);
+ if (fi->type != ARMFault_None) {
+ goto do_fault;
+ }
+ /*
+ * I_YZSVV says that if the in-memory descriptor has changed,
+ * then we must use the information in that new value
+ * (which might include a different output address, different
+ * attributes, or generate a fault).
+ * Restart the handling of the descriptor value from scratch.
+ */
+ if (new_descriptor != descriptor) {
+ descriptor = new_descriptor;
+ goto restart_atomic_update;
+ }
+ }
+
if (ns) {
/*
* The NS bit will (as required by the architecture) have no effect if
@@ -1401,15 +1530,15 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
/* When in aarch64 mode, and BTI is enabled, remember GP in the TLB. */
if (aarch64 && cpu_isar_feature(aa64_bti, cpu)) {
- result->f.guarded = guarded;
+ result->f.guarded = extract64(attrs, 50, 1); /* GP */
}
- if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) {
+ if (regime_is_stage2(mmu_idx)) {
result->cacheattrs.is_s2_format = true;
- result->cacheattrs.attrs = extract32(attrs, 0, 4);
+ result->cacheattrs.attrs = extract32(attrs, 2, 4);
} else {
/* Index into MAIR registers for cache attributes */
- uint8_t attrindx = extract32(attrs, 0, 3);
+ uint8_t attrindx = extract32(attrs, 2, 3);
uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)];
assert(attrindx <= 7);
result->cacheattrs.is_s2_format = false;
@@ -1424,19 +1553,19 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
if (param.ds) {
result->cacheattrs.shareability = param.sh;
} else {
- result->cacheattrs.shareability = extract32(attrs, 6, 2);
+ result->cacheattrs.shareability = extract32(attrs, 8, 2);
}
result->f.phys_addr = descaddr;
result->f.lg_page_size = ctz64(page_size);
return false;
-do_fault:
- fi->type = fault_type;
+ do_translation_fault:
+ fi->type = ARMFault_Translation;
+ do_fault:
fi->level = level;
/* Tag the error as S2 for failed S1 PTW at S2 or ordinary S2. */
- fi->stage2 = fi->s1ptw || (mmu_idx == ARMMMUIdx_Stage2 ||
- mmu_idx == ARMMMUIdx_Stage2_S);
+ fi->stage2 = fi->s1ptw || regime_is_stage2(mmu_idx);
fi->s1ns = mmu_idx == ARMMMUIdx_Stage2;
return true;
}
@@ -2442,7 +2571,7 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw,
ARMMMUFaultInfo *fi)
{
hwaddr ipa;
- int s1_prot;
+ int s1_prot, s1_lgpgsz;
bool is_secure = ptw->in_secure;
bool ret, ipa_secure, s2walk_secure;
ARMCacheAttrs cacheattrs1;
@@ -2470,6 +2599,7 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw,
is_el0 = ptw->in_mmu_idx == ARMMMUIdx_Stage1_E0;
ptw->in_mmu_idx = s2walk_secure ? ARMMMUIdx_Stage2_S : ARMMMUIdx_Stage2;
+ ptw->in_ptw_idx = s2walk_secure ? ARMMMUIdx_Phys_S : ARMMMUIdx_Phys_NS;
ptw->in_secure = s2walk_secure;
/*
@@ -2477,6 +2607,7 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw,
* Save the stage1 results so that we may merge prot and cacheattrs later.
*/
s1_prot = result->f.prot;
+ s1_lgpgsz = result->f.lg_page_size;
cacheattrs1 = result->cacheattrs;
memset(result, 0, sizeof(*result));
@@ -2491,6 +2622,14 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw,
return ret;
}
+ /*
+ * Use the maximum of the S1 & S2 page size, so that invalidation
+ * of pages > TARGET_PAGE_SIZE works correctly.
+ */
+ if (result->f.lg_page_size < s1_lgpgsz) {
+ result->f.lg_page_size = s1_lgpgsz;
+ }
+
/* Combine the S1 and S2 cache attributes. */
hcr = arm_hcr_el2_eff_secstate(env, is_secure);
if (hcr & HCR_DC) {
@@ -2529,10 +2668,32 @@ static bool get_phys_addr_with_struct(CPUARMState *env, S1Translate *ptw,
ARMMMUFaultInfo *fi)
{
ARMMMUIdx mmu_idx = ptw->in_mmu_idx;
- ARMMMUIdx s1_mmu_idx = stage_1_mmu_idx(mmu_idx);
bool is_secure = ptw->in_secure;
+ ARMMMUIdx s1_mmu_idx;
+
+ switch (mmu_idx) {
+ case ARMMMUIdx_Phys_S:
+ case ARMMMUIdx_Phys_NS:
+ /* Checking Phys early avoids special casing later vs regime_el. */
+ return get_phys_addr_disabled(env, address, access_type, mmu_idx,
+ is_secure, result, fi);
+
+ case ARMMMUIdx_Stage1_E0:
+ case ARMMMUIdx_Stage1_E1:
+ case ARMMMUIdx_Stage1_E1_PAN:
+ /* First stage lookup uses second stage for ptw. */
+ ptw->in_ptw_idx = is_secure ? ARMMMUIdx_Stage2_S : ARMMMUIdx_Stage2;
+ break;
- if (mmu_idx != s1_mmu_idx) {
+ case ARMMMUIdx_E10_0:
+ s1_mmu_idx = ARMMMUIdx_Stage1_E0;
+ goto do_twostage;
+ case ARMMMUIdx_E10_1:
+ s1_mmu_idx = ARMMMUIdx_Stage1_E1;
+ goto do_twostage;
+ case ARMMMUIdx_E10_1_PAN:
+ s1_mmu_idx = ARMMMUIdx_Stage1_E1_PAN;
+ do_twostage:
/*
* Call ourselves recursively to do the stage 1 and then stage 2
* translations if mmu_idx is a two-stage regime, and EL2 present.
@@ -2543,6 +2704,12 @@ static bool get_phys_addr_with_struct(CPUARMState *env, S1Translate *ptw,
return get_phys_addr_twostage(env, ptw, address, access_type,
result, fi);
}
+ /* fall through */
+
+ default:
+ /* Single stage and second stage uses physical for ptw. */
+ ptw->in_ptw_idx = is_secure ? ARMMMUIdx_Phys_S : ARMMMUIdx_Phys_NS;
+ break;
}
/*
diff --git a/target/arm/translate.c b/target/arm/translate.c
index d1b868430e..74a903072f 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -9939,25 +9939,3 @@ void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
translator_loop(cpu, tb, max_insns, pc, host_pc, ops, &dc.base);
}
-
-void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
- target_ulong *data)
-{
- if (is_a64(env)) {
- if (TARGET_TB_PCREL) {
- env->pc = (env->pc & TARGET_PAGE_MASK) | data[0];
- } else {
- env->pc = data[0];
- }
- env->condexec_bits = 0;
- env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
- } else {
- if (TARGET_TB_PCREL) {
- env->regs[15] = (env->regs[15] & TARGET_PAGE_MASK) | data[0];
- } else {
- env->regs[15] = data[0];
- }
- env->condexec_bits = data[1];
- env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
- }
-}
diff --git a/target/avr/cpu.c b/target/avr/cpu.c
index 0d2861179d..c7295b488d 100644
--- a/target/avr/cpu.c
+++ b/target/avr/cpu.c
@@ -57,6 +57,16 @@ static void avr_cpu_synchronize_from_tb(CPUState *cs,
env->pc_w = tb_pc(tb) / 2; /* internally PC points to words */
}
+static void avr_restore_state_to_opc(CPUState *cs,
+ const TranslationBlock *tb,
+ const uint64_t *data)
+{
+ AVRCPU *cpu = AVR_CPU(cs);
+ CPUAVRState *env = &cpu->env;
+
+ env->pc_w = data[0];
+}
+
static void avr_cpu_reset(DeviceState *ds)
{
CPUState *cs = CPU(ds);
@@ -202,6 +212,7 @@ static const struct SysemuCPUOps avr_sysemu_ops = {
static const struct TCGCPUOps avr_tcg_ops = {
.initialize = avr_cpu_tcg_init,
.synchronize_from_tb = avr_cpu_synchronize_from_tb,
+ .restore_state_to_opc = avr_restore_state_to_opc,
.cpu_exec_interrupt = avr_cpu_exec_interrupt,
.tlb_fill = avr_cpu_tlb_fill,
.do_interrupt = avr_cpu_do_interrupt,
diff --git a/target/avr/translate.c b/target/avr/translate.c
index e65b6008c0..2bed56f135 100644
--- a/target/avr/translate.c
+++ b/target/avr/translate.c
@@ -3055,9 +3055,3 @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
DisasContext dc = { };
translator_loop(cs, tb, max_insns, pc, host_pc, &avr_tr_ops, &dc.base);
}
-
-void restore_state_to_opc(CPUAVRState *env, TranslationBlock *tb,
- target_ulong *data)
-{
- env->pc_w = data[0];
-}
diff --git a/target/cris/cpu.c b/target/cris/cpu.c
index 22f5c70f39..fb05dc6f9a 100644
--- a/target/cris/cpu.c
+++ b/target/cris/cpu.c
@@ -42,6 +42,15 @@ static vaddr cris_cpu_get_pc(CPUState *cs)
return cpu->env.pc;
}
+static void cris_restore_state_to_opc(CPUState *cs,
+ const TranslationBlock *tb,
+ const uint64_t *data)
+{
+ CRISCPU *cpu = CRIS_CPU(cs);
+
+ cpu->env.pc = data[0];
+}
+
static bool cris_cpu_has_work(CPUState *cs)
{
return cs->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI);
@@ -212,6 +221,7 @@ static const struct SysemuCPUOps cris_sysemu_ops = {
static const struct TCGCPUOps crisv10_tcg_ops = {
.initialize = cris_initialize_crisv10_tcg,
+ .restore_state_to_opc = cris_restore_state_to_opc,
#ifndef CONFIG_USER_ONLY
.tlb_fill = cris_cpu_tlb_fill,
@@ -222,6 +232,7 @@ static const struct TCGCPUOps crisv10_tcg_ops = {
static const struct TCGCPUOps crisv32_tcg_ops = {
.initialize = cris_initialize_tcg,
+ .restore_state_to_opc = cris_restore_state_to_opc,
#ifndef CONFIG_USER_ONLY
.tlb_fill = cris_cpu_tlb_fill,
diff --git a/target/cris/translate.c b/target/cris/translate.c
index 73385b0b3c..fbc3fd5865 100644
--- a/target/cris/translate.c
+++ b/target/cris/translate.c
@@ -3392,9 +3392,3 @@ void cris_initialize_tcg(void)
pregnames_v32[i]);
}
}
-
-void restore_state_to_opc(CPUCRISState *env, TranslationBlock *tb,
- target_ulong *data)
-{
- env->pc = data[0];
-}
diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c
index fa6d722555..03221fbdc2 100644
--- a/target/hexagon/cpu.c
+++ b/target/hexagon/cpu.c
@@ -271,9 +271,13 @@ static bool hexagon_cpu_has_work(CPUState *cs)
return true;
}
-void restore_state_to_opc(CPUHexagonState *env, TranslationBlock *tb,
- target_ulong *data)
+static void hexagon_restore_state_to_opc(CPUState *cs,
+ const TranslationBlock *tb,
+ const uint64_t *data)
{
+ HexagonCPU *cpu = HEXAGON_CPU(cs);
+ CPUHexagonState *env = &cpu->env;
+
env->gpr[HEX_REG_PC] = data[0];
}
@@ -327,6 +331,7 @@ static void hexagon_cpu_init(Object *obj)
static const struct TCGCPUOps hexagon_tcg_ops = {
.initialize = hexagon_translate_init,
.synchronize_from_tb = hexagon_cpu_synchronize_from_tb,
+ .restore_state_to_opc = hexagon_restore_state_to_opc,
};
static void hexagon_cpu_class_init(ObjectClass *c, void *data)
diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c
index e677ca09d4..55c190280e 100644
--- a/target/hppa/cpu.c
+++ b/target/hppa/cpu.c
@@ -68,6 +68,24 @@ static void hppa_cpu_synchronize_from_tb(CPUState *cs,
cpu->env.psw_n = (tb->flags & PSW_N) != 0;
}
+static void hppa_restore_state_to_opc(CPUState *cs,
+ const TranslationBlock *tb,
+ const uint64_t *data)
+{
+ HPPACPU *cpu = HPPA_CPU(cs);
+
+ cpu->env.iaoq_f = data[0];
+ if (data[1] != (target_ureg)-1) {
+ cpu->env.iaoq_b = data[1];
+ }
+ /*
+ * Since we were executing the instruction at IAOQ_F, and took some
+ * sort of action that provoked the cpu_restore_state, we can infer
+ * that the instruction was not nullified.
+ */
+ cpu->env.psw_n = 0;
+}
+
static bool hppa_cpu_has_work(CPUState *cs)
{
return cs->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI);
@@ -153,6 +171,7 @@ static const struct SysemuCPUOps hppa_sysemu_ops = {
static const struct TCGCPUOps hppa_tcg_ops = {
.initialize = hppa_translate_init,
.synchronize_from_tb = hppa_cpu_synchronize_from_tb,
+ .restore_state_to_opc = hppa_restore_state_to_opc,
#ifndef CONFIG_USER_ONLY
.tlb_fill = hppa_cpu_tlb_fill,
diff --git a/target/hppa/translate.c b/target/hppa/translate.c
index 8b861957e0..1af77473da 100644
--- a/target/hppa/translate.c
+++ b/target/hppa/translate.c
@@ -4346,16 +4346,3 @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
DisasContext ctx;
translator_loop(cs, tb, max_insns, pc, host_pc, &hppa_tr_ops, &ctx.base);
}
-
-void restore_state_to_opc(CPUHPPAState *env, TranslationBlock *tb,
- target_ulong *data)
-{
- env->iaoq_f = data[0];
- if (data[1] != (target_ureg)-1) {
- env->iaoq_b = data[1];
- }
- /* Since we were executing the instruction at IAOQ_F, and took some
- sort of action that provoked the cpu_restore_state, we can infer
- that the instruction was not nullified. */
- env->psw_n = 0;
-}
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index dac100c67c..4df0428089 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -2262,8 +2262,7 @@ static int kvm_get_supported_feature_msrs(KVMState *s)
}
assert(msr_list.nmsrs > 0);
- kvm_feature_msrs = (struct kvm_msr_list *) \
- g_malloc0(sizeof(msr_list) +
+ kvm_feature_msrs = g_malloc0(sizeof(msr_list) +
msr_list.nmsrs * sizeof(msr_list.indices[0]));
kvm_feature_msrs->nmsrs = msr_list.nmsrs;
diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c
index 828244abe2..79ac5908f7 100644
--- a/target/i386/tcg/tcg-cpu.c
+++ b/target/i386/tcg/tcg-cpu.c
@@ -56,6 +56,24 @@ static void x86_cpu_synchronize_from_tb(CPUState *cs,
}
}
+static void x86_restore_state_to_opc(CPUState *cs,
+ const TranslationBlock *tb,
+ const uint64_t *data)
+{
+ X86CPU *cpu = X86_CPU(cs);
+ CPUX86State *env = &cpu->env;
+ int cc_op = data[1];
+
+ if (TARGET_TB_PCREL) {
+ env->eip = (env->eip & TARGET_PAGE_MASK) | data[0];
+ } else {
+ env->eip = data[0] - tb->cs_base;
+ }
+ if (cc_op != CC_OP_DYNAMIC) {
+ env->cc_op = cc_op;
+ }
+}
+
#ifndef CONFIG_USER_ONLY
static bool x86_debug_check_breakpoint(CPUState *cs)
{
@@ -72,6 +90,7 @@ static bool x86_debug_check_breakpoint(CPUState *cs)
static const struct TCGCPUOps x86_tcg_ops = {
.initialize = tcg_x86_init,
.synchronize_from_tb = x86_cpu_synchronize_from_tb,
+ .restore_state_to_opc = x86_restore_state_to_opc,
.cpu_exec_enter = x86_cpu_exec_enter,
.cpu_exec_exit = x86_cpu_exec_exit,
#ifdef CONFIG_USER_ONLY
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 85be2e58c2..546c427c23 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -7023,18 +7023,3 @@ void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
translator_loop(cpu, tb, max_insns, pc, host_pc, &i386_tr_ops, &dc.base);
}
-
-void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
- target_ulong *data)
-{
- int cc_op = data[1];
-
- if (TARGET_TB_PCREL) {
- env->eip = (env->eip & TARGET_PAGE_MASK) | data[0];
- } else {
- env->eip = data[0] - tb->cs_base;
- }
- if (cc_op != CC_OP_DYNAMIC) {
- env->cc_op = cc_op;
- }
-}
diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c
index 8e4969edeb..e738d83e81 100644
--- a/target/i386/whpx/whpx-all.c
+++ b/target/i386/whpx/whpx-all.c
@@ -1164,9 +1164,8 @@ static void whpx_translate_cpu_breakpoints(
(breakpoints->breakpoints ? breakpoints->breakpoints->used : 0);
struct whpx_breakpoint_collection *new_breakpoints =
- (struct whpx_breakpoint_collection *)g_malloc0(
- sizeof(struct whpx_breakpoint_collection) +
- max_breakpoints * sizeof(struct whpx_breakpoint));
+ g_malloc0(sizeof(struct whpx_breakpoint_collection)
+ + max_breakpoints * sizeof(struct whpx_breakpoint));
new_breakpoints->allocated = max_breakpoints;
new_breakpoints->used = 0;
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index 1722ed2a4d..49393d95d8 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -319,6 +319,16 @@ static void loongarch_cpu_synchronize_from_tb(CPUState *cs,
env->pc = tb_pc(tb);
}
+
+static void loongarch_restore_state_to_opc(CPUState *cs,
+ const TranslationBlock *tb,
+ const uint64_t *data)
+{
+ LoongArchCPU *cpu = LOONGARCH_CPU(cs);
+ CPULoongArchState *env = &cpu->env;
+
+ env->pc = data[0];
+}
#endif /* CONFIG_TCG */
static bool loongarch_cpu_has_work(CPUState *cs)
@@ -651,6 +661,7 @@ void loongarch_cpu_dump_state(CPUState *cs, FILE *f, int flags)
static struct TCGCPUOps loongarch_tcg_ops = {
.initialize = loongarch_translate_init,
.synchronize_from_tb = loongarch_cpu_synchronize_from_tb,
+ .restore_state_to_opc = loongarch_restore_state_to_opc,
#ifndef CONFIG_USER_ONLY
.tlb_fill = loongarch_cpu_tlb_fill,
diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c
index 95b37ea180..6091772349 100644
--- a/target/loongarch/translate.c
+++ b/target/loongarch/translate.c
@@ -272,9 +272,3 @@ void loongarch_translate_init(void)
cpu_llval = tcg_global_mem_new(cpu_env,
offsetof(CPULoongArchState, llval), "llval");
}
-
-void restore_state_to_opc(CPULoongArchState *env, TranslationBlock *tb,
- target_ulong *data)
-{
- env->pc = data[0];
-}
diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c
index 1e902e1ef0..b67ddea2ae 100644
--- a/target/m68k/cpu.c
+++ b/target/m68k/cpu.c
@@ -38,6 +38,19 @@ static vaddr m68k_cpu_get_pc(CPUState *cs)
return cpu->env.pc;
}
+static void m68k_restore_state_to_opc(CPUState *cs,
+ const TranslationBlock *tb,
+ const uint64_t *data)
+{
+ M68kCPU *cpu = M68K_CPU(cs);
+ int cc_op = data[1];
+
+ cpu->env.pc = data[0];
+ if (cc_op != CC_OP_DYNAMIC) {
+ cpu->env.cc_op = cc_op;
+ }
+}
+
static bool m68k_cpu_has_work(CPUState *cs)
{
return cs->interrupt_request & CPU_INTERRUPT_HARD;
@@ -524,6 +537,7 @@ static const struct SysemuCPUOps m68k_sysemu_ops = {
static const struct TCGCPUOps m68k_tcg_ops = {
.initialize = m68k_tcg_init,
+ .restore_state_to_opc = m68k_restore_state_to_opc,
#ifndef CONFIG_USER_ONLY
.tlb_fill = m68k_cpu_tlb_fill,
diff --git a/target/m68k/translate.c b/target/m68k/translate.c
index 9df17aa4b2..5cbde4be34 100644
--- a/target/m68k/translate.c
+++ b/target/m68k/translate.c
@@ -6479,13 +6479,3 @@ void m68k_cpu_dump_state(CPUState *cs, FILE *f, int flags)
env->mmu.mmusr, env->mmu.ar);
#endif
}
-
-void restore_state_to_opc(CPUM68KState *env, TranslationBlock *tb,
- target_ulong *data)
-{
- int cc_op = data[1];
- env->pc = data[0];
- if (cc_op != CC_OP_DYNAMIC) {
- env->cc_op = cc_op;
- }
-}
diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c
index c10b8ac029..89e493f3ff 100644
--- a/target/microblaze/cpu.c
+++ b/target/microblaze/cpu.c
@@ -100,6 +100,16 @@ static void mb_cpu_synchronize_from_tb(CPUState *cs,
cpu->env.iflags = tb->flags & IFLAGS_TB_MASK;
}
+static void mb_restore_state_to_opc(CPUState *cs,
+ const TranslationBlock *tb,
+ const uint64_t *data)
+{
+ MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs);
+
+ cpu->env.pc = data[0];
+ cpu->env.iflags = data[1];
+}
+
static bool mb_cpu_has_work(CPUState *cs)
{
return cs->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI);
@@ -373,6 +383,7 @@ static const struct SysemuCPUOps mb_sysemu_ops = {
static const struct TCGCPUOps mb_tcg_ops = {
.initialize = mb_tcg_init,
.synchronize_from_tb = mb_cpu_synchronize_from_tb,
+ .restore_state_to_opc = mb_restore_state_to_opc,
#ifndef CONFIG_USER_ONLY
.tlb_fill = mb_cpu_tlb_fill,
diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c
index c5546f93aa..974f21eb31 100644
--- a/target/microblaze/translate.c
+++ b/target/microblaze/translate.c
@@ -1946,10 +1946,3 @@ void mb_tcg_init(void)
cpu_res_addr =
tcg_global_mem_new(cpu_env, offsetof(CPUMBState, res_addr), "res_addr");
}
-
-void restore_state_to_opc(CPUMBState *env, TranslationBlock *tb,
- target_ulong *data)
-{
- env->pc = data[0];
- env->iflags = data[1];
-}
diff --git a/target/mips/cpu.c b/target/mips/cpu.c
index da58eb8892..e997c1b9cb 100644
--- a/target/mips/cpu.c
+++ b/target/mips/cpu.c
@@ -538,6 +538,7 @@ static const struct SysemuCPUOps mips_sysemu_ops = {
static const struct TCGCPUOps mips_tcg_ops = {
.initialize = mips_tcg_init,
.synchronize_from_tb = mips_cpu_synchronize_from_tb,
+ .restore_state_to_opc = mips_restore_state_to_opc,
#if !defined(CONFIG_USER_ONLY)
.tlb_fill = mips_cpu_tlb_fill,
diff --git a/target/mips/tcg/tcg-internal.h b/target/mips/tcg/tcg-internal.h
index 1d27fa2ff9..aef032c48d 100644
--- a/target/mips/tcg/tcg-internal.h
+++ b/target/mips/tcg/tcg-internal.h
@@ -21,6 +21,9 @@ void mips_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb);
G_NORETURN void mips_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
MMUAccessType access_type, int mmu_idx,
uintptr_t retaddr);
+void mips_restore_state_to_opc(CPUState *cs,
+ const TranslationBlock *tb,
+ const uint64_t *data);
const char *mips_exception_name(int32_t exception);
diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c
index c3f92ea652..2f2d707a12 100644
--- a/target/mips/tcg/translate.c
+++ b/target/mips/tcg/translate.c
@@ -16229,9 +16229,13 @@ void mips_tcg_init(void)
}
}
-void restore_state_to_opc(CPUMIPSState *env, TranslationBlock *tb,
- target_ulong *data)
+void mips_restore_state_to_opc(CPUState *cs,
+ const TranslationBlock *tb,
+ const uint64_t *data)
{
+ MIPSCPU *cpu = MIPS_CPU(cs);
+ CPUMIPSState *env = &cpu->env;
+
env->active_tc.PC = data[0];
env->hflags &= ~MIPS_HFLAG_BMASK;
env->hflags |= data[1];
diff --git a/target/nios2/cpu.c b/target/nios2/cpu.c
index 2b28429c08..9a5351bc81 100644
--- a/target/nios2/cpu.c
+++ b/target/nios2/cpu.c
@@ -42,6 +42,16 @@ static vaddr nios2_cpu_get_pc(CPUState *cs)
return env->pc;
}
+static void nios2_restore_state_to_opc(CPUState *cs,
+ const TranslationBlock *tb,
+ const uint64_t *data)
+{
+ Nios2CPU *cpu = NIOS2_CPU(cs);
+ CPUNios2State *env = &cpu->env;
+
+ env->pc = data[0];
+}
+
static bool nios2_cpu_has_work(CPUState *cs)
{
return cs->interrupt_request & CPU_INTERRUPT_HARD;
@@ -346,6 +356,7 @@ static const struct SysemuCPUOps nios2_sysemu_ops = {
static const struct TCGCPUOps nios2_tcg_ops = {
.initialize = nios2_tcg_init,
+ .restore_state_to_opc = nios2_restore_state_to_opc,
#ifndef CONFIG_USER_ONLY
.tlb_fill = nios2_cpu_tlb_fill,
diff --git a/target/nios2/translate.c b/target/nios2/translate.c
index 8dc0a32c6c..4db8b47744 100644
--- a/target/nios2/translate.c
+++ b/target/nios2/translate.c
@@ -1110,9 +1110,3 @@ void nios2_tcg_init(void)
cpu_pc = tcg_global_mem_new(cpu_env,
offsetof(CPUNios2State, pc), "pc");
}
-
-void restore_state_to_opc(CPUNios2State *env, TranslationBlock *tb,
- target_ulong *data)
-{
- env->pc = data[0];
-}
diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
index f6fd437785..de0176cd20 100644
--- a/target/openrisc/cpu.c
+++ b/target/openrisc/cpu.c
@@ -46,6 +46,18 @@ static void openrisc_cpu_synchronize_from_tb(CPUState *cs,
cpu->env.pc = tb_pc(tb);
}
+static void openrisc_restore_state_to_opc(CPUState *cs,
+ const TranslationBlock *tb,
+ const uint64_t *data)
+{
+ OpenRISCCPU *cpu = OPENRISC_CPU(cs);
+
+ cpu->env.pc = data[0];
+ cpu->env.dflag = data[1] & 1;
+ if (data[1] & 2) {
+ cpu->env.ppc = cpu->env.pc - 4;
+ }
+}
static bool openrisc_cpu_has_work(CPUState *cs)
{
@@ -203,6 +215,7 @@ static const struct SysemuCPUOps openrisc_sysemu_ops = {
static const struct TCGCPUOps openrisc_tcg_ops = {
.initialize = openrisc_translate_init,
.synchronize_from_tb = openrisc_cpu_synchronize_from_tb,
+ .restore_state_to_opc = openrisc_restore_state_to_opc,
#ifndef CONFIG_USER_ONLY
.tlb_fill = openrisc_cpu_tlb_fill,
diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c
index 8154f9d744..2f3d7c5fd1 100644
--- a/target/openrisc/translate.c
+++ b/target/openrisc/translate.c
@@ -1726,13 +1726,3 @@ void openrisc_cpu_dump_state(CPUState *cs, FILE *f, int flags)
(i % 4) == 3 ? '\n' : ' ');
}
}
-
-void restore_state_to_opc(CPUOpenRISCState *env, TranslationBlock *tb,
- target_ulong *data)
-{
- env->pc = data[0];
- env->dflag = data[1] & 1;
- if (data[1] & 2) {
- env->ppc = env->pc - 4;
- }
-}
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 763a8431be..335351c226 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -7221,6 +7221,15 @@ static vaddr ppc_cpu_get_pc(CPUState *cs)
return cpu->env.nip;
}
+static void ppc_restore_state_to_opc(CPUState *cs,
+ const TranslationBlock *tb,
+ const uint64_t *data)
+{
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+ cpu->env.nip = data[0];
+}
+
static bool ppc_cpu_has_work(CPUState *cs)
{
PowerPCCPU *cpu = POWERPC_CPU(cs);
@@ -7446,6 +7455,7 @@ static const struct SysemuCPUOps ppc_sysemu_ops = {
static const struct TCGCPUOps ppc_tcg_ops = {
.initialize = ppc_translate_init,
+ .restore_state_to_opc = ppc_restore_state_to_opc,
#ifdef CONFIG_USER_ONLY
.record_sigsegv = ppc_cpu_record_sigsegv,
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index e810842925..7228857e23 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -7739,9 +7739,3 @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
translator_loop(cs, tb, max_insns, pc, host_pc, &ppc_tr_ops, &ctx.base);
}
-
-void restore_state_to_opc(CPUPPCState *env, TranslationBlock *tb,
- target_ulong *data)
-{
- env->nip = data[0];
-}
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index e6d9c706bb..d14e95c9dc 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -503,10 +503,14 @@ static bool riscv_cpu_has_work(CPUState *cs)
#endif
}
-void restore_state_to_opc(CPURISCVState *env, TranslationBlock *tb,
- target_ulong *data)
+static void riscv_restore_state_to_opc(CPUState *cs,
+ const TranslationBlock *tb,
+ const uint64_t *data)
{
+ RISCVCPU *cpu = RISCV_CPU(cs);
+ CPURISCVState *env = &cpu->env;
RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
+
if (xl == MXL_RV32) {
env->pc = (int32_t)data[0];
} else {
@@ -1138,6 +1142,7 @@ static const struct SysemuCPUOps riscv_sysemu_ops = {
static const struct TCGCPUOps riscv_tcg_ops = {
.initialize = riscv_translate_init,
.synchronize_from_tb = riscv_cpu_synchronize_from_tb,
+ .restore_state_to_opc = riscv_restore_state_to_opc,
#ifndef CONFIG_USER_ONLY
.tlb_fill = riscv_cpu_tlb_fill,
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index b94f809eb3..0020b9a95d 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -211,7 +211,7 @@ static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt,
return;
}
if (tot - cnt == 0) {
- return ;
+ return;
}
memset(base + cnt, -1, tot - cnt);
}
diff --git a/target/rx/cpu.c b/target/rx/cpu.c
index 2f28099723..9003c6e9fe 100644
--- a/target/rx/cpu.c
+++ b/target/rx/cpu.c
@@ -47,6 +47,15 @@ static void rx_cpu_synchronize_from_tb(CPUState *cs,
cpu->env.pc = tb_pc(tb);
}
+static void rx_restore_state_to_opc(CPUState *cs,
+ const TranslationBlock *tb,
+ const uint64_t *data)
+{
+ RXCPU *cpu = RX_CPU(cs);
+
+ cpu->env.pc = data[0];
+}
+
static bool rx_cpu_has_work(CPUState *cs)
{
return cs->interrupt_request &
@@ -192,6 +201,7 @@ static const struct SysemuCPUOps rx_sysemu_ops = {
static const struct TCGCPUOps rx_tcg_ops = {
.initialize = rx_translate_init,
.synchronize_from_tb = rx_cpu_synchronize_from_tb,
+ .restore_state_to_opc = rx_restore_state_to_opc,
.tlb_fill = rx_cpu_tlb_fill,
#ifndef CONFIG_USER_ONLY
diff --git a/target/rx/op_helper.c b/target/rx/op_helper.c
index 9ca32dcc82..acce650185 100644
--- a/target/rx/op_helper.c
+++ b/target/rx/op_helper.c
@@ -286,7 +286,7 @@ void helper_suntil(CPURXState *env, uint32_t sz)
uint32_t tmp;
tcg_debug_assert(sz < 3);
if (env->regs[3] == 0) {
- return ;
+ return;
}
do {
tmp = cpu_ldufn[sz](env, env->regs[1], GETPC());
@@ -305,7 +305,7 @@ void helper_swhile(CPURXState *env, uint32_t sz)
uint32_t tmp;
tcg_debug_assert(sz < 3);
if (env->regs[3] == 0) {
- return ;
+ return;
}
do {
tmp = cpu_ldufn[sz](env, env->regs[1], GETPC());
diff --git a/target/rx/translate.c b/target/rx/translate.c
index ea5653bc95..87a3f54adb 100644
--- a/target/rx/translate.c
+++ b/target/rx/translate.c
@@ -2371,12 +2371,6 @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
translator_loop(cs, tb, max_insns, pc, host_pc, &rx_tr_ops, &dc.base);
}
-void restore_state_to_opc(CPURXState *env, TranslationBlock *tb,
- target_ulong *data)
-{
- env->pc = data[0];
-}
-
#define ALLOC_REGISTER(sym, name) \
cpu_##sym = tcg_global_mem_new_i32(cpu_env, \
offsetof(CPURXState, sym), name)
diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c
index f60a14920d..a2329141e8 100644
--- a/target/s390x/arch_dump.c
+++ b/target/s390x/arch_dump.c
@@ -12,11 +12,13 @@
*/
#include "qemu/osdep.h"
+#include "qemu/units.h"
#include "cpu.h"
#include "s390x-internal.h"
#include "elf.h"
#include "sysemu/dump.h"
-
+#include "hw/s390x/pv.h"
+#include "kvm/kvm_s390x.h"
struct S390xUserRegsStruct {
uint64_t psw[2];
@@ -76,9 +78,16 @@ typedef struct noteStruct {
uint64_t todcmp;
uint32_t todpreg;
uint64_t ctrs[16];
+ uint8_t dynamic[1]; /*
+ * Would be a flexible array member, if
+ * that was legal inside a union. Real
+ * size comes from PV info interface.
+ */
} contents;
} QEMU_PACKED Note;
+static bool pv_dump_initialized;
+
static void s390x_write_elf64_prstatus(Note *note, S390CPU *cpu, int id)
{
int i;
@@ -177,28 +186,39 @@ static void s390x_write_elf64_prefix(Note *note, S390CPU *cpu, int id)
note->contents.prefix = cpu_to_be32((uint32_t)(cpu->env.psa));
}
+static void s390x_write_elf64_pv(Note *note, S390CPU *cpu, int id)
+{
+ note->hdr.n_type = cpu_to_be32(NT_S390_PV_CPU_DATA);
+ if (!pv_dump_initialized) {
+ return;
+ }
+ kvm_s390_dump_cpu(cpu, &note->contents.dynamic);
+}
typedef struct NoteFuncDescStruct {
int contents_size;
+ uint64_t (*note_size_func)(void); /* NULL for non-dynamic sized contents */
void (*note_contents_func)(Note *note, S390CPU *cpu, int id);
+ bool pvonly;
} NoteFuncDesc;
static const NoteFuncDesc note_core[] = {
- {sizeof_field(Note, contents.prstatus), s390x_write_elf64_prstatus},
- {sizeof_field(Note, contents.fpregset), s390x_write_elf64_fpregset},
- { 0, NULL}
+ {sizeof_field(Note, contents.prstatus), NULL, s390x_write_elf64_prstatus, false},
+ {sizeof_field(Note, contents.fpregset), NULL, s390x_write_elf64_fpregset, false},
+ { 0, NULL, NULL, false}
};
static const NoteFuncDesc note_linux[] = {
- {sizeof_field(Note, contents.prefix), s390x_write_elf64_prefix},
- {sizeof_field(Note, contents.ctrs), s390x_write_elf64_ctrs},
- {sizeof_field(Note, contents.timer), s390x_write_elf64_timer},
- {sizeof_field(Note, contents.todcmp), s390x_write_elf64_todcmp},
- {sizeof_field(Note, contents.todpreg), s390x_write_elf64_todpreg},
- {sizeof_field(Note, contents.vregslo), s390x_write_elf64_vregslo},
- {sizeof_field(Note, contents.vregshi), s390x_write_elf64_vregshi},
- {sizeof_field(Note, contents.gscb), s390x_write_elf64_gscb},
- { 0, NULL}
+ {sizeof_field(Note, contents.prefix), NULL, s390x_write_elf64_prefix, false},
+ {sizeof_field(Note, contents.ctrs), NULL, s390x_write_elf64_ctrs, false},
+ {sizeof_field(Note, contents.timer), NULL, s390x_write_elf64_timer, false},
+ {sizeof_field(Note, contents.todcmp), NULL, s390x_write_elf64_todcmp, false},
+ {sizeof_field(Note, contents.todpreg), NULL, s390x_write_elf64_todpreg, false},
+ {sizeof_field(Note, contents.vregslo), NULL, s390x_write_elf64_vregslo, false},
+ {sizeof_field(Note, contents.vregshi), NULL, s390x_write_elf64_vregshi, false},
+ {sizeof_field(Note, contents.gscb), NULL, s390x_write_elf64_gscb, false},
+ {0, kvm_s390_pv_dmp_get_size_cpu, s390x_write_elf64_pv, true},
+ { 0, NULL, NULL, false}
};
static int s390x_write_elf64_notes(const char *note_name,
@@ -207,22 +227,41 @@ static int s390x_write_elf64_notes(const char *note_name,
DumpState *s,
const NoteFuncDesc *funcs)
{
- Note note;
+ Note note, *notep;
const NoteFuncDesc *nf;
- int note_size;
+ int note_size, content_size;
int ret = -1;
assert(strlen(note_name) < sizeof(note.name));
for (nf = funcs; nf->note_contents_func; nf++) {
- memset(&note, 0, sizeof(note));
- note.hdr.n_namesz = cpu_to_be32(strlen(note_name) + 1);
- note.hdr.n_descsz = cpu_to_be32(nf->contents_size);
- g_strlcpy(note.name, note_name, sizeof(note.name));
- (*nf->note_contents_func)(&note, cpu, id);
+ notep = &note;
+ if (nf->pvonly && !s390_is_pv()) {
+ continue;
+ }
+
+ content_size = nf->note_size_func ? nf->note_size_func() : nf->contents_size;
+ note_size = sizeof(note) - sizeof(notep->contents) + content_size;
+
+ /* Notes with dynamic sizes need to allocate a note */
+ if (nf->note_size_func) {
+ notep = g_malloc(note_size);
+ }
+
+ memset(notep, 0, sizeof(note));
- note_size = sizeof(note) - sizeof(note.contents) + nf->contents_size;
- ret = f(&note, note_size, s);
+ /* Setup note header data */
+ notep->hdr.n_descsz = cpu_to_be32(content_size);
+ notep->hdr.n_namesz = cpu_to_be32(strlen(note_name) + 1);
+ g_strlcpy(notep->name, note_name, sizeof(notep->name));
+
+ /* Get contents and write them out */
+ (*nf->note_contents_func)(notep, cpu, id);
+ ret = f(notep, note_size, s);
+
+ if (nf->note_size_func) {
+ g_free(notep);
+ }
if (ret < 0) {
return -1;
@@ -247,13 +286,179 @@ int s390_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
return s390x_write_elf64_notes("LINUX", f, cpu, cpuid, s, note_linux);
}
+/* PV dump section size functions */
+static uint64_t get_mem_state_size_from_len(uint64_t len)
+{
+ return (len / (MiB)) * kvm_s390_pv_dmp_get_size_mem_state();
+}
+
+static uint64_t get_size_mem_state(DumpState *s)
+{
+ return get_mem_state_size_from_len(s->total_size);
+}
+
+static uint64_t get_size_completion_data(DumpState *s)
+{
+ return kvm_s390_pv_dmp_get_size_completion_data();
+}
+
+/* PV dump section data functions*/
+static int get_data_completion(DumpState *s, uint8_t *buff)
+{
+ int rc;
+
+ if (!pv_dump_initialized) {
+ return 0;
+ }
+ rc = kvm_s390_dump_completion_data(buff);
+ if (!rc) {
+ pv_dump_initialized = false;
+ }
+ return rc;
+}
+
+static int get_mem_state(DumpState *s, uint8_t *buff)
+{
+ int64_t memblock_size, memblock_start;
+ GuestPhysBlock *block;
+ uint64_t off;
+ int rc;
+
+ QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
+ memblock_start = dump_filtered_memblock_start(block, s->filter_area_begin,
+ s->filter_area_length);
+ if (memblock_start == -1) {
+ continue;
+ }
+
+ memblock_size = dump_filtered_memblock_size(block, s->filter_area_begin,
+ s->filter_area_length);
+
+ off = get_mem_state_size_from_len(block->target_start);
+
+ rc = kvm_s390_dump_mem_state(block->target_start,
+ get_mem_state_size_from_len(memblock_size),
+ buff + off);
+ if (rc) {
+ return rc;
+ }
+ }
+
+ return 0;
+}
+
+static struct sections {
+ uint64_t (*sections_size_func)(DumpState *s);
+ int (*sections_contents_func)(DumpState *s, uint8_t *buff);
+ char sctn_str[12];
+} sections[] = {
+ { get_size_mem_state, get_mem_state, "pv_mem_meta"},
+ { get_size_completion_data, get_data_completion, "pv_compl"},
+ {NULL , NULL, ""}
+};
+
+static uint64_t arch_sections_write_hdr(DumpState *s, uint8_t *buff)
+{
+ Elf64_Shdr *shdr = (void *)buff;
+ struct sections *sctn = sections;
+ uint64_t off = s->section_offset;
+
+ if (!pv_dump_initialized) {
+ return 0;
+ }
+
+ for (; sctn->sections_size_func; off += shdr->sh_size, sctn++, shdr++) {
+ memset(shdr, 0, sizeof(*shdr));
+ shdr->sh_type = SHT_PROGBITS;
+ shdr->sh_offset = off;
+ shdr->sh_size = sctn->sections_size_func(s);
+ shdr->sh_name = s->string_table_buf->len;
+ g_array_append_vals(s->string_table_buf, sctn->sctn_str, sizeof(sctn->sctn_str));
+ }
+
+ return (uintptr_t)shdr - (uintptr_t)buff;
+}
+
+
+/* Add arch specific number of sections and their respective sizes */
+static void arch_sections_add(DumpState *s)
+{
+ struct sections *sctn = sections;
+
+ /*
+ * We only do a PV dump if we are running a PV guest, KVM supports
+ * the dump API and we got valid dump length information.
+ */
+ if (!s390_is_pv() || !kvm_s390_get_protected_dump() ||
+ !kvm_s390_pv_info_basic_valid()) {
+ return;
+ }
+
+ /*
+ * Start the UV dump process by doing the initialize dump call via
+ * KVM as the proxy.
+ */
+ if (!kvm_s390_dump_init()) {
+ pv_dump_initialized = true;
+ } else {
+ /*
+ * Dump init failed, maybe the guest owner disabled dumping.
+ * We'll continue the non-PV dump process since this is no
+ * reason to crash qemu.
+ */
+ return;
+ }
+
+ for (; sctn->sections_size_func; sctn++) {
+ s->shdr_num += 1;
+ s->elf_section_data_size += sctn->sections_size_func(s);
+ }
+}
+
+/*
+ * After the PV dump has been initialized, the CPU data has been
+ * fetched and memory has been dumped, we need to grab the tweak data
+ * and the completion data.
+ */
+static int arch_sections_write(DumpState *s, uint8_t *buff)
+{
+ struct sections *sctn = sections;
+ int rc;
+
+ if (!pv_dump_initialized) {
+ return -EINVAL;
+ }
+
+ for (; sctn->sections_size_func; sctn++) {
+ rc = sctn->sections_contents_func(s, buff);
+ buff += sctn->sections_size_func(s);
+ if (rc) {
+ return rc;
+ }
+ }
+ return 0;
+}
+
int cpu_get_dump_info(ArchDumpInfo *info,
const struct GuestPhysBlockList *guest_phys_blocks)
{
info->d_machine = EM_S390;
info->d_endian = ELFDATA2MSB;
info->d_class = ELFCLASS64;
-
+ /*
+ * This is evaluated for each dump so we can freely switch
+ * between PV and non-PV.
+ */
+ if (s390_is_pv() && kvm_s390_get_protected_dump() &&
+ kvm_s390_pv_info_basic_valid()) {
+ info->arch_sections_add_fn = *arch_sections_add;
+ info->arch_sections_write_hdr_fn = *arch_sections_write_hdr;
+ info->arch_sections_write_fn = *arch_sections_write;
+ } else {
+ info->arch_sections_add_fn = NULL;
+ info->arch_sections_write_hdr_fn = NULL;
+ info->arch_sections_write_fn = NULL;
+ }
return 0;
}
@@ -261,7 +466,7 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus)
{
int name_size = 8; /* "LINUX" or "CORE" + pad */
size_t elf_note_size = 0;
- int note_head_size;
+ int note_head_size, content_size;
const NoteFuncDesc *nf;
assert(class == ELFCLASS64);
@@ -270,12 +475,15 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus)
note_head_size = sizeof(Elf64_Nhdr);
for (nf = note_core; nf->note_contents_func; nf++) {
- elf_note_size = elf_note_size + note_head_size + name_size +
- nf->contents_size;
+ elf_note_size = elf_note_size + note_head_size + name_size + nf->contents_size;
}
for (nf = note_linux; nf->note_contents_func; nf++) {
+ if (nf->pvonly && !s390_is_pv()) {
+ continue;
+ }
+ content_size = nf->contents_size ? nf->contents_size : nf->note_size_func();
elf_note_size = elf_note_size + note_head_size + name_size +
- nf->contents_size;
+ content_size;
}
return (elf_note_size) * nr_cpus;
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
index df00040e95..96562c516d 100644
--- a/target/s390x/cpu.c
+++ b/target/s390x/cpu.c
@@ -272,6 +272,7 @@ static void s390_cpu_reset_full(DeviceState *dev)
static const struct TCGCPUOps s390_tcg_ops = {
.initialize = s390x_translate_init,
+ .restore_state_to_opc = s390x_restore_state_to_opc,
#ifdef CONFIG_USER_ONLY
.record_sigsegv = s390_cpu_record_sigsegv,
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index 508c24cfec..3ac7ec9acf 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -158,6 +158,7 @@ static int cap_hpage_1m;
static int cap_vcpu_resets;
static int cap_protected;
static int cap_zpci_op;
+static int cap_protected_dump;
static bool mem_op_storage_key_support;
@@ -364,6 +365,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
cap_vcpu_resets = kvm_check_extension(s, KVM_CAP_S390_VCPU_RESETS);
cap_protected = kvm_check_extension(s, KVM_CAP_S390_PROTECTED);
cap_zpci_op = kvm_check_extension(s, KVM_CAP_S390_ZPCI_OP);
+ cap_protected_dump = kvm_check_extension(s, KVM_CAP_S390_PROTECTED_DUMP);
kvm_vm_enable_cap(s, KVM_CAP_S390_USER_SIGP, 0);
kvm_vm_enable_cap(s, KVM_CAP_S390_VECTOR_REGISTERS, 0);
@@ -1033,7 +1035,7 @@ int kvm_arch_remove_hw_breakpoint(target_ulong addr,
}
size = nb_hw_breakpoints * sizeof(struct kvm_hw_breakpoint);
hw_breakpoints =
- (struct kvm_hw_breakpoint *)g_realloc(hw_breakpoints, size);
+ g_realloc(hw_breakpoints, size);
} else {
g_free(hw_breakpoints);
hw_breakpoints = NULL;
@@ -2045,6 +2047,11 @@ int kvm_s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch,
return kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick);
}
+int kvm_s390_get_protected_dump(void)
+{
+ return cap_protected_dump;
+}
+
int kvm_s390_get_ri(void)
{
return cap_ri;
diff --git a/target/s390x/kvm/kvm_s390x.h b/target/s390x/kvm/kvm_s390x.h
index aaae8570de..f9785564d0 100644
--- a/target/s390x/kvm/kvm_s390x.h
+++ b/target/s390x/kvm/kvm_s390x.h
@@ -26,6 +26,7 @@ int kvm_s390_set_cpu_state(S390CPU *cpu, uint8_t cpu_state);
void kvm_s390_vcpu_interrupt_pre_save(S390CPU *cpu);
int kvm_s390_vcpu_interrupt_post_load(S390CPU *cpu);
int kvm_s390_get_hpage_1m(void);
+int kvm_s390_get_protected_dump(void);
int kvm_s390_get_ri(void);
int kvm_s390_get_zpci_op(void);
int kvm_s390_get_clock(uint8_t *tod_high, uint64_t *tod_clock);
diff --git a/target/s390x/kvm/meson.build b/target/s390x/kvm/meson.build
index d1356356b1..aef52b6686 100644
--- a/target/s390x/kvm/meson.build
+++ b/target/s390x/kvm/meson.build
@@ -1,6 +1,8 @@
s390x_ss.add(when: 'CONFIG_KVM', if_true: files(
'kvm.c'
+), if_false: files(
+ 'stubs.c'
))
# Newer kernels on s390 check for an S390_PGSTE program header and
diff --git a/target/s390x/kvm/stubs.c b/target/s390x/kvm/stubs.c
new file mode 100644
index 0000000000..5fd63b9a7e
--- /dev/null
+++ b/target/s390x/kvm/stubs.c
@@ -0,0 +1,12 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+
+#include "kvm_s390x.h"
+
+int kvm_s390_get_protected_dump(void)
+{
+ return false;
+}
diff --git a/target/s390x/s390x-internal.h b/target/s390x/s390x-internal.h
index b5ae0ae364..5d4361d35b 100644
--- a/target/s390x/s390x-internal.h
+++ b/target/s390x/s390x-internal.h
@@ -398,7 +398,9 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3,
/* translate.c */
void s390x_translate_init(void);
-
+void s390x_restore_state_to_opc(CPUState *cs,
+ const TranslationBlock *tb,
+ const uint64_t *data);
/* sigp.c */
int handle_sigp(CPUS390XState *env, uint8_t order, uint64_t r1, uint64_t r3);
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
index 1d2dddab1c..5798928473 100644
--- a/target/s390x/tcg/translate.c
+++ b/target/s390x/tcg/translate.c
@@ -6691,9 +6691,12 @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
translator_loop(cs, tb, max_insns, pc, host_pc, &s390x_tr_ops, &dc.base);
}
-void restore_state_to_opc(CPUS390XState *env, TranslationBlock *tb,
- target_ulong *data)
+void s390x_restore_state_to_opc(CPUState *cs,
+ const TranslationBlock *tb,
+ const uint64_t *data)
{
+ S390CPU *cpu = S390_CPU(cs);
+ CPUS390XState *env = &cpu->env;
int cc_op = data[1];
env->psw.addr = data[0];
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
index 56c50530da..453268392b 100644
--- a/target/sh4/cpu.c
+++ b/target/sh4/cpu.c
@@ -50,6 +50,21 @@ static void superh_cpu_synchronize_from_tb(CPUState *cs,
cpu->env.flags = tb->flags;
}
+static void superh_restore_state_to_opc(CPUState *cs,
+ const TranslationBlock *tb,
+ const uint64_t *data)
+{
+ SuperHCPU *cpu = SUPERH_CPU(cs);
+
+ cpu->env.pc = data[0];
+ cpu->env.flags = data[1];
+ /*
+ * Theoretically delayed_pc should also be restored. In practice the
+ * branch instruction is re-executed after exception, so the delayed
+ * branch target will be recomputed.
+ */
+}
+
#ifndef CONFIG_USER_ONLY
static bool superh_io_recompile_replay_branch(CPUState *cs,
const TranslationBlock *tb)
@@ -243,6 +258,7 @@ static const struct SysemuCPUOps sh4_sysemu_ops = {
static const struct TCGCPUOps superh_tcg_ops = {
.initialize = sh4_translate_init,
.synchronize_from_tb = superh_cpu_synchronize_from_tb,
+ .restore_state_to_opc = superh_restore_state_to_opc,
#ifndef CONFIG_USER_ONLY
.tlb_fill = superh_cpu_tlb_fill,
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 26231b2a5a..7db3468b01 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -2381,13 +2381,3 @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
translator_loop(cs, tb, max_insns, pc, host_pc, &sh4_tr_ops, &ctx.base);
}
-
-void restore_state_to_opc(CPUSH4State *env, TranslationBlock *tb,
- target_ulong *data)
-{
- env->pc = data[0];
- env->flags = data[1];
- /* Theoretically delayed_pc should also be restored. In practice the
- branch instruction is re-executed after exception, so the delayed
- branch target will be recomputed. */
-}
diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
index 1f9ef7afd8..4c3d08a875 100644
--- a/target/sparc/cpu.c
+++ b/target/sparc/cpu.c
@@ -872,6 +872,7 @@ static const struct SysemuCPUOps sparc_sysemu_ops = {
static const struct TCGCPUOps sparc_tcg_ops = {
.initialize = sparc_tcg_init,
.synchronize_from_tb = sparc_cpu_synchronize_from_tb,
+ .restore_state_to_opc = sparc_restore_state_to_opc,
#ifndef CONFIG_USER_ONLY
.tlb_fill = sparc_cpu_tlb_fill,
diff --git a/target/sparc/cpu.h b/target/sparc/cpu.h
index f80ea2e8cf..e478c5eb16 100644
--- a/target/sparc/cpu.h
+++ b/target/sparc/cpu.h
@@ -600,6 +600,9 @@ int sparc_cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
/* translate.c */
void sparc_tcg_init(void);
+void sparc_restore_state_to_opc(CPUState *cs,
+ const TranslationBlock *tb,
+ const uint64_t *data);
/* cpu-exec.c */
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
index 2cbbe2396a..34858eb95f 100644
--- a/target/sparc/translate.c
+++ b/target/sparc/translate.c
@@ -6011,9 +6011,12 @@ void sparc_tcg_init(void)
}
}
-void restore_state_to_opc(CPUSPARCState *env, TranslationBlock *tb,
- target_ulong *data)
+void sparc_restore_state_to_opc(CPUState *cs,
+ const TranslationBlock *tb,
+ const uint64_t *data)
{
+ SPARCCPU *cpu = SPARC_CPU(cs);
+ CPUSPARCState *env = &cpu->env;
target_ulong pc = data[0];
target_ulong npc = data[1];
diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c
index ab7a1e3a6d..2c54a2825f 100644
--- a/target/tricore/cpu.c
+++ b/target/tricore/cpu.c
@@ -58,6 +58,16 @@ static void tricore_cpu_synchronize_from_tb(CPUState *cs,
env->PC = tb_pc(tb);
}
+static void tricore_restore_state_to_opc(CPUState *cs,
+ const TranslationBlock *tb,
+ const uint64_t *data)
+{
+ TriCoreCPU *cpu = TRICORE_CPU(cs);
+ CPUTriCoreState *env = &cpu->env;
+
+ env->PC = data[0];
+}
+
static void tricore_cpu_reset(DeviceState *dev)
{
CPUState *s = CPU(dev);
@@ -161,6 +171,7 @@ static const struct SysemuCPUOps tricore_sysemu_ops = {
static const struct TCGCPUOps tricore_tcg_ops = {
.initialize = tricore_tcg_init,
.synchronize_from_tb = tricore_cpu_synchronize_from_tb,
+ .restore_state_to_opc = tricore_restore_state_to_opc,
.tlb_fill = tricore_cpu_tlb_fill,
};
diff --git a/target/tricore/translate.c b/target/tricore/translate.c
index a0558ead71..c5b7bfbf20 100644
--- a/target/tricore/translate.c
+++ b/target/tricore/translate.c
@@ -8886,12 +8886,6 @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
&tricore_tr_ops, &ctx.base);
}
-void
-restore_state_to_opc(CPUTriCoreState *env, TranslationBlock *tb,
- target_ulong *data)
-{
- env->PC = data[0];
-}
/*
*
* Initialization
diff --git a/target/xtensa/cpu.c b/target/xtensa/cpu.c
index cbbe0e84a2..09923301c4 100644
--- a/target/xtensa/cpu.c
+++ b/target/xtensa/cpu.c
@@ -51,6 +51,15 @@ static vaddr xtensa_cpu_get_pc(CPUState *cs)
return cpu->env.pc;
}
+static void xtensa_restore_state_to_opc(CPUState *cs,
+ const TranslationBlock *tb,
+ const uint64_t *data)
+{
+ XtensaCPU *cpu = XTENSA_CPU(cs);
+
+ cpu->env.pc = data[0];
+}
+
static bool xtensa_cpu_has_work(CPUState *cs)
{
#ifndef CONFIG_USER_ONLY
@@ -215,6 +224,7 @@ static const struct SysemuCPUOps xtensa_sysemu_ops = {
static const struct TCGCPUOps xtensa_tcg_ops = {
.initialize = xtensa_translate_init,
.debug_excp_handler = xtensa_breakpoint_handler,
+ .restore_state_to_opc = xtensa_restore_state_to_opc,
#ifndef CONFIG_USER_ONLY
.tlb_fill = xtensa_cpu_tlb_fill,
diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c
index bdd4690a5c..77bcd71030 100644
--- a/target/xtensa/translate.c
+++ b/target/xtensa/translate.c
@@ -1355,12 +1355,6 @@ void xtensa_cpu_dump_state(CPUState *cs, FILE *f, int flags)
}
}
-void restore_state_to_opc(CPUXtensaState *env, TranslationBlock *tb,
- target_ulong *data)
-{
- env->pc = data[0];
-}
-
static void translate_abs(DisasContext *dc, const OpcodeArg arg[],
const uint32_t par[])
{
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index d997f7922a..344b63e20f 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -1916,24 +1916,21 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_goto_tb:
- if (s->tb_jmp_insn_offset != NULL) {
- /* TCG_TARGET_HAS_direct_jump */
- /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
- write can be used to patch the target address. */
- if ((uintptr_t)s->code_ptr & 7) {
- tcg_out32(s, NOP);
- }
- s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
- /* actual branch destination will be patched by
- tb_target_set_jmp_target later. */
- tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
- tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
- } else {
- /* !TCG_TARGET_HAS_direct_jump */
- tcg_debug_assert(s->tb_jmp_target_addr != NULL);
- intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
- tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
+ tcg_debug_assert(s->tb_jmp_insn_offset != NULL);
+ /*
+ * Ensure that ADRP+ADD are 8-byte aligned so that an atomic
+ * write can be used to patch the target address.
+ */
+ if ((uintptr_t)s->code_ptr & 7) {
+ tcg_out32(s, NOP);
}
+ s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
+ /*
+ * actual branch destination will be patched by
+ * tb_target_set_jmp_target later
+ */
+ tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
+ tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
set_jmp_reset_offset(s, a0);
break;
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index a3debf6da7..d326e28740 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1031,6 +1031,36 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args)
#endif
}
+/* LoongArch uses `andi zero, zero, 0` as NOP. */
+#define NOP OPC_ANDI
+static void tcg_out_nop(TCGContext *s)
+{
+ tcg_out32(s, NOP);
+}
+
+void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
+ uintptr_t jmp_rw, uintptr_t addr)
+{
+ tcg_insn_unit i1, i2;
+ ptrdiff_t upper, lower;
+ ptrdiff_t offset = (ptrdiff_t)(addr - jmp_rx) >> 2;
+
+ if (offset == sextreg(offset, 0, 26)) {
+ i1 = encode_sd10k16_insn(OPC_B, offset);
+ i2 = NOP;
+ } else {
+ tcg_debug_assert(offset == sextreg(offset, 0, 36));
+ lower = (int16_t)offset;
+ upper = (offset - lower) >> 16;
+
+ i1 = encode_dsj20_insn(OPC_PCADDU18I, TCG_REG_TMP0, upper);
+ i2 = encode_djsk16_insn(OPC_JIRL, TCG_REG_ZERO, TCG_REG_TMP0, lower);
+ }
+ uint64_t pair = ((uint64_t)i2 << 32) | i1;
+ qatomic_set((uint64_t *)jmp_rw, pair);
+ flush_idcache_range(jmp_rx, jmp_rw, 8);
+}
+
/*
* Entry-points
*/
@@ -1058,10 +1088,20 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_goto_tb:
- assert(s->tb_jmp_insn_offset == 0);
- /* indirect jump method */
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
- (uintptr_t)(s->tb_jmp_target_addr + a0));
+ tcg_debug_assert(s->tb_jmp_insn_offset != NULL);
+ /*
+ * Ensure that patch area is 8-byte aligned so that an
+ * atomic write can be used to patch the target address.
+ */
+ if ((uintptr_t)s->code_ptr & 7) {
+ tcg_out_nop(s);
+ }
+ s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
+ /*
+ * actual branch destination will be patched by
+ * tb_target_set_jmp_target later
+ */
+ tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, 0);
tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
set_jmp_reset_offset(s, a0);
break;
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
index d58a6162f2..a659c8d6fd 100644
--- a/tcg/loongarch64/tcg-target.h
+++ b/tcg/loongarch64/tcg-target.h
@@ -42,7 +42,11 @@
#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_NB_REGS 32
-#define MAX_CODE_GEN_BUFFER_SIZE SIZE_MAX
+/*
+ * PCADDU18I + JIRL sequence can give 20 + 16 + 2 = 38 bits
+ * signed offset, which is +/- 128 GiB.
+ */
+#define MAX_CODE_GEN_BUFFER_SIZE (128 * GiB)
typedef enum {
TCG_REG_ZERO,
@@ -123,7 +127,7 @@ typedef enum {
#define TCG_TARGET_HAS_clz_i32 1
#define TCG_TARGET_HAS_ctz_i32 1
#define TCG_TARGET_HAS_ctpop_i32 0
-#define TCG_TARGET_HAS_direct_jump 0
+#define TCG_TARGET_HAS_direct_jump 1
#define TCG_TARGET_HAS_brcond2 0
#define TCG_TARGET_HAS_setcond2 0
#define TCG_TARGET_HAS_qemu_st8_i32 0
@@ -166,7 +170,6 @@ typedef enum {
#define TCG_TARGET_HAS_muluh_i64 1
#define TCG_TARGET_HAS_mulsh_i64 1
-/* not defined -- call should be eliminated at compile time */
void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
#define TCG_TARGET_DEFAULT_MO (0)
diff --git a/tests/avocado/machine_aspeed.py b/tests/avocado/machine_aspeed.py
index 124649a24b..fba6527026 100644
--- a/tests/avocado/machine_aspeed.py
+++ b/tests/avocado/machine_aspeed.py
@@ -92,7 +92,7 @@ class AST2x00Machine(QemuSystemTest):
self.do_test_arm_aspeed(image_path)
- def do_test_arm_aspeed_buidroot_start(self, image, cpu_id):
+ def do_test_arm_aspeed_buildroot_start(self, image, cpu_id):
self.require_netdev('user')
self.vm.set_console()
@@ -111,11 +111,11 @@ class AST2x00Machine(QemuSystemTest):
exec_command(self, 'root')
time.sleep(0.1)
- def do_test_arm_aspeed_buidroot_poweroff(self):
+ def do_test_arm_aspeed_buildroot_poweroff(self):
exec_command_and_wait_for_pattern(self, 'poweroff',
'reboot: System halted');
- def test_arm_ast2500_evb_builroot(self):
+ def test_arm_ast2500_evb_buildroot(self):
"""
:avocado: tags=arch:arm
:avocado: tags=machine:ast2500-evb
@@ -129,7 +129,7 @@ class AST2x00Machine(QemuSystemTest):
self.vm.add_args('-device',
'tmp105,bus=aspeed.i2c.bus.3,address=0x4d,id=tmp-test');
- self.do_test_arm_aspeed_buidroot_start(image_path, '0x0')
+ self.do_test_arm_aspeed_buildroot_start(image_path, '0x0')
exec_command_and_wait_for_pattern(self,
'echo lm75 0x4d > /sys/class/i2c-dev/i2c-3/device/new_device',
@@ -141,9 +141,9 @@ class AST2x00Machine(QemuSystemTest):
exec_command_and_wait_for_pattern(self,
'cat /sys/class/hwmon/hwmon1/temp1_input', '18000')
- self.do_test_arm_aspeed_buidroot_poweroff()
+ self.do_test_arm_aspeed_buildroot_poweroff()
- def test_arm_ast2600_evb_builroot(self):
+ def test_arm_ast2600_evb_buildroot(self):
"""
:avocado: tags=arch:arm
:avocado: tags=machine:ast2600-evb
@@ -159,7 +159,7 @@ class AST2x00Machine(QemuSystemTest):
'tmp105,bus=aspeed.i2c.bus.3,address=0x4d,id=tmp-test');
self.vm.add_args('-device',
'ds1338,bus=aspeed.i2c.bus.3,address=0x32');
- self.do_test_arm_aspeed_buidroot_start(image_path, '0xf00')
+ self.do_test_arm_aspeed_buildroot_start(image_path, '0xf00')
exec_command_and_wait_for_pattern(self,
'echo lm75 0x4d > /sys/class/i2c-dev/i2c-3/device/new_device',
@@ -177,7 +177,7 @@ class AST2x00Machine(QemuSystemTest):
year = time.strftime("%Y")
exec_command_and_wait_for_pattern(self, 'hwclock -f /dev/rtc1', year);
- self.do_test_arm_aspeed_buidroot_poweroff()
+ self.do_test_arm_aspeed_buildroot_poweroff()
class AST2x00MachineSDK(QemuSystemTest):
diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index ef4427ff4d..aa1ba179fa 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -2481,8 +2481,8 @@ int main(int argc, char **argv)
tmpfs = g_dir_make_tmp("migration-test-XXXXXX", &err);
if (!tmpfs) {
- g_test_message("g_dir_make_tmp on path (%s): %s", tmpfs,
- err->message);
+ g_test_message("Can't create temporary directory in %s: %s",
+ g_get_tmp_dir(), err->message);
}
g_assert(tmpfs);
diff --git a/tests/qtest/modules-test.c b/tests/qtest/modules-test.c
index 88217686e1..be2575ae6d 100644
--- a/tests/qtest/modules-test.c
+++ b/tests/qtest/modules-test.c
@@ -16,6 +16,9 @@ static void test_modules_load(const void *data)
int main(int argc, char *argv[])
{
const char *modules[] = {
+#ifdef CONFIG_BLKIO
+ "block-", "blkio",
+#endif
#ifdef CONFIG_CURL
"block-", "curl",
#endif
diff --git a/tests/qtest/vhost-user-test.c b/tests/qtest/vhost-user-test.c
index e8d2da7228..bf9f7c4248 100644
--- a/tests/qtest/vhost-user-test.c
+++ b/tests/qtest/vhost-user-test.c
@@ -571,8 +571,8 @@ static TestServer *test_server_new(const gchar *name,
tmpfs = g_dir_make_tmp("vhost-test-XXXXXX", &err);
if (!tmpfs) {
- g_test_message("g_dir_make_tmp on path (%s): %s", tmpfs,
- err->message);
+ g_test_message("Can't create temporary directory in %s: %s",
+ g_get_tmp_dir(), err->message);
g_error_free(err);
}
g_assert(tmpfs);
diff --git a/ui/console.c b/ui/console.c
index 49da6a91df..65c117874c 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -1297,7 +1297,7 @@ static void kbd_send_chars(QemuConsole *s)
uint32_t size;
buf = fifo8_pop_buf(&s->out_fifo, MIN(len, avail), &size);
- qemu_chr_be_write(s->chr, (uint8_t *)buf, size);
+ qemu_chr_be_write(s->chr, buf, size);
len = qemu_chr_be_can_write(s->chr);
avail -= size;
}
diff --git a/ui/gtk.c b/ui/gtk.c
index 92daaa6a6e..7ec21f7798 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -1763,7 +1763,7 @@ static void gd_vc_send_chars(VirtualConsole *vc)
uint32_t size;
buf = fifo8_pop_buf(&vc->vte.out_fifo, MIN(len, avail), &size);
- qemu_chr_be_write(vc->vte.chr, (uint8_t *)buf, size);
+ qemu_chr_be_write(vc->vte.chr, buf, size);
len = qemu_chr_be_can_write(vc->vte.chr);
avail -= size;
}
diff --git a/ui/vnc-enc-hextile.c b/ui/vnc-enc-hextile.c
index 4215bd7daf..c763256f29 100644
--- a/ui/vnc-enc-hextile.c
+++ b/ui/vnc-enc-hextile.c
@@ -50,8 +50,8 @@ int vnc_hextile_send_framebuffer_update(VncState *vs, int x,
int has_fg, has_bg;
uint8_t *last_fg, *last_bg;
- last_fg = (uint8_t *) g_malloc(VNC_SERVER_FB_BYTES);
- last_bg = (uint8_t *) g_malloc(VNC_SERVER_FB_BYTES);
+ last_fg = g_malloc(VNC_SERVER_FB_BYTES);
+ last_bg = g_malloc(VNC_SERVER_FB_BYTES);
has_fg = has_bg = 0;
for (j = y; j < (y + h); j += 16) {
for (i = x; i < (x + w); i += 16) {
diff --git a/ui/vnc-jobs.c b/ui/vnc-jobs.c
index 4562bf8928..886f9bf611 100644
--- a/ui/vnc-jobs.c
+++ b/ui/vnc-jobs.c
@@ -373,7 +373,7 @@ void vnc_start_worker_thread(void)
VncJobQueue *q;
if (vnc_worker_thread_running())
- return ;
+ return;
q = vnc_queue_init();
qemu_thread_create(&q->thread, "vnc_worker", vnc_worker_thread, q,
diff --git a/ui/vnc.c b/ui/vnc.c
index acb3629cd8..88f55cbf3c 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -3085,7 +3085,7 @@ static void vnc_rect_updated(VncDisplay *vd, int x, int y, struct timeval * tv)
rect = vnc_stat_rect(vd, x, y);
if (rect->updated) {
- return ;
+ return;
}
rect->times[rect->idx] = *tv;
rect->idx = (rect->idx + 1) % ARRAY_SIZE(rect->times);
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
index 15c82d9348..45c6b57374 100644
--- a/util/qemu-coroutine-lock.c
+++ b/util/qemu-coroutine-lock.c
@@ -39,10 +39,15 @@ void qemu_co_queue_init(CoQueue *queue)
QSIMPLEQ_INIT(&queue->entries);
}
-void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock)
+void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock,
+ CoQueueWaitFlags flags)
{
Coroutine *self = qemu_coroutine_self();
- QSIMPLEQ_INSERT_TAIL(&queue->entries, self, co_queue_next);
+ if (flags & CO_QUEUE_WAIT_FRONT) {
+ QSIMPLEQ_INSERT_HEAD(&queue->entries, self, co_queue_next);
+ } else {
+ QSIMPLEQ_INSERT_TAIL(&queue->entries, self, co_queue_next);
+ }
if (lock) {
qemu_lockable_unlock(lock);
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
index 5ba01177bf..0d1520caac 100644
--- a/util/vfio-helpers.c
+++ b/util/vfio-helpers.c
@@ -847,10 +847,13 @@ void qemu_vfio_close(QEMUVFIOState *s)
if (!s) {
return;
}
+
+ ram_block_notifier_remove(&s->ram_notifier);
+
for (i = 0; i < s->nr_mappings; ++i) {
qemu_vfio_undo_mapping(s, &s->mappings[i], NULL);
}
- ram_block_notifier_remove(&s->ram_notifier);
+
g_free(s->usable_iova_ranges);
s->nb_iova_ranges = 0;
qemu_vfio_reset(s);