aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitlab-ci.d/crossbuilds.yml24
-rw-r--r--.gitlab-ci.d/opensbi.yml4
-rw-r--r--.gitlab-ci.d/opensbi/Dockerfile1
-rw-r--r--MAINTAINERS27
-rw-r--r--accel/xen/xen-all.c69
-rwxr-xr-xconfigure1
-rw-r--r--disas/riscv.c2
-rw-r--r--docs/about/build-platforms.rst3
-rw-r--r--docs/about/deprecated.rst20
-rw-r--r--docs/config/mach-virt-graphical.cfg4
-rw-r--r--docs/config/mach-virt-serial.cfg4
-rw-r--r--docs/config/q35-emulated.cfg2
-rw-r--r--docs/config/q35-virtio-graphical.cfg2
-rw-r--r--docs/config/q35-virtio-serial.cfg2
-rw-r--r--docs/devel/atomics.rst26
-rw-r--r--docs/system/i386/xen.rst30
-rw-r--r--docs/system/target-mips.rst14
-rw-r--r--hw/9pfs/meson.build2
-rw-r--r--hw/9pfs/xen-9p-backend.c32
-rw-r--r--hw/arm/aspeed.c9
-rw-r--r--hw/arm/aspeed_eeprom.c10
-rw-r--r--hw/audio/trace-events6
-rw-r--r--hw/audio/via-ac97.c455
-rw-r--r--hw/block/block.c3
-rw-r--r--hw/block/dataplane/meson.build2
-rw-r--r--hw/block/dataplane/xen-block.c12
-rw-r--r--hw/block/m25p80.c4
-rw-r--r--hw/block/meson.build2
-rw-r--r--hw/block/xen-block.c12
-rw-r--r--hw/char/meson.build2
-rw-r--r--hw/char/xen_console.c57
-rw-r--r--hw/display/meson.build2
-rw-r--r--hw/display/sm501.c18
-rw-r--r--hw/display/xenfb.c32
-rw-r--r--hw/i386/kvm/meson.build1
-rw-r--r--hw/i386/kvm/trace-events15
-rw-r--r--hw/i386/kvm/xen_evtchn.c15
-rw-r--r--hw/i386/kvm/xen_gnttab.c325
-rw-r--r--hw/i386/kvm/xen_gnttab.h1
-rw-r--r--hw/i386/kvm/xen_xenstore.c1251
-rw-r--r--hw/i386/kvm/xenstore_impl.c1927
-rw-r--r--hw/i386/kvm/xenstore_impl.h63
-rw-r--r--hw/i386/pc.c7
-rw-r--r--hw/i386/pc_piix.c4
-rw-r--r--hw/i386/xen/xen-hvm.c38
-rw-r--r--hw/i386/xen/xen-mapcache.c2
-rw-r--r--hw/i386/xen/xen_platform.c7
-rw-r--r--hw/intc/i8259.c10
-rw-r--r--hw/intc/i8259_common.c24
-rw-r--r--hw/intc/mips_gic.c4
-rw-r--r--hw/intc/riscv_aclint.c16
-rw-r--r--hw/intc/riscv_aplic.c4
-rw-r--r--hw/intc/riscv_imsic.c6
-rw-r--r--hw/isa/i82378.c10
-rw-r--r--hw/isa/trace-events1
-rw-r--r--hw/isa/vt82c686.c54
-rw-r--r--hw/mips/boston.c2
-rw-r--r--hw/mips/cps.c35
-rw-r--r--hw/mips/malta.c2
-rw-r--r--hw/misc/edu.c5
-rw-r--r--hw/misc/mips_cmgcr.c2
-rw-r--r--hw/misc/mips_itu.c30
-rw-r--r--hw/net/xen_nic.c25
-rw-r--r--hw/pci-host/mv64361.c4
-rw-r--r--hw/ppc/pegasos2.c26
-rw-r--r--hw/riscv/Kconfig1
-rw-r--r--hw/riscv/meson.build1
-rw-r--r--hw/riscv/virt-acpi-build.c416
-rw-r--r--hw/riscv/virt.c70
-rw-r--r--hw/usb/hcd-ohci.c23
-rw-r--r--hw/usb/meson.build2
-rw-r--r--hw/usb/vt82c686-uhci-pci.c12
-rw-r--r--hw/usb/xen-usb.c29
-rw-r--r--hw/xen/meson.build6
-rw-r--r--hw/xen/trace-events2
-rw-r--r--hw/xen/xen-bus-helper.c62
-rw-r--r--hw/xen/xen-bus.c411
-rw-r--r--hw/xen/xen-legacy-backend.c254
-rw-r--r--hw/xen/xen-operations.c478
-rw-r--r--hw/xen/xen_devconfig.c4
-rw-r--r--hw/xen/xen_pt.c2
-rw-r--r--hw/xen/xen_pt.h2
-rw-r--r--hw/xen/xen_pt_config_init.c2
-rw-r--r--hw/xen/xen_pt_graphics.c1
-rw-r--r--hw/xen/xen_pt_msi.c4
-rw-r--r--hw/xen/xen_pvdev.c63
-rw-r--r--include/block/aio-wait.h2
-rw-r--r--include/hw/i386/x86.h2
-rw-r--r--include/hw/intc/mips_gic.h4
-rw-r--r--include/hw/isa/i8259_internal.h1
-rw-r--r--include/hw/isa/vt82c686.h25
-rw-r--r--include/hw/misc/mips_cmgcr.h2
-rw-r--r--include/hw/misc/mips_itu.h9
-rw-r--r--include/hw/riscv/virt.h6
-rw-r--r--include/hw/xen/xen-bus-helper.h26
-rw-r--r--include/hw/xen/xen-bus.h21
-rw-r--r--include/hw/xen/xen-legacy-backend.h24
-rw-r--r--include/hw/xen/xen.h24
-rw-r--r--include/hw/xen/xen_backend_ops.h408
-rw-r--r--include/hw/xen/xen_native.h (renamed from include/hw/xen/xen_common.h)75
-rw-r--r--include/hw/xen/xen_pvdev.h6
-rw-r--r--include/qemu/atomic.h17
-rw-r--r--pc-bios/opensbi-riscv32-generic-fw_dynamic.binbin117704 -> 123072 bytes
-rw-r--r--pc-bios/opensbi-riscv64-generic-fw_dynamic.binbin115344 -> 121800 bytes
-rw-r--r--pc-bios/s390-ccw.imgbin42608 -> 42608 bytes
-rw-r--r--pc-bios/s390-ccw/bootmap.c157
-rw-r--r--pc-bios/s390-ccw/bootmap.h30
m---------roms/opensbi0
-rw-r--r--softmmu/globals.c4
-rw-r--r--softmmu/physmem.c17
-rw-r--r--target/hexagon/meson.build2
-rw-r--r--target/i386/kvm/xen-emu.c5
-rw-r--r--target/mips/cpu-defs.c.inc13
-rw-r--r--target/mips/cpu.c4
-rw-r--r--target/mips/cpu.h1
-rw-r--r--target/mips/sysemu/physaddr.c3
-rw-r--r--target/mips/tcg/ldst_helper.c4
-rw-r--r--target/mips/tcg/msa_helper.c104
-rw-r--r--target/mips/tcg/translate.c8
-rw-r--r--target/riscv/cpu.c303
-rw-r--r--target/riscv/cpu.h29
-rw-r--r--target/riscv/csr.c29
-rw-r--r--target/riscv/helper.h5
-rw-r--r--target/riscv/insn32.decode16
-rw-r--r--target/riscv/insn_trans/trans_rvzicbo.c.inc57
-rw-r--r--target/riscv/op_helper.c135
-rw-r--r--target/riscv/translate.c1
-rw-r--r--tests/qtest/meson.build10
-rw-r--r--tests/qtest/readconfig-test.c36
-rw-r--r--tests/unit/meson.build1
-rw-r--r--tests/unit/test-xs-node.c871
-rw-r--r--ui/cocoa.m11
-rw-r--r--util/async.c43
-rw-r--r--util/log.c2
-rw-r--r--util/qemu-coroutine-lock.c9
-rw-r--r--util/qemu-thread-posix.c69
-rw-r--r--util/qemu-thread-win32.c82
137 files changed, 8054 insertions, 1315 deletions
diff --git a/.gitlab-ci.d/crossbuilds.yml b/.gitlab-ci.d/crossbuilds.yml
index d3a31a2112..61b8ac86ee 100644
--- a/.gitlab-ci.d/crossbuilds.yml
+++ b/.gitlab-ci.d/crossbuilds.yml
@@ -1,13 +1,6 @@
include:
- local: '/.gitlab-ci.d/crossbuild-template.yml'
-cross-armel-system:
- extends: .cross_system_build_job
- needs:
- job: armel-debian-cross-container
- variables:
- IMAGE: debian-armel-cross
-
cross-armel-user:
extends: .cross_user_build_job
needs:
@@ -15,13 +8,6 @@ cross-armel-user:
variables:
IMAGE: debian-armel-cross
-cross-armhf-system:
- extends: .cross_system_build_job
- needs:
- job: armhf-debian-cross-container
- variables:
- IMAGE: debian-armhf-cross
-
cross-armhf-user:
extends: .cross_user_build_job
needs:
@@ -43,16 +29,6 @@ cross-arm64-user:
variables:
IMAGE: debian-arm64-cross
-cross-i386-system:
- extends:
- - .cross_system_build_job
- - .cross_test_artifacts
- needs:
- job: i386-fedora-cross-container
- variables:
- IMAGE: fedora-i386-cross
- MAKE_CHECK_ARGS: check-qtest
-
cross-i386-user:
extends:
- .cross_user_build_job
diff --git a/.gitlab-ci.d/opensbi.yml b/.gitlab-ci.d/opensbi.yml
index 04ed5a3ea1..9a651465d8 100644
--- a/.gitlab-ci.d/opensbi.yml
+++ b/.gitlab-ci.d/opensbi.yml
@@ -42,9 +42,9 @@
docker-opensbi:
extends: .opensbi_job_rules
stage: containers
- image: docker:19.03.1
+ image: docker:stable
services:
- - docker:19.03.1-dind
+ - docker:stable-dind
variables:
GIT_DEPTH: 3
IMAGE_TAG: $CI_REGISTRY_IMAGE:opensbi-cross-build
diff --git a/.gitlab-ci.d/opensbi/Dockerfile b/.gitlab-ci.d/opensbi/Dockerfile
index 4ba8a4de86..5ccf4151f4 100644
--- a/.gitlab-ci.d/opensbi/Dockerfile
+++ b/.gitlab-ci.d/opensbi/Dockerfile
@@ -15,6 +15,7 @@ RUN apt update \
ca-certificates \
git \
make \
+ python3 \
wget \
&& \
\
diff --git a/MAINTAINERS b/MAINTAINERS
index da29661b37..640deb2895 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -443,6 +443,15 @@ F: target/i386/kvm/
F: target/i386/sev*
F: scripts/kvm/vmxcap
+Xen emulation on X86 KVM CPUs
+M: David Woodhouse <dwmw2@infradead.org>
+M: Paul Durrant <paul@xen.org>
+S: Supported
+F: include/sysemu/kvm_xen.h
+F: target/i386/kvm/xen*
+F: hw/i386/kvm/xen*
+F: tests/avocado/xen_guest.py
+
Guest CPU Cores (other accelerators)
------------------------------------
Overall
@@ -999,12 +1008,6 @@ S: Maintained
F: hw/ssi/xlnx-versal-ospi.c
F: include/hw/ssi/xlnx-versal-ospi.h
-ARM ACPI Subsystem
-M: Shannon Zhao <shannon.zhaosl@gmail.com>
-L: qemu-arm@nongnu.org
-S: Maintained
-F: hw/arm/virt-acpi-build.c
-
STM32F100
M: Alexandre Iooss <erdnaxe@crans.org>
L: qemu-arm@nongnu.org
@@ -1892,6 +1895,18 @@ F: docs/specs/acpi_nvdimm.rst
F: docs/specs/acpi_pci_hotplug.rst
F: docs/specs/acpi_hw_reduced_hotplug.rst
+ARM ACPI Subsystem
+M: Shannon Zhao <shannon.zhaosl@gmail.com>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/arm/virt-acpi-build.c
+
+RISC-V ACPI Subsystem
+M: Sunil V L <sunilvl@ventanamicro.com>
+L: qemu-riscv@nongnu.org
+S: Maintained
+F: hw/riscv/virt-acpi-build.c
+
ACPI/VIOT
M: Jean-Philippe Brucker <jean-philippe@linaro.org>
S: Supported
diff --git a/accel/xen/xen-all.c b/accel/xen/xen-all.c
index e85e4aeba5..00221e23c5 100644
--- a/accel/xen/xen-all.c
+++ b/accel/xen/xen-all.c
@@ -12,6 +12,7 @@
#include "qemu/error-report.h"
#include "qemu/module.h"
#include "qapi/error.h"
+#include "hw/xen/xen_native.h"
#include "hw/xen/xen-legacy-backend.h"
#include "hw/xen/xen_pt.h"
#include "chardev/char.h"
@@ -29,73 +30,15 @@ xc_interface *xen_xc;
xenforeignmemory_handle *xen_fmem;
xendevicemodel_handle *xen_dmod;
-static int store_dev_info(int domid, Chardev *cs, const char *string)
+static void xenstore_record_dm_state(const char *state)
{
- struct xs_handle *xs = NULL;
- char *path = NULL;
- char *newpath = NULL;
- char *pts = NULL;
- int ret = -1;
-
- /* Only continue if we're talking to a pty. */
- if (!CHARDEV_IS_PTY(cs)) {
- return 0;
- }
- pts = cs->filename + 4;
+ struct xs_handle *xs;
+ char path[50];
/* We now have everything we need to set the xenstore entry. */
xs = xs_open(0);
if (xs == NULL) {
fprintf(stderr, "Could not contact XenStore\n");
- goto out;
- }
-
- path = xs_get_domain_path(xs, domid);
- if (path == NULL) {
- fprintf(stderr, "xs_get_domain_path() error\n");
- goto out;
- }
- newpath = realloc(path, (strlen(path) + strlen(string) +
- strlen("/tty") + 1));
- if (newpath == NULL) {
- fprintf(stderr, "realloc error\n");
- goto out;
- }
- path = newpath;
-
- strcat(path, string);
- strcat(path, "/tty");
- if (!xs_write(xs, XBT_NULL, path, pts, strlen(pts))) {
- fprintf(stderr, "xs_write for '%s' fail", string);
- goto out;
- }
- ret = 0;
-
-out:
- free(path);
- xs_close(xs);
-
- return ret;
-}
-
-void xenstore_store_pv_console_info(int i, Chardev *chr)
-{
- if (i == 0) {
- store_dev_info(xen_domid, chr, "/console");
- } else {
- char buf[32];
- snprintf(buf, sizeof(buf), "/device/console/%d", i);
- store_dev_info(xen_domid, chr, buf);
- }
-}
-
-
-static void xenstore_record_dm_state(struct xs_handle *xs, const char *state)
-{
- char path[50];
-
- if (xs == NULL) {
- error_report("xenstore connection not initialized");
exit(1);
}
@@ -109,6 +52,8 @@ static void xenstore_record_dm_state(struct xs_handle *xs, const char *state)
error_report("error recording dm state");
exit(1);
}
+
+ xs_close(xs);
}
@@ -117,7 +62,7 @@ static void xen_change_state_handler(void *opaque, bool running,
{
if (running) {
/* record state running */
- xenstore_record_dm_state(xenstore, "running");
+ xenstore_record_dm_state("running");
}
}
diff --git a/configure b/configure
index 219ff13748..7290493729 100755
--- a/configure
+++ b/configure
@@ -2262,6 +2262,7 @@ fi
# tests might fail. Prefer to keep the relevant files in their own
# directory and symlink the directory instead.
LINKS="Makefile"
+LINKS="$LINKS docs/config"
LINKS="$LINKS pc-bios/optionrom/Makefile"
LINKS="$LINKS pc-bios/s390-ccw/Makefile"
LINKS="$LINKS pc-bios/vof/Makefile"
diff --git a/disas/riscv.c b/disas/riscv.c
index ddda687c13..54455aaaa8 100644
--- a/disas/riscv.c
+++ b/disas/riscv.c
@@ -1645,7 +1645,7 @@ const rv_opcode_data opcode_data[] = {
{ "max", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
{ "maxu", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
{ "clzw", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 },
- { "clzw", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 },
+ { "ctzw", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 },
{ "cpopw", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 },
{ "slli.uw", rv_codec_i_sh5, rv_fmt_rd_rs1_imm, NULL, 0, 0, 0 },
{ "add.uw", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
diff --git a/docs/about/build-platforms.rst b/docs/about/build-platforms.rst
index 20b97c3310..89cae5a6bb 100644
--- a/docs/about/build-platforms.rst
+++ b/docs/about/build-platforms.rst
@@ -67,7 +67,8 @@ Non-supported architectures may be removed in the future following the
Linux OS, macOS, FreeBSD, NetBSD, OpenBSD
-----------------------------------------
-The project aims to support the most recent major version at all times. Support
+The project aims to support the most recent major version at all times for
+up to five years after its initial release. Support
for the previous major version will be dropped 2 years after the new major
version is released or when the vendor itself drops support, whichever comes
first. In this context, third-party efforts to extend the lifetime of a distro
diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst
index 15084f7bea..33b942283f 100644
--- a/docs/about/deprecated.rst
+++ b/docs/about/deprecated.rst
@@ -196,6 +196,26 @@ CI coverage support may bitrot away before the deprecation process
completes. The little endian variants of MIPS (both 32 and 64 bit) are
still a supported host architecture.
+System emulation on 32-bit x86 hosts (since 8.0)
+''''''''''''''''''''''''''''''''''''''''''''''''
+
+Support for 32-bit x86 host deployments is increasingly uncommon in mainstream
+OS distributions given the widespread availability of 64-bit x86 hardware.
+The QEMU project no longer considers 32-bit x86 support for system emulation to
+be an effective use of its limited resources, and thus intends to discontinue
+it. Since all recent x86 hardware from the past >10 years is capable of the
+64-bit x86 extensions, a corresponding 64-bit OS should be used instead.
+
+System emulation on 32-bit arm hosts (since 8.0)
+''''''''''''''''''''''''''''''''''''''''''''''''
+
+Since QEMU needs a strong host machine for running full system emulation, and
+all recent powerful arm hosts support 64-bit, the QEMU project deprecates the
+support for running any system emulation on 32-bit arm hosts in general. Use
+64-bit arm hosts for system emulation instead. (Note: "user" mode emulation
+continues to be supported on 32-bit arm hosts, too)
+
+
QEMU API (QAPI) events
----------------------
diff --git a/docs/config/mach-virt-graphical.cfg b/docs/config/mach-virt-graphical.cfg
index d6d31b17f5..eba76eb198 100644
--- a/docs/config/mach-virt-graphical.cfg
+++ b/docs/config/mach-virt-graphical.cfg
@@ -56,9 +56,11 @@
[machine]
type = "virt"
- accel = "kvm"
gic-version = "host"
+[accel]
+ accel = "kvm"
+
[memory]
size = "1024"
diff --git a/docs/config/mach-virt-serial.cfg b/docs/config/mach-virt-serial.cfg
index 18a7c83731..324b0542ff 100644
--- a/docs/config/mach-virt-serial.cfg
+++ b/docs/config/mach-virt-serial.cfg
@@ -62,9 +62,11 @@
[machine]
type = "virt"
- accel = "kvm"
gic-version = "host"
+[accel]
+ accel = "kvm"
+
[memory]
size = "1024"
diff --git a/docs/config/q35-emulated.cfg b/docs/config/q35-emulated.cfg
index 99ac918e78..c8806e6d36 100644
--- a/docs/config/q35-emulated.cfg
+++ b/docs/config/q35-emulated.cfg
@@ -61,6 +61,8 @@
[machine]
type = "q35"
+
+[accel]
accel = "kvm"
[memory]
diff --git a/docs/config/q35-virtio-graphical.cfg b/docs/config/q35-virtio-graphical.cfg
index 4207f11e4f..148b5d2c5e 100644
--- a/docs/config/q35-virtio-graphical.cfg
+++ b/docs/config/q35-virtio-graphical.cfg
@@ -55,6 +55,8 @@
[machine]
type = "q35"
+
+[accel]
accel = "kvm"
[memory]
diff --git a/docs/config/q35-virtio-serial.cfg b/docs/config/q35-virtio-serial.cfg
index d2830aec5e..023291390e 100644
--- a/docs/config/q35-virtio-serial.cfg
+++ b/docs/config/q35-virtio-serial.cfg
@@ -60,6 +60,8 @@
[machine]
type = "q35"
+
+[accel]
accel = "kvm"
[memory]
diff --git a/docs/devel/atomics.rst b/docs/devel/atomics.rst
index 7957310071..633df65a97 100644
--- a/docs/devel/atomics.rst
+++ b/docs/devel/atomics.rst
@@ -27,7 +27,8 @@ provides macros that fall in three camps:
- weak atomic access and manual memory barriers: ``qatomic_read()``,
``qatomic_set()``, ``smp_rmb()``, ``smp_wmb()``, ``smp_mb()``,
- ``smp_mb_acquire()``, ``smp_mb_release()``, ``smp_read_barrier_depends()``;
+ ``smp_mb_acquire()``, ``smp_mb_release()``, ``smp_read_barrier_depends()``,
+ ``smp_mb__before_rmw()``, ``smp_mb__after_rmw()``;
- sequentially consistent atomic access: everything else.
@@ -472,7 +473,7 @@ and memory barriers, and the equivalents in QEMU:
sequential consistency.
- in QEMU, ``qatomic_read()`` and ``qatomic_set()`` do not participate in
- the total ordering enforced by sequentially-consistent operations.
+ the ordering enforced by read-modify-write operations.
This is because QEMU uses the C11 memory model. The following example
is correct in Linux but not in QEMU:
@@ -488,9 +489,24 @@ and memory barriers, and the equivalents in QEMU:
because the read of ``y`` can be moved (by either the processor or the
compiler) before the write of ``x``.
- Fixing this requires an ``smp_mb()`` memory barrier between the write
- of ``x`` and the read of ``y``. In the common case where only one thread
- writes ``x``, it is also possible to write it like this:
+ Fixing this requires a full memory barrier between the write of ``x`` and
+ the read of ``y``. QEMU provides ``smp_mb__before_rmw()`` and
+ ``smp_mb__after_rmw()``; they act both as an optimization,
+ avoiding the memory barrier on processors where it is unnecessary,
+ and as a clarification of this corner case of the C11 memory model:
+
+ +--------------------------------+
+ | QEMU (correct) |
+ +================================+
+ | :: |
+ | |
+ | a = qatomic_fetch_add(&x, 2);|
+ | smp_mb__after_rmw(); |
+ | b = qatomic_read(&y); |
+ +--------------------------------+
+
+ In the common case where only one thread writes ``x``, it is also possible
+ to write it like this:
+--------------------------------+
| QEMU (correct) |
diff --git a/docs/system/i386/xen.rst b/docs/system/i386/xen.rst
index a00523b492..f06765e88c 100644
--- a/docs/system/i386/xen.rst
+++ b/docs/system/i386/xen.rst
@@ -9,6 +9,8 @@ KVM has support for hosting Xen guests, intercepting Xen hypercalls and event
channel (Xen PV interrupt) delivery. This allows guests which expect to be
run under Xen to be hosted in QEMU under Linux/KVM instead.
+Using the split irqchip is mandatory for Xen support.
+
Setup
-----
@@ -17,14 +19,14 @@ accelerator, for example for Xen 4.10:
.. parsed-literal::
- |qemu_system| --accel kvm,xen-version=0x4000a
+ |qemu_system| --accel kvm,xen-version=0x4000a,kernel-irqchip=split
Additionally, virtual APIC support can be advertised to the guest through the
``xen-vapic`` CPU flag:
.. parsed-literal::
- |qemu_system| --accel kvm,xen-version=0x4000a --cpu host,+xen_vapic
+ |qemu_system| --accel kvm,xen-version=0x4000a,kernel-irqchip=split --cpu host,+xen_vapic
When Xen support is enabled, QEMU changes hypervisor identification (CPUID
0x40000000..0x4000000A) to Xen. The KVM identification and features are not
@@ -33,11 +35,25 @@ moves to leaves 0x40000100..0x4000010A.
The Xen platform device is enabled automatically for a Xen guest. This allows
a guest to unplug all emulated devices, in order to use Xen PV block and network
-drivers instead. Note that until the Xen PV device back ends are enabled to work
-with Xen mode in QEMU, that is unlikely to cause significant joy. Linux guests
-can be dissuaded from this by adding 'xen_emul_unplug=never' on their command
-line, and it can also be noted that AHCI disk controllers are exempt from being
-unplugged, as are passthrough VFIO PCI devices.
+drivers instead. Under Xen, the boot disk is typically available both via IDE
+emulation, and as a PV block device. Guest bootloaders typically use IDE to load
+the guest kernel, which then unplugs the IDE and continues with the Xen PV block
+device.
+
+This configuration can be achieved as follows
+
+.. parsed-literal::
+
+ |qemu_system| -M pc --accel kvm,xen-version=0x4000a,kernel-irqchip=split \\
+ -drive file=${GUEST_IMAGE},if=none,id=disk,file.locking=off -device xen-disk,drive=disk,vdev=xvda \\
+ -drive file=${GUEST_IMAGE},index=2,media=disk,file.locking=off,if=ide
+
+It is necessary to use the pc machine type, as the q35 machine uses AHCI instead
+of legacy IDE, and AHCI disks are not unplugged through the Xen PV unplug
+mechanism.
+
+VirtIO devices can also be used; Linux guests may need to be dissuaded from
+umplugging them by adding 'xen_emul_unplug=never' on their command line.
Properties
----------
diff --git a/docs/system/target-mips.rst b/docs/system/target-mips.rst
index 138441bdec..83239fb9df 100644
--- a/docs/system/target-mips.rst
+++ b/docs/system/target-mips.rst
@@ -8,8 +8,6 @@ endian options, ``qemu-system-mips``, ``qemu-system-mipsel``
``qemu-system-mips64`` and ``qemu-system-mips64el``. Five different
machine types are emulated:
-- A generic ISA PC-like machine \"mips\"
-
- The MIPS Malta prototype board \"malta\"
- An ACER Pica \"pica61\". This machine needs the 64-bit emulator.
@@ -19,18 +17,6 @@ machine types are emulated:
- A MIPS Magnum R4000 machine \"magnum\". This machine needs the
64-bit emulator.
-The generic emulation is supported by Debian 'Etch' and is able to
-install Debian into a virtual disk image. The following devices are
-emulated:
-
-- A range of MIPS CPUs, default is the 24Kf
-
-- PC style serial port
-
-- PC style IDE disk
-
-- NE2000 network card
-
The Malta emulation supports the following devices:
- Core board with MIPS 24Kf CPU and Galileo system controller
diff --git a/hw/9pfs/meson.build b/hw/9pfs/meson.build
index 12443b6ad5..fd37b7a02d 100644
--- a/hw/9pfs/meson.build
+++ b/hw/9pfs/meson.build
@@ -15,7 +15,7 @@ fs_ss.add(files(
))
fs_ss.add(when: 'CONFIG_LINUX', if_true: files('9p-util-linux.c'))
fs_ss.add(when: 'CONFIG_DARWIN', if_true: files('9p-util-darwin.c'))
-fs_ss.add(when: 'CONFIG_XEN', if_true: files('xen-9p-backend.c'))
+fs_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-9p-backend.c'))
softmmu_ss.add_all(when: 'CONFIG_FSDEV_9P', if_true: fs_ss)
specific_ss.add(when: 'CONFIG_VIRTIO_9P', if_true: files('virtio-9p-device.c'))
diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c
index 65c4979c3c..74f3a05f88 100644
--- a/hw/9pfs/xen-9p-backend.c
+++ b/hw/9pfs/xen-9p-backend.c
@@ -22,6 +22,7 @@
#include "qemu/config-file.h"
#include "qemu/main-loop.h"
#include "qemu/option.h"
+#include "qemu/iov.h"
#include "fsdev/qemu-fsdev.h"
#define VERSIONS "1"
@@ -241,7 +242,7 @@ static void xen_9pfs_push_and_notify(V9fsPDU *pdu)
xen_wmb();
ring->inprogress = false;
- xenevtchn_notify(ring->evtchndev, ring->local_port);
+ qemu_xen_evtchn_notify(ring->evtchndev, ring->local_port);
qemu_bh_schedule(ring->bh);
}
@@ -324,8 +325,8 @@ static void xen_9pfs_evtchn_event(void *opaque)
Xen9pfsRing *ring = opaque;
evtchn_port_t port;
- port = xenevtchn_pending(ring->evtchndev);
- xenevtchn_unmask(ring->evtchndev, port);
+ port = qemu_xen_evtchn_pending(ring->evtchndev);
+ qemu_xen_evtchn_unmask(ring->evtchndev, port);
qemu_bh_schedule(ring->bh);
}
@@ -337,10 +338,10 @@ static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev)
for (i = 0; i < xen_9pdev->num_rings; i++) {
if (xen_9pdev->rings[i].evtchndev != NULL) {
- qemu_set_fd_handler(xenevtchn_fd(xen_9pdev->rings[i].evtchndev),
- NULL, NULL, NULL);
- xenevtchn_unbind(xen_9pdev->rings[i].evtchndev,
- xen_9pdev->rings[i].local_port);
+ qemu_set_fd_handler(qemu_xen_evtchn_fd(xen_9pdev->rings[i].evtchndev),
+ NULL, NULL, NULL);
+ qemu_xen_evtchn_unbind(xen_9pdev->rings[i].evtchndev,
+ xen_9pdev->rings[i].local_port);
xen_9pdev->rings[i].evtchndev = NULL;
}
}
@@ -359,12 +360,13 @@ static int xen_9pfs_free(struct XenLegacyDevice *xendev)
if (xen_9pdev->rings[i].data != NULL) {
xen_be_unmap_grant_refs(&xen_9pdev->xendev,
xen_9pdev->rings[i].data,
+ xen_9pdev->rings[i].intf->ref,
(1 << xen_9pdev->rings[i].ring_order));
}
if (xen_9pdev->rings[i].intf != NULL) {
- xen_be_unmap_grant_refs(&xen_9pdev->xendev,
- xen_9pdev->rings[i].intf,
- 1);
+ xen_be_unmap_grant_ref(&xen_9pdev->xendev,
+ xen_9pdev->rings[i].intf,
+ xen_9pdev->rings[i].ref);
}
if (xen_9pdev->rings[i].bh != NULL) {
qemu_bh_delete(xen_9pdev->rings[i].bh);
@@ -447,12 +449,12 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev)
xen_9pdev->rings[i].inprogress = false;
- xen_9pdev->rings[i].evtchndev = xenevtchn_open(NULL, 0);
+ xen_9pdev->rings[i].evtchndev = qemu_xen_evtchn_open();
if (xen_9pdev->rings[i].evtchndev == NULL) {
goto out;
}
- qemu_set_cloexec(xenevtchn_fd(xen_9pdev->rings[i].evtchndev));
- xen_9pdev->rings[i].local_port = xenevtchn_bind_interdomain
+ qemu_set_cloexec(qemu_xen_evtchn_fd(xen_9pdev->rings[i].evtchndev));
+ xen_9pdev->rings[i].local_port = qemu_xen_evtchn_bind_interdomain
(xen_9pdev->rings[i].evtchndev,
xendev->dom,
xen_9pdev->rings[i].evtchn);
@@ -463,8 +465,8 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev)
goto out;
}
xen_pv_printf(xendev, 2, "bind evtchn port %d\n", xendev->local_port);
- qemu_set_fd_handler(xenevtchn_fd(xen_9pdev->rings[i].evtchndev),
- xen_9pfs_evtchn_event, NULL, &xen_9pdev->rings[i]);
+ qemu_set_fd_handler(qemu_xen_evtchn_fd(xen_9pdev->rings[i].evtchndev),
+ xen_9pfs_evtchn_event, NULL, &xen_9pdev->rings[i]);
}
xen_9pdev->security_model = xenstore_read_be_str(xendev, "security_model");
diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index 86601cb1a5..c1f2b9cfca 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -524,6 +524,11 @@ static void yosemitev2_bmc_i2c_init(AspeedMachineState *bmc)
at24c_eeprom_init(aspeed_i2c_get_bus(&soc->i2c, 4), 0x51, 128 * KiB);
at24c_eeprom_init_rom(aspeed_i2c_get_bus(&soc->i2c, 8), 0x51, 128 * KiB,
yosemitev2_bmc_fruid, yosemitev2_bmc_fruid_len);
+ /* TMP421 */
+ i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 11), "tmp421", 0x1f);
+ i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 9), "tmp421", 0x4e);
+ i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 9), "tmp421", 0x4f);
+
}
static void romulus_bmc_i2c_init(AspeedMachineState *bmc)
@@ -542,6 +547,10 @@ static void tiogapass_bmc_i2c_init(AspeedMachineState *bmc)
at24c_eeprom_init(aspeed_i2c_get_bus(&soc->i2c, 4), 0x54, 128 * KiB);
at24c_eeprom_init_rom(aspeed_i2c_get_bus(&soc->i2c, 6), 0x54, 128 * KiB,
tiogapass_bmc_fruid, tiogapass_bmc_fruid_len);
+ /* TMP421 */
+ i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 8), "tmp421", 0x1f);
+ i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 6), "tmp421", 0x4f);
+ i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 6), "tmp421", 0x4e);
}
static void create_pca9552(AspeedSoCState *soc, int bus_id, int addr)
diff --git a/hw/arm/aspeed_eeprom.c b/hw/arm/aspeed_eeprom.c
index 2fb2d5dbb7..dc33a88a54 100644
--- a/hw/arm/aspeed_eeprom.c
+++ b/hw/arm/aspeed_eeprom.c
@@ -101,17 +101,17 @@ const uint8_t fby35_bmc_fruid[] = {
/* Yosemite V2 BMC FRU */
const uint8_t yosemitev2_bmc_fruid[] = {
0x01, 0x00, 0x00, 0x01, 0x0d, 0x00, 0x00, 0xf1, 0x01, 0x0c, 0x00, 0x36,
- 0xe6, 0xd0, 0xc6, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xd2, 0x42, 0x4d,
- 0x43, 0x20, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x20, 0x4d, 0x6f,
- 0x64, 0x75, 0x6c, 0x65, 0xcd, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58,
+ 0xe6, 0xd0, 0xc6, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xd2, 0x42, 0x61,
+ 0x73, 0x65, 0x62, 0x6f, 0x61, 0x72, 0x64, 0x20, 0x4d, 0x50, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xcd, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58,
0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xce, 0x58, 0x58, 0x58, 0x58, 0x58,
0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xc3, 0x31, 0x2e,
0x30, 0xc9, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xd2,
0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58,
0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xc1, 0x39, 0x01, 0x0c, 0x00, 0xc6,
0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xd2, 0x59, 0x6f, 0x73, 0x65, 0x6d,
- 0x69, 0x74, 0x65, 0x20, 0x56, 0x32, 0x2e, 0x30, 0x20, 0x45, 0x56, 0x54,
- 0x32, 0xce, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58,
+ 0x69, 0x74, 0x65, 0x20, 0x56, 0x32, 0x20, 0x4d, 0x50, 0x00, 0x00, 0x00,
+ 0x00, 0xce, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58,
0x58, 0x58, 0x58, 0x58, 0xc4, 0x45, 0x56, 0x54, 0x32, 0xcd, 0x58, 0x58,
0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xc7,
0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xc3, 0x31, 0x2e, 0x30, 0xc9,
diff --git a/hw/audio/trace-events b/hw/audio/trace-events
index e0e71cd9b1..4dec48a4fd 100644
--- a/hw/audio/trace-events
+++ b/hw/audio/trace-events
@@ -11,3 +11,9 @@ hda_audio_running(const char *stream, int nr, bool running) "st %s, nr %d, run %
hda_audio_format(const char *stream, int chan, const char *fmt, int freq) "st %s, %d x %s @ %d Hz"
hda_audio_adjust(const char *stream, int pos) "st %s, pos %d"
hda_audio_overrun(const char *stream) "st %s"
+
+#via-ac97.c
+via_ac97_codec_write(uint8_t addr, uint16_t val) "0x%x <- 0x%x"
+via_ac97_sgd_fetch(uint32_t curr, uint32_t addr, char stop, char eol, char flag, uint32_t len) "curr=0x%x addr=0x%x %c%c%c len=%d"
+via_ac97_sgd_read(uint64_t addr, unsigned size, uint64_t val) "0x%"PRIx64" %d -> 0x%"PRIx64
+via_ac97_sgd_write(uint64_t addr, unsigned size, uint64_t val) "0x%"PRIx64" %d <- 0x%"PRIx64
diff --git a/hw/audio/via-ac97.c b/hw/audio/via-ac97.c
index d1a856f63d..676254b7a4 100644
--- a/hw/audio/via-ac97.c
+++ b/hw/audio/via-ac97.c
@@ -1,39 +1,482 @@
/*
* VIA south bridges sound support
*
+ * Copyright (c) 2022-2023 BALATON Zoltan
+ *
* This work is licensed under the GNU GPL license version 2 or later.
*/
/*
- * TODO: This is entirely boiler plate just registering empty PCI devices
- * with the right ID guests expect, functionality should be added here.
+ * TODO: This is only a basic implementation of one audio playback channel
+ * more functionality should be added here.
*/
#include "qemu/osdep.h"
+#include "qemu/log.h"
#include "hw/isa/vt82c686.h"
-#include "hw/pci/pci_device.h"
+#include "ac97.h"
+#include "trace.h"
+
+#define CLEN_IS_EOL(x) ((x)->clen & BIT(31))
+#define CLEN_IS_FLAG(x) ((x)->clen & BIT(30))
+#define CLEN_IS_STOP(x) ((x)->clen & BIT(29))
+#define CLEN_LEN(x) ((x)->clen & 0xffffff)
+
+#define STAT_ACTIVE BIT(7)
+#define STAT_PAUSED BIT(6)
+#define STAT_TRIG BIT(3)
+#define STAT_STOP BIT(2)
+#define STAT_EOL BIT(1)
+#define STAT_FLAG BIT(0)
+
+#define CNTL_START BIT(7)
+#define CNTL_TERM BIT(6)
+#define CNTL_PAUSE BIT(3)
+
+static void open_voice_out(ViaAC97State *s);
+
+static uint16_t codec_rates[] = { 8000, 11025, 16000, 22050, 32000, 44100,
+ 48000 };
+
+#define CODEC_REG(s, o) ((s)->codec_regs[(o) / 2])
+#define CODEC_VOL(vol, mask) ((255 * ((vol) & mask)) / mask)
+
+static void codec_volume_set_out(ViaAC97State *s)
+{
+ int lvol, rvol, mute;
+
+ lvol = 255 - CODEC_VOL(CODEC_REG(s, AC97_Master_Volume_Mute) >> 8, 0x1f);
+ lvol *= 255 - CODEC_VOL(CODEC_REG(s, AC97_PCM_Out_Volume_Mute) >> 8, 0x1f);
+ lvol /= 255;
+ rvol = 255 - CODEC_VOL(CODEC_REG(s, AC97_Master_Volume_Mute), 0x1f);
+ rvol *= 255 - CODEC_VOL(CODEC_REG(s, AC97_PCM_Out_Volume_Mute), 0x1f);
+ rvol /= 255;
+ mute = CODEC_REG(s, AC97_Master_Volume_Mute) >> MUTE_SHIFT;
+ mute |= CODEC_REG(s, AC97_PCM_Out_Volume_Mute) >> MUTE_SHIFT;
+ AUD_set_volume_out(s->vo, mute, lvol, rvol);
+}
+
+static void codec_reset(ViaAC97State *s)
+{
+ memset(s->codec_regs, 0, sizeof(s->codec_regs));
+ CODEC_REG(s, AC97_Reset) = 0x6a90;
+ CODEC_REG(s, AC97_Master_Volume_Mute) = 0x8000;
+ CODEC_REG(s, AC97_Headphone_Volume_Mute) = 0x8000;
+ CODEC_REG(s, AC97_Master_Volume_Mono_Mute) = 0x8000;
+ CODEC_REG(s, AC97_Phone_Volume_Mute) = 0x8008;
+ CODEC_REG(s, AC97_Mic_Volume_Mute) = 0x8008;
+ CODEC_REG(s, AC97_Line_In_Volume_Mute) = 0x8808;
+ CODEC_REG(s, AC97_CD_Volume_Mute) = 0x8808;
+ CODEC_REG(s, AC97_Video_Volume_Mute) = 0x8808;
+ CODEC_REG(s, AC97_Aux_Volume_Mute) = 0x8808;
+ CODEC_REG(s, AC97_PCM_Out_Volume_Mute) = 0x8808;
+ CODEC_REG(s, AC97_Record_Gain_Mute) = 0x8000;
+ CODEC_REG(s, AC97_Powerdown_Ctrl_Stat) = 0x000f;
+ CODEC_REG(s, AC97_Extended_Audio_ID) = 0x0a05;
+ CODEC_REG(s, AC97_Extended_Audio_Ctrl_Stat) = 0x0400;
+ CODEC_REG(s, AC97_PCM_Front_DAC_Rate) = 48000;
+ CODEC_REG(s, AC97_PCM_LR_ADC_Rate) = 48000;
+ /* Sigmatel 9766 (STAC9766) */
+ CODEC_REG(s, AC97_Vendor_ID1) = 0x8384;
+ CODEC_REG(s, AC97_Vendor_ID2) = 0x7666;
+}
+
+static uint16_t codec_read(ViaAC97State *s, uint8_t addr)
+{
+ return CODEC_REG(s, addr);
+}
+
+static void codec_write(ViaAC97State *s, uint8_t addr, uint16_t val)
+{
+ trace_via_ac97_codec_write(addr, val);
+ switch (addr) {
+ case AC97_Reset:
+ codec_reset(s);
+ return;
+ case AC97_Master_Volume_Mute:
+ case AC97_PCM_Out_Volume_Mute:
+ if (addr == AC97_Master_Volume_Mute) {
+ if (val & BIT(13)) {
+ val |= 0x1f00;
+ }
+ if (val & BIT(5)) {
+ val |= 0x1f;
+ }
+ }
+ CODEC_REG(s, addr) = val & 0x9f1f;
+ codec_volume_set_out(s);
+ return;
+ case AC97_Extended_Audio_Ctrl_Stat:
+ CODEC_REG(s, addr) &= ~EACS_VRA;
+ CODEC_REG(s, addr) |= val & EACS_VRA;
+ if (!(val & EACS_VRA)) {
+ CODEC_REG(s, AC97_PCM_Front_DAC_Rate) = 48000;
+ CODEC_REG(s, AC97_PCM_LR_ADC_Rate) = 48000;
+ open_voice_out(s);
+ }
+ return;
+ case AC97_PCM_Front_DAC_Rate:
+ case AC97_PCM_LR_ADC_Rate:
+ if (CODEC_REG(s, AC97_Extended_Audio_Ctrl_Stat) & EACS_VRA) {
+ int i;
+ uint16_t rate = val;
+
+ for (i = 0; i < ARRAY_SIZE(codec_rates) - 1; i++) {
+ if (rate < codec_rates[i] +
+ (codec_rates[i + 1] - codec_rates[i]) / 2) {
+ rate = codec_rates[i];
+ break;
+ }
+ }
+ if (rate > 48000) {
+ rate = 48000;
+ }
+ CODEC_REG(s, addr) = rate;
+ open_voice_out(s);
+ }
+ return;
+ case AC97_Powerdown_Ctrl_Stat:
+ CODEC_REG(s, addr) = (val & 0xff00) | (CODEC_REG(s, addr) & 0xff);
+ return;
+ case AC97_Extended_Audio_ID:
+ case AC97_Vendor_ID1:
+ case AC97_Vendor_ID2:
+ /* Read only registers */
+ return;
+ default:
+ qemu_log_mask(LOG_UNIMP,
+ "via-ac97: Unimplemented codec register 0x%x\n", addr);
+ CODEC_REG(s, addr) = val;
+ }
+}
+
+static void fetch_sgd(ViaAC97SGDChannel *c, PCIDevice *d)
+{
+ uint32_t b[2];
+
+ if (c->curr < c->base) {
+ c->curr = c->base;
+ }
+ if (unlikely(pci_dma_read(d, c->curr, b, sizeof(b)) != MEMTX_OK)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "via-ac97: DMA error reading SGD table\n");
+ return;
+ }
+ c->addr = le32_to_cpu(b[0]);
+ c->clen = le32_to_cpu(b[1]);
+ trace_via_ac97_sgd_fetch(c->curr, c->addr, CLEN_IS_STOP(c) ? 'S' : '-',
+ CLEN_IS_EOL(c) ? 'E' : '-',
+ CLEN_IS_FLAG(c) ? 'F' : '-', CLEN_LEN(c));
+}
+
+static void out_cb(void *opaque, int avail)
+{
+ ViaAC97State *s = opaque;
+ ViaAC97SGDChannel *c = &s->aur;
+ int temp, to_copy, copied;
+ bool stop = false;
+ uint8_t tmpbuf[4096];
+
+ if (c->stat & STAT_PAUSED) {
+ return;
+ }
+ c->stat |= STAT_ACTIVE;
+ while (avail && !stop) {
+ if (!c->clen) {
+ fetch_sgd(c, &s->dev);
+ }
+ temp = MIN(CLEN_LEN(c), avail);
+ while (temp) {
+ to_copy = MIN(temp, sizeof(tmpbuf));
+ pci_dma_read(&s->dev, c->addr, tmpbuf, to_copy);
+ copied = AUD_write(s->vo, tmpbuf, to_copy);
+ if (!copied) {
+ stop = true;
+ break;
+ }
+ temp -= copied;
+ avail -= copied;
+ c->addr += copied;
+ c->clen -= copied;
+ }
+ if (CLEN_LEN(c) == 0) {
+ c->curr += 8;
+ if (CLEN_IS_EOL(c)) {
+ c->stat |= STAT_EOL;
+ if (c->type & CNTL_START) {
+ c->curr = c->base;
+ c->stat |= STAT_PAUSED;
+ } else {
+ c->stat &= ~STAT_ACTIVE;
+ AUD_set_active_out(s->vo, 0);
+ }
+ if (c->type & STAT_EOL) {
+ pci_set_irq(&s->dev, 1);
+ }
+ }
+ if (CLEN_IS_FLAG(c)) {
+ c->stat |= STAT_FLAG;
+ c->stat |= STAT_PAUSED;
+ if (c->type & STAT_FLAG) {
+ pci_set_irq(&s->dev, 1);
+ }
+ }
+ if (CLEN_IS_STOP(c)) {
+ c->stat |= STAT_STOP;
+ c->stat |= STAT_PAUSED;
+ }
+ c->clen = 0;
+ stop = true;
+ }
+ }
+}
+
+static void open_voice_out(ViaAC97State *s)
+{
+ struct audsettings as = {
+ .freq = CODEC_REG(s, AC97_PCM_Front_DAC_Rate),
+ .nchannels = s->aur.type & BIT(4) ? 2 : 1,
+ .fmt = s->aur.type & BIT(5) ? AUDIO_FORMAT_S16 : AUDIO_FORMAT_S8,
+ .endianness = 0,
+ };
+ s->vo = AUD_open_out(&s->card, s->vo, "via-ac97.out", s, out_cb, &as);
+}
+
+static uint64_t sgd_read(void *opaque, hwaddr addr, unsigned size)
+{
+ ViaAC97State *s = opaque;
+ uint64_t val = 0;
+
+ switch (addr) {
+ case 0:
+ val = s->aur.stat;
+ if (s->aur.type & CNTL_START) {
+ val |= STAT_TRIG;
+ }
+ break;
+ case 1:
+ val = s->aur.stat & STAT_PAUSED ? BIT(3) : 0;
+ break;
+ case 2:
+ val = s->aur.type;
+ break;
+ case 4:
+ val = s->aur.curr;
+ break;
+ case 0xc:
+ val = CLEN_LEN(&s->aur);
+ break;
+ case 0x10:
+ /* silence unimplemented log message that happens at every IRQ */
+ break;
+ case 0x80:
+ val = s->ac97_cmd;
+ break;
+ case 0x84:
+ val = s->aur.stat & STAT_FLAG;
+ if (s->aur.stat & STAT_EOL) {
+ val |= BIT(4);
+ }
+ if (s->aur.stat & STAT_STOP) {
+ val |= BIT(8);
+ }
+ if (s->aur.stat & STAT_ACTIVE) {
+ val |= BIT(12);
+ }
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "via-ac97: Unimplemented register read 0x%"
+ HWADDR_PRIx"\n", addr);
+ }
+ trace_via_ac97_sgd_read(addr, size, val);
+ return val;
+}
+
+static void sgd_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
+{
+ ViaAC97State *s = opaque;
+
+ trace_via_ac97_sgd_write(addr, size, val);
+ switch (addr) {
+ case 0:
+ if (val & STAT_STOP) {
+ s->aur.stat &= ~STAT_PAUSED;
+ }
+ if (val & STAT_EOL) {
+ s->aur.stat &= ~(STAT_EOL | STAT_PAUSED);
+ if (s->aur.type & STAT_EOL) {
+ pci_set_irq(&s->dev, 0);
+ }
+ }
+ if (val & STAT_FLAG) {
+ s->aur.stat &= ~(STAT_FLAG | STAT_PAUSED);
+ if (s->aur.type & STAT_FLAG) {
+ pci_set_irq(&s->dev, 0);
+ }
+ }
+ break;
+ case 1:
+ if (val & CNTL_START) {
+ AUD_set_active_out(s->vo, 1);
+ s->aur.stat = STAT_ACTIVE;
+ }
+ if (val & CNTL_TERM) {
+ AUD_set_active_out(s->vo, 0);
+ s->aur.stat &= ~(STAT_ACTIVE | STAT_PAUSED);
+ s->aur.clen = 0;
+ }
+ if (val & CNTL_PAUSE) {
+ AUD_set_active_out(s->vo, 0);
+ s->aur.stat &= ~STAT_ACTIVE;
+ s->aur.stat |= STAT_PAUSED;
+ } else if (!(val & CNTL_PAUSE) && (s->aur.stat & STAT_PAUSED)) {
+ AUD_set_active_out(s->vo, 1);
+ s->aur.stat |= STAT_ACTIVE;
+ s->aur.stat &= ~STAT_PAUSED;
+ }
+ break;
+ case 2:
+ {
+ uint32_t oldval = s->aur.type;
+ s->aur.type = val;
+ if ((oldval & 0x30) != (val & 0x30)) {
+ open_voice_out(s);
+ }
+ break;
+ }
+ case 4:
+ s->aur.base = val & ~1ULL;
+ s->aur.curr = s->aur.base;
+ break;
+ case 0x80:
+ if (val >> 30) {
+ /* we only have primary codec */
+ break;
+ }
+ if (val & BIT(23)) { /* read reg */
+ s->ac97_cmd = val & 0xc0ff0000ULL;
+ s->ac97_cmd |= codec_read(s, (val >> 16) & 0x7f);
+ s->ac97_cmd |= BIT(25); /* data valid */
+ } else {
+ s->ac97_cmd = val & 0xc0ffffffULL;
+ codec_write(s, (val >> 16) & 0x7f, val);
+ }
+ break;
+ case 0xc:
+ case 0x84:
+ /* Read only */
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "via-ac97: Unimplemented register write 0x%"
+ HWADDR_PRIx"\n", addr);
+ }
+}
+
+static const MemoryRegionOps sgd_ops = {
+ .read = sgd_read,
+ .write = sgd_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static uint64_t fm_read(void *opaque, hwaddr addr, unsigned size)
+{
+ qemu_log_mask(LOG_UNIMP, "%s: 0x%"HWADDR_PRIx" %d\n", __func__, addr, size);
+ return 0;
+}
+
+static void fm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
+{
+ qemu_log_mask(LOG_UNIMP, "%s: 0x%"HWADDR_PRIx" %d <= 0x%"PRIX64"\n",
+ __func__, addr, size, val);
+}
+
+static const MemoryRegionOps fm_ops = {
+ .read = fm_read,
+ .write = fm_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static uint64_t midi_read(void *opaque, hwaddr addr, unsigned size)
+{
+ qemu_log_mask(LOG_UNIMP, "%s: 0x%"HWADDR_PRIx" %d\n", __func__, addr, size);
+ return 0;
+}
+
+static void midi_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
+{
+ qemu_log_mask(LOG_UNIMP, "%s: 0x%"HWADDR_PRIx" %d <= 0x%"PRIX64"\n",
+ __func__, addr, size, val);
+}
+
+static const MemoryRegionOps midi_ops = {
+ .read = midi_read,
+ .write = midi_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void via_ac97_reset(DeviceState *dev)
+{
+ ViaAC97State *s = VIA_AC97(dev);
+
+ codec_reset(s);
+}
static void via_ac97_realize(PCIDevice *pci_dev, Error **errp)
{
- pci_set_word(pci_dev->config + PCI_COMMAND,
- PCI_COMMAND_INVALIDATE | PCI_COMMAND_PARITY);
+ ViaAC97State *s = VIA_AC97(pci_dev);
+ Object *o = OBJECT(s);
+
+ /*
+ * Command register Bus Master bit is documented to be fixed at 0 but it's
+ * needed for PCI DMA to work in QEMU. The pegasos2 firmware writes 0 here
+ * and the AmigaOS driver writes 1 only enabling IO bit which works on
+ * real hardware. So set it here and fix it to 1 to allow DMA.
+ */
+ pci_set_word(pci_dev->config + PCI_COMMAND, PCI_COMMAND_MASTER);
+ pci_set_word(pci_dev->wmask + PCI_COMMAND, PCI_COMMAND_IO);
pci_set_word(pci_dev->config + PCI_STATUS,
PCI_STATUS_CAP_LIST | PCI_STATUS_DEVSEL_MEDIUM);
pci_set_long(pci_dev->config + PCI_INTERRUPT_PIN, 0x03);
+ pci_set_byte(pci_dev->config + 0x40, 1); /* codec ready */
+
+ memory_region_init_io(&s->sgd, o, &sgd_ops, s, "via-ac97.sgd", 256);
+ pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_IO, &s->sgd);
+ memory_region_init_io(&s->fm, o, &fm_ops, s, "via-ac97.fm", 4);
+ pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &s->fm);
+ memory_region_init_io(&s->midi, o, &midi_ops, s, "via-ac97.midi", 4);
+ pci_register_bar(pci_dev, 2, PCI_BASE_ADDRESS_SPACE_IO, &s->midi);
+
+ AUD_register_card ("via-ac97", &s->card);
}
+static void via_ac97_exit(PCIDevice *dev)
+{
+ ViaAC97State *s = VIA_AC97(dev);
+
+ AUD_close_out(&s->card, s->vo);
+ AUD_remove_card(&s->card);
+}
+
+static Property via_ac97_properties[] = {
+ DEFINE_AUDIO_PROPERTIES(ViaAC97State, card),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
static void via_ac97_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
k->realize = via_ac97_realize;
+ k->exit = via_ac97_exit;
k->vendor_id = PCI_VENDOR_ID_VIA;
k->device_id = PCI_DEVICE_ID_VIA_AC97;
k->revision = 0x50;
k->class_id = PCI_CLASS_MULTIMEDIA_AUDIO;
+ device_class_set_props(dc, via_ac97_properties);
set_bit(DEVICE_CATEGORY_SOUND, dc->categories);
dc->desc = "VIA AC97";
+ dc->reset = via_ac97_reset;
/* Reason: Part of a south bridge chip */
dc->user_creatable = false;
}
@@ -41,7 +484,7 @@ static void via_ac97_class_init(ObjectClass *klass, void *data)
static const TypeInfo via_ac97_info = {
.name = TYPE_VIA_AC97,
.parent = TYPE_PCI_DEVICE,
- .instance_size = sizeof(PCIDevice),
+ .instance_size = sizeof(ViaAC97State),
.class_init = via_ac97_class_init,
.interfaces = (InterfaceInfo[]) {
{ INTERFACE_CONVENTIONAL_PCI_DEVICE },
diff --git a/hw/block/block.c b/hw/block/block.c
index af0710e477..9f52ee6e72 100644
--- a/hw/block/block.c
+++ b/hw/block/block.c
@@ -39,8 +39,7 @@ static int blk_pread_nonzeroes(BlockBackend *blk, hwaddr size, void *buf)
return ret;
}
if (!(ret & BDRV_BLOCK_ZERO)) {
- ret = bdrv_pread(bs->file, offset, bytes,
- (uint8_t *) buf + offset, 0);
+ ret = blk_pread(blk, offset, bytes, (uint8_t *) buf + offset, 0);
if (ret < 0) {
return ret;
}
diff --git a/hw/block/dataplane/meson.build b/hw/block/dataplane/meson.build
index 12c6a264f1..78d7ac1a11 100644
--- a/hw/block/dataplane/meson.build
+++ b/hw/block/dataplane/meson.build
@@ -1,2 +1,2 @@
specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c'))
-specific_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c'))
+specific_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-block.c'))
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
index 2785b9e849..734da42ea7 100644
--- a/hw/block/dataplane/xen-block.c
+++ b/hw/block/dataplane/xen-block.c
@@ -23,8 +23,9 @@
#include "qemu/main-loop.h"
#include "qemu/memalign.h"
#include "qapi/error.h"
-#include "hw/xen/xen_common.h"
+#include "hw/xen/xen.h"
#include "hw/block/xen_blkif.h"
+#include "hw/xen/interface/io/ring.h"
#include "sysemu/block-backend.h"
#include "sysemu/iothread.h"
#include "xen-block.h"
@@ -101,9 +102,9 @@ static XenBlockRequest *xen_block_start_request(XenBlockDataPlane *dataplane)
* re-use requests, allocate the memory once here. It will be freed
* xen_block_dataplane_destroy() when the request list is freed.
*/
- request->buf = qemu_memalign(XC_PAGE_SIZE,
+ request->buf = qemu_memalign(XEN_PAGE_SIZE,
BLKIF_MAX_SEGMENTS_PER_REQUEST *
- XC_PAGE_SIZE);
+ XEN_PAGE_SIZE);
dataplane->requests_total++;
qemu_iovec_init(&request->v, 1);
} else {
@@ -185,7 +186,7 @@ static int xen_block_parse_request(XenBlockRequest *request)
goto err;
}
if (request->req.seg[i].last_sect * dataplane->sector_size >=
- XC_PAGE_SIZE) {
+ XEN_PAGE_SIZE) {
error_report("error: page crossing");
goto err;
}
@@ -705,6 +706,7 @@ void xen_block_dataplane_stop(XenBlockDataPlane *dataplane)
Error *local_err = NULL;
xen_device_unmap_grant_refs(xendev, dataplane->sring,
+ dataplane->ring_ref,
dataplane->nr_ring_ref, &local_err);
dataplane->sring = NULL;
@@ -739,7 +741,7 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane,
dataplane->protocol = protocol;
- ring_size = XC_PAGE_SIZE * dataplane->nr_ring_ref;
+ ring_size = XEN_PAGE_SIZE * dataplane->nr_ring_ref;
switch (dataplane->protocol) {
case BLKIF_PROTOCOL_NATIVE:
{
diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index 802d2eb021..dc5ffbc4ff 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -24,6 +24,7 @@
#include "qemu/osdep.h"
#include "qemu/units.h"
#include "sysemu/block-backend.h"
+#include "hw/block/block.h"
#include "hw/qdev-properties.h"
#include "hw/qdev-properties-system.h"
#include "hw/ssi/ssi.h"
@@ -1615,8 +1616,7 @@ static void m25p80_realize(SSIPeripheral *ss, Error **errp)
trace_m25p80_binding(s);
s->storage = blk_blockalign(s->blk, s->size);
- if (blk_pread(s->blk, 0, s->size, s->storage, 0) < 0) {
- error_setg(errp, "failed to read the initial flash content");
+ if (!blk_check_size_and_read_all(s->blk, s->storage, s->size, errp)) {
return;
}
} else {
diff --git a/hw/block/meson.build b/hw/block/meson.build
index b434d5654c..cc2a75cc50 100644
--- a/hw/block/meson.build
+++ b/hw/block/meson.build
@@ -14,7 +14,7 @@ softmmu_ss.add(when: 'CONFIG_PFLASH_CFI02', if_true: files('pflash_cfi02.c'))
softmmu_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80.c'))
softmmu_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80_sfdp.c'))
softmmu_ss.add(when: 'CONFIG_SWIM', if_true: files('swim.c'))
-softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c'))
+softmmu_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-block.c'))
softmmu_ss.add(when: 'CONFIG_TC58128', if_true: files('tc58128.c'))
specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c', 'virtio-blk-common.c'))
diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c
index 345b284d70..f5a744589d 100644
--- a/hw/block/xen-block.c
+++ b/hw/block/xen-block.c
@@ -19,7 +19,6 @@
#include "qapi/qmp/qdict.h"
#include "qapi/qmp/qstring.h"
#include "qom/object_interfaces.h"
-#include "hw/xen/xen_common.h"
#include "hw/block/xen_blkif.h"
#include "hw/qdev-properties.h"
#include "hw/xen/xen-block.h"
@@ -84,7 +83,8 @@ static void xen_block_connect(XenDevice *xendev, Error **errp)
g_free(ring_ref);
return;
}
- } else if (order <= blockdev->props.max_ring_page_order) {
+ } else if (qemu_xen_gnttab_can_map_multi() &&
+ order <= blockdev->props.max_ring_page_order) {
unsigned int i;
nr_ring_ref = 1 << order;
@@ -256,8 +256,12 @@ static void xen_block_realize(XenDevice *xendev, Error **errp)
}
xen_device_backend_printf(xendev, "feature-flush-cache", "%u", 1);
- xen_device_backend_printf(xendev, "max-ring-page-order", "%u",
- blockdev->props.max_ring_page_order);
+
+ if (qemu_xen_gnttab_can_map_multi()) {
+ xen_device_backend_printf(xendev, "max-ring-page-order", "%u",
+ blockdev->props.max_ring_page_order);
+ }
+
xen_device_backend_printf(xendev, "info", "%u", blockdev->info);
xen_device_frontend_printf(xendev, "virtual-device", "%lu",
diff --git a/hw/char/meson.build b/hw/char/meson.build
index 7b594f51b8..e02c60dd54 100644
--- a/hw/char/meson.build
+++ b/hw/char/meson.build
@@ -18,7 +18,7 @@ softmmu_ss.add(when: 'CONFIG_SERIAL_PCI', if_true: files('serial-pci.c'))
softmmu_ss.add(when: 'CONFIG_SERIAL_PCI_MULTI', if_true: files('serial-pci-multi.c'))
softmmu_ss.add(when: 'CONFIG_SHAKTI_UART', if_true: files('shakti_uart.c'))
softmmu_ss.add(when: 'CONFIG_VIRTIO_SERIAL', if_true: files('virtio-console.c'))
-softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen_console.c'))
+softmmu_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen_console.c'))
softmmu_ss.add(when: 'CONFIG_XILINX', if_true: files('xilinx_uartlite.c'))
softmmu_ss.add(when: 'CONFIG_AVR_USART', if_true: files('avr_usart.c'))
diff --git a/hw/char/xen_console.c b/hw/char/xen_console.c
index 63153dfde4..c7a19c0e7c 100644
--- a/hw/char/xen_console.c
+++ b/hw/char/xen_console.c
@@ -173,6 +173,48 @@ static void xencons_send(struct XenConsole *con)
/* -------------------------------------------------------------------- */
+static int store_con_info(struct XenConsole *con)
+{
+ Chardev *cs = qemu_chr_fe_get_driver(&con->chr);
+ char *pts = NULL;
+ char *dom_path;
+ GString *path;
+ int ret = -1;
+
+ /* Only continue if we're talking to a pty. */
+ if (!CHARDEV_IS_PTY(cs)) {
+ return 0;
+ }
+ pts = cs->filename + 4;
+
+ dom_path = qemu_xen_xs_get_domain_path(xenstore, xen_domid);
+ if (!dom_path) {
+ return 0;
+ }
+
+ path = g_string_new(dom_path);
+ free(dom_path);
+
+ if (con->xendev.dev) {
+ g_string_append_printf(path, "/device/console/%d", con->xendev.dev);
+ } else {
+ g_string_append(path, "/console");
+ }
+ g_string_append(path, "/tty");
+
+ if (xenstore_write_str(con->console, path->str, pts)) {
+ fprintf(stderr, "xenstore_write_str for '%s' fail", path->str);
+ goto out;
+ }
+ ret = 0;
+
+out:
+ g_string_free(path, true);
+ free(path);
+
+ return ret;
+}
+
static int con_init(struct XenLegacyDevice *xendev)
{
struct XenConsole *con = container_of(xendev, struct XenConsole, xendev);
@@ -181,7 +223,7 @@ static int con_init(struct XenLegacyDevice *xendev)
const char *output;
/* setup */
- dom = xs_get_domain_path(xenstore, con->xendev.dom);
+ dom = qemu_xen_xs_get_domain_path(xenstore, con->xendev.dom);
if (!xendev->dev) {
snprintf(con->console, sizeof(con->console), "%s/console", dom);
} else {
@@ -215,8 +257,7 @@ static int con_init(struct XenLegacyDevice *xendev)
&error_abort);
}
- xenstore_store_pv_console_info(con->xendev.dev,
- qemu_chr_fe_get_driver(&con->chr));
+ store_con_info(con);
out:
g_free(type);
@@ -237,9 +278,9 @@ static int con_initialise(struct XenLegacyDevice *xendev)
if (!xendev->dev) {
xen_pfn_t mfn = con->ring_ref;
- con->sring = xenforeignmemory_map(xen_fmem, con->xendev.dom,
- PROT_READ | PROT_WRITE,
- 1, &mfn, NULL);
+ con->sring = qemu_xen_foreignmem_map(con->xendev.dom, NULL,
+ PROT_READ | PROT_WRITE,
+ 1, &mfn, NULL);
} else {
con->sring = xen_be_map_grant_ref(xendev, con->ring_ref,
PROT_READ | PROT_WRITE);
@@ -269,9 +310,9 @@ static void con_disconnect(struct XenLegacyDevice *xendev)
if (con->sring) {
if (!xendev->dev) {
- xenforeignmemory_unmap(xen_fmem, con->sring, 1);
+ qemu_xen_foreignmem_unmap(con->sring, 1);
} else {
- xen_be_unmap_grant_ref(xendev, con->sring);
+ xen_be_unmap_grant_ref(xendev, con->sring, con->ring_ref);
}
con->sring = NULL;
}
diff --git a/hw/display/meson.build b/hw/display/meson.build
index f470179122..4191694380 100644
--- a/hw/display/meson.build
+++ b/hw/display/meson.build
@@ -14,7 +14,7 @@ softmmu_ss.add(when: 'CONFIG_PL110', if_true: files('pl110.c'))
softmmu_ss.add(when: 'CONFIG_SII9022', if_true: files('sii9022.c'))
softmmu_ss.add(when: 'CONFIG_SSD0303', if_true: files('ssd0303.c'))
softmmu_ss.add(when: 'CONFIG_SSD0323', if_true: files('ssd0323.c'))
-softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xenfb.c'))
+softmmu_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xenfb.c'))
softmmu_ss.add(when: 'CONFIG_VGA_PCI', if_true: files('vga-pci.c'))
softmmu_ss.add(when: 'CONFIG_VGA_ISA', if_true: files('vga-isa.c'))
diff --git a/hw/display/sm501.c b/hw/display/sm501.c
index 17835159fc..dbabbc4339 100644
--- a/hw/display/sm501.c
+++ b/hw/display/sm501.c
@@ -465,6 +465,7 @@ typedef struct SM501State {
uint32_t last_width;
uint32_t last_height;
bool do_full_update; /* perform a full update next time */
+ uint8_t use_pixman;
I2CBus *i2c_bus;
/* mmio registers */
@@ -827,7 +828,7 @@ static void sm501_2d_operation(SM501State *s)
de = db + (width + (height - 1) * dst_pitch) * bypp;
overlap = (db < se && sb < de);
}
- if (overlap) {
+ if (overlap && (s->use_pixman & BIT(2))) {
/* pixman can't do reverse blit: copy via temporary */
int tmp_stride = DIV_ROUND_UP(width * bypp, sizeof(uint32_t));
uint32_t *tmp = tmp_buf;
@@ -852,13 +853,15 @@ static void sm501_2d_operation(SM501State *s)
if (tmp != tmp_buf) {
g_free(tmp);
}
- } else {
+ } else if (!overlap && (s->use_pixman & BIT(1))) {
fallback = !pixman_blt((uint32_t *)&s->local_mem[src_base],
(uint32_t *)&s->local_mem[dst_base],
src_pitch * bypp / sizeof(uint32_t),
dst_pitch * bypp / sizeof(uint32_t),
8 * bypp, 8 * bypp, src_x, src_y,
dst_x, dst_y, width, height);
+ } else {
+ fallback = true;
}
if (fallback) {
uint8_t *sp = s->local_mem + src_base;
@@ -891,7 +894,7 @@ static void sm501_2d_operation(SM501State *s)
color = cpu_to_le16(color);
}
- if ((width == 1 && height == 1) ||
+ if (!(s->use_pixman & BIT(0)) || (width == 1 && height == 1) ||
!pixman_fill((uint32_t *)&s->local_mem[dst_base],
dst_pitch * bypp / sizeof(uint32_t), 8 * bypp,
dst_x, dst_y, width, height, color)) {
@@ -2035,6 +2038,7 @@ static void sm501_realize_sysbus(DeviceState *dev, Error **errp)
static Property sm501_sysbus_properties[] = {
DEFINE_PROP_UINT32("vram-size", SM501SysBusState, vram_size, 0),
+ DEFINE_PROP_UINT8("x-pixman", SM501SysBusState, state.use_pixman, 7),
DEFINE_PROP_END_OF_LIST(),
};
@@ -2122,6 +2126,7 @@ static void sm501_realize_pci(PCIDevice *dev, Error **errp)
static Property sm501_pci_properties[] = {
DEFINE_PROP_UINT32("vram-size", SM501PCIState, vram_size, 64 * MiB),
+ DEFINE_PROP_UINT8("x-pixman", SM501PCIState, state.use_pixman, 7),
DEFINE_PROP_END_OF_LIST(),
};
@@ -2162,11 +2167,18 @@ static void sm501_pci_class_init(ObjectClass *klass, void *data)
dc->vmsd = &vmstate_sm501_pci;
}
+static void sm501_pci_init(Object *o)
+{
+ object_property_set_description(o, "x-pixman", "Use pixman for: "
+ "1: fill, 2: blit, 4: overlap blit");
+}
+
static const TypeInfo sm501_pci_info = {
.name = TYPE_PCI_SM501,
.parent = TYPE_PCI_DEVICE,
.instance_size = sizeof(SM501PCIState),
.class_init = sm501_pci_class_init,
+ .instance_init = sm501_pci_init,
.interfaces = (InterfaceInfo[]) {
{ INTERFACE_CONVENTIONAL_PCI_DEVICE },
{ },
diff --git a/hw/display/xenfb.c b/hw/display/xenfb.c
index 260eb38a76..0074a9b6f8 100644
--- a/hw/display/xenfb.c
+++ b/hw/display/xenfb.c
@@ -98,8 +98,9 @@ static int common_bind(struct common *c)
if (xenstore_read_fe_int(&c->xendev, "event-channel", &c->xendev.remote_port) == -1)
return -1;
- c->page = xenforeignmemory_map(xen_fmem, c->xendev.dom,
- PROT_READ | PROT_WRITE, 1, &mfn, NULL);
+ c->page = qemu_xen_foreignmem_map(c->xendev.dom, NULL,
+ PROT_READ | PROT_WRITE, 1, &mfn,
+ NULL);
if (c->page == NULL)
return -1;
@@ -115,7 +116,7 @@ static void common_unbind(struct common *c)
{
xen_pv_unbind_evtchn(&c->xendev);
if (c->page) {
- xenforeignmemory_unmap(xen_fmem, c->page, 1);
+ qemu_xen_foreignmem_unmap(c->page, 1);
c->page = NULL;
}
}
@@ -488,27 +489,28 @@ static int xenfb_map_fb(struct XenFB *xenfb)
}
if (xenfb->pixels) {
- munmap(xenfb->pixels, xenfb->fbpages * XC_PAGE_SIZE);
+ munmap(xenfb->pixels, xenfb->fbpages * XEN_PAGE_SIZE);
xenfb->pixels = NULL;
}
- xenfb->fbpages = DIV_ROUND_UP(xenfb->fb_len, XC_PAGE_SIZE);
+ xenfb->fbpages = DIV_ROUND_UP(xenfb->fb_len, XEN_PAGE_SIZE);
n_fbdirs = xenfb->fbpages * mode / 8;
- n_fbdirs = DIV_ROUND_UP(n_fbdirs, XC_PAGE_SIZE);
+ n_fbdirs = DIV_ROUND_UP(n_fbdirs, XEN_PAGE_SIZE);
pgmfns = g_new0(xen_pfn_t, n_fbdirs);
fbmfns = g_new0(xen_pfn_t, xenfb->fbpages);
xenfb_copy_mfns(mode, n_fbdirs, pgmfns, pd);
- map = xenforeignmemory_map(xen_fmem, xenfb->c.xendev.dom,
- PROT_READ, n_fbdirs, pgmfns, NULL);
+ map = qemu_xen_foreignmem_map(xenfb->c.xendev.dom, NULL, PROT_READ,
+ n_fbdirs, pgmfns, NULL);
if (map == NULL)
goto out;
xenfb_copy_mfns(mode, xenfb->fbpages, fbmfns, map);
- xenforeignmemory_unmap(xen_fmem, map, n_fbdirs);
+ qemu_xen_foreignmem_unmap(map, n_fbdirs);
- xenfb->pixels = xenforeignmemory_map(xen_fmem, xenfb->c.xendev.dom,
- PROT_READ, xenfb->fbpages, fbmfns, NULL);
+ xenfb->pixels = qemu_xen_foreignmem_map(xenfb->c.xendev.dom, NULL,
+ PROT_READ, xenfb->fbpages,
+ fbmfns, NULL);
if (xenfb->pixels == NULL)
goto out;
@@ -526,8 +528,8 @@ static int xenfb_configure_fb(struct XenFB *xenfb, size_t fb_len_lim,
{
size_t mfn_sz = sizeof_field(struct xenfb_page, pd[0]);
size_t pd_len = sizeof_field(struct xenfb_page, pd) / mfn_sz;
- size_t fb_pages = pd_len * XC_PAGE_SIZE / mfn_sz;
- size_t fb_len_max = fb_pages * XC_PAGE_SIZE;
+ size_t fb_pages = pd_len * XEN_PAGE_SIZE / mfn_sz;
+ size_t fb_len_max = fb_pages * XEN_PAGE_SIZE;
int max_width, max_height;
if (fb_len_lim > fb_len_max) {
@@ -927,8 +929,8 @@ static void fb_disconnect(struct XenLegacyDevice *xendev)
* Replacing the framebuffer with anonymous shared memory
* instead. This releases the guest pages and keeps qemu happy.
*/
- xenforeignmemory_unmap(xen_fmem, fb->pixels, fb->fbpages);
- fb->pixels = mmap(fb->pixels, fb->fbpages * XC_PAGE_SIZE,
+ qemu_xen_foreignmem_unmap(fb->pixels, fb->fbpages);
+ fb->pixels = mmap(fb->pixels, fb->fbpages * XEN_PAGE_SIZE,
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON,
-1, 0);
if (fb->pixels == MAP_FAILED) {
diff --git a/hw/i386/kvm/meson.build b/hw/i386/kvm/meson.build
index 82dd6ae7c6..6621ba5cd7 100644
--- a/hw/i386/kvm/meson.build
+++ b/hw/i386/kvm/meson.build
@@ -9,6 +9,7 @@ i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files(
'xen_evtchn.c',
'xen_gnttab.c',
'xen_xenstore.c',
+ 'xenstore_impl.c',
))
i386_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss)
diff --git a/hw/i386/kvm/trace-events b/hw/i386/kvm/trace-events
index b83c3eb965..e4c82de6f3 100644
--- a/hw/i386/kvm/trace-events
+++ b/hw/i386/kvm/trace-events
@@ -3,3 +3,18 @@ kvm_xen_unmap_pirq(int pirq, int gsi) "pirq %d gsi %d"
kvm_xen_get_free_pirq(int pirq, int type) "pirq %d type %d"
kvm_xen_bind_pirq(int pirq, int port) "pirq %d port %d"
kvm_xen_unmask_pirq(int pirq, char *dev, int vector) "pirq %d dev %s vector %d"
+xenstore_error(unsigned int id, unsigned int tx_id, const char *err) "req %u tx %u err %s"
+xenstore_read(unsigned int tx_id, const char *path) "tx %u path %s"
+xenstore_write(unsigned int tx_id, const char *path) "tx %u path %s"
+xenstore_mkdir(unsigned int tx_id, const char *path) "tx %u path %s"
+xenstore_directory(unsigned int tx_id, const char *path) "tx %u path %s"
+xenstore_directory_part(unsigned int tx_id, const char *path, unsigned int offset) "tx %u path %s offset %u"
+xenstore_transaction_start(unsigned int new_tx) "new_tx %u"
+xenstore_transaction_end(unsigned int tx_id, bool commit) "tx %u commit %d"
+xenstore_rm(unsigned int tx_id, const char *path) "tx %u path %s"
+xenstore_get_perms(unsigned int tx_id, const char *path) "tx %u path %s"
+xenstore_set_perms(unsigned int tx_id, const char *path) "tx %u path %s"
+xenstore_watch(const char *path, const char *token) "path %s token %s"
+xenstore_unwatch(const char *path, const char *token) "path %s token %s"
+xenstore_reset_watches(void) ""
+xenstore_watch_event(const char *path, const char *token) "path %s token %s"
diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c
index 886fbf6b3b..98a7b85047 100644
--- a/hw/i386/kvm/xen_evtchn.c
+++ b/hw/i386/kvm/xen_evtchn.c
@@ -34,6 +34,7 @@
#include "hw/pci/msi.h"
#include "hw/pci/msix.h"
#include "hw/irq.h"
+#include "hw/xen/xen_backend_ops.h"
#include "xen_evtchn.h"
#include "xen_overlay.h"
@@ -278,6 +279,17 @@ static const TypeInfo xen_evtchn_info = {
.class_init = xen_evtchn_class_init,
};
+static struct evtchn_backend_ops emu_evtchn_backend_ops = {
+ .open = xen_be_evtchn_open,
+ .bind_interdomain = xen_be_evtchn_bind_interdomain,
+ .unbind = xen_be_evtchn_unbind,
+ .close = xen_be_evtchn_close,
+ .get_fd = xen_be_evtchn_fd,
+ .notify = xen_be_evtchn_notify,
+ .unmask = xen_be_evtchn_unmask,
+ .pending = xen_be_evtchn_pending,
+};
+
static void gsi_assert_bh(void *opaque)
{
struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
@@ -318,6 +330,9 @@ void xen_evtchn_create(void)
s->nr_pirq_inuse_words = DIV_ROUND_UP(s->nr_pirqs, 64);
s->pirq_inuse_bitmap = g_new0(uint64_t, s->nr_pirq_inuse_words);
s->pirq = g_new0(struct pirq_info, s->nr_pirqs);
+
+ /* Set event channel functions for backend drivers to use */
+ xen_evtchn_ops = &emu_evtchn_backend_ops;
}
void xen_evtchn_connect_gsis(qemu_irq *system_gsis)
diff --git a/hw/i386/kvm/xen_gnttab.c b/hw/i386/kvm/xen_gnttab.c
index 1e691ded32..21c30e3659 100644
--- a/hw/i386/kvm/xen_gnttab.c
+++ b/hw/i386/kvm/xen_gnttab.c
@@ -22,6 +22,7 @@
#include "hw/sysbus.h"
#include "hw/xen/xen.h"
+#include "hw/xen/xen_backend_ops.h"
#include "xen_overlay.h"
#include "xen_gnttab.h"
@@ -34,11 +35,10 @@
#define TYPE_XEN_GNTTAB "xen-gnttab"
OBJECT_DECLARE_SIMPLE_TYPE(XenGnttabState, XEN_GNTTAB)
-#define XEN_PAGE_SHIFT 12
-#define XEN_PAGE_SIZE (1ULL << XEN_PAGE_SHIFT)
-
#define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
+static struct gnttab_backend_ops emu_gnttab_backend_ops;
+
struct XenGnttabState {
/*< private >*/
SysBusDevice busdev;
@@ -57,6 +57,8 @@ struct XenGnttabState {
MemoryRegion gnt_frames;
MemoryRegion *gnt_aliases;
uint64_t *gnt_frame_gpas;
+
+ uint8_t *map_track;
};
struct XenGnttabState *xen_gnttab_singleton;
@@ -70,13 +72,11 @@ static void xen_gnttab_realize(DeviceState *dev, Error **errp)
error_setg(errp, "Xen grant table support is for Xen emulation");
return;
}
- s->nr_frames = 0;
s->max_frames = kvm_xen_get_gnttab_max_frames();
memory_region_init_ram(&s->gnt_frames, OBJECT(dev), "xen:grant_table",
XEN_PAGE_SIZE * s->max_frames, &error_abort);
memory_region_set_enabled(&s->gnt_frames, true);
s->entries.v1 = memory_region_get_ram_ptr(&s->gnt_frames);
- memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames);
/* Create individual page-sizes aliases for overlays */
s->gnt_aliases = (void *)g_new0(MemoryRegion, s->max_frames);
@@ -88,9 +88,18 @@ static void xen_gnttab_realize(DeviceState *dev, Error **errp)
s->gnt_frame_gpas[i] = INVALID_GPA;
}
+ s->nr_frames = 0;
+ memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames);
+ s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access;
+ s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE);
+
qemu_mutex_init(&s->gnt_lock);
xen_gnttab_singleton = s;
+
+ s->map_track = g_new0(uint8_t, s->max_frames * ENTRIES_PER_FRAME_V1);
+
+ xen_gnttab_ops = &emu_gnttab_backend_ops;
}
static int xen_gnttab_post_load(void *opaque, int version_id)
@@ -230,3 +239,309 @@ int xen_gnttab_query_size_op(struct gnttab_query_size *size)
size->max_nr_frames = s->max_frames;
return 0;
}
+
+/* Track per-open refs, to allow close() to clean up. */
+struct active_ref {
+ MemoryRegionSection mrs;
+ void *virtaddr;
+ uint32_t refcnt;
+ int prot;
+};
+
+static void gnt_unref(XenGnttabState *s, grant_ref_t ref,
+ MemoryRegionSection *mrs, int prot)
+{
+ if (mrs && mrs->mr) {
+ if (prot & PROT_WRITE) {
+ memory_region_set_dirty(mrs->mr, mrs->offset_within_region,
+ XEN_PAGE_SIZE);
+ }
+ memory_region_unref(mrs->mr);
+ mrs->mr = NULL;
+ }
+ assert(s->map_track[ref] != 0);
+
+ if (--s->map_track[ref] == 0) {
+ grant_entry_v1_t *gnt_p = &s->entries.v1[ref];
+ qatomic_and(&gnt_p->flags, (uint16_t)~(GTF_reading | GTF_writing));
+ }
+}
+
+static uint64_t gnt_ref(XenGnttabState *s, grant_ref_t ref, int prot)
+{
+ uint16_t mask = GTF_type_mask | GTF_sub_page;
+ grant_entry_v1_t gnt, *gnt_p;
+ int retries = 0;
+
+ if (ref >= s->max_frames * ENTRIES_PER_FRAME_V1 ||
+ s->map_track[ref] == UINT8_MAX) {
+ return INVALID_GPA;
+ }
+
+ if (prot & PROT_WRITE) {
+ mask |= GTF_readonly;
+ }
+
+ gnt_p = &s->entries.v1[ref];
+
+ /*
+ * The guest can legitimately be changing the GTF_readonly flag. Allow
+ * that, but don't let a malicious guest cause a livelock.
+ */
+ for (retries = 0; retries < 5; retries++) {
+ uint16_t new_flags;
+
+ /* Read the entry before an atomic operation on its flags */
+ gnt = *(volatile grant_entry_v1_t *)gnt_p;
+
+ if ((gnt.flags & mask) != GTF_permit_access ||
+ gnt.domid != DOMID_QEMU) {
+ return INVALID_GPA;
+ }
+
+ new_flags = gnt.flags | GTF_reading;
+ if (prot & PROT_WRITE) {
+ new_flags |= GTF_writing;
+ }
+
+ if (qatomic_cmpxchg(&gnt_p->flags, gnt.flags, new_flags) == gnt.flags) {
+ return (uint64_t)gnt.frame << XEN_PAGE_SHIFT;
+ }
+ }
+
+ return INVALID_GPA;
+}
+
+struct xengntdev_handle {
+ GHashTable *active_maps;
+};
+
+static int xen_be_gnttab_set_max_grants(struct xengntdev_handle *xgt,
+ uint32_t nr_grants)
+{
+ return 0;
+}
+
+static void *xen_be_gnttab_map_refs(struct xengntdev_handle *xgt,
+ uint32_t count, uint32_t domid,
+ uint32_t *refs, int prot)
+{
+ XenGnttabState *s = xen_gnttab_singleton;
+ struct active_ref *act;
+
+ if (!s) {
+ errno = ENOTSUP;
+ return NULL;
+ }
+
+ if (domid != xen_domid) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ if (!count || count > 4096) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ /*
+ * Making a contiguous mapping from potentially discontiguous grant
+ * references would be... distinctly non-trivial. We don't support it.
+ * Even changing the API to return an array of pointers, one per page,
+ * wouldn't be simple to use in PV backends because some structures
+ * actually cross page boundaries (e.g. 32-bit blkif_response ring
+ * entries are 12 bytes).
+ */
+ if (count != 1) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ QEMU_LOCK_GUARD(&s->gnt_lock);
+
+ act = g_hash_table_lookup(xgt->active_maps, GINT_TO_POINTER(refs[0]));
+ if (act) {
+ if ((prot & PROT_WRITE) && !(act->prot & PROT_WRITE)) {
+ if (gnt_ref(s, refs[0], prot) == INVALID_GPA) {
+ return NULL;
+ }
+ act->prot |= PROT_WRITE;
+ }
+ act->refcnt++;
+ } else {
+ uint64_t gpa = gnt_ref(s, refs[0], prot);
+ if (gpa == INVALID_GPA) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ act = g_new0(struct active_ref, 1);
+ act->prot = prot;
+ act->refcnt = 1;
+ act->mrs = memory_region_find(get_system_memory(), gpa, XEN_PAGE_SIZE);
+
+ if (act->mrs.mr &&
+ !int128_lt(act->mrs.size, int128_make64(XEN_PAGE_SIZE)) &&
+ memory_region_get_ram_addr(act->mrs.mr) != RAM_ADDR_INVALID) {
+ act->virtaddr = qemu_map_ram_ptr(act->mrs.mr->ram_block,
+ act->mrs.offset_within_region);
+ }
+ if (!act->virtaddr) {
+ gnt_unref(s, refs[0], &act->mrs, 0);
+ g_free(act);
+ errno = EINVAL;
+ return NULL;
+ }
+
+ s->map_track[refs[0]]++;
+ g_hash_table_insert(xgt->active_maps, GINT_TO_POINTER(refs[0]), act);
+ }
+
+ return act->virtaddr;
+}
+
+static gboolean do_unmap(gpointer key, gpointer value, gpointer user_data)
+{
+ XenGnttabState *s = user_data;
+ grant_ref_t gref = GPOINTER_TO_INT(key);
+ struct active_ref *act = value;
+
+ gnt_unref(s, gref, &act->mrs, act->prot);
+ g_free(act);
+ return true;
+}
+
+static int xen_be_gnttab_unmap(struct xengntdev_handle *xgt,
+ void *start_address, uint32_t *refs,
+ uint32_t count)
+{
+ XenGnttabState *s = xen_gnttab_singleton;
+ struct active_ref *act;
+
+ if (!s) {
+ return -ENOTSUP;
+ }
+
+ if (count != 1) {
+ return -EINVAL;
+ }
+
+ QEMU_LOCK_GUARD(&s->gnt_lock);
+
+ act = g_hash_table_lookup(xgt->active_maps, GINT_TO_POINTER(refs[0]));
+ if (!act) {
+ return -ENOENT;
+ }
+
+ if (act->virtaddr != start_address) {
+ return -EINVAL;
+ }
+
+ if (!--act->refcnt) {
+ do_unmap(GINT_TO_POINTER(refs[0]), act, s);
+ g_hash_table_remove(xgt->active_maps, GINT_TO_POINTER(refs[0]));
+ }
+
+ return 0;
+}
+
+/*
+ * This looks a bit like the one for true Xen in xen-operations.c but
+ * in emulation we don't support multi-page mappings. And under Xen we
+ * *want* the multi-page mappings so we have fewer bounces through the
+ * kernel and the hypervisor. So the code paths end up being similar,
+ * but different.
+ */
+static int xen_be_gnttab_copy(struct xengntdev_handle *xgt, bool to_domain,
+ uint32_t domid, XenGrantCopySegment *segs,
+ uint32_t nr_segs, Error **errp)
+{
+ int prot = to_domain ? PROT_WRITE : PROT_READ;
+ unsigned int i;
+
+ for (i = 0; i < nr_segs; i++) {
+ XenGrantCopySegment *seg = &segs[i];
+ void *page;
+ uint32_t ref = to_domain ? seg->dest.foreign.ref :
+ seg->source.foreign.ref;
+
+ page = xen_be_gnttab_map_refs(xgt, 1, domid, &ref, prot);
+ if (!page) {
+ if (errp) {
+ error_setg_errno(errp, errno,
+ "xen_be_gnttab_map_refs failed");
+ }
+ return -errno;
+ }
+
+ if (to_domain) {
+ memcpy(page + seg->dest.foreign.offset, seg->source.virt,
+ seg->len);
+ } else {
+ memcpy(seg->dest.virt, page + seg->source.foreign.offset,
+ seg->len);
+ }
+
+ if (xen_be_gnttab_unmap(xgt, page, &ref, 1)) {
+ if (errp) {
+ error_setg_errno(errp, errno, "xen_be_gnttab_unmap failed");
+ }
+ return -errno;
+ }
+ }
+
+ return 0;
+}
+
+static struct xengntdev_handle *xen_be_gnttab_open(void)
+{
+ struct xengntdev_handle *xgt = g_new0(struct xengntdev_handle, 1);
+
+ xgt->active_maps = g_hash_table_new(g_direct_hash, g_direct_equal);
+ return xgt;
+}
+
+static int xen_be_gnttab_close(struct xengntdev_handle *xgt)
+{
+ XenGnttabState *s = xen_gnttab_singleton;
+
+ if (!s) {
+ return -ENOTSUP;
+ }
+
+ g_hash_table_foreach_remove(xgt->active_maps, do_unmap, s);
+ g_hash_table_destroy(xgt->active_maps);
+ g_free(xgt);
+ return 0;
+}
+
+static struct gnttab_backend_ops emu_gnttab_backend_ops = {
+ .open = xen_be_gnttab_open,
+ .close = xen_be_gnttab_close,
+ .grant_copy = xen_be_gnttab_copy,
+ .set_max_grants = xen_be_gnttab_set_max_grants,
+ .map_refs = xen_be_gnttab_map_refs,
+ .unmap = xen_be_gnttab_unmap,
+};
+
+int xen_gnttab_reset(void)
+{
+ XenGnttabState *s = xen_gnttab_singleton;
+
+ if (!s) {
+ return -ENOTSUP;
+ }
+
+ QEMU_LOCK_GUARD(&s->gnt_lock);
+
+ s->nr_frames = 0;
+
+ memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames);
+
+ s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access;
+ s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE);
+
+ memset(s->map_track, 0, s->max_frames * ENTRIES_PER_FRAME_V1);
+
+ return 0;
+}
diff --git a/hw/i386/kvm/xen_gnttab.h b/hw/i386/kvm/xen_gnttab.h
index 3bdbe96191..ee215239b0 100644
--- a/hw/i386/kvm/xen_gnttab.h
+++ b/hw/i386/kvm/xen_gnttab.h
@@ -13,6 +13,7 @@
#define QEMU_XEN_GNTTAB_H
void xen_gnttab_create(void);
+int xen_gnttab_reset(void);
int xen_gnttab_map_page(uint64_t idx, uint64_t gfn);
struct gnttab_set_version;
diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c
index 14193ef3f9..2cadafd56a 100644
--- a/hw/i386/kvm/xen_xenstore.c
+++ b/hw/i386/kvm/xen_xenstore.c
@@ -21,6 +21,7 @@
#include "hw/sysbus.h"
#include "hw/xen/xen.h"
+#include "hw/xen/xen_backend_ops.h"
#include "xen_overlay.h"
#include "xen_evtchn.h"
#include "xen_xenstore.h"
@@ -28,15 +29,17 @@
#include "sysemu/kvm.h"
#include "sysemu/kvm_xen.h"
+#include "trace.h"
+
+#include "xenstore_impl.h"
+
#include "hw/xen/interface/io/xs_wire.h"
#include "hw/xen/interface/event_channel.h"
+#include "hw/xen/interface/grant_table.h"
#define TYPE_XEN_XENSTORE "xen-xenstore"
OBJECT_DECLARE_SIMPLE_TYPE(XenXenstoreState, XEN_XENSTORE)
-#define XEN_PAGE_SHIFT 12
-#define XEN_PAGE_SIZE (1ULL << XEN_PAGE_SHIFT)
-
#define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
#define ENTRIES_PER_FRAME_V2 (XEN_PAGE_SIZE / sizeof(grant_entry_v2_t))
@@ -47,6 +50,9 @@ struct XenXenstoreState {
SysBusDevice busdev;
/*< public >*/
+ XenstoreImplState *impl;
+ GList *watch_events; /* for the guest */
+
MemoryRegion xenstore_page;
struct xenstore_domain_interface *xs;
uint8_t req_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX];
@@ -59,15 +65,54 @@ struct XenXenstoreState {
evtchn_port_t guest_port;
evtchn_port_t be_port;
struct xenevtchn_handle *eh;
+
+ uint8_t *impl_state;
+ uint32_t impl_state_size;
+
+ struct xengntdev_handle *gt;
+ void *granted_xs;
};
struct XenXenstoreState *xen_xenstore_singleton;
static void xen_xenstore_event(void *opaque);
+static void fire_watch_cb(void *opaque, const char *path, const char *token);
+
+static struct xenstore_backend_ops emu_xenstore_backend_ops;
+
+static void G_GNUC_PRINTF (4, 5) relpath_printf(XenXenstoreState *s,
+ GList *perms,
+ const char *relpath,
+ const char *fmt, ...)
+{
+ gchar *abspath;
+ gchar *value;
+ va_list args;
+ GByteArray *data;
+ int err;
+
+ abspath = g_strdup_printf("/local/domain/%u/%s", xen_domid, relpath);
+ va_start(args, fmt);
+ value = g_strdup_vprintf(fmt, args);
+ va_end(args);
+
+ data = g_byte_array_new_take((void *)value, strlen(value));
+
+ err = xs_impl_write(s->impl, DOMID_QEMU, XBT_NULL, abspath, data);
+ assert(!err);
+
+ g_byte_array_unref(data);
+
+ err = xs_impl_set_perms(s->impl, DOMID_QEMU, XBT_NULL, abspath, perms);
+ assert(!err);
+
+ g_free(abspath);
+}
static void xen_xenstore_realize(DeviceState *dev, Error **errp)
{
XenXenstoreState *s = XEN_XENSTORE(dev);
+ GList *perms;
if (xen_mode != XEN_EMULATE) {
error_setg(errp, "Xen xenstore support is for Xen emulation");
@@ -89,6 +134,50 @@ static void xen_xenstore_realize(DeviceState *dev, Error **errp)
}
aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh), true,
xen_xenstore_event, NULL, NULL, NULL, s);
+
+ s->impl = xs_impl_create(xen_domid);
+
+ /* Populate the default nodes */
+
+ /* Nodes owned by 'dom0' but readable by the guest */
+ perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, DOMID_QEMU));
+ perms = g_list_append(perms, xs_perm_as_string(XS_PERM_READ, xen_domid));
+
+ relpath_printf(s, perms, "", "%s", "");
+
+ relpath_printf(s, perms, "domid", "%u", xen_domid);
+
+ relpath_printf(s, perms, "control/platform-feature-xs_reset_watches", "%u", 1);
+ relpath_printf(s, perms, "control/platform-feature-multiprocessor-suspend", "%u", 1);
+
+ relpath_printf(s, perms, "platform/acpi", "%u", 1);
+ relpath_printf(s, perms, "platform/acpi_s3", "%u", 1);
+ relpath_printf(s, perms, "platform/acpi_s4", "%u", 1);
+ relpath_printf(s, perms, "platform/acpi_laptop_slate", "%u", 0);
+
+ g_list_free_full(perms, g_free);
+
+ /* Nodes owned by the guest */
+ perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, xen_domid));
+
+ relpath_printf(s, perms, "attr", "%s", "");
+
+ relpath_printf(s, perms, "control/shutdown", "%s", "");
+ relpath_printf(s, perms, "control/feature-poweroff", "%u", 1);
+ relpath_printf(s, perms, "control/feature-reboot", "%u", 1);
+ relpath_printf(s, perms, "control/feature-suspend", "%u", 1);
+ relpath_printf(s, perms, "control/feature-s3", "%u", 1);
+ relpath_printf(s, perms, "control/feature-s4", "%u", 1);
+
+ relpath_printf(s, perms, "data", "%s", "");
+ relpath_printf(s, perms, "device", "%s", "");
+ relpath_printf(s, perms, "drivers", "%s", "");
+ relpath_printf(s, perms, "error", "%s", "");
+ relpath_printf(s, perms, "feature", "%s", "");
+
+ g_list_free_full(perms, g_free);
+
+ xen_xenstore_ops = &emu_xenstore_backend_ops;
}
static bool xen_xenstore_is_needed(void *opaque)
@@ -99,16 +188,26 @@ static bool xen_xenstore_is_needed(void *opaque)
static int xen_xenstore_pre_save(void *opaque)
{
XenXenstoreState *s = opaque;
+ GByteArray *save;
if (s->eh) {
s->guest_port = xen_be_evtchn_get_guest_port(s->eh);
}
+
+ g_free(s->impl_state);
+ save = xs_impl_serialize(s->impl);
+ s->impl_state = save->data;
+ s->impl_state_size = save->len;
+ g_byte_array_free(save, false);
+
return 0;
}
static int xen_xenstore_post_load(void *opaque, int ver)
{
XenXenstoreState *s = opaque;
+ GByteArray *save;
+ int ret;
/*
* As qemu/dom0, rebind to the guest's port. The Windows drivers may
@@ -125,11 +224,18 @@ static int xen_xenstore_post_load(void *opaque, int ver)
}
s->be_port = be_port;
}
- return 0;
+
+ save = g_byte_array_new_take(s->impl_state, s->impl_state_size);
+ s->impl_state = NULL;
+ s->impl_state_size = 0;
+
+ ret = xs_impl_deserialize(s->impl, save, xen_domid, fire_watch_cb, s);
+ return ret;
}
static const VMStateDescription xen_xenstore_vmstate = {
.name = "xen_xenstore",
+ .unmigratable = 1, /* The PV back ends don't migrate yet */
.version_id = 1,
.minimum_version_id = 1,
.needed = xen_xenstore_is_needed,
@@ -145,6 +251,10 @@ static const VMStateDescription xen_xenstore_vmstate = {
VMSTATE_BOOL(rsp_pending, XenXenstoreState),
VMSTATE_UINT32(guest_port, XenXenstoreState),
VMSTATE_BOOL(fatal_error, XenXenstoreState),
+ VMSTATE_UINT32(impl_state_size, XenXenstoreState),
+ VMSTATE_VARRAY_UINT32_ALLOC(impl_state, XenXenstoreState,
+ impl_state_size, 0,
+ vmstate_info_uint8, uint8_t),
VMSTATE_END_OF_LIST()
}
};
@@ -213,20 +323,761 @@ static void reset_rsp(XenXenstoreState *s)
s->rsp_offset = 0;
}
+static void xs_error(XenXenstoreState *s, unsigned int id,
+ xs_transaction_t tx_id, int errnum)
+{
+ struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
+ const char *errstr = NULL;
+
+ for (unsigned int i = 0; i < ARRAY_SIZE(xsd_errors); i++) {
+ struct xsd_errors *xsd_error = &xsd_errors[i];
+
+ if (xsd_error->errnum == errnum) {
+ errstr = xsd_error->errstring;
+ break;
+ }
+ }
+ assert(errstr);
+
+ trace_xenstore_error(id, tx_id, errstr);
+
+ rsp->type = XS_ERROR;
+ rsp->req_id = id;
+ rsp->tx_id = tx_id;
+ rsp->len = (uint32_t)strlen(errstr) + 1;
+
+ memcpy(&rsp[1], errstr, rsp->len);
+}
+
+static void xs_ok(XenXenstoreState *s, unsigned int type, unsigned int req_id,
+ xs_transaction_t tx_id)
+{
+ struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
+ const char *okstr = "OK";
+
+ rsp->type = type;
+ rsp->req_id = req_id;
+ rsp->tx_id = tx_id;
+ rsp->len = (uint32_t)strlen(okstr) + 1;
+
+ memcpy(&rsp[1], okstr, rsp->len);
+}
+
+/*
+ * The correct request and response formats are documented in xen.git:
+ * docs/misc/xenstore.txt. A summary is given below for convenience.
+ * The '|' symbol represents a NUL character.
+ *
+ * ---------- Database read, write and permissions operations ----------
+ *
+ * READ <path>| <value|>
+ * WRITE <path>|<value|>
+ * Store and read the octet string <value> at <path>.
+ * WRITE creates any missing parent paths, with empty values.
+ *
+ * MKDIR <path>|
+ * Ensures that the <path> exists, by necessary by creating
+ * it and any missing parents with empty values. If <path>
+ * or any parent already exists, its value is left unchanged.
+ *
+ * RM <path>|
+ * Ensures that the <path> does not exist, by deleting
+ * it and all of its children. It is not an error if <path> does
+ * not exist, but it _is_ an error if <path>'s immediate parent
+ * does not exist either.
+ *
+ * DIRECTORY <path>| <child-leaf-name>|*
+ * Gives a list of the immediate children of <path>, as only the
+ * leafnames. The resulting children are each named
+ * <path>/<child-leaf-name>.
+ *
+ * DIRECTORY_PART <path>|<offset> <gencnt>|<child-leaf-name>|*
+ * Same as DIRECTORY, but to be used for children lists longer than
+ * XENSTORE_PAYLOAD_MAX. Input are <path> and the byte offset into
+ * the list of children to return. Return values are the generation
+ * count <gencnt> of the node (to be used to ensure the node hasn't
+ * changed between two reads: <gencnt> being the same for multiple
+ * reads guarantees the node hasn't changed) and the list of children
+ * starting at the specified <offset> of the complete list.
+ *
+ * GET_PERMS <path>| <perm-as-string>|+
+ * SET_PERMS <path>|<perm-as-string>|+?
+ * <perm-as-string> is one of the following
+ * w<domid> write only
+ * r<domid> read only
+ * b<domid> both read and write
+ * n<domid> no access
+ * See https://wiki.xen.org/wiki/XenBus section
+ * `Permissions' for details of the permissions system.
+ * It is possible to set permissions for the special watch paths
+ * "@introduceDomain" and "@releaseDomain" to enable receiving those
+ * watches in unprivileged domains.
+ *
+ * ---------- Watches ----------
+ *
+ * WATCH <wpath>|<token>|?
+ * Adds a watch.
+ *
+ * When a <path> is modified (including path creation, removal,
+ * contents change or permissions change) this generates an event
+ * on the changed <path>. Changes made in transactions cause an
+ * event only if and when committed. Each occurring event is
+ * matched against all the watches currently set up, and each
+ * matching watch results in a WATCH_EVENT message (see below).
+ *
+ * The event's path matches the watch's <wpath> if it is an child
+ * of <wpath>.
+ *
+ * <wpath> can be a <path> to watch or @<wspecial>. In the
+ * latter case <wspecial> may have any syntax but it matches
+ * (according to the rules above) only the following special
+ * events which are invented by xenstored:
+ * @introduceDomain occurs on INTRODUCE
+ * @releaseDomain occurs on any domain crash or
+ * shutdown, and also on RELEASE
+ * and domain destruction
+ * <wspecial> events are sent to privileged callers or explicitly
+ * via SET_PERMS enabled domains only.
+ *
+ * When a watch is first set up it is triggered once straight
+ * away, with <path> equal to <wpath>. Watches may be triggered
+ * spuriously. The tx_id in a WATCH request is ignored.
+ *
+ * Watches are supposed to be restricted by the permissions
+ * system but in practice the implementation is imperfect.
+ * Applications should not rely on being sent a notification for
+ * paths that they cannot read; however, an application may rely
+ * on being sent a watch when a path which it _is_ able to read
+ * is deleted even if that leaves only a nonexistent unreadable
+ * parent. A notification may omitted if a node's permissions
+ * are changed so as to make it unreadable, in which case future
+ * notifications may be suppressed (and if the node is later made
+ * readable, some notifications may have been lost).
+ *
+ * WATCH_EVENT <epath>|<token>|
+ * Unsolicited `reply' generated for matching modification events
+ * as described above. req_id and tx_id are both 0.
+ *
+ * <epath> is the event's path, ie the actual path that was
+ * modified; however if the event was the recursive removal of an
+ * parent of <wpath>, <epath> is just
+ * <wpath> (rather than the actual path which was removed). So
+ * <epath> is a child of <wpath>, regardless.
+ *
+ * Iff <wpath> for the watch was specified as a relative pathname,
+ * the <epath> path will also be relative (with the same base,
+ * obviously).
+ *
+ * UNWATCH <wpath>|<token>|?
+ *
+ * RESET_WATCHES |
+ * Reset all watches and transactions of the caller.
+ *
+ * ---------- Transactions ----------
+ *
+ * TRANSACTION_START | <transid>|
+ * <transid> is an opaque uint32_t allocated by xenstored
+ * represented as unsigned decimal. After this, transaction may
+ * be referenced by using <transid> (as 32-bit binary) in the
+ * tx_id request header field. When transaction is started whole
+ * db is copied; reads and writes happen on the copy.
+ * It is not legal to send non-0 tx_id in TRANSACTION_START.
+ *
+ * TRANSACTION_END T|
+ * TRANSACTION_END F|
+ * tx_id must refer to existing transaction. After this
+ * request the tx_id is no longer valid and may be reused by
+ * xenstore. If F, the transaction is discarded. If T,
+ * it is committed: if there were any other intervening writes
+ * then our END gets get EAGAIN.
+ *
+ * The plan is that in the future only intervening `conflicting'
+ * writes cause EAGAIN, meaning only writes or other commits
+ * which changed paths which were read or written in the
+ * transaction at hand.
+ *
+ */
+
+static void xs_read(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *req_data, unsigned int len)
+{
+ const char *path = (const char *)req_data;
+ struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
+ uint8_t *rsp_data = (uint8_t *)&rsp[1];
+ g_autoptr(GByteArray) data = g_byte_array_new();
+ int err;
+
+ if (len == 0 || req_data[len - 1] != '\0') {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ trace_xenstore_read(tx_id, path);
+ err = xs_impl_read(s->impl, xen_domid, tx_id, path, data);
+ if (err) {
+ xs_error(s, req_id, tx_id, err);
+ return;
+ }
+
+ rsp->type = XS_READ;
+ rsp->req_id = req_id;
+ rsp->tx_id = tx_id;
+ rsp->len = 0;
+
+ len = data->len;
+ if (len > XENSTORE_PAYLOAD_MAX) {
+ xs_error(s, req_id, tx_id, E2BIG);
+ return;
+ }
+
+ memcpy(&rsp_data[rsp->len], data->data, len);
+ rsp->len += len;
+}
+
+static void xs_write(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *req_data,
+ unsigned int len)
+{
+ g_autoptr(GByteArray) data = g_byte_array_new();
+ const char *path;
+ int err;
+
+ if (len == 0) {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ path = (const char *)req_data;
+
+ while (len--) {
+ if (*req_data++ == '\0') {
+ break;
+ }
+ if (len == 0) {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+ }
+
+ g_byte_array_append(data, req_data, len);
+
+ trace_xenstore_write(tx_id, path);
+ err = xs_impl_write(s->impl, xen_domid, tx_id, path, data);
+ if (err) {
+ xs_error(s, req_id, tx_id, err);
+ return;
+ }
+
+ xs_ok(s, XS_WRITE, req_id, tx_id);
+}
+
+static void xs_mkdir(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *req_data,
+ unsigned int len)
+{
+ g_autoptr(GByteArray) data = g_byte_array_new();
+ const char *path;
+ int err;
+
+ if (len == 0 || req_data[len - 1] != '\0') {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ path = (const char *)req_data;
+
+ trace_xenstore_mkdir(tx_id, path);
+ err = xs_impl_read(s->impl, xen_domid, tx_id, path, data);
+ if (err == ENOENT) {
+ err = xs_impl_write(s->impl, xen_domid, tx_id, path, data);
+ }
+
+ if (!err) {
+ xs_error(s, req_id, tx_id, err);
+ return;
+ }
+
+ xs_ok(s, XS_MKDIR, req_id, tx_id);
+}
+
+static void xs_append_strings(XenXenstoreState *s, struct xsd_sockmsg *rsp,
+ GList *strings, unsigned int start, bool truncate)
+{
+ uint8_t *rsp_data = (uint8_t *)&rsp[1];
+ GList *l;
+
+ for (l = strings; l; l = l->next) {
+ size_t len = strlen(l->data) + 1; /* Including the NUL termination */
+ char *str = l->data;
+
+ if (rsp->len + len > XENSTORE_PAYLOAD_MAX) {
+ if (truncate) {
+ len = XENSTORE_PAYLOAD_MAX - rsp->len;
+ if (!len) {
+ return;
+ }
+ } else {
+ xs_error(s, rsp->req_id, rsp->tx_id, E2BIG);
+ return;
+ }
+ }
+
+ if (start) {
+ if (start >= len) {
+ start -= len;
+ continue;
+ }
+
+ str += start;
+ len -= start;
+ start = 0;
+ }
+
+ memcpy(&rsp_data[rsp->len], str, len);
+ rsp->len += len;
+ }
+ /* XS_DIRECTORY_PART wants an extra NUL to indicate the end */
+ if (truncate && rsp->len < XENSTORE_PAYLOAD_MAX) {
+ rsp_data[rsp->len++] = '\0';
+ }
+}
+
+static void xs_directory(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *req_data,
+ unsigned int len)
+{
+ struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
+ GList *items = NULL;
+ const char *path;
+ int err;
+
+ if (len == 0 || req_data[len - 1] != '\0') {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ path = (const char *)req_data;
+
+ trace_xenstore_directory(tx_id, path);
+ err = xs_impl_directory(s->impl, xen_domid, tx_id, path, NULL, &items);
+ if (err != 0) {
+ xs_error(s, req_id, tx_id, err);
+ return;
+ }
+
+ rsp->type = XS_DIRECTORY;
+ rsp->req_id = req_id;
+ rsp->tx_id = tx_id;
+ rsp->len = 0;
+
+ xs_append_strings(s, rsp, items, 0, false);
+
+ g_list_free_full(items, g_free);
+}
+
+static void xs_directory_part(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *req_data,
+ unsigned int len)
+{
+ const char *offset_str, *path = (const char *)req_data;
+ struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
+ char *rsp_data = (char *)&rsp[1];
+ uint64_t gencnt = 0;
+ unsigned int offset;
+ GList *items = NULL;
+ int err;
+
+ if (len == 0) {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ while (len--) {
+ if (*req_data++ == '\0') {
+ break;
+ }
+ if (len == 0) {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+ }
+
+ offset_str = (const char *)req_data;
+ while (len--) {
+ if (*req_data++ == '\0') {
+ break;
+ }
+ if (len == 0) {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+ }
+
+ if (len) {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ if (qemu_strtoui(offset_str, NULL, 10, &offset) < 0) {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ trace_xenstore_directory_part(tx_id, path, offset);
+ err = xs_impl_directory(s->impl, xen_domid, tx_id, path, &gencnt, &items);
+ if (err != 0) {
+ xs_error(s, req_id, tx_id, err);
+ return;
+ }
+
+ rsp->type = XS_DIRECTORY_PART;
+ rsp->req_id = req_id;
+ rsp->tx_id = tx_id;
+ rsp->len = snprintf(rsp_data, XENSTORE_PAYLOAD_MAX, "%" PRIu64, gencnt) + 1;
+
+ xs_append_strings(s, rsp, items, offset, true);
+
+ g_list_free_full(items, g_free);
+}
+
+static void xs_transaction_start(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *req_data,
+ unsigned int len)
+{
+ struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
+ char *rsp_data = (char *)&rsp[1];
+ int err;
+
+ if (len != 1 || req_data[0] != '\0') {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ rsp->type = XS_TRANSACTION_START;
+ rsp->req_id = req_id;
+ rsp->tx_id = tx_id;
+ rsp->len = 0;
+
+ err = xs_impl_transaction_start(s->impl, xen_domid, &tx_id);
+ if (err) {
+ xs_error(s, req_id, tx_id, err);
+ return;
+ }
+
+ trace_xenstore_transaction_start(tx_id);
+
+ rsp->len = snprintf(rsp_data, XENSTORE_PAYLOAD_MAX, "%u", tx_id);
+ assert(rsp->len < XENSTORE_PAYLOAD_MAX);
+ rsp->len++;
+}
+
+static void xs_transaction_end(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *req_data,
+ unsigned int len)
+{
+ bool commit;
+ int err;
+
+ if (len != 2 || req_data[1] != '\0') {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ switch (req_data[0]) {
+ case 'T':
+ commit = true;
+ break;
+ case 'F':
+ commit = false;
+ break;
+ default:
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ trace_xenstore_transaction_end(tx_id, commit);
+ err = xs_impl_transaction_end(s->impl, xen_domid, tx_id, commit);
+ if (err) {
+ xs_error(s, req_id, tx_id, err);
+ return;
+ }
+
+ xs_ok(s, XS_TRANSACTION_END, req_id, tx_id);
+}
+
+static void xs_rm(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *req_data, unsigned int len)
+{
+ const char *path = (const char *)req_data;
+ int err;
+
+ if (len == 0 || req_data[len - 1] != '\0') {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ trace_xenstore_rm(tx_id, path);
+ err = xs_impl_rm(s->impl, xen_domid, tx_id, path);
+ if (err) {
+ xs_error(s, req_id, tx_id, err);
+ return;
+ }
+
+ xs_ok(s, XS_RM, req_id, tx_id);
+}
+
+static void xs_get_perms(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *req_data,
+ unsigned int len)
+{
+ const char *path = (const char *)req_data;
+ struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
+ GList *perms = NULL;
+ int err;
+
+ if (len == 0 || req_data[len - 1] != '\0') {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ trace_xenstore_get_perms(tx_id, path);
+ err = xs_impl_get_perms(s->impl, xen_domid, tx_id, path, &perms);
+ if (err) {
+ xs_error(s, req_id, tx_id, err);
+ return;
+ }
+
+ rsp->type = XS_GET_PERMS;
+ rsp->req_id = req_id;
+ rsp->tx_id = tx_id;
+ rsp->len = 0;
+
+ xs_append_strings(s, rsp, perms, 0, false);
+
+ g_list_free_full(perms, g_free);
+}
+
+static void xs_set_perms(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *req_data,
+ unsigned int len)
+{
+ const char *path = (const char *)req_data;
+ uint8_t *perm;
+ GList *perms = NULL;
+ int err;
+
+ if (len == 0) {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ while (len--) {
+ if (*req_data++ == '\0') {
+ break;
+ }
+ if (len == 0) {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+ }
+
+ perm = req_data;
+ while (len--) {
+ if (*req_data++ == '\0') {
+ perms = g_list_append(perms, perm);
+ perm = req_data;
+ }
+ }
+
+ /*
+ * Note that there may be trailing garbage at the end of the buffer.
+ * This is explicitly permitted by the '?' at the end of the definition:
+ *
+ * SET_PERMS <path>|<perm-as-string>|+?
+ */
+
+ trace_xenstore_set_perms(tx_id, path);
+ err = xs_impl_set_perms(s->impl, xen_domid, tx_id, path, perms);
+ g_list_free(perms);
+ if (err) {
+ xs_error(s, req_id, tx_id, err);
+ return;
+ }
+
+ xs_ok(s, XS_SET_PERMS, req_id, tx_id);
+}
+
+static void xs_watch(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *req_data,
+ unsigned int len)
+{
+ const char *token, *path = (const char *)req_data;
+ int err;
+
+ if (len == 0) {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ while (len--) {
+ if (*req_data++ == '\0') {
+ break;
+ }
+ if (len == 0) {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+ }
+
+ token = (const char *)req_data;
+ while (len--) {
+ if (*req_data++ == '\0') {
+ break;
+ }
+ if (len == 0) {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+ }
+
+ /*
+ * Note that there may be trailing garbage at the end of the buffer.
+ * This is explicitly permitted by the '?' at the end of the definition:
+ *
+ * WATCH <wpath>|<token>|?
+ */
+
+ trace_xenstore_watch(path, token);
+ err = xs_impl_watch(s->impl, xen_domid, path, token, fire_watch_cb, s);
+ if (err) {
+ xs_error(s, req_id, tx_id, err);
+ return;
+ }
+
+ xs_ok(s, XS_WATCH, req_id, tx_id);
+}
+
+static void xs_unwatch(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *req_data,
+ unsigned int len)
+{
+ const char *token, *path = (const char *)req_data;
+ int err;
+
+ if (len == 0) {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ while (len--) {
+ if (*req_data++ == '\0') {
+ break;
+ }
+ if (len == 0) {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+ }
+
+ token = (const char *)req_data;
+ while (len--) {
+ if (*req_data++ == '\0') {
+ break;
+ }
+ if (len == 0) {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+ }
+
+ trace_xenstore_unwatch(path, token);
+ err = xs_impl_unwatch(s->impl, xen_domid, path, token, fire_watch_cb, s);
+ if (err) {
+ xs_error(s, req_id, tx_id, err);
+ return;
+ }
+
+ xs_ok(s, XS_UNWATCH, req_id, tx_id);
+}
+
+static void xs_reset_watches(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *req_data,
+ unsigned int len)
+{
+ if (len == 0 || req_data[len - 1] != '\0') {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ trace_xenstore_reset_watches();
+ xs_impl_reset_watches(s->impl, xen_domid);
+
+ xs_ok(s, XS_RESET_WATCHES, req_id, tx_id);
+}
+
+static void xs_priv(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *data,
+ unsigned int len)
+{
+ xs_error(s, req_id, tx_id, EACCES);
+}
+
+static void xs_unimpl(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *data,
+ unsigned int len)
+{
+ xs_error(s, req_id, tx_id, ENOSYS);
+}
+
+typedef void (*xs_impl)(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *data,
+ unsigned int len);
+
+struct xsd_req {
+ const char *name;
+ xs_impl fn;
+};
+#define XSD_REQ(_type, _fn) \
+ [_type] = { .name = #_type, .fn = _fn }
+
+struct xsd_req xsd_reqs[] = {
+ XSD_REQ(XS_READ, xs_read),
+ XSD_REQ(XS_WRITE, xs_write),
+ XSD_REQ(XS_MKDIR, xs_mkdir),
+ XSD_REQ(XS_DIRECTORY, xs_directory),
+ XSD_REQ(XS_DIRECTORY_PART, xs_directory_part),
+ XSD_REQ(XS_TRANSACTION_START, xs_transaction_start),
+ XSD_REQ(XS_TRANSACTION_END, xs_transaction_end),
+ XSD_REQ(XS_RM, xs_rm),
+ XSD_REQ(XS_GET_PERMS, xs_get_perms),
+ XSD_REQ(XS_SET_PERMS, xs_set_perms),
+ XSD_REQ(XS_WATCH, xs_watch),
+ XSD_REQ(XS_UNWATCH, xs_unwatch),
+ XSD_REQ(XS_CONTROL, xs_priv),
+ XSD_REQ(XS_INTRODUCE, xs_priv),
+ XSD_REQ(XS_RELEASE, xs_priv),
+ XSD_REQ(XS_IS_DOMAIN_INTRODUCED, xs_priv),
+ XSD_REQ(XS_RESUME, xs_priv),
+ XSD_REQ(XS_SET_TARGET, xs_priv),
+ XSD_REQ(XS_RESET_WATCHES, xs_reset_watches),
+};
+
static void process_req(XenXenstoreState *s)
{
struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
- struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
- const char enosys[] = "ENOSYS";
+ xs_impl handler = NULL;
assert(req_pending(s));
assert(!s->rsp_pending);
- rsp->type = XS_ERROR;
- rsp->req_id = req->req_id;
- rsp->tx_id = req->tx_id;
- rsp->len = sizeof(enosys);
- memcpy((void *)&rsp[1], enosys, sizeof(enosys));
+ if (req->type < ARRAY_SIZE(xsd_reqs)) {
+ handler = xsd_reqs[req->type].fn;
+ }
+ if (!handler) {
+ handler = &xs_unimpl;
+ }
+
+ handler(s, req->req_id, req->tx_id, (uint8_t *)&req[1], req->len);
s->rsp_pending = true;
reset_req(s);
@@ -415,6 +1266,113 @@ static unsigned int put_rsp(XenXenstoreState *s)
return copylen;
}
+static void deliver_watch(XenXenstoreState *s, const char *path,
+ const char *token)
+{
+ struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
+ uint8_t *rsp_data = (uint8_t *)&rsp[1];
+ unsigned int len;
+
+ assert(!s->rsp_pending);
+
+ trace_xenstore_watch_event(path, token);
+
+ rsp->type = XS_WATCH_EVENT;
+ rsp->req_id = 0;
+ rsp->tx_id = 0;
+ rsp->len = 0;
+
+ len = strlen(path);
+
+ /* XENSTORE_ABS/REL_PATH_MAX should ensure there can be no overflow */
+ assert(rsp->len + len < XENSTORE_PAYLOAD_MAX);
+
+ memcpy(&rsp_data[rsp->len], path, len);
+ rsp->len += len;
+ rsp_data[rsp->len] = '\0';
+ rsp->len++;
+
+ len = strlen(token);
+ /*
+ * It is possible for the guest to have chosen a token that will
+ * not fit (along with the patch) into a watch event. We have no
+ * choice but to drop the event if this is the case.
+ */
+ if (rsp->len + len >= XENSTORE_PAYLOAD_MAX) {
+ return;
+ }
+
+ memcpy(&rsp_data[rsp->len], token, len);
+ rsp->len += len;
+ rsp_data[rsp->len] = '\0';
+ rsp->len++;
+
+ s->rsp_pending = true;
+}
+
+struct watch_event {
+ char *path;
+ char *token;
+};
+
+static void free_watch_event(struct watch_event *ev)
+{
+ if (ev) {
+ g_free(ev->path);
+ g_free(ev->token);
+ g_free(ev);
+ }
+}
+
+static void queue_watch(XenXenstoreState *s, const char *path,
+ const char *token)
+{
+ struct watch_event *ev = g_new0(struct watch_event, 1);
+
+ ev->path = g_strdup(path);
+ ev->token = g_strdup(token);
+
+ s->watch_events = g_list_append(s->watch_events, ev);
+}
+
+static void fire_watch_cb(void *opaque, const char *path, const char *token)
+{
+ XenXenstoreState *s = opaque;
+
+ assert(qemu_mutex_iothread_locked());
+
+ /*
+ * If there's a response pending, we obviously can't scribble over
+ * it. But if there's a request pending, it has dibs on the buffer
+ * too.
+ *
+ * In the common case of a watch firing due to backend activity
+ * when the ring was otherwise idle, we should be able to copy the
+ * strings directly into the rsp_data and thence the actual ring,
+ * without needing to perform any allocations and queue them.
+ */
+ if (s->rsp_pending || req_pending(s)) {
+ queue_watch(s, path, token);
+ } else {
+ deliver_watch(s, path, token);
+ /*
+ * If the message was queued because there was already ring activity,
+ * no need to wake the guest. But if not, we need to send the evtchn.
+ */
+ xen_be_evtchn_notify(s->eh, s->be_port);
+ }
+}
+
+static void process_watch_events(XenXenstoreState *s)
+{
+ struct watch_event *ev = s->watch_events->data;
+
+ deliver_watch(s, ev->path, ev->token);
+
+ s->watch_events = g_list_remove(s->watch_events, ev);
+ free_watch_event(ev);
+}
+
static void xen_xenstore_event(void *opaque)
{
XenXenstoreState *s = opaque;
@@ -433,6 +1391,10 @@ static void xen_xenstore_event(void *opaque)
copied_to = copied_from = 0;
processed = false;
+ if (!s->rsp_pending && s->watch_events) {
+ process_watch_events(s);
+ }
+
if (s->rsp_pending) {
copied_to = put_rsp(s);
}
@@ -441,7 +1403,7 @@ static void xen_xenstore_event(void *opaque)
copied_from = get_req(s);
}
- if (req_pending(s) && !s->rsp_pending) {
+ if (req_pending(s) && !s->rsp_pending && !s->watch_events) {
process_req(s);
processed = true;
}
@@ -496,5 +1458,270 @@ int xen_xenstore_reset(void)
}
s->be_port = err;
+ /*
+ * We don't actually access the guest's page through the grant, because
+ * this isn't real Xen, and we can just use the page we gave it in the
+ * first place. Map the grant anyway, mostly for cosmetic purposes so
+ * it *looks* like it's in use in the guest-visible grant table.
+ */
+ s->gt = qemu_xen_gnttab_open();
+ uint32_t xs_gntref = GNTTAB_RESERVED_XENSTORE;
+ s->granted_xs = qemu_xen_gnttab_map_refs(s->gt, 1, xen_domid, &xs_gntref,
+ PROT_READ | PROT_WRITE);
+
return 0;
}
+
+struct qemu_xs_handle {
+ XenstoreImplState *impl;
+ GList *watches;
+ QEMUBH *watch_bh;
+};
+
+struct qemu_xs_watch {
+ struct qemu_xs_handle *h;
+ char *path;
+ xs_watch_fn fn;
+ void *opaque;
+ GList *events;
+};
+
+static char *xs_be_get_domain_path(struct qemu_xs_handle *h, unsigned int domid)
+{
+ return g_strdup_printf("/local/domain/%u", domid);
+}
+
+static char **xs_be_directory(struct qemu_xs_handle *h, xs_transaction_t t,
+ const char *path, unsigned int *num)
+{
+ GList *items = NULL, *l;
+ unsigned int i = 0;
+ char **items_ret;
+ int err;
+
+ err = xs_impl_directory(h->impl, DOMID_QEMU, t, path, NULL, &items);
+ if (err) {
+ errno = err;
+ return NULL;
+ }
+
+ items_ret = g_new0(char *, g_list_length(items) + 1);
+ *num = 0;
+ for (l = items; l; l = l->next) {
+ items_ret[i++] = l->data;
+ (*num)++;
+ }
+ g_list_free(items);
+ return items_ret;
+}
+
+static void *xs_be_read(struct qemu_xs_handle *h, xs_transaction_t t,
+ const char *path, unsigned int *len)
+{
+ GByteArray *data = g_byte_array_new();
+ bool free_segment = false;
+ int err;
+
+ err = xs_impl_read(h->impl, DOMID_QEMU, t, path, data);
+ if (err) {
+ free_segment = true;
+ errno = err;
+ } else {
+ if (len) {
+ *len = data->len;
+ }
+ /* The xen-bus-helper code expects to get NUL terminated string! */
+ g_byte_array_append(data, (void *)"", 1);
+ }
+
+ return g_byte_array_free(data, free_segment);
+}
+
+static bool xs_be_write(struct qemu_xs_handle *h, xs_transaction_t t,
+ const char *path, const void *data, unsigned int len)
+{
+ GByteArray *gdata = g_byte_array_new();
+ int err;
+
+ g_byte_array_append(gdata, data, len);
+ err = xs_impl_write(h->impl, DOMID_QEMU, t, path, gdata);
+ g_byte_array_unref(gdata);
+ if (err) {
+ errno = err;
+ return false;
+ }
+ return true;
+}
+
+static bool xs_be_create(struct qemu_xs_handle *h, xs_transaction_t t,
+ unsigned int owner, unsigned int domid,
+ unsigned int perms, const char *path)
+{
+ g_autoptr(GByteArray) data = g_byte_array_new();
+ GList *perms_list = NULL;
+ int err;
+
+ /* mkdir does this */
+ err = xs_impl_read(h->impl, DOMID_QEMU, t, path, data);
+ if (err == ENOENT) {
+ err = xs_impl_write(h->impl, DOMID_QEMU, t, path, data);
+ }
+ if (err) {
+ errno = err;
+ return false;
+ }
+
+ perms_list = g_list_append(perms_list,
+ xs_perm_as_string(XS_PERM_NONE, owner));
+ perms_list = g_list_append(perms_list,
+ xs_perm_as_string(perms, domid));
+
+ err = xs_impl_set_perms(h->impl, DOMID_QEMU, t, path, perms_list);
+ g_list_free_full(perms_list, g_free);
+ if (err) {
+ errno = err;
+ return false;
+ }
+ return true;
+}
+
+static bool xs_be_destroy(struct qemu_xs_handle *h, xs_transaction_t t,
+ const char *path)
+{
+ int err = xs_impl_rm(h->impl, DOMID_QEMU, t, path);
+ if (err) {
+ errno = err;
+ return false;
+ }
+ return true;
+}
+
+static void be_watch_bh(void *_h)
+{
+ struct qemu_xs_handle *h = _h;
+ GList *l;
+
+ for (l = h->watches; l; l = l->next) {
+ struct qemu_xs_watch *w = l->data;
+
+ while (w->events) {
+ struct watch_event *ev = w->events->data;
+
+ w->fn(w->opaque, ev->path);
+
+ w->events = g_list_remove(w->events, ev);
+ free_watch_event(ev);
+ }
+ }
+}
+
+static void xs_be_watch_cb(void *opaque, const char *path, const char *token)
+{
+ struct watch_event *ev = g_new0(struct watch_event, 1);
+ struct qemu_xs_watch *w = opaque;
+
+ /* We don't care about the token */
+ ev->path = g_strdup(path);
+ w->events = g_list_append(w->events, ev);
+
+ qemu_bh_schedule(w->h->watch_bh);
+}
+
+static struct qemu_xs_watch *xs_be_watch(struct qemu_xs_handle *h,
+ const char *path, xs_watch_fn fn,
+ void *opaque)
+{
+ struct qemu_xs_watch *w = g_new0(struct qemu_xs_watch, 1);
+ int err;
+
+ w->h = h;
+ w->fn = fn;
+ w->opaque = opaque;
+
+ err = xs_impl_watch(h->impl, DOMID_QEMU, path, NULL, xs_be_watch_cb, w);
+ if (err) {
+ errno = err;
+ g_free(w);
+ return NULL;
+ }
+
+ w->path = g_strdup(path);
+ h->watches = g_list_append(h->watches, w);
+ return w;
+}
+
+static void xs_be_unwatch(struct qemu_xs_handle *h, struct qemu_xs_watch *w)
+{
+ xs_impl_unwatch(h->impl, DOMID_QEMU, w->path, NULL, xs_be_watch_cb, w);
+
+ h->watches = g_list_remove(h->watches, w);
+ g_list_free_full(w->events, (GDestroyNotify)free_watch_event);
+ g_free(w->path);
+ g_free(w);
+}
+
+static xs_transaction_t xs_be_transaction_start(struct qemu_xs_handle *h)
+{
+ unsigned int new_tx = XBT_NULL;
+ int err = xs_impl_transaction_start(h->impl, DOMID_QEMU, &new_tx);
+ if (err) {
+ errno = err;
+ return XBT_NULL;
+ }
+ return new_tx;
+}
+
+static bool xs_be_transaction_end(struct qemu_xs_handle *h, xs_transaction_t t,
+ bool abort)
+{
+ int err = xs_impl_transaction_end(h->impl, DOMID_QEMU, t, !abort);
+ if (err) {
+ errno = err;
+ return false;
+ }
+ return true;
+}
+
+static struct qemu_xs_handle *xs_be_open(void)
+{
+ XenXenstoreState *s = xen_xenstore_singleton;
+ struct qemu_xs_handle *h;
+
+ if (!s && !s->impl) {
+ errno = -ENOSYS;
+ return NULL;
+ }
+
+ h = g_new0(struct qemu_xs_handle, 1);
+ h->impl = s->impl;
+
+ h->watch_bh = aio_bh_new(qemu_get_aio_context(), be_watch_bh, h);
+
+ return h;
+}
+
+static void xs_be_close(struct qemu_xs_handle *h)
+{
+ while (h->watches) {
+ struct qemu_xs_watch *w = h->watches->data;
+ xs_be_unwatch(h, w);
+ }
+
+ qemu_bh_delete(h->watch_bh);
+ g_free(h);
+}
+
+static struct xenstore_backend_ops emu_xenstore_backend_ops = {
+ .open = xs_be_open,
+ .close = xs_be_close,
+ .get_domain_path = xs_be_get_domain_path,
+ .directory = xs_be_directory,
+ .read = xs_be_read,
+ .write = xs_be_write,
+ .create = xs_be_create,
+ .destroy = xs_be_destroy,
+ .watch = xs_be_watch,
+ .unwatch = xs_be_unwatch,
+ .transaction_start = xs_be_transaction_start,
+ .transaction_end = xs_be_transaction_end,
+};
diff --git a/hw/i386/kvm/xenstore_impl.c b/hw/i386/kvm/xenstore_impl.c
new file mode 100644
index 0000000000..305fe75519
--- /dev/null
+++ b/hw/i386/kvm/xenstore_impl.c
@@ -0,0 +1,1927 @@
+/*
+ * QEMU Xen emulation: The actual implementation of XenStore
+ *
+ * Copyright © 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * Authors: David Woodhouse <dwmw2@infradead.org>, Paul Durrant <paul@xen.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qom/object.h"
+
+#include "hw/xen/xen.h"
+
+#include "xen_xenstore.h"
+#include "xenstore_impl.h"
+
+#include "hw/xen/interface/io/xs_wire.h"
+
+#define XS_MAX_WATCHES 128
+#define XS_MAX_DOMAIN_NODES 1000
+#define XS_MAX_NODE_SIZE 2048
+#define XS_MAX_TRANSACTIONS 10
+#define XS_MAX_PERMS_PER_NODE 5
+
+#define XS_VALID_CHARS "abcdefghijklmnopqrstuvwxyz" \
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
+ "0123456789-/_"
+
+typedef struct XsNode {
+ uint32_t ref;
+ GByteArray *content;
+ GList *perms;
+ GHashTable *children;
+ uint64_t gencnt;
+ bool deleted_in_tx;
+ bool modified_in_tx;
+ unsigned int serialized_tx;
+#ifdef XS_NODE_UNIT_TEST
+ gchar *name; /* debug only */
+#endif
+} XsNode;
+
+typedef struct XsWatch {
+ struct XsWatch *next;
+ xs_impl_watch_fn *cb;
+ void *cb_opaque;
+ char *token;
+ unsigned int dom_id;
+ int rel_prefix;
+} XsWatch;
+
+typedef struct XsTransaction {
+ XsNode *root;
+ unsigned int nr_nodes;
+ unsigned int base_tx;
+ unsigned int tx_id;
+ unsigned int dom_id;
+} XsTransaction;
+
+struct XenstoreImplState {
+ XsNode *root;
+ unsigned int nr_nodes;
+ GHashTable *watches;
+ unsigned int nr_domu_watches;
+ GHashTable *transactions;
+ unsigned int nr_domu_transactions;
+ unsigned int root_tx;
+ unsigned int last_tx;
+ bool serialized;
+};
+
+
+static void nobble_tx(gpointer key, gpointer value, gpointer user_data)
+{
+ unsigned int *new_tx_id = user_data;
+ XsTransaction *tx = value;
+
+ if (tx->base_tx == *new_tx_id) {
+ /* Transactions based on XBT_NULL will always fail */
+ tx->base_tx = XBT_NULL;
+ }
+}
+
+static inline unsigned int next_tx(struct XenstoreImplState *s)
+{
+ unsigned int tx_id;
+
+ /* Find the next TX id which isn't either XBT_NULL or in use. */
+ do {
+ tx_id = ++s->last_tx;
+ } while (tx_id == XBT_NULL || tx_id == s->root_tx ||
+ g_hash_table_lookup(s->transactions, GINT_TO_POINTER(tx_id)));
+
+ /*
+ * It is vanishingly unlikely, but ensure that no outstanding transaction
+ * is based on the (previous incarnation of the) newly-allocated TX id.
+ */
+ g_hash_table_foreach(s->transactions, nobble_tx, &tx_id);
+
+ return tx_id;
+}
+
+static inline XsNode *xs_node_new(void)
+{
+ XsNode *n = g_new0(XsNode, 1);
+ n->ref = 1;
+
+#ifdef XS_NODE_UNIT_TEST
+ nr_xs_nodes++;
+ xs_node_list = g_list_prepend(xs_node_list, n);
+#endif
+ return n;
+}
+
+static inline XsNode *xs_node_ref(XsNode *n)
+{
+ /* With just 10 transactions, it can never get anywhere near this. */
+ g_assert(n->ref < INT_MAX);
+
+ g_assert(n->ref);
+ n->ref++;
+ return n;
+}
+
+static inline void xs_node_unref(XsNode *n)
+{
+ if (!n) {
+ return;
+ }
+ g_assert(n->ref);
+ if (--n->ref) {
+ return;
+ }
+
+ if (n->content) {
+ g_byte_array_unref(n->content);
+ }
+ if (n->perms) {
+ g_list_free_full(n->perms, g_free);
+ }
+ if (n->children) {
+ g_hash_table_unref(n->children);
+ }
+#ifdef XS_NODE_UNIT_TEST
+ g_free(n->name);
+ nr_xs_nodes--;
+ xs_node_list = g_list_remove(xs_node_list, n);
+#endif
+ g_free(n);
+}
+
+char *xs_perm_as_string(unsigned int perm, unsigned int domid)
+{
+ char letter;
+
+ switch (perm) {
+ case XS_PERM_READ | XS_PERM_WRITE:
+ letter = 'b';
+ break;
+ case XS_PERM_READ:
+ letter = 'r';
+ break;
+ case XS_PERM_WRITE:
+ letter = 'w';
+ break;
+ case XS_PERM_NONE:
+ default:
+ letter = 'n';
+ break;
+ }
+
+ return g_strdup_printf("%c%u", letter, domid);
+}
+
+static gpointer do_perm_copy(gconstpointer src, gpointer user_data)
+{
+ return g_strdup(src);
+}
+
+static XsNode *xs_node_create(const char *name, GList *perms)
+{
+ XsNode *n = xs_node_new();
+
+#ifdef XS_NODE_UNIT_TEST
+ if (name) {
+ n->name = g_strdup(name);
+ }
+#endif
+
+ n->perms = g_list_copy_deep(perms, do_perm_copy, NULL);
+
+ return n;
+}
+
+/* For copying from one hash table to another using g_hash_table_foreach() */
+static void do_child_insert(gpointer key, gpointer value, gpointer user_data)
+{
+ g_hash_table_insert(user_data, g_strdup(key), xs_node_ref(value));
+}
+
+static XsNode *xs_node_copy(XsNode *old)
+{
+ XsNode *n = xs_node_new();
+
+ n->gencnt = old->gencnt;
+
+#ifdef XS_NODE_UNIT_TEST
+ if (n->name) {
+ n->name = g_strdup(old->name);
+ }
+#endif
+
+ assert(old);
+ if (old->children) {
+ n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free,
+ (GDestroyNotify)xs_node_unref);
+ g_hash_table_foreach(old->children, do_child_insert, n->children);
+ }
+ if (old->perms) {
+ n->perms = g_list_copy_deep(old->perms, do_perm_copy, NULL);
+ }
+ if (old->content) {
+ n->content = g_byte_array_ref(old->content);
+ }
+ return n;
+}
+
+/* Returns true if it made a change to the hash table */
+static bool xs_node_add_child(XsNode *n, const char *path_elem, XsNode *child)
+{
+ assert(!strchr(path_elem, '/'));
+
+ if (!child) {
+ assert(n->children);
+ return g_hash_table_remove(n->children, path_elem);
+ }
+
+#ifdef XS_NODE_UNIT_TEST
+ g_free(child->name);
+ child->name = g_strdup(path_elem);
+#endif
+ if (!n->children) {
+ n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free,
+ (GDestroyNotify)xs_node_unref);
+ }
+
+ /*
+ * The documentation for g_hash_table_insert() says that it "returns a
+ * boolean value to indicate whether the newly added value was already
+ * in the hash table or not."
+ *
+ * It could perhaps be clearer that returning TRUE means it wasn't,
+ */
+ return g_hash_table_insert(n->children, g_strdup(path_elem), child);
+}
+
+struct walk_op {
+ struct XenstoreImplState *s;
+ char path[XENSTORE_ABS_PATH_MAX + 2]; /* Two NUL terminators */
+ int (*op_fn)(XsNode **n, struct walk_op *op);
+ void *op_opaque;
+ void *op_opaque2;
+
+ GList *watches;
+ unsigned int dom_id;
+ unsigned int tx_id;
+
+ /* The number of nodes which will exist in the tree if this op succeeds. */
+ unsigned int new_nr_nodes;
+
+ /*
+ * This is maintained on the way *down* the walk to indicate
+ * whether nodes can be modified in place or whether COW is
+ * required. It starts off being true, as we're always going to
+ * replace the root node. If we walk into a shared subtree it
+ * becomes false. If we start *creating* new nodes for a write,
+ * it becomes true again.
+ *
+ * Do not use it on the way back up.
+ */
+ bool inplace;
+ bool mutating;
+ bool create_dirs;
+ bool in_transaction;
+
+ /* Tracking during recursion so we know which is first. */
+ bool deleted_in_tx;
+};
+
+static void fire_watches(struct walk_op *op, bool parents)
+{
+ GList *l = NULL;
+ XsWatch *w;
+
+ if (!op->mutating || op->in_transaction) {
+ return;
+ }
+
+ if (parents) {
+ l = op->watches;
+ }
+
+ w = g_hash_table_lookup(op->s->watches, op->path);
+ while (w || l) {
+ if (!w) {
+ /* Fire the parent nodes from 'op' if asked to */
+ w = l->data;
+ l = l->next;
+ continue;
+ }
+
+ assert(strlen(op->path) > w->rel_prefix);
+ w->cb(w->cb_opaque, op->path + w->rel_prefix, w->token);
+
+ w = w->next;
+ }
+}
+
+static int xs_node_add_content(XsNode **n, struct walk_op *op)
+{
+ GByteArray *data = op->op_opaque;
+
+ if (op->dom_id) {
+ /*
+ * The real XenStored includes permissions and names of child nodes
+ * in the calculated datasize but life's too short. For a single
+ * tenant internal XenStore, we don't have to be quite as pedantic.
+ */
+ if (data->len > XS_MAX_NODE_SIZE) {
+ return E2BIG;
+ }
+ }
+ /* We *are* the node to be written. Either this or a copy. */
+ if (!op->inplace) {
+ XsNode *old = *n;
+ *n = xs_node_copy(old);
+ xs_node_unref(old);
+ }
+
+ if ((*n)->content) {
+ g_byte_array_unref((*n)->content);
+ }
+ (*n)->content = g_byte_array_ref(data);
+ if (op->tx_id != XBT_NULL) {
+ (*n)->modified_in_tx = true;
+ }
+ return 0;
+}
+
+static int xs_node_get_content(XsNode **n, struct walk_op *op)
+{
+ GByteArray *data = op->op_opaque;
+ GByteArray *node_data;
+
+ assert(op->inplace);
+ assert(*n);
+
+ node_data = (*n)->content;
+ if (node_data) {
+ g_byte_array_append(data, node_data->data, node_data->len);
+ }
+
+ return 0;
+}
+
+static int node_rm_recurse(gpointer key, gpointer value, gpointer user_data)
+{
+ struct walk_op *op = user_data;
+ int path_len = strlen(op->path);
+ int key_len = strlen(key);
+ XsNode *n = value;
+ bool this_inplace = op->inplace;
+
+ if (n->ref != 1) {
+ op->inplace = 0;
+ }
+
+ assert(key_len + path_len + 2 <= sizeof(op->path));
+ op->path[path_len] = '/';
+ memcpy(op->path + path_len + 1, key, key_len + 1);
+
+ if (n->children) {
+ g_hash_table_foreach_remove(n->children, node_rm_recurse, op);
+ }
+ op->new_nr_nodes--;
+
+ /*
+ * Fire watches on *this* node but not the parents because they are
+ * going to be deleted too, so the watch will fire for them anyway.
+ */
+ fire_watches(op, false);
+ op->path[path_len] = '\0';
+
+ /*
+ * Actually deleting the child here is just an optimisation; if we
+ * don't then the final unref on the topmost victim will just have
+ * to cascade down again repeating all the g_hash_table_foreach()
+ * calls.
+ */
+ return this_inplace;
+}
+
+static XsNode *xs_node_copy_deleted(XsNode *old, struct walk_op *op);
+static void copy_deleted_recurse(gpointer key, gpointer value,
+ gpointer user_data)
+{
+ struct walk_op *op = user_data;
+ GHashTable *siblings = op->op_opaque2;
+ XsNode *n = xs_node_copy_deleted(value, op);
+
+ /*
+ * Reinsert the deleted_in_tx copy of the node into the parent's
+ * 'children' hash table. Having stashed it from op->op_opaque2
+ * before the recursive call to xs_node_copy_deleted() scribbled
+ * over it.
+ */
+ g_hash_table_insert(siblings, g_strdup(key), n);
+}
+
+static XsNode *xs_node_copy_deleted(XsNode *old, struct walk_op *op)
+{
+ XsNode *n = xs_node_new();
+
+ n->gencnt = old->gencnt;
+
+#ifdef XS_NODE_UNIT_TEST
+ if (old->name) {
+ n->name = g_strdup(old->name);
+ }
+#endif
+
+ if (old->children) {
+ n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free,
+ (GDestroyNotify)xs_node_unref);
+ op->op_opaque2 = n->children;
+ g_hash_table_foreach(old->children, copy_deleted_recurse, op);
+ }
+ if (old->perms) {
+ n->perms = g_list_copy_deep(old->perms, do_perm_copy, NULL);
+ }
+ n->deleted_in_tx = true;
+ /* If it gets resurrected we only fire a watch if it lost its content */
+ if (old->content) {
+ n->modified_in_tx = true;
+ }
+ op->new_nr_nodes--;
+ return n;
+}
+
+static int xs_node_rm(XsNode **n, struct walk_op *op)
+{
+ bool this_inplace = op->inplace;
+
+ if (op->tx_id != XBT_NULL) {
+ /* It's not trivial to do inplace handling for this one */
+ XsNode *old = *n;
+ *n = xs_node_copy_deleted(old, op);
+ xs_node_unref(old);
+ return 0;
+ }
+
+ /* Fire watches for, and count, nodes in the subtree which get deleted */
+ if ((*n)->children) {
+ g_hash_table_foreach_remove((*n)->children, node_rm_recurse, op);
+ }
+ op->new_nr_nodes--;
+
+ if (this_inplace) {
+ xs_node_unref(*n);
+ }
+ *n = NULL;
+ return 0;
+}
+
+static int xs_node_get_perms(XsNode **n, struct walk_op *op)
+{
+ GList **perms = op->op_opaque;
+
+ assert(op->inplace);
+ assert(*n);
+
+ *perms = g_list_copy_deep((*n)->perms, do_perm_copy, NULL);
+ return 0;
+}
+
+static void parse_perm(const char *perm, char *letter, unsigned int *dom_id)
+{
+ unsigned int n = sscanf(perm, "%c%u", letter, dom_id);
+
+ assert(n == 2);
+}
+
+static bool can_access(unsigned int dom_id, GList *perms, const char *letters)
+{
+ unsigned int i, n;
+ char perm_letter;
+ unsigned int perm_dom_id;
+ bool access;
+
+ if (dom_id == 0) {
+ return true;
+ }
+
+ n = g_list_length(perms);
+ assert(n >= 1);
+
+ /*
+ * The dom_id of the first perm is the owner, and the owner always has
+ * read-write access.
+ */
+ parse_perm(g_list_nth_data(perms, 0), &perm_letter, &perm_dom_id);
+ if (dom_id == perm_dom_id) {
+ return true;
+ }
+
+ /*
+ * The letter of the first perm specified the default access for all other
+ * domains.
+ */
+ access = !!strchr(letters, perm_letter);
+ for (i = 1; i < n; i++) {
+ parse_perm(g_list_nth_data(perms, i), &perm_letter, &perm_dom_id);
+ if (dom_id != perm_dom_id) {
+ continue;
+ }
+ access = !!strchr(letters, perm_letter);
+ }
+
+ return access;
+}
+
+static int xs_node_set_perms(XsNode **n, struct walk_op *op)
+{
+ GList *perms = op->op_opaque;
+
+ if (op->dom_id) {
+ unsigned int perm_dom_id;
+ char perm_letter;
+
+ /* A guest may not change permissions on nodes it does not own */
+ if (!can_access(op->dom_id, (*n)->perms, "")) {
+ return EPERM;
+ }
+
+ /* A guest may not change the owner of a node it owns. */
+ parse_perm(perms->data, &perm_letter, &perm_dom_id);
+ if (perm_dom_id != op->dom_id) {
+ return EPERM;
+ }
+
+ if (g_list_length(perms) > XS_MAX_PERMS_PER_NODE) {
+ return ENOSPC;
+ }
+ }
+
+ /* We *are* the node to be written. Either this or a copy. */
+ if (!op->inplace) {
+ XsNode *old = *n;
+ *n = xs_node_copy(old);
+ xs_node_unref(old);
+ }
+
+ if ((*n)->perms) {
+ g_list_free_full((*n)->perms, g_free);
+ }
+ (*n)->perms = g_list_copy_deep(perms, do_perm_copy, NULL);
+ if (op->tx_id != XBT_NULL) {
+ (*n)->modified_in_tx = true;
+ }
+ return 0;
+}
+
+/*
+ * Passed a full reference in *n which it may free if it needs to COW.
+ *
+ * When changing the tree, the op->inplace flag indicates whether this
+ * node may be modified in place (i.e. it and all its parents had a
+ * refcount of one). If walking down the tree we find a node whose
+ * refcount is higher, we must clear op->inplace and COW from there
+ * down. Unless we are creating new nodes as scaffolding for a write
+ * (which works like 'mkdir -p' does). In which case those newly
+ * created nodes can (and must) be modified in place again.
+ */
+static int xs_node_walk(XsNode **n, struct walk_op *op)
+{
+ char *child_name = NULL;
+ size_t namelen;
+ XsNode *old = *n, *child = NULL;
+ bool stole_child = false;
+ bool this_inplace;
+ XsWatch *watch;
+ int err;
+
+ namelen = strlen(op->path);
+ watch = g_hash_table_lookup(op->s->watches, op->path);
+
+ /* Is there a child, or do we hit the double-NUL termination? */
+ if (op->path[namelen + 1]) {
+ char *slash;
+ child_name = op->path + namelen + 1;
+ slash = strchr(child_name, '/');
+ if (slash) {
+ *slash = '\0';
+ }
+ op->path[namelen] = '/';
+ }
+
+ /* If we walk into a subtree which is shared, we must COW */
+ if (op->mutating && old->ref != 1) {
+ op->inplace = false;
+ }
+
+ if (!child_name) {
+ const char *letters = op->mutating ? "wb" : "rb";
+
+ if (!can_access(op->dom_id, old->perms, letters)) {
+ err = EACCES;
+ goto out;
+ }
+
+ /* This is the actual node on which the operation shall be performed */
+ err = op->op_fn(n, op);
+ if (!err) {
+ fire_watches(op, true);
+ }
+ goto out;
+ }
+
+ /* op->inplace will be further modified during the recursion */
+ this_inplace = op->inplace;
+
+ if (old && old->children) {
+ child = g_hash_table_lookup(old->children, child_name);
+ /* This is a *weak* reference to 'child', owned by the hash table */
+ }
+
+ if (child) {
+ if (child->deleted_in_tx) {
+ assert(child->ref == 1);
+ /* Cannot actually set child->deleted_in_tx = false until later */
+ }
+ xs_node_ref(child);
+ /*
+ * Now we own it too. But if we can modify inplace, that's going to
+ * foil the check and force it to COW. We want to be the *only* owner
+ * so that it can be modified in place, so remove it from the hash
+ * table in that case. We'll add it (or its replacement) back later.
+ */
+ if (op->mutating && this_inplace) {
+ g_hash_table_remove(old->children, child_name);
+ stole_child = true;
+ }
+ } else if (op->create_dirs) {
+ assert(op->mutating);
+
+ if (!can_access(op->dom_id, old->perms, "wb")) {
+ err = EACCES;
+ goto out;
+ }
+
+ if (op->dom_id && op->new_nr_nodes >= XS_MAX_DOMAIN_NODES) {
+ err = ENOSPC;
+ goto out;
+ }
+
+ child = xs_node_create(child_name, old->perms);
+ op->new_nr_nodes++;
+
+ /*
+ * If we're creating a new child, we can clearly modify it (and its
+ * children) in place from here on down.
+ */
+ op->inplace = true;
+ } else {
+ err = ENOENT;
+ goto out;
+ }
+
+ /*
+ * If there's a watch on this node, add it to the list to be fired
+ * (with the correct full pathname for the modified node) at the end.
+ */
+ if (watch) {
+ op->watches = g_list_append(op->watches, watch);
+ }
+
+ /*
+ * Except for the temporary child-stealing as noted, our node has not
+ * changed yet. We don't yet know the overall operation will complete.
+ */
+ err = xs_node_walk(&child, op);
+
+ if (watch) {
+ op->watches = g_list_remove(op->watches, watch);
+ }
+
+ if (err || !op->mutating) {
+ if (stole_child) {
+ /* Put it back as it was. */
+ g_hash_table_replace(old->children, g_strdup(child_name), child);
+ } else {
+ xs_node_unref(child);
+ }
+ goto out;
+ }
+
+ /*
+ * Now we know the operation has completed successfully and we're on
+ * the way back up. Make the change, substituting 'child' in the
+ * node at our level.
+ */
+ if (!this_inplace) {
+ *n = xs_node_copy(old);
+ xs_node_unref(old);
+ }
+
+ /*
+ * If we resurrected a deleted_in_tx node, we can mark it as no longer
+ * deleted now that we know the overall operation has succeeded.
+ */
+ if (op->create_dirs && child && child->deleted_in_tx) {
+ op->new_nr_nodes++;
+ child->deleted_in_tx = false;
+ }
+
+ /*
+ * The child may be NULL here, for a remove operation. Either way,
+ * xs_node_add_child() will do the right thing and return a value
+ * indicating whether it changed the parent's hash table or not.
+ *
+ * We bump the parent gencnt if it adds a child that we *didn't*
+ * steal from it in the first place, or if child==NULL and was
+ * thus removed (whether we stole it earlier and didn't put it
+ * back, or xs_node_add_child() actually removed it now).
+ */
+ if ((xs_node_add_child(*n, child_name, child) && !stole_child) || !child) {
+ (*n)->gencnt++;
+ }
+
+ out:
+ op->path[namelen] = '\0';
+ if (!namelen) {
+ assert(!op->watches);
+ /*
+ * On completing the recursion back up the path walk and reaching the
+ * top, assign the new node count if the operation was successful. If
+ * the main tree was changed, bump its tx ID so that outstanding
+ * transactions correctly fail. But don't bump it every time; only
+ * if it makes a difference.
+ */
+ if (!err && op->mutating) {
+ if (!op->in_transaction) {
+ if (op->s->root_tx != op->s->last_tx) {
+ op->s->root_tx = next_tx(op->s);
+ }
+ op->s->nr_nodes = op->new_nr_nodes;
+ } else {
+ XsTransaction *tx = g_hash_table_lookup(op->s->transactions,
+ GINT_TO_POINTER(op->tx_id));
+ assert(tx);
+ tx->nr_nodes = op->new_nr_nodes;
+ }
+ }
+ }
+ return err;
+}
+
+static void append_directory_item(gpointer key, gpointer value,
+ gpointer user_data)
+{
+ GList **items = user_data;
+
+ *items = g_list_insert_sorted(*items, g_strdup(key), (GCompareFunc)strcmp);
+}
+
+/* Populates items with char * names which caller must free. */
+static int xs_node_directory(XsNode **n, struct walk_op *op)
+{
+ GList **items = op->op_opaque;
+
+ assert(op->inplace);
+ assert(*n);
+
+ if ((*n)->children) {
+ g_hash_table_foreach((*n)->children, append_directory_item, items);
+ }
+
+ if (op->op_opaque2) {
+ *(uint64_t *)op->op_opaque2 = (*n)->gencnt;
+ }
+
+ return 0;
+}
+
+static int validate_path(char *outpath, const char *userpath,
+ unsigned int dom_id)
+{
+ size_t i, pathlen = strlen(userpath);
+
+ if (!pathlen || userpath[pathlen] == '/' || strstr(userpath, "//")) {
+ return EINVAL;
+ }
+ for (i = 0; i < pathlen; i++) {
+ if (!strchr(XS_VALID_CHARS, userpath[i])) {
+ return EINVAL;
+ }
+ }
+ if (userpath[0] == '/') {
+ if (pathlen > XENSTORE_ABS_PATH_MAX) {
+ return E2BIG;
+ }
+ memcpy(outpath, userpath, pathlen + 1);
+ } else {
+ if (pathlen > XENSTORE_REL_PATH_MAX) {
+ return E2BIG;
+ }
+ snprintf(outpath, XENSTORE_ABS_PATH_MAX, "/local/domain/%u/%s", dom_id,
+ userpath);
+ }
+ return 0;
+}
+
+
+static int init_walk_op(XenstoreImplState *s, struct walk_op *op,
+ xs_transaction_t tx_id, unsigned int dom_id,
+ const char *path, XsNode ***rootp)
+{
+ int ret = validate_path(op->path, path, dom_id);
+ if (ret) {
+ return ret;
+ }
+
+ /*
+ * We use *two* NUL terminators at the end of the path, as during the walk
+ * we will temporarily turn each '/' into a NUL to allow us to use that
+ * path element for the lookup.
+ */
+ op->path[strlen(op->path) + 1] = '\0';
+ op->watches = NULL;
+ op->path[0] = '\0';
+ op->inplace = true;
+ op->mutating = false;
+ op->create_dirs = false;
+ op->in_transaction = false;
+ op->dom_id = dom_id;
+ op->tx_id = tx_id;
+ op->s = s;
+
+ if (tx_id == XBT_NULL) {
+ *rootp = &s->root;
+ op->new_nr_nodes = s->nr_nodes;
+ } else {
+ XsTransaction *tx = g_hash_table_lookup(s->transactions,
+ GINT_TO_POINTER(tx_id));
+ if (!tx) {
+ return ENOENT;
+ }
+ *rootp = &tx->root;
+ op->new_nr_nodes = tx->nr_nodes;
+ op->in_transaction = true;
+ }
+
+ return 0;
+}
+
+int xs_impl_read(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t tx_id, const char *path, GByteArray *data)
+{
+ /*
+ * The data GByteArray shall exist, and will be freed by caller.
+ * Just g_byte_array_append() to it.
+ */
+ struct walk_op op;
+ XsNode **n;
+ int ret;
+
+ ret = init_walk_op(s, &op, tx_id, dom_id, path, &n);
+ if (ret) {
+ return ret;
+ }
+ op.op_fn = xs_node_get_content;
+ op.op_opaque = data;
+ return xs_node_walk(n, &op);
+}
+
+int xs_impl_write(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t tx_id, const char *path, GByteArray *data)
+{
+ /*
+ * The data GByteArray shall exist, will be freed by caller. You are
+ * free to use g_byte_array_steal() and keep the data. Or just ref it.
+ */
+ struct walk_op op;
+ XsNode **n;
+ int ret;
+
+ ret = init_walk_op(s, &op, tx_id, dom_id, path, &n);
+ if (ret) {
+ return ret;
+ }
+ op.op_fn = xs_node_add_content;
+ op.op_opaque = data;
+ op.mutating = true;
+ op.create_dirs = true;
+ return xs_node_walk(n, &op);
+}
+
+int xs_impl_directory(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t tx_id, const char *path,
+ uint64_t *gencnt, GList **items)
+{
+ /*
+ * The items are (char *) to be freed by caller. Although it's consumed
+ * immediately so if you want to change it to (const char *) and keep
+ * them, go ahead and change the caller.
+ */
+ struct walk_op op;
+ XsNode **n;
+ int ret;
+
+ ret = init_walk_op(s, &op, tx_id, dom_id, path, &n);
+ if (ret) {
+ return ret;
+ }
+ op.op_fn = xs_node_directory;
+ op.op_opaque = items;
+ op.op_opaque2 = gencnt;
+ return xs_node_walk(n, &op);
+}
+
+int xs_impl_transaction_start(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t *tx_id)
+{
+ XsTransaction *tx;
+
+ if (*tx_id != XBT_NULL) {
+ return EINVAL;
+ }
+
+ if (dom_id && s->nr_domu_transactions >= XS_MAX_TRANSACTIONS) {
+ return ENOSPC;
+ }
+
+ tx = g_new0(XsTransaction, 1);
+
+ tx->nr_nodes = s->nr_nodes;
+ tx->tx_id = next_tx(s);
+ tx->base_tx = s->root_tx;
+ tx->root = xs_node_ref(s->root);
+ tx->dom_id = dom_id;
+
+ g_hash_table_insert(s->transactions, GINT_TO_POINTER(tx->tx_id), tx);
+ if (dom_id) {
+ s->nr_domu_transactions++;
+ }
+ *tx_id = tx->tx_id;
+ return 0;
+}
+
+static gboolean tx_commit_walk(gpointer key, gpointer value,
+ gpointer user_data)
+{
+ struct walk_op *op = user_data;
+ int path_len = strlen(op->path);
+ int key_len = strlen(key);
+ bool fire_parents = true;
+ XsWatch *watch;
+ XsNode *n = value;
+
+ if (n->ref != 1) {
+ return false;
+ }
+
+ if (n->deleted_in_tx) {
+ /*
+ * We fire watches on our parents if we are the *first* node
+ * to be deleted (the topmost one). This matches the behaviour
+ * when deleting in the live tree.
+ */
+ fire_parents = !op->deleted_in_tx;
+
+ /* Only used on the way down so no need to clear it later */
+ op->deleted_in_tx = true;
+ }
+
+ assert(key_len + path_len + 2 <= sizeof(op->path));
+ op->path[path_len] = '/';
+ memcpy(op->path + path_len + 1, key, key_len + 1);
+
+ watch = g_hash_table_lookup(op->s->watches, op->path);
+ if (watch) {
+ op->watches = g_list_append(op->watches, watch);
+ }
+
+ if (n->children) {
+ g_hash_table_foreach_remove(n->children, tx_commit_walk, op);
+ }
+
+ if (watch) {
+ op->watches = g_list_remove(op->watches, watch);
+ }
+
+ /*
+ * Don't fire watches if this node was only copied because a
+ * descendent was changed. The modified_in_tx flag indicates the
+ * ones which were really changed.
+ */
+ if (n->modified_in_tx || n->deleted_in_tx) {
+ fire_watches(op, fire_parents);
+ n->modified_in_tx = false;
+ }
+ op->path[path_len] = '\0';
+
+ /* Deleted nodes really do get expunged when we commit */
+ return n->deleted_in_tx;
+}
+
+static int transaction_commit(XenstoreImplState *s, XsTransaction *tx)
+{
+ struct walk_op op;
+ XsNode **n;
+
+ if (s->root_tx != tx->base_tx) {
+ return EAGAIN;
+ }
+ xs_node_unref(s->root);
+ s->root = tx->root;
+ tx->root = NULL;
+ s->root_tx = tx->tx_id;
+ s->nr_nodes = tx->nr_nodes;
+
+ init_walk_op(s, &op, XBT_NULL, tx->dom_id, "/", &n);
+ op.deleted_in_tx = false;
+ op.mutating = true;
+
+ /*
+ * Walk the new root and fire watches on any node which has a
+ * refcount of one (which is therefore unique to this transaction).
+ */
+ if (s->root->children) {
+ g_hash_table_foreach_remove(s->root->children, tx_commit_walk, &op);
+ }
+
+ return 0;
+}
+
+int xs_impl_transaction_end(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t tx_id, bool commit)
+{
+ int ret = 0;
+ XsTransaction *tx = g_hash_table_lookup(s->transactions,
+ GINT_TO_POINTER(tx_id));
+
+ if (!tx || tx->dom_id != dom_id) {
+ return ENOENT;
+ }
+
+ if (commit) {
+ ret = transaction_commit(s, tx);
+ }
+
+ g_hash_table_remove(s->transactions, GINT_TO_POINTER(tx_id));
+ if (dom_id) {
+ assert(s->nr_domu_transactions);
+ s->nr_domu_transactions--;
+ }
+ return ret;
+}
+
+int xs_impl_rm(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t tx_id, const char *path)
+{
+ struct walk_op op;
+ XsNode **n;
+ int ret;
+
+ ret = init_walk_op(s, &op, tx_id, dom_id, path, &n);
+ if (ret) {
+ return ret;
+ }
+ op.op_fn = xs_node_rm;
+ op.mutating = true;
+ return xs_node_walk(n, &op);
+}
+
+int xs_impl_get_perms(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t tx_id, const char *path, GList **perms)
+{
+ struct walk_op op;
+ XsNode **n;
+ int ret;
+
+ ret = init_walk_op(s, &op, tx_id, dom_id, path, &n);
+ if (ret) {
+ return ret;
+ }
+ op.op_fn = xs_node_get_perms;
+ op.op_opaque = perms;
+ return xs_node_walk(n, &op);
+}
+
+static void is_valid_perm(gpointer data, gpointer user_data)
+{
+ char *perm = data;
+ bool *valid = user_data;
+ char letter;
+ unsigned int dom_id;
+
+ if (!*valid) {
+ return;
+ }
+
+ if (sscanf(perm, "%c%u", &letter, &dom_id) != 2) {
+ *valid = false;
+ return;
+ }
+
+ switch (letter) {
+ case 'n':
+ case 'r':
+ case 'w':
+ case 'b':
+ break;
+
+ default:
+ *valid = false;
+ break;
+ }
+}
+
+int xs_impl_set_perms(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t tx_id, const char *path, GList *perms)
+{
+ struct walk_op op;
+ XsNode **n;
+ bool valid = true;
+ int ret;
+
+ if (!g_list_length(perms)) {
+ return EINVAL;
+ }
+
+ g_list_foreach(perms, is_valid_perm, &valid);
+ if (!valid) {
+ return EINVAL;
+ }
+
+ ret = init_walk_op(s, &op, tx_id, dom_id, path, &n);
+ if (ret) {
+ return ret;
+ }
+ op.op_fn = xs_node_set_perms;
+ op.op_opaque = perms;
+ op.mutating = true;
+ return xs_node_walk(n, &op);
+}
+
+static int do_xs_impl_watch(XenstoreImplState *s, unsigned int dom_id,
+ const char *path, const char *token,
+ xs_impl_watch_fn fn, void *opaque)
+
+{
+ char abspath[XENSTORE_ABS_PATH_MAX + 1];
+ XsWatch *w, *l;
+ int ret;
+
+ ret = validate_path(abspath, path, dom_id);
+ if (ret) {
+ return ret;
+ }
+
+ /* Check for duplicates */
+ l = w = g_hash_table_lookup(s->watches, abspath);
+ while (w) {
+ if (!g_strcmp0(token, w->token) && opaque == w->cb_opaque &&
+ fn == w->cb && dom_id == w->dom_id) {
+ return EEXIST;
+ }
+ w = w->next;
+ }
+
+ if (dom_id && s->nr_domu_watches >= XS_MAX_WATCHES) {
+ return E2BIG;
+ }
+
+ w = g_new0(XsWatch, 1);
+ w->token = g_strdup(token);
+ w->cb = fn;
+ w->cb_opaque = opaque;
+ w->dom_id = dom_id;
+ w->rel_prefix = strlen(abspath) - strlen(path);
+
+ /* l was looked up above when checking for duplicates */
+ if (l) {
+ w->next = l->next;
+ l->next = w;
+ } else {
+ g_hash_table_insert(s->watches, g_strdup(abspath), w);
+ }
+ if (dom_id) {
+ s->nr_domu_watches++;
+ }
+
+ return 0;
+}
+
+int xs_impl_watch(XenstoreImplState *s, unsigned int dom_id, const char *path,
+ const char *token, xs_impl_watch_fn fn, void *opaque)
+{
+ int ret = do_xs_impl_watch(s, dom_id, path, token, fn, opaque);
+
+ if (!ret) {
+ /* A new watch should fire immediately */
+ fn(opaque, path, token);
+ }
+
+ return ret;
+}
+
+static XsWatch *free_watch(XenstoreImplState *s, XsWatch *w)
+{
+ XsWatch *next = w->next;
+
+ if (w->dom_id) {
+ assert(s->nr_domu_watches);
+ s->nr_domu_watches--;
+ }
+
+ g_free(w->token);
+ g_free(w);
+
+ return next;
+}
+
+int xs_impl_unwatch(XenstoreImplState *s, unsigned int dom_id,
+ const char *path, const char *token,
+ xs_impl_watch_fn fn, void *opaque)
+{
+ char abspath[XENSTORE_ABS_PATH_MAX + 1];
+ XsWatch *w, **l;
+ int ret;
+
+ ret = validate_path(abspath, path, dom_id);
+ if (ret) {
+ return ret;
+ }
+
+ w = g_hash_table_lookup(s->watches, abspath);
+ if (!w) {
+ return ENOENT;
+ }
+
+ /*
+ * The hash table contains the first element of a list of
+ * watches. Removing the first element in the list is a
+ * special case because we have to update the hash table to
+ * point to the next (or remove it if there's nothing left).
+ */
+ if (!g_strcmp0(token, w->token) && fn == w->cb && opaque == w->cb_opaque &&
+ dom_id == w->dom_id) {
+ if (w->next) {
+ /* Insert the previous 'next' into the hash table */
+ g_hash_table_insert(s->watches, g_strdup(abspath), w->next);
+ } else {
+ /* Nothing left; remove from hash table */
+ g_hash_table_remove(s->watches, abspath);
+ }
+ free_watch(s, w);
+ return 0;
+ }
+
+ /*
+ * We're all done messing with the hash table because the element
+ * it points to has survived the cull. Now it's just a simple
+ * linked list removal operation.
+ */
+ for (l = &w->next; *l; l = &w->next) {
+ w = *l;
+
+ if (!g_strcmp0(token, w->token) && fn == w->cb &&
+ opaque != w->cb_opaque && dom_id == w->dom_id) {
+ *l = free_watch(s, w);
+ return 0;
+ }
+ }
+
+ return ENOENT;
+}
+
+int xs_impl_reset_watches(XenstoreImplState *s, unsigned int dom_id)
+{
+ char **watch_paths;
+ guint nr_watch_paths;
+ guint i;
+
+ watch_paths = (char **)g_hash_table_get_keys_as_array(s->watches,
+ &nr_watch_paths);
+
+ for (i = 0; i < nr_watch_paths; i++) {
+ XsWatch *w1 = g_hash_table_lookup(s->watches, watch_paths[i]);
+ XsWatch *w2, *w, **l;
+
+ /*
+ * w1 is the original list. The hash table has this pointer.
+ * w2 is the head of our newly-filtered list.
+ * w and l are temporary for processing. w is somewhat redundant
+ * with *l but makes my eyes bleed less.
+ */
+
+ w = w2 = w1;
+ l = &w;
+ while (w) {
+ if (w->dom_id == dom_id) {
+ /* If we're freeing the head of the list, bump w2 */
+ if (w2 == w) {
+ w2 = w->next;
+ }
+ *l = free_watch(s, w);
+ } else {
+ l = &w->next;
+ }
+ w = *l;
+ }
+ /*
+ * If the head of the list survived the cull, we don't need to
+ * touch the hash table and we're done with this path. Else...
+ */
+ if (w1 != w2) {
+ g_hash_table_steal(s->watches, watch_paths[i]);
+
+ /*
+ * It was already freed. (Don't worry, this whole thing is
+ * single-threaded and nobody saw it in the meantime). And
+ * having *stolen* it, we now own the watch_paths[i] string
+ * so if we don't give it back to the hash table, we need
+ * to free it.
+ */
+ if (w2) {
+ g_hash_table_insert(s->watches, watch_paths[i], w2);
+ } else {
+ g_free(watch_paths[i]);
+ }
+ }
+ }
+ g_free(watch_paths);
+ return 0;
+}
+
+static void xs_tx_free(void *_tx)
+{
+ XsTransaction *tx = _tx;
+ if (tx->root) {
+ xs_node_unref(tx->root);
+ }
+ g_free(tx);
+}
+
+XenstoreImplState *xs_impl_create(unsigned int dom_id)
+{
+ XenstoreImplState *s = g_new0(XenstoreImplState, 1);
+ GList *perms;
+
+ s->watches = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL);
+ s->transactions = g_hash_table_new_full(g_direct_hash, g_direct_equal,
+ NULL, xs_tx_free);
+
+ perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, 0));
+ s->root = xs_node_create("/", perms);
+ g_list_free_full(perms, g_free);
+ s->nr_nodes = 1;
+
+ s->root_tx = s->last_tx = 1;
+ return s;
+}
+
+
+static void clear_serialized_tx(gpointer key, gpointer value, gpointer opaque)
+{
+ XsNode *n = value;
+
+ n->serialized_tx = XBT_NULL;
+ if (n->children) {
+ g_hash_table_foreach(n->children, clear_serialized_tx, NULL);
+ }
+}
+
+static void clear_tx_serialized_tx(gpointer key, gpointer value,
+ gpointer opaque)
+{
+ XsTransaction *t = value;
+
+ clear_serialized_tx(NULL, t->root, NULL);
+}
+
+static void write_be32(GByteArray *save, uint32_t val)
+{
+ uint32_t be = htonl(val);
+ g_byte_array_append(save, (void *)&be, sizeof(be));
+}
+
+
+struct save_state {
+ GByteArray *bytes;
+ unsigned int tx_id;
+};
+
+#define MODIFIED_IN_TX (1U << 0)
+#define DELETED_IN_TX (1U << 1)
+#define NODE_REF (1U << 2)
+
+static void save_node(gpointer key, gpointer value, gpointer opaque)
+{
+ struct save_state *ss = opaque;
+ XsNode *n = value;
+ char *name = key;
+ uint8_t flag = 0;
+
+ /* Child nodes (i.e. anything but the root) have a name */
+ if (name) {
+ g_byte_array_append(ss->bytes, key, strlen(key) + 1);
+ }
+
+ /*
+ * If we already wrote this node, refer to the previous copy.
+ * There's no rename/move in XenStore, so all we need to find
+ * it is the tx_id of the transation in which it exists. Which
+ * may be the root tx.
+ */
+ if (n->serialized_tx != XBT_NULL) {
+ flag = NODE_REF;
+ g_byte_array_append(ss->bytes, &flag, 1);
+ write_be32(ss->bytes, n->serialized_tx);
+ } else {
+ GList *l;
+ n->serialized_tx = ss->tx_id;
+
+ if (n->modified_in_tx) {
+ flag |= MODIFIED_IN_TX;
+ }
+ if (n->deleted_in_tx) {
+ flag |= DELETED_IN_TX;
+ }
+ g_byte_array_append(ss->bytes, &flag, 1);
+
+ if (n->content) {
+ write_be32(ss->bytes, n->content->len);
+ g_byte_array_append(ss->bytes, n->content->data, n->content->len);
+ } else {
+ write_be32(ss->bytes, 0);
+ }
+
+ for (l = n->perms; l; l = l->next) {
+ g_byte_array_append(ss->bytes, l->data, strlen(l->data) + 1);
+ }
+ /* NUL termination after perms */
+ g_byte_array_append(ss->bytes, (void *)"", 1);
+
+ if (n->children) {
+ g_hash_table_foreach(n->children, save_node, ss);
+ }
+ /* NUL termination after children (child name is NUL) */
+ g_byte_array_append(ss->bytes, (void *)"", 1);
+ }
+}
+
+static void save_tree(struct save_state *ss, uint32_t tx_id, XsNode *root)
+{
+ write_be32(ss->bytes, tx_id);
+ ss->tx_id = tx_id;
+ save_node(NULL, root, ss);
+}
+
+static void save_tx(gpointer key, gpointer value, gpointer opaque)
+{
+ uint32_t tx_id = GPOINTER_TO_INT(key);
+ struct save_state *ss = opaque;
+ XsTransaction *n = value;
+
+ write_be32(ss->bytes, n->base_tx);
+ write_be32(ss->bytes, n->dom_id);
+
+ save_tree(ss, tx_id, n->root);
+}
+
+static void save_watch(gpointer key, gpointer value, gpointer opaque)
+{
+ struct save_state *ss = opaque;
+ XsWatch *w = value;
+
+ /* We only save the *guest* watches. */
+ if (w->dom_id) {
+ gpointer relpath = key + w->rel_prefix;
+ g_byte_array_append(ss->bytes, relpath, strlen(relpath) + 1);
+ g_byte_array_append(ss->bytes, (void *)w->token, strlen(w->token) + 1);
+ }
+}
+
+GByteArray *xs_impl_serialize(XenstoreImplState *s)
+{
+ struct save_state ss;
+
+ ss.bytes = g_byte_array_new();
+
+ /*
+ * node = flags [ real_node / node_ref ]
+ * flags = uint8_t (MODIFIED_IN_TX | DELETED_IN_TX | NODE_REF)
+ * node_ref = tx_id (in which the original version of this node exists)
+ * real_node = content perms child* NUL
+ * content = len data
+ * len = uint32_t
+ * data = uint8_t{len}
+ * perms = perm* NUL
+ * perm = asciiz
+ * child = name node
+ * name = asciiz
+ *
+ * tree = tx_id node
+ * tx_id = uint32_t
+ *
+ * transaction = base_tx_id dom_id tree
+ * base_tx_id = uint32_t
+ * dom_id = uint32_t
+ *
+ * tx_list = tree transaction* XBT_NULL
+ *
+ * watch = path token
+ * path = asciiz
+ * token = asciiz
+ *
+ * watch_list = watch* NUL
+ *
+ * xs_serialize_stream = last_tx tx_list watch_list
+ * last_tx = uint32_t
+ */
+
+ /* Clear serialized_tx in every node. */
+ if (s->serialized) {
+ clear_serialized_tx(NULL, s->root, NULL);
+ g_hash_table_foreach(s->transactions, clear_tx_serialized_tx, NULL);
+ }
+
+ s->serialized = true;
+
+ write_be32(ss.bytes, s->last_tx);
+ save_tree(&ss, s->root_tx, s->root);
+ g_hash_table_foreach(s->transactions, save_tx, &ss);
+
+ write_be32(ss.bytes, XBT_NULL);
+
+ g_hash_table_foreach(s->watches, save_watch, &ss);
+ g_byte_array_append(ss.bytes, (void *)"", 1);
+
+ return ss.bytes;
+}
+
+struct unsave_state {
+ char path[XENSTORE_ABS_PATH_MAX + 1];
+ XenstoreImplState *s;
+ GByteArray *bytes;
+ uint8_t *d;
+ size_t l;
+ bool root_walk;
+};
+
+static int consume_be32(struct unsave_state *us, unsigned int *val)
+{
+ uint32_t d;
+
+ if (us->l < sizeof(d)) {
+ return -EINVAL;
+ }
+ memcpy(&d, us->d, sizeof(d));
+ *val = ntohl(d);
+ us->d += sizeof(d);
+ us->l -= sizeof(d);
+ return 0;
+}
+
+static int consume_string(struct unsave_state *us, char **str, size_t *len)
+{
+ size_t l;
+
+ if (!us->l) {
+ return -EINVAL;
+ }
+
+ l = strnlen((void *)us->d, us->l);
+ if (l == us->l) {
+ return -EINVAL;
+ }
+
+ if (str) {
+ *str = (void *)us->d;
+ }
+ if (len) {
+ *len = l;
+ }
+
+ us->d += l + 1;
+ us->l -= l + 1;
+ return 0;
+}
+
+static XsNode *lookup_node(XsNode *n, char *path)
+{
+ char *slash = strchr(path, '/');
+ XsNode *child;
+
+ if (path[0] == '\0') {
+ return n;
+ }
+
+ if (slash) {
+ *slash = '\0';
+ }
+
+ if (!n->children) {
+ return NULL;
+ }
+ child = g_hash_table_lookup(n->children, path);
+ if (!slash) {
+ return child;
+ }
+
+ *slash = '/';
+ if (!child) {
+ return NULL;
+ }
+ return lookup_node(child, slash + 1);
+}
+
+static XsNode *lookup_tx_node(struct unsave_state *us, unsigned int tx_id)
+{
+ XsTransaction *t;
+ if (tx_id == us->s->root_tx) {
+ return lookup_node(us->s->root, us->path + 1);
+ }
+
+ t = g_hash_table_lookup(us->s->transactions, GINT_TO_POINTER(tx_id));
+ if (!t) {
+ return NULL;
+ }
+ g_assert(t->root);
+ return lookup_node(t->root, us->path + 1);
+}
+
+static void count_child_nodes(gpointer key, gpointer value, gpointer user_data)
+{
+ unsigned int *nr_nodes = user_data;
+ XsNode *n = value;
+
+ (*nr_nodes)++;
+
+ if (n->children) {
+ g_hash_table_foreach(n->children, count_child_nodes, nr_nodes);
+ }
+}
+
+static int consume_node(struct unsave_state *us, XsNode **nodep,
+ unsigned int *nr_nodes)
+{
+ XsNode *n = NULL;
+ uint8_t flags;
+ int ret;
+
+ if (us->l < 1) {
+ return -EINVAL;
+ }
+ flags = us->d[0];
+ us->d++;
+ us->l--;
+
+ if (flags == NODE_REF) {
+ unsigned int tx;
+
+ ret = consume_be32(us, &tx);
+ if (ret) {
+ return ret;
+ }
+
+ n = lookup_tx_node(us, tx);
+ if (!n) {
+ return -EINVAL;
+ }
+ n->ref++;
+ if (n->children) {
+ g_hash_table_foreach(n->children, count_child_nodes, nr_nodes);
+ }
+ } else {
+ uint32_t datalen;
+
+ if (flags & ~(DELETED_IN_TX | MODIFIED_IN_TX)) {
+ return -EINVAL;
+ }
+ n = xs_node_new();
+
+ if (flags & DELETED_IN_TX) {
+ n->deleted_in_tx = true;
+ }
+ if (flags & MODIFIED_IN_TX) {
+ n->modified_in_tx = true;
+ }
+ ret = consume_be32(us, &datalen);
+ if (ret) {
+ xs_node_unref(n);
+ return -EINVAL;
+ }
+ if (datalen) {
+ if (datalen > us->l) {
+ xs_node_unref(n);
+ return -EINVAL;
+ }
+
+ GByteArray *node_data = g_byte_array_new();
+ g_byte_array_append(node_data, us->d, datalen);
+ us->d += datalen;
+ us->l -= datalen;
+ n->content = node_data;
+
+ if (us->root_walk) {
+ n->modified_in_tx = true;
+ }
+ }
+ while (1) {
+ char *perm = NULL;
+ size_t permlen = 0;
+
+ ret = consume_string(us, &perm, &permlen);
+ if (ret) {
+ xs_node_unref(n);
+ return ret;
+ }
+
+ if (!permlen) {
+ break;
+ }
+
+ n->perms = g_list_append(n->perms, g_strdup(perm));
+ }
+
+ /* Now children */
+ while (1) {
+ size_t childlen;
+ char *childname;
+ char *pathend;
+ XsNode *child = NULL;
+
+ ret = consume_string(us, &childname, &childlen);
+ if (ret) {
+ xs_node_unref(n);
+ return ret;
+ }
+
+ if (!childlen) {
+ break;
+ }
+
+ pathend = us->path + strlen(us->path);
+ strncat(us->path, "/", sizeof(us->path) - 1);
+ strncat(us->path, childname, sizeof(us->path) - 1);
+
+ ret = consume_node(us, &child, nr_nodes);
+ *pathend = '\0';
+ if (ret) {
+ xs_node_unref(n);
+ return ret;
+ }
+ g_assert(child);
+ xs_node_add_child(n, childname, child);
+ }
+
+ /*
+ * If the node has no data and no children we still want to fire
+ * a watch on it.
+ */
+ if (us->root_walk && !n->children) {
+ n->modified_in_tx = true;
+ }
+ }
+
+ if (!n->deleted_in_tx) {
+ (*nr_nodes)++;
+ }
+
+ *nodep = n;
+ return 0;
+}
+
+static int consume_tree(struct unsave_state *us, XsTransaction *t)
+{
+ int ret;
+
+ ret = consume_be32(us, &t->tx_id);
+ if (ret) {
+ return ret;
+ }
+
+ if (t->tx_id > us->s->last_tx) {
+ return -EINVAL;
+ }
+
+ us->path[0] = '\0';
+
+ return consume_node(us, &t->root, &t->nr_nodes);
+}
+
+int xs_impl_deserialize(XenstoreImplState *s, GByteArray *bytes,
+ unsigned int dom_id, xs_impl_watch_fn watch_fn,
+ void *watch_opaque)
+{
+ struct unsave_state us;
+ XsTransaction base_t = { 0 };
+ int ret;
+
+ us.s = s;
+ us.bytes = bytes;
+ us.d = bytes->data;
+ us.l = bytes->len;
+
+ xs_impl_reset_watches(s, dom_id);
+ g_hash_table_remove_all(s->transactions);
+
+ xs_node_unref(s->root);
+ s->root = NULL;
+ s->root_tx = s->last_tx = XBT_NULL;
+
+ ret = consume_be32(&us, &s->last_tx);
+ if (ret) {
+ return ret;
+ }
+
+ /*
+ * Consume the base tree into a transaction so that watches can be
+ * fired as we commit it. By setting us.root_walk we cause the nodes
+ * to be marked as 'modified_in_tx' as they are created, so that the
+ * watches are triggered on them.
+ */
+ base_t.dom_id = dom_id;
+ base_t.base_tx = XBT_NULL;
+ us.root_walk = true;
+ ret = consume_tree(&us, &base_t);
+ if (ret) {
+ return ret;
+ }
+ us.root_walk = false;
+
+ /*
+ * Commit the transaction now while the refcount on all nodes is 1.
+ * Note that we haven't yet reinstated the *guest* watches but that's
+ * OK because we don't want the guest to see any changes. Even any
+ * backend nodes which get recreated should be *precisely* as they
+ * were before the migration. Back ends may have been instantiated
+ * already, and will see the frontend magically blink into existence
+ * now (well, from the aio_bh which fires the watches). It's their
+ * responsibility to rebuild everything precisely as it was before.
+ */
+ ret = transaction_commit(s, &base_t);
+ if (ret) {
+ return ret;
+ }
+
+ while (1) {
+ unsigned int base_tx;
+ XsTransaction *t;
+
+ ret = consume_be32(&us, &base_tx);
+ if (ret) {
+ return ret;
+ }
+ if (base_tx == XBT_NULL) {
+ break;
+ }
+
+ t = g_new0(XsTransaction, 1);
+ t->base_tx = base_tx;
+
+ ret = consume_be32(&us, &t->dom_id);
+ if (!ret) {
+ ret = consume_tree(&us, t);
+ }
+ if (ret) {
+ g_free(t);
+ return ret;
+ }
+ g_assert(t->root);
+ if (t->dom_id) {
+ s->nr_domu_transactions++;
+ }
+ g_hash_table_insert(s->transactions, GINT_TO_POINTER(t->tx_id), t);
+ }
+
+ while (1) {
+ char *path, *token;
+ size_t pathlen, toklen;
+
+ ret = consume_string(&us, &path, &pathlen);
+ if (ret) {
+ return ret;
+ }
+ if (!pathlen) {
+ break;
+ }
+
+ ret = consume_string(&us, &token, &toklen);
+ if (ret) {
+ return ret;
+ }
+
+ if (!watch_fn) {
+ continue;
+ }
+
+ ret = do_xs_impl_watch(s, dom_id, path, token, watch_fn, watch_opaque);
+ if (ret) {
+ return ret;
+ }
+ }
+
+ if (us.l) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
diff --git a/hw/i386/kvm/xenstore_impl.h b/hw/i386/kvm/xenstore_impl.h
new file mode 100644
index 0000000000..0df2a91aae
--- /dev/null
+++ b/hw/i386/kvm/xenstore_impl.h
@@ -0,0 +1,63 @@
+/*
+ * QEMU Xen emulation: The actual implementation of XenStore
+ *
+ * Copyright © 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * Authors: David Woodhouse <dwmw2@infradead.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_XENSTORE_IMPL_H
+#define QEMU_XENSTORE_IMPL_H
+
+#include "hw/xen/xen_backend_ops.h"
+
+typedef struct XenstoreImplState XenstoreImplState;
+
+XenstoreImplState *xs_impl_create(unsigned int dom_id);
+
+char *xs_perm_as_string(unsigned int perm, unsigned int domid);
+
+/*
+ * These functions return *positive* error numbers. This is a little
+ * unconventional but it helps to keep us honest because there is
+ * also a very limited set of error numbers that they are permitted
+ * to return (those in xsd_errors).
+ */
+
+int xs_impl_read(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t tx_id, const char *path, GByteArray *data);
+int xs_impl_write(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t tx_id, const char *path, GByteArray *data);
+int xs_impl_directory(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t tx_id, const char *path,
+ uint64_t *gencnt, GList **items);
+int xs_impl_transaction_start(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t *tx_id);
+int xs_impl_transaction_end(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t tx_id, bool commit);
+int xs_impl_rm(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t tx_id, const char *path);
+int xs_impl_get_perms(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t tx_id, const char *path, GList **perms);
+int xs_impl_set_perms(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t tx_id, const char *path, GList *perms);
+
+/* This differs from xs_watch_fn because it has the token */
+typedef void(xs_impl_watch_fn)(void *opaque, const char *path,
+ const char *token);
+int xs_impl_watch(XenstoreImplState *s, unsigned int dom_id, const char *path,
+ const char *token, xs_impl_watch_fn fn, void *opaque);
+int xs_impl_unwatch(XenstoreImplState *s, unsigned int dom_id,
+ const char *path, const char *token, xs_impl_watch_fn fn,
+ void *opaque);
+int xs_impl_reset_watches(XenstoreImplState *s, unsigned int dom_id);
+
+GByteArray *xs_impl_serialize(XenstoreImplState *s);
+int xs_impl_deserialize(XenstoreImplState *s, GByteArray *bytes,
+ unsigned int dom_id, xs_impl_watch_fn watch_fn,
+ void *watch_opaque);
+
+#endif /* QEMU_XENSTORE_IMPL_H */
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 7bebea57e3..1489abf010 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -102,6 +102,11 @@
#include "trace.h"
#include CONFIG_DEVICES
+#ifdef CONFIG_XEN_EMU
+#include "hw/xen/xen-legacy-backend.h"
+#include "hw/xen/xen-bus.h"
+#endif
+
/*
* Helper for setting model-id for CPU models that changed model-id
* depending on QEMU versions up to QEMU 2.4.
@@ -1318,6 +1323,8 @@ void pc_basic_device_init(struct PCMachineState *pcms,
if (pcms->bus) {
pci_create_simple(pcms->bus, -1, "xen-platform");
}
+ xen_bus_init();
+ xen_be_init();
}
#endif
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 4bf15f9c1f..30eedd62a3 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -47,8 +47,6 @@
#include "hw/kvm/clock.h"
#include "hw/sysbus.h"
#include "hw/i2c/smbus_eeprom.h"
-#include "hw/xen/xen-x86.h"
-#include "hw/xen/xen.h"
#include "exec/memory.h"
#include "hw/acpi/acpi.h"
#include "hw/acpi/piix4.h"
@@ -60,6 +58,8 @@
#include <xen/hvm/hvm_info_table.h>
#include "hw/xen/xen_pt.h"
#endif
+#include "hw/xen/xen-x86.h"
+#include "hw/xen/xen.h"
#include "migration/global_state.h"
#include "migration/misc.h"
#include "sysemu/numa.h"
diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
index e5a1dd19f4..56641a550e 100644
--- a/hw/i386/xen/xen-hvm.c
+++ b/hw/i386/xen/xen-hvm.c
@@ -18,7 +18,7 @@
#include "hw/irq.h"
#include "hw/hw.h"
#include "hw/i386/apic-msidef.h"
-#include "hw/xen/xen_common.h"
+#include "hw/xen/xen_native.h"
#include "hw/xen/xen-legacy-backend.h"
#include "hw/xen/xen-bus.h"
#include "hw/xen/xen-x86.h"
@@ -52,10 +52,11 @@ static bool xen_in_migration;
/* Compatibility with older version */
-/* This allows QEMU to build on a system that has Xen 4.5 or earlier
- * installed. This here (not in hw/xen/xen_common.h) because xen/hvm/ioreq.h
- * needs to be included before this block and hw/xen/xen_common.h needs to
- * be included before xen/hvm/ioreq.h
+/*
+ * This allows QEMU to build on a system that has Xen 4.5 or earlier installed.
+ * This is here (not in hw/xen/xen_native.h) because xen/hvm/ioreq.h needs to
+ * be included before this block and hw/xen/xen_native.h needs to be included
+ * before xen/hvm/ioreq.h
*/
#ifndef IOREQ_TYPE_VMWARE_PORT
#define IOREQ_TYPE_VMWARE_PORT 3
@@ -761,7 +762,7 @@ static ioreq_t *cpu_get_ioreq(XenIOState *state)
int i;
evtchn_port_t port;
- port = xenevtchn_pending(state->xce_handle);
+ port = qemu_xen_evtchn_pending(state->xce_handle);
if (port == state->bufioreq_local_port) {
timer_mod(state->buffered_io_timer,
BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
@@ -780,7 +781,7 @@ static ioreq_t *cpu_get_ioreq(XenIOState *state)
}
/* unmask the wanted port again */
- xenevtchn_unmask(state->xce_handle, port);
+ qemu_xen_evtchn_unmask(state->xce_handle, port);
/* get the io packet from shared memory */
state->send_vcpu = i;
@@ -1147,7 +1148,7 @@ static void handle_buffered_io(void *opaque)
BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
} else {
timer_del(state->buffered_io_timer);
- xenevtchn_unmask(state->xce_handle, state->bufioreq_local_port);
+ qemu_xen_evtchn_unmask(state->xce_handle, state->bufioreq_local_port);
}
}
@@ -1196,8 +1197,8 @@ static void cpu_handle_ioreq(void *opaque)
}
req->state = STATE_IORESP_READY;
- xenevtchn_notify(state->xce_handle,
- state->ioreq_local_port[state->send_vcpu]);
+ qemu_xen_evtchn_notify(state->xce_handle,
+ state->ioreq_local_port[state->send_vcpu]);
}
}
@@ -1206,7 +1207,7 @@ static void xen_main_loop_prepare(XenIOState *state)
int evtchn_fd = -1;
if (state->xce_handle != NULL) {
- evtchn_fd = xenevtchn_fd(state->xce_handle);
+ evtchn_fd = qemu_xen_evtchn_fd(state->xce_handle);
}
state->buffered_io_timer = timer_new_ms(QEMU_CLOCK_REALTIME, handle_buffered_io,
@@ -1249,7 +1250,7 @@ static void xen_exit_notifier(Notifier *n, void *data)
xenforeignmemory_unmap_resource(xen_fmem, state->fres);
}
- xenevtchn_close(state->xce_handle);
+ qemu_xen_evtchn_close(state->xce_handle);
xs_daemon_close(state->xenstore);
}
@@ -1397,9 +1398,11 @@ void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory)
xen_pfn_t ioreq_pfn;
XenIOState *state;
+ setup_xen_backend_ops();
+
state = g_new0(XenIOState, 1);
- state->xce_handle = xenevtchn_open(NULL, 0);
+ state->xce_handle = qemu_xen_evtchn_open();
if (state->xce_handle == NULL) {
perror("xen: event channel open");
goto err;
@@ -1463,8 +1466,9 @@ void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory)
/* FIXME: how about if we overflow the page here? */
for (i = 0; i < max_cpus; i++) {
- rc = xenevtchn_bind_interdomain(state->xce_handle, xen_domid,
- xen_vcpu_eport(state->shared_page, i));
+ rc = qemu_xen_evtchn_bind_interdomain(state->xce_handle, xen_domid,
+ xen_vcpu_eport(state->shared_page,
+ i));
if (rc == -1) {
error_report("shared evtchn %d bind error %d", i, errno);
goto err;
@@ -1472,8 +1476,8 @@ void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory)
state->ioreq_local_port[i] = rc;
}
- rc = xenevtchn_bind_interdomain(state->xce_handle, xen_domid,
- state->bufioreq_remote_port);
+ rc = qemu_xen_evtchn_bind_interdomain(state->xce_handle, xen_domid,
+ state->bufioreq_remote_port);
if (rc == -1) {
error_report("buffered evtchn bind error %d", errno);
goto err;
diff --git a/hw/i386/xen/xen-mapcache.c b/hw/i386/xen/xen-mapcache.c
index 1d0879d234..f7d974677d 100644
--- a/hw/i386/xen/xen-mapcache.c
+++ b/hw/i386/xen/xen-mapcache.c
@@ -14,7 +14,7 @@
#include <sys/resource.h>
-#include "hw/xen/xen-legacy-backend.h"
+#include "hw/xen/xen_native.h"
#include "qemu/bitmap.h"
#include "sysemu/runstate.h"
diff --git a/hw/i386/xen/xen_platform.c b/hw/i386/xen/xen_platform.c
index 539f7da374..57f1d742c1 100644
--- a/hw/i386/xen/xen_platform.c
+++ b/hw/i386/xen/xen_platform.c
@@ -28,7 +28,6 @@
#include "hw/ide/pci.h"
#include "hw/pci/pci.h"
#include "migration/vmstate.h"
-#include "hw/xen/xen.h"
#include "net/net.h"
#include "trace.h"
#include "sysemu/xen.h"
@@ -38,10 +37,12 @@
#include "qom/object.h"
#ifdef CONFIG_XEN
-#include "hw/xen/xen_common.h"
-#include "hw/xen/xen-legacy-backend.h"
+#include "hw/xen/xen_native.h"
#endif
+/* The rule is that xen_native.h must come first */
+#include "hw/xen/xen.h"
+
//#define DEBUG_PLATFORM
#ifdef DEBUG_PLATFORM
diff --git a/hw/intc/i8259.c b/hw/intc/i8259.c
index 17910f3bcb..bbae2d87f4 100644
--- a/hw/intc/i8259.c
+++ b/hw/intc/i8259.c
@@ -133,7 +133,7 @@ static void pic_set_irq(void *opaque, int irq, int level)
}
#endif
- if (s->elcr & mask) {
+ if (s->ltim || (s->elcr & mask)) {
/* level triggered */
if (level) {
s->irr |= mask;
@@ -167,7 +167,7 @@ static void pic_intack(PICCommonState *s, int irq)
s->isr |= (1 << irq);
}
/* We don't clear a level sensitive interrupt here */
- if (!(s->elcr & (1 << irq))) {
+ if (!s->ltim && !(s->elcr & (1 << irq))) {
s->irr &= ~(1 << irq);
}
pic_update_irq(s);
@@ -224,6 +224,7 @@ static void pic_reset(DeviceState *dev)
PICCommonState *s = PIC_COMMON(dev);
s->elcr = 0;
+ s->ltim = 0;
pic_init_reset(s);
}
@@ -243,10 +244,7 @@ static void pic_ioport_write(void *opaque, hwaddr addr64,
s->init_state = 1;
s->init4 = val & 1;
s->single_mode = val & 2;
- if (val & 0x08) {
- qemu_log_mask(LOG_UNIMP,
- "i8259: level sensitive irq not supported\n");
- }
+ s->ltim = val & 8;
} else if (val & 0x08) {
if (val & 0x04) {
s->poll = 1;
diff --git a/hw/intc/i8259_common.c b/hw/intc/i8259_common.c
index af2e4a2241..c931dc2d07 100644
--- a/hw/intc/i8259_common.c
+++ b/hw/intc/i8259_common.c
@@ -51,7 +51,7 @@ void pic_reset_common(PICCommonState *s)
s->special_fully_nested_mode = 0;
s->init4 = 0;
s->single_mode = 0;
- /* Note: ELCR is not reset */
+ /* Note: ELCR and LTIM are not reset */
}
static int pic_dispatch_pre_save(void *opaque)
@@ -144,6 +144,24 @@ static void pic_print_info(InterruptStatsProvider *obj, Monitor *mon)
s->special_fully_nested_mode);
}
+static bool ltim_state_needed(void *opaque)
+{
+ PICCommonState *s = PIC_COMMON(opaque);
+
+ return !!s->ltim;
+}
+
+static const VMStateDescription vmstate_pic_ltim = {
+ .name = "i8259/ltim",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = ltim_state_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT8(ltim, PICCommonState),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
static const VMStateDescription vmstate_pic_common = {
.name = "i8259",
.version_id = 1,
@@ -168,6 +186,10 @@ static const VMStateDescription vmstate_pic_common = {
VMSTATE_UINT8(single_mode, PICCommonState),
VMSTATE_UINT8(elcr, PICCommonState),
VMSTATE_END_OF_LIST()
+ },
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_pic_ltim,
+ NULL
}
};
diff --git a/hw/intc/mips_gic.c b/hw/intc/mips_gic.c
index bda4549925..4bdc3b1bd1 100644
--- a/hw/intc/mips_gic.c
+++ b/hw/intc/mips_gic.c
@@ -439,8 +439,8 @@ static void mips_gic_realize(DeviceState *dev, Error **errp)
}
static Property mips_gic_properties[] = {
- DEFINE_PROP_INT32("num-vp", MIPSGICState, num_vps, 1),
- DEFINE_PROP_INT32("num-irq", MIPSGICState, num_irq, 256),
+ DEFINE_PROP_UINT32("num-vp", MIPSGICState, num_vps, 1),
+ DEFINE_PROP_UINT32("num-irq", MIPSGICState, num_irq, 256),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c
index eee04643cb..b466a6abaf 100644
--- a/hw/intc/riscv_aclint.c
+++ b/hw/intc/riscv_aclint.c
@@ -130,7 +130,7 @@ static uint64_t riscv_aclint_mtimer_read(void *opaque, hwaddr addr,
addr < (mtimer->timecmp_base + (mtimer->num_harts << 3))) {
size_t hartid = mtimer->hartid_base +
((addr - mtimer->timecmp_base) >> 3);
- CPUState *cpu = qemu_get_cpu(hartid);
+ CPUState *cpu = cpu_by_arch_id(hartid);
CPURISCVState *env = cpu ? cpu->env_ptr : NULL;
if (!env) {
qemu_log_mask(LOG_GUEST_ERROR,
@@ -173,7 +173,7 @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr,
addr < (mtimer->timecmp_base + (mtimer->num_harts << 3))) {
size_t hartid = mtimer->hartid_base +
((addr - mtimer->timecmp_base) >> 3);
- CPUState *cpu = qemu_get_cpu(hartid);
+ CPUState *cpu = cpu_by_arch_id(hartid);
CPURISCVState *env = cpu ? cpu->env_ptr : NULL;
if (!env) {
qemu_log_mask(LOG_GUEST_ERROR,
@@ -231,7 +231,7 @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr,
/* Check if timer interrupt is triggered for each hart. */
for (i = 0; i < mtimer->num_harts; i++) {
- CPUState *cpu = qemu_get_cpu(mtimer->hartid_base + i);
+ CPUState *cpu = cpu_by_arch_id(mtimer->hartid_base + i);
CPURISCVState *env = cpu ? cpu->env_ptr : NULL;
if (!env) {
continue;
@@ -292,7 +292,7 @@ static void riscv_aclint_mtimer_realize(DeviceState *dev, Error **errp)
s->timecmp = g_new0(uint64_t, s->num_harts);
/* Claim timer interrupt bits */
for (i = 0; i < s->num_harts; i++) {
- RISCVCPU *cpu = RISCV_CPU(qemu_get_cpu(s->hartid_base + i));
+ RISCVCPU *cpu = RISCV_CPU(cpu_by_arch_id(s->hartid_base + i));
if (riscv_cpu_claim_interrupts(cpu, MIP_MTIP) < 0) {
error_report("MTIP already claimed");
exit(1);
@@ -372,7 +372,7 @@ DeviceState *riscv_aclint_mtimer_create(hwaddr addr, hwaddr size,
sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr);
for (i = 0; i < num_harts; i++) {
- CPUState *cpu = qemu_get_cpu(hartid_base + i);
+ CPUState *cpu = cpu_by_arch_id(hartid_base + i);
RISCVCPU *rvcpu = RISCV_CPU(cpu);
CPURISCVState *env = cpu ? cpu->env_ptr : NULL;
riscv_aclint_mtimer_callback *cb =
@@ -407,7 +407,7 @@ static uint64_t riscv_aclint_swi_read(void *opaque, hwaddr addr,
if (addr < (swi->num_harts << 2)) {
size_t hartid = swi->hartid_base + (addr >> 2);
- CPUState *cpu = qemu_get_cpu(hartid);
+ CPUState *cpu = cpu_by_arch_id(hartid);
CPURISCVState *env = cpu ? cpu->env_ptr : NULL;
if (!env) {
qemu_log_mask(LOG_GUEST_ERROR,
@@ -430,7 +430,7 @@ static void riscv_aclint_swi_write(void *opaque, hwaddr addr, uint64_t value,
if (addr < (swi->num_harts << 2)) {
size_t hartid = swi->hartid_base + (addr >> 2);
- CPUState *cpu = qemu_get_cpu(hartid);
+ CPUState *cpu = cpu_by_arch_id(hartid);
CPURISCVState *env = cpu ? cpu->env_ptr : NULL;
if (!env) {
qemu_log_mask(LOG_GUEST_ERROR,
@@ -545,7 +545,7 @@ DeviceState *riscv_aclint_swi_create(hwaddr addr, uint32_t hartid_base,
sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr);
for (i = 0; i < num_harts; i++) {
- CPUState *cpu = qemu_get_cpu(hartid_base + i);
+ CPUState *cpu = cpu_by_arch_id(hartid_base + i);
RISCVCPU *rvcpu = RISCV_CPU(cpu);
qdev_connect_gpio_out(dev, i,
diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c
index cfd007e629..cd7efc4ad4 100644
--- a/hw/intc/riscv_aplic.c
+++ b/hw/intc/riscv_aplic.c
@@ -833,7 +833,7 @@ static void riscv_aplic_realize(DeviceState *dev, Error **errp)
/* Claim the CPU interrupt to be triggered by this APLIC */
for (i = 0; i < aplic->num_harts; i++) {
- RISCVCPU *cpu = RISCV_CPU(qemu_get_cpu(aplic->hartid_base + i));
+ RISCVCPU *cpu = RISCV_CPU(cpu_by_arch_id(aplic->hartid_base + i));
if (riscv_cpu_claim_interrupts(cpu,
(aplic->mmode) ? MIP_MEIP : MIP_SEIP) < 0) {
error_report("%s already claimed",
@@ -966,7 +966,7 @@ DeviceState *riscv_aplic_create(hwaddr addr, hwaddr size,
if (!msimode) {
for (i = 0; i < num_harts; i++) {
- CPUState *cpu = qemu_get_cpu(hartid_base + i);
+ CPUState *cpu = cpu_by_arch_id(hartid_base + i);
qdev_connect_gpio_out_named(dev, NULL, i,
qdev_get_gpio_in(DEVICE(cpu),
diff --git a/hw/intc/riscv_imsic.c b/hw/intc/riscv_imsic.c
index 4d4d5b50ca..fea3385b51 100644
--- a/hw/intc/riscv_imsic.c
+++ b/hw/intc/riscv_imsic.c
@@ -316,8 +316,8 @@ static const MemoryRegionOps riscv_imsic_ops = {
static void riscv_imsic_realize(DeviceState *dev, Error **errp)
{
RISCVIMSICState *imsic = RISCV_IMSIC(dev);
- RISCVCPU *rcpu = RISCV_CPU(qemu_get_cpu(imsic->hartid));
- CPUState *cpu = qemu_get_cpu(imsic->hartid);
+ RISCVCPU *rcpu = RISCV_CPU(cpu_by_arch_id(imsic->hartid));
+ CPUState *cpu = cpu_by_arch_id(imsic->hartid);
CPURISCVState *env = cpu ? cpu->env_ptr : NULL;
imsic->num_eistate = imsic->num_pages * imsic->num_irqs;
@@ -413,7 +413,7 @@ DeviceState *riscv_imsic_create(hwaddr addr, uint32_t hartid, bool mmode,
uint32_t num_pages, uint32_t num_ids)
{
DeviceState *dev = qdev_new(TYPE_RISCV_IMSIC);
- CPUState *cpu = qemu_get_cpu(hartid);
+ CPUState *cpu = cpu_by_arch_id(hartid);
uint32_t i;
assert(!(addr & (IMSIC_MMIO_PAGE_SZ - 1)));
diff --git a/hw/isa/i82378.c b/hw/isa/i82378.c
index 233059c6dc..5432ab5065 100644
--- a/hw/isa/i82378.c
+++ b/hw/isa/i82378.c
@@ -47,6 +47,12 @@ static const VMStateDescription vmstate_i82378 = {
},
};
+static void i82378_request_out0_irq(void *opaque, int irq, int level)
+{
+ I82378State *s = opaque;
+ qemu_set_irq(s->cpu_intr, level);
+}
+
static void i82378_request_pic_irq(void *opaque, int irq, int level)
{
DeviceState *dev = opaque;
@@ -88,7 +94,9 @@ static void i82378_realize(PCIDevice *pci, Error **errp)
*/
/* 2 82C59 (irq) */
- s->isa_irqs_in = i8259_init(isabus, s->cpu_intr);
+ s->isa_irqs_in = i8259_init(isabus,
+ qemu_allocate_irq(i82378_request_out0_irq,
+ s, 0));
isa_bus_register_input_irqs(isabus, s->isa_irqs_in);
/* 1 82C54 (pit) */
diff --git a/hw/isa/trace-events b/hw/isa/trace-events
index c4567a9b47..1816e8307a 100644
--- a/hw/isa/trace-events
+++ b/hw/isa/trace-events
@@ -16,6 +16,7 @@ apm_io_write(uint8_t addr, uint8_t val) "write addr=0x%x val=0x%02x"
# vt82c686.c
via_isa_write(uint32_t addr, uint32_t val, int len) "addr 0x%x val 0x%x len 0x%x"
+via_pm_read(uint32_t addr, uint32_t val, int len) "addr 0x%x val 0x%x len 0x%x"
via_pm_write(uint32_t addr, uint32_t val, int len) "addr 0x%x val 0x%x len 0x%x"
via_pm_io_read(uint32_t addr, uint32_t val, int len) "addr 0x%x val 0x%x len 0x%x"
via_pm_io_write(uint32_t addr, uint32_t val, int len) "addr 0x%x val 0x%x len 0x%x"
diff --git a/hw/isa/vt82c686.c b/hw/isa/vt82c686.c
index f4c40965cd..ca89119ce0 100644
--- a/hw/isa/vt82c686.c
+++ b/hw/isa/vt82c686.c
@@ -554,7 +554,7 @@ struct ViaISAState {
PCIIDEState ide;
UHCIState uhci[2];
ViaPMState pm;
- PCIDevice ac97;
+ ViaAC97State ac97;
PCIDevice mc97;
};
@@ -598,15 +598,63 @@ void via_isa_set_irq(PCIDevice *d, int n, int level)
qemu_set_irq(s->isa_irqs_in[n], level);
}
+static void via_isa_request_i8259_irq(void *opaque, int irq, int level)
+{
+ ViaISAState *s = opaque;
+ qemu_set_irq(s->cpu_intr, level);
+}
+
+static int via_isa_get_pci_irq(const ViaISAState *s, int irq_num)
+{
+ switch (irq_num) {
+ case 0:
+ return s->dev.config[0x55] >> 4;
+ case 1:
+ return s->dev.config[0x56] & 0xf;
+ case 2:
+ return s->dev.config[0x56] >> 4;
+ case 3:
+ return s->dev.config[0x57] >> 4;
+ }
+ return 0;
+}
+
+static void via_isa_set_pci_irq(void *opaque, int irq_num, int level)
+{
+ ViaISAState *s = opaque;
+ PCIBus *bus = pci_get_bus(&s->dev);
+ int i, pic_level, pic_irq = via_isa_get_pci_irq(s, irq_num);
+
+ /* IRQ 0: disabled, IRQ 2,8,13: reserved */
+ if (!pic_irq) {
+ return;
+ }
+ if (unlikely(pic_irq == 2 || pic_irq == 8 || pic_irq == 13)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "Invalid ISA IRQ routing");
+ }
+
+ /* The pic level is the logical OR of all the PCI irqs mapped to it. */
+ pic_level = 0;
+ for (i = 0; i < PCI_NUM_PINS; i++) {
+ if (pic_irq == via_isa_get_pci_irq(s, i)) {
+ pic_level |= pci_bus_get_irq_level(bus, i);
+ }
+ }
+ /* Now we change the pic irq level according to the via irq mappings. */
+ qemu_set_irq(s->isa_irqs_in[pic_irq], pic_level);
+}
+
static void via_isa_realize(PCIDevice *d, Error **errp)
{
ViaISAState *s = VIA_ISA(d);
DeviceState *dev = DEVICE(d);
PCIBus *pci_bus = pci_get_bus(d);
+ qemu_irq *isa_irq;
ISABus *isa_bus;
int i;
qdev_init_gpio_out(dev, &s->cpu_intr, 1);
+ isa_irq = qemu_allocate_irqs(via_isa_request_i8259_irq, s, 1);
isa_bus = isa_bus_new(dev, pci_address_space(d), pci_address_space_io(d),
errp);
@@ -614,11 +662,13 @@ static void via_isa_realize(PCIDevice *d, Error **errp)
return;
}
- s->isa_irqs_in = i8259_init(isa_bus, s->cpu_intr);
+ s->isa_irqs_in = i8259_init(isa_bus, *isa_irq);
isa_bus_register_input_irqs(isa_bus, s->isa_irqs_in);
i8254_pit_init(isa_bus, 0x40, 0, NULL);
i8257_dma_init(isa_bus, 0);
+ qdev_init_gpio_in_named(dev, via_isa_set_pci_irq, "pirq", PCI_NUM_PINS);
+
/* RTC */
qdev_prop_set_int32(DEVICE(&s->rtc), "base_year", 2000);
if (!qdev_realize(DEVICE(&s->rtc), BUS(isa_bus), errp)) {
diff --git a/hw/mips/boston.c b/hw/mips/boston.c
index a9d87f3437..21ad844519 100644
--- a/hw/mips/boston.c
+++ b/hw/mips/boston.c
@@ -702,7 +702,7 @@ static void boston_mach_init(MachineState *machine)
object_initialize_child(OBJECT(machine), "cps", &s->cps, TYPE_MIPS_CPS);
object_property_set_str(OBJECT(&s->cps), "cpu-type", machine->cpu_type,
&error_fatal);
- object_property_set_int(OBJECT(&s->cps), "num-vp", machine->smp.cpus,
+ object_property_set_uint(OBJECT(&s->cps), "num-vp", machine->smp.cpus,
&error_fatal);
qdev_connect_clock_in(DEVICE(&s->cps), "clk-in",
qdev_get_clock_out(dev, "cpu-refclk"));
diff --git a/hw/mips/cps.c b/hw/mips/cps.c
index 2b436700ce..2b5269ebf1 100644
--- a/hw/mips/cps.c
+++ b/hw/mips/cps.c
@@ -66,20 +66,17 @@ static bool cpu_mips_itu_supported(CPUMIPSState *env)
static void mips_cps_realize(DeviceState *dev, Error **errp)
{
MIPSCPSState *s = MIPS_CPS(dev);
- CPUMIPSState *env;
- MIPSCPU *cpu;
- int i;
target_ulong gcr_base;
bool itu_present = false;
- bool saar_present = false;
if (!clock_get(s->clock)) {
error_setg(errp, "CPS input clock is not connected to an output clock");
return;
}
- for (i = 0; i < s->num_vp; i++) {
- cpu = MIPS_CPU(object_new(s->cpu_type));
+ for (int i = 0; i < s->num_vp; i++) {
+ MIPSCPU *cpu = MIPS_CPU(object_new(s->cpu_type));
+ CPUMIPSState *env = &cpu->env;
/* All VPs are halted on reset. Leave powering up to CPC. */
if (!object_property_set_bool(OBJECT(cpu), "start-powered-off", true,
@@ -97,7 +94,6 @@ static void mips_cps_realize(DeviceState *dev, Error **errp)
cpu_mips_irq_init_cpu(cpu);
cpu_mips_clock_init(cpu);
- env = &cpu->env;
if (cpu_mips_itu_supported(env)) {
itu_present = true;
/* Attach ITC Tag to the VP */
@@ -107,22 +103,15 @@ static void mips_cps_realize(DeviceState *dev, Error **errp)
qemu_register_reset(main_cpu_reset, cpu);
}
- cpu = MIPS_CPU(first_cpu);
- env = &cpu->env;
- saar_present = (bool)env->saarp;
-
/* Inter-Thread Communication Unit */
if (itu_present) {
object_initialize_child(OBJECT(dev), "itu", &s->itu, TYPE_MIPS_ITU);
- object_property_set_int(OBJECT(&s->itu), "num-fifo", 16,
+ object_property_set_link(OBJECT(&s->itu), "cpu[0]",
+ OBJECT(first_cpu), &error_abort);
+ object_property_set_uint(OBJECT(&s->itu), "num-fifo", 16,
&error_abort);
- object_property_set_int(OBJECT(&s->itu), "num-semaphores", 16,
+ object_property_set_uint(OBJECT(&s->itu), "num-semaphores", 16,
&error_abort);
- object_property_set_bool(OBJECT(&s->itu), "saar-present", saar_present,
- &error_abort);
- if (saar_present) {
- s->itu.saar = &env->CP0_SAAR;
- }
if (!sysbus_realize(SYS_BUS_DEVICE(&s->itu), errp)) {
return;
}
@@ -133,7 +122,7 @@ static void mips_cps_realize(DeviceState *dev, Error **errp)
/* Cluster Power Controller */
object_initialize_child(OBJECT(dev), "cpc", &s->cpc, TYPE_MIPS_CPC);
- object_property_set_int(OBJECT(&s->cpc), "num-vp", s->num_vp,
+ object_property_set_uint(OBJECT(&s->cpc), "num-vp", s->num_vp,
&error_abort);
object_property_set_int(OBJECT(&s->cpc), "vp-start-running", 1,
&error_abort);
@@ -146,9 +135,9 @@ static void mips_cps_realize(DeviceState *dev, Error **errp)
/* Global Interrupt Controller */
object_initialize_child(OBJECT(dev), "gic", &s->gic, TYPE_MIPS_GIC);
- object_property_set_int(OBJECT(&s->gic), "num-vp", s->num_vp,
+ object_property_set_uint(OBJECT(&s->gic), "num-vp", s->num_vp,
&error_abort);
- object_property_set_int(OBJECT(&s->gic), "num-irq", 128,
+ object_property_set_uint(OBJECT(&s->gic), "num-irq", 128,
&error_abort);
if (!sysbus_realize(SYS_BUS_DEVICE(&s->gic), errp)) {
return;
@@ -158,10 +147,10 @@ static void mips_cps_realize(DeviceState *dev, Error **errp)
sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->gic), 0));
/* Global Configuration Registers */
- gcr_base = env->CP0_CMGCRBase << 4;
+ gcr_base = MIPS_CPU(first_cpu)->env.CP0_CMGCRBase << 4;
object_initialize_child(OBJECT(dev), "gcr", &s->gcr, TYPE_MIPS_GCR);
- object_property_set_int(OBJECT(&s->gcr), "num-vp", s->num_vp,
+ object_property_set_uint(OBJECT(&s->gcr), "num-vp", s->num_vp,
&error_abort);
object_property_set_int(OBJECT(&s->gcr), "gcr-rev", 0x800,
&error_abort);
diff --git a/hw/mips/malta.c b/hw/mips/malta.c
index ec172b111a..af9021316d 100644
--- a/hw/mips/malta.c
+++ b/hw/mips/malta.c
@@ -1066,7 +1066,7 @@ static void create_cps(MachineState *ms, MaltaState *s,
object_initialize_child(OBJECT(s), "cps", &s->cps, TYPE_MIPS_CPS);
object_property_set_str(OBJECT(&s->cps), "cpu-type", ms->cpu_type,
&error_fatal);
- object_property_set_int(OBJECT(&s->cps), "num-vp", ms->smp.cpus,
+ object_property_set_uint(OBJECT(&s->cps), "num-vp", ms->smp.cpus,
&error_fatal);
qdev_connect_clock_in(DEVICE(&s->cps), "clk-in", s->cpuclk);
sysbus_realize(SYS_BUS_DEVICE(&s->cps), &error_fatal);
diff --git a/hw/misc/edu.c b/hw/misc/edu.c
index e935c418d4..a1f8bc77e7 100644
--- a/hw/misc/edu.c
+++ b/hw/misc/edu.c
@@ -267,6 +267,8 @@ static void edu_mmio_write(void *opaque, hwaddr addr, uint64_t val,
case 0x20:
if (val & EDU_STATUS_IRQFACT) {
qatomic_or(&edu->status, EDU_STATUS_IRQFACT);
+ /* Order check of the COMPUTING flag after setting IRQFACT. */
+ smp_mb__after_rmw();
} else {
qatomic_and(&edu->status, ~EDU_STATUS_IRQFACT);
}
@@ -349,6 +351,9 @@ static void *edu_fact_thread(void *opaque)
qemu_mutex_unlock(&edu->thr_mutex);
qatomic_and(&edu->status, ~EDU_STATUS_COMPUTING);
+ /* Clear COMPUTING flag before checking IRQFACT. */
+ smp_mb__after_rmw();
+
if (qatomic_read(&edu->status) & EDU_STATUS_IRQFACT) {
qemu_mutex_lock_iothread();
edu_raise_irq(edu, FACT_IRQ);
diff --git a/hw/misc/mips_cmgcr.c b/hw/misc/mips_cmgcr.c
index 3c8b37f700..66eb11662c 100644
--- a/hw/misc/mips_cmgcr.c
+++ b/hw/misc/mips_cmgcr.c
@@ -212,7 +212,7 @@ static const VMStateDescription vmstate_mips_gcr = {
};
static Property mips_gcr_properties[] = {
- DEFINE_PROP_INT32("num-vp", MIPSGCRState, num_vps, 1),
+ DEFINE_PROP_UINT32("num-vp", MIPSGCRState, num_vps, 1),
DEFINE_PROP_INT32("gcr-rev", MIPSGCRState, gcr_rev, 0x800),
DEFINE_PROP_UINT64("gcr-base", MIPSGCRState, gcr_base, GCR_BASE_ADDR),
DEFINE_PROP_LINK("gic", MIPSGCRState, gic_mr, TYPE_MEMORY_REGION,
diff --git a/hw/misc/mips_itu.c b/hw/misc/mips_itu.c
index badef5c214..0eda302db4 100644
--- a/hw/misc/mips_itu.c
+++ b/hw/misc/mips_itu.c
@@ -93,10 +93,10 @@ void itc_reconfigure(MIPSITUState *tag)
uint64_t size = (1 * KiB) + (am[1] & ITC_AM1_ADDR_MASK_MASK);
bool is_enabled = (am[0] & ITC_AM0_EN_MASK) != 0;
- if (tag->saar_present) {
- address = ((*(uint64_t *) tag->saar) & 0xFFFFFFFFE000ULL) << 4;
- size = 1ULL << ((*(uint64_t *) tag->saar >> 1) & 0x1f);
- is_enabled = *(uint64_t *) tag->saar & 1;
+ if (tag->saar) {
+ address = (tag->saar[0] & 0xFFFFFFFFE000ULL) << 4;
+ size = 1ULL << ((tag->saar[0] >> 1) & 0x1f);
+ is_enabled = tag->saar[0] & 1;
}
memory_region_transaction_begin();
@@ -157,7 +157,7 @@ static inline ITCView get_itc_view(hwaddr addr)
static inline int get_cell_stride_shift(const MIPSITUState *s)
{
/* Minimum interval (for EntryGain = 0) is 128 B */
- if (s->saar_present) {
+ if (s->saar) {
return 7 + ((s->icr0 >> ITC_ICR0_BLK_GRAIN) &
ITC_ICR0_BLK_GRAIN_MASK);
} else {
@@ -515,6 +515,7 @@ static void mips_itu_init(Object *obj)
static void mips_itu_realize(DeviceState *dev, Error **errp)
{
MIPSITUState *s = MIPS_ITU(dev);
+ CPUMIPSState *env;
if (s->num_fifo > ITC_FIFO_NUM_MAX) {
error_setg(errp, "Exceed maximum number of FIFO cells: %d",
@@ -526,6 +527,15 @@ static void mips_itu_realize(DeviceState *dev, Error **errp)
s->num_semaphores);
return;
}
+ if (!s->cpu0) {
+ error_setg(errp, "Missing 'cpu[0]' property");
+ return;
+ }
+
+ env = &s->cpu0->env;
+ if (env->saarp) {
+ s->saar = env->CP0_SAAR;
+ }
s->cell = g_new(ITCStorageCell, get_num_cells(s));
}
@@ -534,8 +544,8 @@ static void mips_itu_reset(DeviceState *dev)
{
MIPSITUState *s = MIPS_ITU(dev);
- if (s->saar_present) {
- *(uint64_t *) s->saar = 0x11 << 1;
+ if (s->saar) {
+ s->saar[0] = 0x11 << 1;
s->icr0 = get_num_cells(s) << ITC_ICR0_CELL_NUM;
} else {
s->ITCAddressMap[0] = 0;
@@ -549,11 +559,11 @@ static void mips_itu_reset(DeviceState *dev)
}
static Property mips_itu_properties[] = {
- DEFINE_PROP_INT32("num-fifo", MIPSITUState, num_fifo,
+ DEFINE_PROP_UINT32("num-fifo", MIPSITUState, num_fifo,
ITC_FIFO_NUM_MAX),
- DEFINE_PROP_INT32("num-semaphores", MIPSITUState, num_semaphores,
+ DEFINE_PROP_UINT32("num-semaphores", MIPSITUState, num_semaphores,
ITC_SEMAPH_NUM_MAX),
- DEFINE_PROP_BOOL("saar-present", MIPSITUState, saar_present, false),
+ DEFINE_PROP_LINK("cpu[0]", MIPSITUState, cpu0, TYPE_MIPS_CPU, MIPSCPU *),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/net/xen_nic.c b/hw/net/xen_nic.c
index 7d92c2d022..9bbf6599fc 100644
--- a/hw/net/xen_nic.c
+++ b/hw/net/xen_nic.c
@@ -145,7 +145,7 @@ static void net_tx_packets(struct XenNetDev *netdev)
continue;
}
- if ((txreq.offset + txreq.size) > XC_PAGE_SIZE) {
+ if ((txreq.offset + txreq.size) > XEN_PAGE_SIZE) {
xen_pv_printf(&netdev->xendev, 0, "error: page crossing\n");
net_tx_error(netdev, &txreq, rc);
continue;
@@ -171,7 +171,7 @@ static void net_tx_packets(struct XenNetDev *netdev)
if (txreq.flags & NETTXF_csum_blank) {
/* have read-only mapping -> can't fill checksum in-place */
if (!tmpbuf) {
- tmpbuf = g_malloc(XC_PAGE_SIZE);
+ tmpbuf = g_malloc(XEN_PAGE_SIZE);
}
memcpy(tmpbuf, page + txreq.offset, txreq.size);
net_checksum_calculate(tmpbuf, txreq.size, CSUM_ALL);
@@ -181,7 +181,7 @@ static void net_tx_packets(struct XenNetDev *netdev)
qemu_send_packet(qemu_get_queue(netdev->nic),
page + txreq.offset, txreq.size);
}
- xen_be_unmap_grant_ref(&netdev->xendev, page);
+ xen_be_unmap_grant_ref(&netdev->xendev, page, txreq.gref);
net_tx_response(netdev, &txreq, NETIF_RSP_OKAY);
}
if (!netdev->tx_work) {
@@ -243,9 +243,9 @@ static ssize_t net_rx_packet(NetClientState *nc, const uint8_t *buf, size_t size
if (rc == rp || RING_REQUEST_CONS_OVERFLOW(&netdev->rx_ring, rc)) {
return 0;
}
- if (size > XC_PAGE_SIZE - NET_IP_ALIGN) {
+ if (size > XEN_PAGE_SIZE - NET_IP_ALIGN) {
xen_pv_printf(&netdev->xendev, 0, "packet too big (%lu > %ld)",
- (unsigned long)size, XC_PAGE_SIZE - NET_IP_ALIGN);
+ (unsigned long)size, XEN_PAGE_SIZE - NET_IP_ALIGN);
return -1;
}
@@ -261,7 +261,7 @@ static ssize_t net_rx_packet(NetClientState *nc, const uint8_t *buf, size_t size
return -1;
}
memcpy(page + NET_IP_ALIGN, buf, size);
- xen_be_unmap_grant_ref(&netdev->xendev, page);
+ xen_be_unmap_grant_ref(&netdev->xendev, page, rxreq.gref);
net_rx_response(netdev, &rxreq, NETIF_RSP_OKAY, NET_IP_ALIGN, size, 0);
return size;
@@ -343,12 +343,13 @@ static int net_connect(struct XenLegacyDevice *xendev)
netdev->rx_ring_ref,
PROT_READ | PROT_WRITE);
if (!netdev->rxs) {
- xen_be_unmap_grant_ref(&netdev->xendev, netdev->txs);
+ xen_be_unmap_grant_ref(&netdev->xendev, netdev->txs,
+ netdev->tx_ring_ref);
netdev->txs = NULL;
return -1;
}
- BACK_RING_INIT(&netdev->tx_ring, netdev->txs, XC_PAGE_SIZE);
- BACK_RING_INIT(&netdev->rx_ring, netdev->rxs, XC_PAGE_SIZE);
+ BACK_RING_INIT(&netdev->tx_ring, netdev->txs, XEN_PAGE_SIZE);
+ BACK_RING_INIT(&netdev->rx_ring, netdev->rxs, XEN_PAGE_SIZE);
xen_be_bind_evtchn(&netdev->xendev);
@@ -368,11 +369,13 @@ static void net_disconnect(struct XenLegacyDevice *xendev)
xen_pv_unbind_evtchn(&netdev->xendev);
if (netdev->txs) {
- xen_be_unmap_grant_ref(&netdev->xendev, netdev->txs);
+ xen_be_unmap_grant_ref(&netdev->xendev, netdev->txs,
+ netdev->tx_ring_ref);
netdev->txs = NULL;
}
if (netdev->rxs) {
- xen_be_unmap_grant_ref(&netdev->xendev, netdev->rxs);
+ xen_be_unmap_grant_ref(&netdev->xendev, netdev->rxs,
+ netdev->rx_ring_ref);
netdev->rxs = NULL;
}
}
diff --git a/hw/pci-host/mv64361.c b/hw/pci-host/mv64361.c
index 298564f1f5..19e8031a3f 100644
--- a/hw/pci-host/mv64361.c
+++ b/hw/pci-host/mv64361.c
@@ -873,10 +873,6 @@ static void mv64361_realize(DeviceState *dev, Error **errp)
}
sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->cpu_irq);
qdev_init_gpio_in_named(dev, mv64361_gpp_irq, "gpp", 32);
- /* FIXME: PCI IRQ connections may be board specific */
- for (i = 0; i < PCI_NUM_PINS; i++) {
- s->pci[1].irq[i] = qdev_get_gpio_in_named(dev, "gpp", 12 + i);
- }
}
static void mv64361_reset(DeviceState *dev)
diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c
index 7cc375df05..f1650be5ee 100644
--- a/hw/ppc/pegasos2.c
+++ b/hw/ppc/pegasos2.c
@@ -73,6 +73,8 @@ struct Pegasos2MachineState {
MachineState parent_obj;
PowerPCCPU *cpu;
DeviceState *mv;
+ qemu_irq mv_pirq[PCI_NUM_PINS];
+ qemu_irq via_pirq[PCI_NUM_PINS];
Vof *vof;
void *fdt_blob;
uint64_t kernel_addr;
@@ -95,6 +97,15 @@ static void pegasos2_cpu_reset(void *opaque)
}
}
+static void pegasos2_pci_irq(void *opaque, int n, int level)
+{
+ Pegasos2MachineState *pm = opaque;
+
+ /* PCI interrupt lines are connected to both MV64361 and VT8231 */
+ qemu_set_irq(pm->mv_pirq[n], level);
+ qemu_set_irq(pm->via_pirq[n], level);
+}
+
static void pegasos2_init(MachineState *machine)
{
Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine);
@@ -106,7 +117,7 @@ static void pegasos2_init(MachineState *machine)
I2CBus *i2c_bus;
const char *fwname = machine->firmware ?: PROM_FILENAME;
char *filename;
- int sz;
+ int i, sz;
uint8_t *spd_data;
/* init CPU */
@@ -156,11 +167,18 @@ static void pegasos2_init(MachineState *machine)
/* Marvell Discovery II system controller */
pm->mv = DEVICE(sysbus_create_simple(TYPE_MV64361, -1,
qdev_get_gpio_in(DEVICE(pm->cpu), PPC6xx_INPUT_INT)));
+ for (i = 0; i < PCI_NUM_PINS; i++) {
+ pm->mv_pirq[i] = qdev_get_gpio_in_named(pm->mv, "gpp", 12 + i);
+ }
pci_bus = mv64361_get_pci_bus(pm->mv, 1);
+ pci_bus_irqs(pci_bus, pegasos2_pci_irq, pm, PCI_NUM_PINS);
/* VIA VT8231 South Bridge (multifunction PCI device) */
via = OBJECT(pci_create_simple_multifunction(pci_bus, PCI_DEVFN(12, 0),
true, TYPE_VT8231_ISA));
+ for (i = 0; i < PCI_NUM_PINS; i++) {
+ pm->via_pirq[i] = qdev_get_gpio_in_named(DEVICE(via), "pirq", i);
+ }
object_property_add_alias(OBJECT(machine), "rtc-time",
object_resolve_path_component(via, "rtc"),
"date");
@@ -267,6 +285,12 @@ static void pegasos2_machine_reset(MachineState *machine, ShutdownCause reason)
PCI_INTERRUPT_LINE, 2, 0x9);
pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 0) << 8) |
0x50, 1, 0x2);
+ pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 0) << 8) |
+ 0x55, 1, 0x90);
+ pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 0) << 8) |
+ 0x56, 1, 0x99);
+ pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 0) << 8) |
+ 0x57, 1, 0x90);
pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 1) << 8) |
PCI_INTERRUPT_LINE, 2, 0x109);
diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
index 4550b3b938..6528ebfa3a 100644
--- a/hw/riscv/Kconfig
+++ b/hw/riscv/Kconfig
@@ -44,6 +44,7 @@ config RISCV_VIRT
select VIRTIO_MMIO
select FW_CFG_DMA
select PLATFORM_BUS
+ select ACPI
config SHAKTI_C
bool
diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build
index ab6cae57ea..2f7ee81be3 100644
--- a/hw/riscv/meson.build
+++ b/hw/riscv/meson.build
@@ -9,5 +9,6 @@ riscv_ss.add(when: 'CONFIG_SIFIVE_E', if_true: files('sifive_e.c'))
riscv_ss.add(when: 'CONFIG_SIFIVE_U', if_true: files('sifive_u.c'))
riscv_ss.add(when: 'CONFIG_SPIKE', if_true: files('spike.c'))
riscv_ss.add(when: 'CONFIG_MICROCHIP_PFSOC', if_true: files('microchip_pfsoc.c'))
+riscv_ss.add(when: 'CONFIG_ACPI', if_true: files('virt-acpi-build.c'))
hw_arch += {'riscv': riscv_ss}
diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c
new file mode 100644
index 0000000000..82da0a238c
--- /dev/null
+++ b/hw/riscv/virt-acpi-build.c
@@ -0,0 +1,416 @@
+/*
+ * Support for generating ACPI tables and passing them to Guests
+ *
+ * RISC-V virt ACPI generation
+ *
+ * Copyright (C) 2008-2010 Kevin O'Connor <kevin@koconnor.net>
+ * Copyright (C) 2006 Fabrice Bellard
+ * Copyright (C) 2013 Red Hat Inc
+ * Copyright (c) 2015 HUAWEI TECHNOLOGIES CO.,LTD.
+ * Copyright (C) 2021-2023 Ventana Micro Systems Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/acpi/acpi-defs.h"
+#include "hw/acpi/acpi.h"
+#include "hw/acpi/aml-build.h"
+#include "hw/acpi/utils.h"
+#include "qapi/error.h"
+#include "sysemu/reset.h"
+#include "migration/vmstate.h"
+#include "hw/riscv/virt.h"
+#include "hw/riscv/numa.h"
+#include "hw/intc/riscv_aclint.h"
+
+#define ACPI_BUILD_TABLE_SIZE 0x20000
+
+typedef struct AcpiBuildState {
+ /* Copy of table in RAM (for patching) */
+ MemoryRegion *table_mr;
+ MemoryRegion *rsdp_mr;
+ MemoryRegion *linker_mr;
+ /* Is table patched? */
+ bool patched;
+} AcpiBuildState;
+
+static void acpi_align_size(GArray *blob, unsigned align)
+{
+ /*
+ * Align size to multiple of given size. This reduces the chance
+ * we need to change size in the future (breaking cross version migration).
+ */
+ g_array_set_size(blob, ROUND_UP(acpi_data_len(blob), align));
+}
+
+static void riscv_acpi_madt_add_rintc(uint32_t uid,
+ const CPUArchIdList *arch_ids,
+ GArray *entry)
+{
+ uint64_t hart_id = arch_ids->cpus[uid].arch_id;
+
+ build_append_int_noprefix(entry, 0x18, 1); /* Type */
+ build_append_int_noprefix(entry, 20, 1); /* Length */
+ build_append_int_noprefix(entry, 1, 1); /* Version */
+ build_append_int_noprefix(entry, 0, 1); /* Reserved */
+ build_append_int_noprefix(entry, 0x1, 4); /* Flags */
+ build_append_int_noprefix(entry, hart_id, 8); /* Hart ID */
+ build_append_int_noprefix(entry, uid, 4); /* ACPI Processor UID */
+}
+
+static void acpi_dsdt_add_cpus(Aml *scope, RISCVVirtState *s)
+{
+ MachineClass *mc = MACHINE_GET_CLASS(s);
+ MachineState *ms = MACHINE(s);
+ const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(ms);
+
+ for (int i = 0; i < arch_ids->len; i++) {
+ Aml *dev;
+ GArray *madt_buf = g_array_new(0, 1, 1);
+
+ dev = aml_device("C%.03X", i);
+ aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0007")));
+ aml_append(dev, aml_name_decl("_UID",
+ aml_int(arch_ids->cpus[i].arch_id)));
+
+ /* build _MAT object */
+ riscv_acpi_madt_add_rintc(i, arch_ids, madt_buf);
+ aml_append(dev, aml_name_decl("_MAT",
+ aml_buffer(madt_buf->len,
+ (uint8_t *)madt_buf->data)));
+ g_array_free(madt_buf, true);
+
+ aml_append(scope, dev);
+ }
+}
+
+static void acpi_dsdt_add_fw_cfg(Aml *scope, const MemMapEntry *fw_cfg_memmap)
+{
+ Aml *dev = aml_device("FWCF");
+ aml_append(dev, aml_name_decl("_HID", aml_string("QEMU0002")));
+
+ /* device present, functioning, decoding, not shown in UI */
+ aml_append(dev, aml_name_decl("_STA", aml_int(0xB)));
+ aml_append(dev, aml_name_decl("_CCA", aml_int(1)));
+
+ Aml *crs = aml_resource_template();
+ aml_append(crs, aml_memory32_fixed(fw_cfg_memmap->base,
+ fw_cfg_memmap->size, AML_READ_WRITE));
+ aml_append(dev, aml_name_decl("_CRS", crs));
+ aml_append(scope, dev);
+}
+
+/* RHCT Node[N] starts at offset 56 */
+#define RHCT_NODE_ARRAY_OFFSET 56
+
+/*
+ * ACPI spec, Revision 6.5+
+ * 5.2.36 RISC-V Hart Capabilities Table (RHCT)
+ * REF: https://github.com/riscv-non-isa/riscv-acpi/issues/16
+ * https://drive.google.com/file/d/1nP3nFiH4jkPMp6COOxP6123DCZKR-tia/view
+ */
+static void build_rhct(GArray *table_data,
+ BIOSLinker *linker,
+ RISCVVirtState *s)
+{
+ MachineClass *mc = MACHINE_GET_CLASS(s);
+ MachineState *ms = MACHINE(s);
+ const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(ms);
+ size_t len, aligned_len;
+ uint32_t isa_offset, num_rhct_nodes;
+ RISCVCPU *cpu;
+ char *isa;
+
+ AcpiTable table = { .sig = "RHCT", .rev = 1, .oem_id = s->oem_id,
+ .oem_table_id = s->oem_table_id };
+
+ acpi_table_begin(&table, table_data);
+
+ build_append_int_noprefix(table_data, 0x0, 4); /* Reserved */
+
+ /* Time Base Frequency */
+ build_append_int_noprefix(table_data,
+ RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, 8);
+
+ /* ISA + N hart info */
+ num_rhct_nodes = 1 + ms->smp.cpus;
+
+ /* Number of RHCT nodes*/
+ build_append_int_noprefix(table_data, num_rhct_nodes, 4);
+
+ /* Offset to the RHCT node array */
+ build_append_int_noprefix(table_data, RHCT_NODE_ARRAY_OFFSET, 4);
+
+ /* ISA String Node */
+ isa_offset = table_data->len - table.table_offset;
+ build_append_int_noprefix(table_data, 0, 2); /* Type 0 */
+
+ cpu = &s->soc[0].harts[0];
+ isa = riscv_isa_string(cpu);
+ len = 8 + strlen(isa) + 1;
+ aligned_len = (len % 2) ? (len + 1) : len;
+
+ build_append_int_noprefix(table_data, aligned_len, 2); /* Length */
+ build_append_int_noprefix(table_data, 0x1, 2); /* Revision */
+
+ /* ISA string length including NUL */
+ build_append_int_noprefix(table_data, strlen(isa) + 1, 2);
+ g_array_append_vals(table_data, isa, strlen(isa) + 1); /* ISA string */
+
+ if (aligned_len != len) {
+ build_append_int_noprefix(table_data, 0x0, 1); /* Optional Padding */
+ }
+
+ /* Hart Info Node */
+ for (int i = 0; i < arch_ids->len; i++) {
+ build_append_int_noprefix(table_data, 0xFFFF, 2); /* Type */
+ build_append_int_noprefix(table_data, 16, 2); /* Length */
+ build_append_int_noprefix(table_data, 0x1, 2); /* Revision */
+ build_append_int_noprefix(table_data, 1, 2); /* Number of offsets */
+ build_append_int_noprefix(table_data, i, 4); /* ACPI Processor UID */
+ build_append_int_noprefix(table_data, isa_offset, 4); /* Offsets[0] */
+ }
+
+ acpi_table_end(linker, &table);
+}
+
+/* FADT */
+static void build_fadt_rev6(GArray *table_data,
+ BIOSLinker *linker,
+ RISCVVirtState *s,
+ unsigned dsdt_tbl_offset)
+{
+ AcpiFadtData fadt = {
+ .rev = 6,
+ .minor_ver = 5,
+ .flags = 1 << ACPI_FADT_F_HW_REDUCED_ACPI,
+ .xdsdt_tbl_offset = &dsdt_tbl_offset,
+ };
+
+ build_fadt(table_data, linker, &fadt, s->oem_id, s->oem_table_id);
+}
+
+/* DSDT */
+static void build_dsdt(GArray *table_data,
+ BIOSLinker *linker,
+ RISCVVirtState *s)
+{
+ Aml *scope, *dsdt;
+ const MemMapEntry *memmap = s->memmap;
+ AcpiTable table = { .sig = "DSDT", .rev = 2, .oem_id = s->oem_id,
+ .oem_table_id = s->oem_table_id };
+
+
+ acpi_table_begin(&table, table_data);
+ dsdt = init_aml_allocator();
+
+ /*
+ * When booting the VM with UEFI, UEFI takes ownership of the RTC hardware.
+ * While UEFI can use libfdt to disable the RTC device node in the DTB that
+ * it passes to the OS, it cannot modify AML. Therefore, we won't generate
+ * the RTC ACPI device at all when using UEFI.
+ */
+ scope = aml_scope("\\_SB");
+ acpi_dsdt_add_cpus(scope, s);
+
+ acpi_dsdt_add_fw_cfg(scope, &memmap[VIRT_FW_CFG]);
+
+ aml_append(dsdt, scope);
+
+ /* copy AML table into ACPI tables blob and patch header there */
+ g_array_append_vals(table_data, dsdt->buf->data, dsdt->buf->len);
+
+ acpi_table_end(linker, &table);
+ free_aml_allocator();
+}
+
+/*
+ * ACPI spec, Revision 6.5+
+ * 5.2.12 Multiple APIC Description Table (MADT)
+ * REF: https://github.com/riscv-non-isa/riscv-acpi/issues/15
+ * https://drive.google.com/file/d/1R6k4MshhN3WTT-hwqAquu5nX6xSEqK2l/view
+ */
+static void build_madt(GArray *table_data,
+ BIOSLinker *linker,
+ RISCVVirtState *s)
+{
+ MachineClass *mc = MACHINE_GET_CLASS(s);
+ MachineState *ms = MACHINE(s);
+ const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(ms);
+
+ AcpiTable table = { .sig = "APIC", .rev = 6, .oem_id = s->oem_id,
+ .oem_table_id = s->oem_table_id };
+
+ acpi_table_begin(&table, table_data);
+ /* Local Interrupt Controller Address */
+ build_append_int_noprefix(table_data, 0, 4);
+ build_append_int_noprefix(table_data, 0, 4); /* MADT Flags */
+
+ /* RISC-V Local INTC structures per HART */
+ for (int i = 0; i < arch_ids->len; i++) {
+ riscv_acpi_madt_add_rintc(i, arch_ids, table_data);
+ }
+
+ acpi_table_end(linker, &table);
+}
+
+static void virt_acpi_build(RISCVVirtState *s, AcpiBuildTables *tables)
+{
+ GArray *table_offsets;
+ unsigned dsdt, xsdt;
+ GArray *tables_blob = tables->table_data;
+
+ table_offsets = g_array_new(false, true,
+ sizeof(uint32_t));
+
+ bios_linker_loader_alloc(tables->linker,
+ ACPI_BUILD_TABLE_FILE, tables_blob,
+ 64, false);
+
+ /* DSDT is pointed to by FADT */
+ dsdt = tables_blob->len;
+ build_dsdt(tables_blob, tables->linker, s);
+
+ /* FADT and others pointed to by XSDT */
+ acpi_add_table(table_offsets, tables_blob);
+ build_fadt_rev6(tables_blob, tables->linker, s, dsdt);
+
+ acpi_add_table(table_offsets, tables_blob);
+ build_madt(tables_blob, tables->linker, s);
+
+ acpi_add_table(table_offsets, tables_blob);
+ build_rhct(tables_blob, tables->linker, s);
+
+ /* XSDT is pointed to by RSDP */
+ xsdt = tables_blob->len;
+ build_xsdt(tables_blob, tables->linker, table_offsets, s->oem_id,
+ s->oem_table_id);
+
+ /* RSDP is in FSEG memory, so allocate it separately */
+ {
+ AcpiRsdpData rsdp_data = {
+ .revision = 2,
+ .oem_id = s->oem_id,
+ .xsdt_tbl_offset = &xsdt,
+ .rsdt_tbl_offset = NULL,
+ };
+ build_rsdp(tables->rsdp, tables->linker, &rsdp_data);
+ }
+
+ /*
+ * The align size is 128, warn if 64k is not enough therefore
+ * the align size could be resized.
+ */
+ if (tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) {
+ warn_report("ACPI table size %u exceeds %d bytes,"
+ " migration may not work",
+ tables_blob->len, ACPI_BUILD_TABLE_SIZE / 2);
+ error_printf("Try removing some objects.");
+ }
+
+ acpi_align_size(tables_blob, ACPI_BUILD_TABLE_SIZE);
+
+ /* Clean up memory that's no longer used */
+ g_array_free(table_offsets, true);
+}
+
+static void acpi_ram_update(MemoryRegion *mr, GArray *data)
+{
+ uint32_t size = acpi_data_len(data);
+
+ /*
+ * Make sure RAM size is correct - in case it got changed
+ * e.g. by migration
+ */
+ memory_region_ram_resize(mr, size, &error_abort);
+
+ memcpy(memory_region_get_ram_ptr(mr), data->data, size);
+ memory_region_set_dirty(mr, 0, size);
+}
+
+static void virt_acpi_build_update(void *build_opaque)
+{
+ AcpiBuildState *build_state = build_opaque;
+ AcpiBuildTables tables;
+
+ /* No state to update or already patched? Nothing to do. */
+ if (!build_state || build_state->patched) {
+ return;
+ }
+
+ build_state->patched = true;
+
+ acpi_build_tables_init(&tables);
+
+ virt_acpi_build(RISCV_VIRT_MACHINE(qdev_get_machine()), &tables);
+
+ acpi_ram_update(build_state->table_mr, tables.table_data);
+ acpi_ram_update(build_state->rsdp_mr, tables.rsdp);
+ acpi_ram_update(build_state->linker_mr, tables.linker->cmd_blob);
+
+ acpi_build_tables_cleanup(&tables, true);
+}
+
+static void virt_acpi_build_reset(void *build_opaque)
+{
+ AcpiBuildState *build_state = build_opaque;
+ build_state->patched = false;
+}
+
+static const VMStateDescription vmstate_virt_acpi_build = {
+ .name = "virt_acpi_build",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_BOOL(patched, AcpiBuildState),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+void virt_acpi_setup(RISCVVirtState *s)
+{
+ AcpiBuildTables tables;
+ AcpiBuildState *build_state;
+
+ build_state = g_malloc0(sizeof *build_state);
+
+ acpi_build_tables_init(&tables);
+ virt_acpi_build(s, &tables);
+
+ /* Now expose it all to Guest */
+ build_state->table_mr = acpi_add_rom_blob(virt_acpi_build_update,
+ build_state, tables.table_data,
+ ACPI_BUILD_TABLE_FILE);
+ assert(build_state->table_mr != NULL);
+
+ build_state->linker_mr = acpi_add_rom_blob(virt_acpi_build_update,
+ build_state,
+ tables.linker->cmd_blob,
+ ACPI_BUILD_LOADER_FILE);
+
+ build_state->rsdp_mr = acpi_add_rom_blob(virt_acpi_build_update,
+ build_state, tables.rsdp,
+ ACPI_BUILD_RSDP_FILE);
+
+ qemu_register_reset(virt_acpi_build_reset, build_state);
+ virt_acpi_build_reset(build_state);
+ vmstate_register(NULL, 0, &vmstate_virt_acpi_build, build_state);
+
+ /*
+ * Clean up tables but don't free the memory: we track it
+ * in build_state.
+ */
+ acpi_build_tables_cleanup(&tables, false);
+}
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 4f8191860b..4e3efbee16 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -49,6 +49,8 @@
#include "hw/pci/pci.h"
#include "hw/pci-host/gpex.h"
#include "hw/display/ramfb.h"
+#include "hw/acpi/aml-build.h"
+#include "qapi/qapi-visit-common.h"
/*
* The virt machine physical address space used by some of the devices
@@ -228,8 +230,9 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int socket,
int cpu;
uint32_t cpu_phandle;
MachineState *ms = MACHINE(s);
- char *name, *cpu_name, *core_name, *intc_name;
+ char *name, *cpu_name, *core_name, *intc_name, *sv_name;
bool is_32_bit = riscv_is_32bit(&s->soc[0]);
+ uint8_t satp_mode_max;
for (cpu = s->soc[socket].num_harts - 1; cpu >= 0; cpu--) {
RISCVCPU *cpu_ptr = &s->soc[socket].harts[cpu];
@@ -239,16 +242,29 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int socket,
cpu_name = g_strdup_printf("/cpus/cpu@%d",
s->soc[socket].hartid_base + cpu);
qemu_fdt_add_subnode(ms->fdt, cpu_name);
- if (cpu_ptr->cfg.mmu) {
- qemu_fdt_setprop_string(ms->fdt, cpu_name, "mmu-type",
- (is_32_bit) ? "riscv,sv32" : "riscv,sv48");
- } else {
- qemu_fdt_setprop_string(ms->fdt, cpu_name, "mmu-type",
- "riscv,none");
- }
+
+ satp_mode_max = satp_mode_max_from_map(
+ s->soc[socket].harts[cpu].cfg.satp_mode.map);
+ sv_name = g_strdup_printf("riscv,%s",
+ satp_mode_str(satp_mode_max, is_32_bit));
+ qemu_fdt_setprop_string(ms->fdt, cpu_name, "mmu-type", sv_name);
+ g_free(sv_name);
+
+
name = riscv_isa_string(cpu_ptr);
qemu_fdt_setprop_string(ms->fdt, cpu_name, "riscv,isa", name);
g_free(name);
+
+ if (cpu_ptr->cfg.ext_icbom) {
+ qemu_fdt_setprop_cell(ms->fdt, cpu_name, "riscv,cbom-block-size",
+ cpu_ptr->cfg.cbom_blocksize);
+ }
+
+ if (cpu_ptr->cfg.ext_icboz) {
+ qemu_fdt_setprop_cell(ms->fdt, cpu_name, "riscv,cboz-block-size",
+ cpu_ptr->cfg.cboz_blocksize);
+ }
+
qemu_fdt_setprop_string(ms->fdt, cpu_name, "compatible", "riscv");
qemu_fdt_setprop_string(ms->fdt, cpu_name, "status", "okay");
qemu_fdt_setprop_cell(ms->fdt, cpu_name, "reg",
@@ -1307,6 +1323,10 @@ static void virt_machine_done(Notifier *notifier, void *data)
if (kvm_enabled()) {
riscv_setup_direct_kernel(kernel_entry, fdt_load_addr);
}
+
+ if (virt_is_acpi_enabled(s)) {
+ virt_acpi_setup(s);
+ }
}
static void virt_machine_init(MachineState *machine)
@@ -1442,6 +1462,8 @@ static void virt_machine_init(MachineState *machine)
ROUND_UP(virt_high_pcie_memmap.base, virt_high_pcie_memmap.size);
}
+ s->memmap = virt_memmap;
+
/* register system main memory (actual RAM) */
memory_region_add_subregion(system_memory, memmap[VIRT_DRAM].base,
machine->ram);
@@ -1514,6 +1536,11 @@ static void virt_machine_init(MachineState *machine)
static void virt_machine_instance_init(Object *obj)
{
+ RISCVVirtState *s = RISCV_VIRT_MACHINE(obj);
+
+ s->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6);
+ s->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
+ s->acpi = ON_OFF_AUTO_AUTO;
}
static char *virt_get_aia_guests(Object *obj, Error **errp)
@@ -1588,6 +1615,28 @@ static void virt_set_aclint(Object *obj, bool value, Error **errp)
s->have_aclint = value;
}
+bool virt_is_acpi_enabled(RISCVVirtState *s)
+{
+ return s->acpi != ON_OFF_AUTO_OFF;
+}
+
+static void virt_get_acpi(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ RISCVVirtState *s = RISCV_VIRT_MACHINE(obj);
+ OnOffAuto acpi = s->acpi;
+
+ visit_type_OnOffAuto(v, name, &acpi, errp);
+}
+
+static void virt_set_acpi(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ RISCVVirtState *s = RISCV_VIRT_MACHINE(obj);
+
+ visit_type_OnOffAuto(v, name, &s->acpi, errp);
+}
+
static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine,
DeviceState *dev)
{
@@ -1659,6 +1708,11 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
sprintf(str, "Set number of guest MMIO pages for AIA IMSIC. Valid value "
"should be between 0 and %d.", VIRT_IRQCHIP_MAX_GUESTS);
object_class_property_set_description(oc, "aia-guests", str);
+ object_class_property_add(oc, "acpi", "OnOffAuto",
+ virt_get_acpi, virt_set_acpi,
+ NULL, NULL);
+ object_class_property_set_description(oc, "acpi",
+ "Enable ACPI");
}
static const TypeInfo virt_machine_typeinfo = {
diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
index 6f8b543243..88d2b4b13c 100644
--- a/hw/usb/hcd-ohci.c
+++ b/hw/usb/hcd-ohci.c
@@ -1410,6 +1410,18 @@ static void ohci_set_hub_status(OHCIState *ohci, uint32_t val)
}
}
+/* This is the one state transition the controller can do by itself */
+static bool ohci_resume(OHCIState *s)
+{
+ if ((s->ctl & OHCI_CTL_HCFS) == OHCI_USB_SUSPEND) {
+ trace_usb_ohci_remote_wakeup(s->name);
+ s->ctl &= ~OHCI_CTL_HCFS;
+ s->ctl |= OHCI_USB_RESUME;
+ return true;
+ }
+ return false;
+}
+
/*
* Sets a flag in a port status reg but only set it if the port is connected.
* If not set ConnectStatusChange flag. If flag is enabled return 1.
@@ -1426,7 +1438,10 @@ static int ohci_port_set_if_connected(OHCIState *ohci, int i, uint32_t val)
if (!(ohci->rhport[i].ctrl & OHCI_PORT_CCS)) {
ohci->rhport[i].ctrl |= OHCI_PORT_CSC;
if (ohci->rhstatus & OHCI_RHS_DRWE) {
- /* TODO: CSC is a wakeup event */
+ /* CSC is a wakeup event */
+ if (ohci_resume(ohci)) {
+ ohci_set_interrupt(ohci, OHCI_INTR_RD);
+ }
}
return 0;
}
@@ -1828,11 +1843,7 @@ static void ohci_wakeup(USBPort *port1)
intr = OHCI_INTR_RHSC;
}
/* Note that the controller can be suspended even if this port is not */
- if ((s->ctl & OHCI_CTL_HCFS) == OHCI_USB_SUSPEND) {
- trace_usb_ohci_remote_wakeup(s->name);
- /* This is the one state transition the controller can do by itself */
- s->ctl &= ~OHCI_CTL_HCFS;
- s->ctl |= OHCI_USB_RESUME;
+ if (ohci_resume(s)) {
/*
* In suspend mode only ResumeDetected is possible, not RHSC:
* see the OHCI spec 5.1.2.3.
diff --git a/hw/usb/meson.build b/hw/usb/meson.build
index bdf34cbd3e..599dc24f0d 100644
--- a/hw/usb/meson.build
+++ b/hw/usb/meson.build
@@ -84,6 +84,6 @@ if libusb.found()
hw_usb_modules += {'host': usbhost_ss}
endif
-softmmu_ss.add(when: ['CONFIG_USB', 'CONFIG_XEN', libusb], if_true: files('xen-usb.c'))
+softmmu_ss.add(when: ['CONFIG_USB', 'CONFIG_XEN_BUS', libusb], if_true: files('xen-usb.c'))
modules += { 'hw-usb': hw_usb_modules }
diff --git a/hw/usb/vt82c686-uhci-pci.c b/hw/usb/vt82c686-uhci-pci.c
index 46a901f56f..b4884c9011 100644
--- a/hw/usb/vt82c686-uhci-pci.c
+++ b/hw/usb/vt82c686-uhci-pci.c
@@ -1,17 +1,7 @@
#include "qemu/osdep.h"
-#include "hw/irq.h"
#include "hw/isa/vt82c686.h"
#include "hcd-uhci.h"
-static void uhci_isa_set_irq(void *opaque, int irq_num, int level)
-{
- UHCIState *s = opaque;
- uint8_t irq = pci_get_byte(s->dev.config + PCI_INTERRUPT_LINE);
- if (irq > 0 && irq < 15) {
- via_isa_set_irq(pci_get_function_0(&s->dev), irq, level);
- }
-}
-
static void usb_uhci_vt82c686b_realize(PCIDevice *dev, Error **errp)
{
UHCIState *s = UHCI(dev);
@@ -25,8 +15,6 @@ static void usb_uhci_vt82c686b_realize(PCIDevice *dev, Error **errp)
pci_set_long(pci_conf + 0xc0, 0x00002000);
usb_uhci_common_realize(dev, errp);
- object_unref(s->irq);
- s->irq = qemu_allocate_irq(uhci_isa_set_irq, s, 0);
}
static UHCIInfo uhci_info[] = {
diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c
index 0f7369e7ed..66cb3f7c24 100644
--- a/hw/usb/xen-usb.c
+++ b/hw/usb/xen-usb.c
@@ -101,6 +101,8 @@ struct usbback_hotplug {
struct usbback_info {
struct XenLegacyDevice xendev; /* must be first */
USBBus bus;
+ uint32_t urb_ring_ref;
+ uint32_t conn_ring_ref;
void *urb_sring;
void *conn_sring;
struct usbif_urb_back_ring urb_ring;
@@ -159,7 +161,7 @@ static int usbback_gnttab_map(struct usbback_req *usbback_req)
for (i = 0; i < nr_segs; i++) {
if ((unsigned)usbback_req->req.seg[i].offset +
- (unsigned)usbback_req->req.seg[i].length > XC_PAGE_SIZE) {
+ (unsigned)usbback_req->req.seg[i].length > XEN_PAGE_SIZE) {
xen_pv_printf(xendev, 0, "segment crosses page boundary\n");
return -EINVAL;
}
@@ -183,7 +185,7 @@ static int usbback_gnttab_map(struct usbback_req *usbback_req)
for (i = 0; i < usbback_req->nr_buffer_segs; i++) {
seg = usbback_req->req.seg + i;
- addr = usbback_req->buffer + i * XC_PAGE_SIZE + seg->offset;
+ addr = usbback_req->buffer + i * XEN_PAGE_SIZE + seg->offset;
qemu_iovec_add(&usbback_req->packet.iov, addr, seg->length);
}
}
@@ -277,10 +279,11 @@ static int usbback_init_packet(struct usbback_req *usbback_req)
static void usbback_do_response(struct usbback_req *usbback_req, int32_t status,
int32_t actual_length, int32_t error_count)
{
+ uint32_t ref[USBIF_MAX_SEGMENTS_PER_REQUEST];
struct usbback_info *usbif;
struct usbif_urb_response *res;
struct XenLegacyDevice *xendev;
- unsigned int notify;
+ unsigned int notify, i;
usbif = usbback_req->usbif;
xendev = &usbif->xendev;
@@ -293,13 +296,19 @@ static void usbback_do_response(struct usbback_req *usbback_req, int32_t status,
}
if (usbback_req->buffer) {
- xen_be_unmap_grant_refs(xendev, usbback_req->buffer,
+ for (i = 0; i < usbback_req->nr_buffer_segs; i++) {
+ ref[i] = usbback_req->req.seg[i].gref;
+ }
+ xen_be_unmap_grant_refs(xendev, usbback_req->buffer, ref,
usbback_req->nr_buffer_segs);
usbback_req->buffer = NULL;
}
if (usbback_req->isoc_buffer) {
- xen_be_unmap_grant_refs(xendev, usbback_req->isoc_buffer,
+ for (i = 0; i < usbback_req->nr_extra_segs; i++) {
+ ref[i] = usbback_req->req.seg[i + usbback_req->req.nr_buffer_segs].gref;
+ }
+ xen_be_unmap_grant_refs(xendev, usbback_req->isoc_buffer, ref,
usbback_req->nr_extra_segs);
usbback_req->isoc_buffer = NULL;
}
@@ -832,11 +841,11 @@ static void usbback_disconnect(struct XenLegacyDevice *xendev)
xen_pv_unbind_evtchn(xendev);
if (usbif->urb_sring) {
- xen_be_unmap_grant_ref(xendev, usbif->urb_sring);
+ xen_be_unmap_grant_ref(xendev, usbif->urb_sring, usbif->urb_ring_ref);
usbif->urb_sring = NULL;
}
if (usbif->conn_sring) {
- xen_be_unmap_grant_ref(xendev, usbif->conn_sring);
+ xen_be_unmap_grant_ref(xendev, usbif->conn_sring, usbif->conn_ring_ref);
usbif->conn_sring = NULL;
}
@@ -889,10 +898,12 @@ static int usbback_connect(struct XenLegacyDevice *xendev)
return -1;
}
+ usbif->urb_ring_ref = urb_ring_ref;
+ usbif->conn_ring_ref = conn_ring_ref;
urb_sring = usbif->urb_sring;
conn_sring = usbif->conn_sring;
- BACK_RING_INIT(&usbif->urb_ring, urb_sring, XC_PAGE_SIZE);
- BACK_RING_INIT(&usbif->conn_ring, conn_sring, XC_PAGE_SIZE);
+ BACK_RING_INIT(&usbif->urb_ring, urb_sring, XEN_PAGE_SIZE);
+ BACK_RING_INIT(&usbif->conn_ring, conn_sring, XEN_PAGE_SIZE);
xen_be_bind_evtchn(xendev);
diff --git a/hw/xen/meson.build b/hw/xen/meson.build
index ae0ace3046..19c6aabc7c 100644
--- a/hw/xen/meson.build
+++ b/hw/xen/meson.build
@@ -1,4 +1,4 @@
-softmmu_ss.add(when: ['CONFIG_XEN', xen], if_true: files(
+softmmu_ss.add(when: ['CONFIG_XEN_BUS'], if_true: files(
'xen-backend.c',
'xen-bus-helper.c',
'xen-bus.c',
@@ -7,6 +7,10 @@ softmmu_ss.add(when: ['CONFIG_XEN', xen], if_true: files(
'xen_pvdev.c',
))
+softmmu_ss.add(when: ['CONFIG_XEN', xen], if_true: files(
+ 'xen-operations.c',
+))
+
xen_specific_ss = ss.source_set()
if have_xen_pci_passthrough
xen_specific_ss.add(files(
diff --git a/hw/xen/trace-events b/hw/xen/trace-events
index 3da3fd8348..55c9e1df68 100644
--- a/hw/xen/trace-events
+++ b/hw/xen/trace-events
@@ -1,6 +1,6 @@
# See docs/devel/tracing.rst for syntax documentation.
-# ../../include/hw/xen/xen_common.h
+# ../../include/hw/xen/xen_native.h
xen_default_ioreq_server(void) ""
xen_ioreq_server_create(uint32_t id) "id: %u"
xen_ioreq_server_destroy(uint32_t id) "id: %u"
diff --git a/hw/xen/xen-bus-helper.c b/hw/xen/xen-bus-helper.c
index 5a1e12b374..b2b2cc9c5d 100644
--- a/hw/xen/xen-bus-helper.c
+++ b/hw/xen/xen-bus-helper.c
@@ -10,6 +10,7 @@
#include "hw/xen/xen-bus.h"
#include "hw/xen/xen-bus-helper.h"
#include "qapi/error.h"
+#include "trace.h"
#include <glib/gprintf.h>
@@ -46,34 +47,28 @@ const char *xs_strstate(enum xenbus_state state)
return "INVALID";
}
-void xs_node_create(struct xs_handle *xsh, xs_transaction_t tid,
- const char *node, struct xs_permissions perms[],
- unsigned int nr_perms, Error **errp)
+void xs_node_create(struct qemu_xs_handle *h, xs_transaction_t tid,
+ const char *node, unsigned int owner, unsigned int domid,
+ unsigned int perms, Error **errp)
{
trace_xs_node_create(node);
- if (!xs_write(xsh, tid, node, "", 0)) {
+ if (!qemu_xen_xs_create(h, tid, owner, domid, perms, node)) {
error_setg_errno(errp, errno, "failed to create node '%s'", node);
- return;
- }
-
- if (!xs_set_permissions(xsh, tid, node, perms, nr_perms)) {
- error_setg_errno(errp, errno, "failed to set node '%s' permissions",
- node);
}
}
-void xs_node_destroy(struct xs_handle *xsh, xs_transaction_t tid,
+void xs_node_destroy(struct qemu_xs_handle *h, xs_transaction_t tid,
const char *node, Error **errp)
{
trace_xs_node_destroy(node);
- if (!xs_rm(xsh, tid, node)) {
+ if (!qemu_xen_xs_destroy(h, tid, node)) {
error_setg_errno(errp, errno, "failed to destroy node '%s'", node);
}
}
-void xs_node_vprintf(struct xs_handle *xsh, xs_transaction_t tid,
+void xs_node_vprintf(struct qemu_xs_handle *h, xs_transaction_t tid,
const char *node, const char *key, Error **errp,
const char *fmt, va_list ap)
{
@@ -86,7 +81,7 @@ void xs_node_vprintf(struct xs_handle *xsh, xs_transaction_t tid,
trace_xs_node_vprintf(path, value);
- if (!xs_write(xsh, tid, path, value, len)) {
+ if (!qemu_xen_xs_write(h, tid, path, value, len)) {
error_setg_errno(errp, errno, "failed to write '%s' to '%s'",
value, path);
}
@@ -95,18 +90,18 @@ void xs_node_vprintf(struct xs_handle *xsh, xs_transaction_t tid,
g_free(path);
}
-void xs_node_printf(struct xs_handle *xsh, xs_transaction_t tid,
+void xs_node_printf(struct qemu_xs_handle *h, xs_transaction_t tid,
const char *node, const char *key, Error **errp,
const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
- xs_node_vprintf(xsh, tid, node, key, errp, fmt, ap);
+ xs_node_vprintf(h, tid, node, key, errp, fmt, ap);
va_end(ap);
}
-int xs_node_vscanf(struct xs_handle *xsh, xs_transaction_t tid,
+int xs_node_vscanf(struct qemu_xs_handle *h, xs_transaction_t tid,
const char *node, const char *key, Error **errp,
const char *fmt, va_list ap)
{
@@ -115,7 +110,7 @@ int xs_node_vscanf(struct xs_handle *xsh, xs_transaction_t tid,
path = (strlen(node) != 0) ? g_strdup_printf("%s/%s", node, key) :
g_strdup(key);
- value = xs_read(xsh, tid, path, NULL);
+ value = qemu_xen_xs_read(h, tid, path, NULL);
trace_xs_node_vscanf(path, value);
@@ -133,7 +128,7 @@ int xs_node_vscanf(struct xs_handle *xsh, xs_transaction_t tid,
return rc;
}
-int xs_node_scanf(struct xs_handle *xsh, xs_transaction_t tid,
+int xs_node_scanf(struct qemu_xs_handle *h, xs_transaction_t tid,
const char *node, const char *key, Error **errp,
const char *fmt, ...)
{
@@ -141,42 +136,35 @@ int xs_node_scanf(struct xs_handle *xsh, xs_transaction_t tid,
int rc;
va_start(ap, fmt);
- rc = xs_node_vscanf(xsh, tid, node, key, errp, fmt, ap);
+ rc = xs_node_vscanf(h, tid, node, key, errp, fmt, ap);
va_end(ap);
return rc;
}
-void xs_node_watch(struct xs_handle *xsh, const char *node, const char *key,
- char *token, Error **errp)
+struct qemu_xs_watch *xs_node_watch(struct qemu_xs_handle *h, const char *node,
+ const char *key, xs_watch_fn fn,
+ void *opaque, Error **errp)
{
char *path;
+ struct qemu_xs_watch *w;
path = (strlen(node) != 0) ? g_strdup_printf("%s/%s", node, key) :
g_strdup(key);
trace_xs_node_watch(path);
- if (!xs_watch(xsh, path, token)) {
+ w = qemu_xen_xs_watch(h, path, fn, opaque);
+ if (!w) {
error_setg_errno(errp, errno, "failed to watch node '%s'", path);
}
g_free(path);
+
+ return w;
}
-void xs_node_unwatch(struct xs_handle *xsh, const char *node,
- const char *key, const char *token, Error **errp)
+void xs_node_unwatch(struct qemu_xs_handle *h, struct qemu_xs_watch *w)
{
- char *path;
-
- path = (strlen(node) != 0) ? g_strdup_printf("%s/%s", node, key) :
- g_strdup(key);
-
- trace_xs_node_unwatch(path);
-
- if (!xs_unwatch(xsh, path, token)) {
- error_setg_errno(errp, errno, "failed to unwatch node '%s'", path);
- }
-
- g_free(path);
+ qemu_xen_xs_unwatch(h, w);
}
diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c
index df3f6b9ae0..c59850b1de 100644
--- a/hw/xen/xen-bus.c
+++ b/hw/xen/xen-bus.c
@@ -62,7 +62,7 @@ static void xen_device_unplug(XenDevice *xendev, Error **errp)
/* Mimic the way the Xen toolstack does an unplug */
again:
- tid = xs_transaction_start(xenbus->xsh);
+ tid = qemu_xen_xs_transaction_start(xenbus->xsh);
if (tid == XBT_NULL) {
error_setg_errno(errp, errno, "failed xs_transaction_start");
return;
@@ -80,7 +80,7 @@ again:
goto abort;
}
- if (!xs_transaction_end(xenbus->xsh, tid, false)) {
+ if (!qemu_xen_xs_transaction_end(xenbus->xsh, tid, false)) {
if (errno == EAGAIN) {
goto again;
}
@@ -95,7 +95,7 @@ abort:
* We only abort if there is already a failure so ignore any error
* from ending the transaction.
*/
- xs_transaction_end(xenbus->xsh, tid, true);
+ qemu_xen_xs_transaction_end(xenbus->xsh, tid, true);
}
static void xen_bus_print_dev(Monitor *mon, DeviceState *dev, int indent)
@@ -111,143 +111,6 @@ static char *xen_bus_get_dev_path(DeviceState *dev)
return xen_device_get_backend_path(XEN_DEVICE(dev));
}
-struct XenWatch {
- char *node, *key;
- char *token;
- XenWatchHandler handler;
- void *opaque;
- Notifier notifier;
-};
-
-static void watch_notify(Notifier *n, void *data)
-{
- XenWatch *watch = container_of(n, XenWatch, notifier);
- const char *token = data;
-
- if (!strcmp(watch->token, token)) {
- watch->handler(watch->opaque);
- }
-}
-
-static XenWatch *new_watch(const char *node, const char *key,
- XenWatchHandler handler, void *opaque)
-{
- XenWatch *watch = g_new0(XenWatch, 1);
- QemuUUID uuid;
-
- qemu_uuid_generate(&uuid);
-
- watch->token = qemu_uuid_unparse_strdup(&uuid);
- watch->node = g_strdup(node);
- watch->key = g_strdup(key);
- watch->handler = handler;
- watch->opaque = opaque;
- watch->notifier.notify = watch_notify;
-
- return watch;
-}
-
-static void free_watch(XenWatch *watch)
-{
- g_free(watch->token);
- g_free(watch->key);
- g_free(watch->node);
-
- g_free(watch);
-}
-
-struct XenWatchList {
- struct xs_handle *xsh;
- NotifierList notifiers;
-};
-
-static void watch_list_event(void *opaque)
-{
- XenWatchList *watch_list = opaque;
- char **v;
- const char *token;
-
- v = xs_check_watch(watch_list->xsh);
- if (!v) {
- return;
- }
-
- token = v[XS_WATCH_TOKEN];
-
- notifier_list_notify(&watch_list->notifiers, (void *)token);
-
- free(v);
-}
-
-static XenWatchList *watch_list_create(struct xs_handle *xsh)
-{
- XenWatchList *watch_list = g_new0(XenWatchList, 1);
-
- g_assert(xsh);
-
- watch_list->xsh = xsh;
- notifier_list_init(&watch_list->notifiers);
- qemu_set_fd_handler(xs_fileno(watch_list->xsh), watch_list_event, NULL,
- watch_list);
-
- return watch_list;
-}
-
-static void watch_list_destroy(XenWatchList *watch_list)
-{
- g_assert(notifier_list_empty(&watch_list->notifiers));
- qemu_set_fd_handler(xs_fileno(watch_list->xsh), NULL, NULL, NULL);
- g_free(watch_list);
-}
-
-static XenWatch *watch_list_add(XenWatchList *watch_list, const char *node,
- const char *key, XenWatchHandler handler,
- void *opaque, Error **errp)
-{
- ERRP_GUARD();
- XenWatch *watch = new_watch(node, key, handler, opaque);
-
- notifier_list_add(&watch_list->notifiers, &watch->notifier);
-
- xs_node_watch(watch_list->xsh, node, key, watch->token, errp);
- if (*errp) {
- notifier_remove(&watch->notifier);
- free_watch(watch);
-
- return NULL;
- }
-
- return watch;
-}
-
-static void watch_list_remove(XenWatchList *watch_list, XenWatch *watch,
- Error **errp)
-{
- xs_node_unwatch(watch_list->xsh, watch->node, watch->key, watch->token,
- errp);
-
- notifier_remove(&watch->notifier);
- free_watch(watch);
-}
-
-static XenWatch *xen_bus_add_watch(XenBus *xenbus, const char *node,
- const char *key, XenWatchHandler handler,
- Error **errp)
-{
- trace_xen_bus_add_watch(node, key);
-
- return watch_list_add(xenbus->watch_list, node, key, handler, xenbus,
- errp);
-}
-
-static void xen_bus_remove_watch(XenBus *xenbus, XenWatch *watch,
- Error **errp)
-{
- trace_xen_bus_remove_watch(watch->node, watch->key);
-
- watch_list_remove(xenbus->watch_list, watch, errp);
-}
-
static void xen_bus_backend_create(XenBus *xenbus, const char *type,
const char *name, char *path,
Error **errp)
@@ -261,15 +124,15 @@ static void xen_bus_backend_create(XenBus *xenbus, const char *type,
trace_xen_bus_backend_create(type, path);
again:
- tid = xs_transaction_start(xenbus->xsh);
+ tid = qemu_xen_xs_transaction_start(xenbus->xsh);
if (tid == XBT_NULL) {
error_setg(errp, "failed xs_transaction_start");
return;
}
- key = xs_directory(xenbus->xsh, tid, path, &n);
+ key = qemu_xen_xs_directory(xenbus->xsh, tid, path, &n);
if (!key) {
- if (!xs_transaction_end(xenbus->xsh, tid, true)) {
+ if (!qemu_xen_xs_transaction_end(xenbus->xsh, tid, true)) {
error_setg_errno(errp, errno, "failed xs_transaction_end");
}
return;
@@ -300,7 +163,7 @@ again:
free(key);
- if (!xs_transaction_end(xenbus->xsh, tid, false)) {
+ if (!qemu_xen_xs_transaction_end(xenbus->xsh, tid, false)) {
qobject_unref(opts);
if (errno == EAGAIN) {
@@ -327,7 +190,7 @@ static void xen_bus_type_enumerate(XenBus *xenbus, const char *type)
trace_xen_bus_type_enumerate(type);
- backend = xs_directory(xenbus->xsh, XBT_NULL, domain_path, &n);
+ backend = qemu_xen_xs_directory(xenbus->xsh, XBT_NULL, domain_path, &n);
if (!backend) {
goto out;
}
@@ -372,7 +235,7 @@ static void xen_bus_enumerate(XenBus *xenbus)
trace_xen_bus_enumerate();
- type = xs_directory(xenbus->xsh, XBT_NULL, "backend", &n);
+ type = qemu_xen_xs_directory(xenbus->xsh, XBT_NULL, "backend", &n);
if (!type) {
return;
}
@@ -415,7 +278,7 @@ static void xen_bus_cleanup(XenBus *xenbus)
}
}
-static void xen_bus_backend_changed(void *opaque)
+static void xen_bus_backend_changed(void *opaque, const char *path)
{
XenBus *xenbus = opaque;
@@ -434,7 +297,7 @@ static void xen_bus_unrealize(BusState *bus)
for (i = 0; i < xenbus->backend_types; i++) {
if (xenbus->backend_watch[i]) {
- xen_bus_remove_watch(xenbus, xenbus->backend_watch[i], NULL);
+ xs_node_unwatch(xenbus->xsh, xenbus->backend_watch[i]);
}
}
@@ -442,13 +305,8 @@ static void xen_bus_unrealize(BusState *bus)
xenbus->backend_watch = NULL;
}
- if (xenbus->watch_list) {
- watch_list_destroy(xenbus->watch_list);
- xenbus->watch_list = NULL;
- }
-
if (xenbus->xsh) {
- xs_close(xenbus->xsh);
+ qemu_xen_xs_close(xenbus->xsh);
}
}
@@ -463,7 +321,7 @@ static void xen_bus_realize(BusState *bus, Error **errp)
trace_xen_bus_realize();
- xenbus->xsh = xs_open(0);
+ xenbus->xsh = qemu_xen_xs_open();
if (!xenbus->xsh) {
error_setg_errno(errp, errno, "failed xs_open");
goto fail;
@@ -476,19 +334,18 @@ static void xen_bus_realize(BusState *bus, Error **errp)
xenbus->backend_id = 0; /* Assume lack of node means dom0 */
}
- xenbus->watch_list = watch_list_create(xenbus->xsh);
-
module_call_init(MODULE_INIT_XEN_BACKEND);
type = xen_backend_get_types(&xenbus->backend_types);
- xenbus->backend_watch = g_new(XenWatch *, xenbus->backend_types);
+ xenbus->backend_watch = g_new(struct qemu_xs_watch *,
+ xenbus->backend_types);
for (i = 0; i < xenbus->backend_types; i++) {
char *node = g_strdup_printf("backend/%s", type[i]);
xenbus->backend_watch[i] =
- xen_bus_add_watch(xenbus, node, key, xen_bus_backend_changed,
- &local_err);
+ xs_node_watch(xenbus->xsh, node, key, xen_bus_backend_changed,
+ xenbus, &local_err);
if (local_err) {
/* This need not be treated as a hard error so don't propagate */
error_reportf_err(local_err,
@@ -631,7 +488,7 @@ static bool xen_device_frontend_is_active(XenDevice *xendev)
}
}
-static void xen_device_backend_changed(void *opaque)
+static void xen_device_backend_changed(void *opaque, const char *path)
{
XenDevice *xendev = opaque;
const char *type = object_get_typename(OBJECT(xendev));
@@ -685,66 +542,35 @@ static void xen_device_backend_changed(void *opaque)
}
}
-static XenWatch *xen_device_add_watch(XenDevice *xendev, const char *node,
- const char *key,
- XenWatchHandler handler,
- Error **errp)
-{
- const char *type = object_get_typename(OBJECT(xendev));
-
- trace_xen_device_add_watch(type, xendev->name, node, key);
-
- return watch_list_add(xendev->watch_list, node, key, handler, xendev,
- errp);
-}
-
-static void xen_device_remove_watch(XenDevice *xendev, XenWatch *watch,
- Error **errp)
-{
- const char *type = object_get_typename(OBJECT(xendev));
-
- trace_xen_device_remove_watch(type, xendev->name, watch->node,
- watch->key);
-
- watch_list_remove(xendev->watch_list, watch, errp);
-}
-
-
static void xen_device_backend_create(XenDevice *xendev, Error **errp)
{
ERRP_GUARD();
XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev)));
- struct xs_permissions perms[2];
xendev->backend_path = xen_device_get_backend_path(xendev);
- perms[0].id = xenbus->backend_id;
- perms[0].perms = XS_PERM_NONE;
- perms[1].id = xendev->frontend_id;
- perms[1].perms = XS_PERM_READ;
-
g_assert(xenbus->xsh);
- xs_node_create(xenbus->xsh, XBT_NULL, xendev->backend_path, perms,
- ARRAY_SIZE(perms), errp);
+ xs_node_create(xenbus->xsh, XBT_NULL, xendev->backend_path,
+ xenbus->backend_id, xendev->frontend_id, XS_PERM_READ, errp);
if (*errp) {
error_prepend(errp, "failed to create backend: ");
return;
}
xendev->backend_state_watch =
- xen_device_add_watch(xendev, xendev->backend_path,
- "state", xen_device_backend_changed,
- errp);
+ xs_node_watch(xendev->xsh, xendev->backend_path,
+ "state", xen_device_backend_changed, xendev,
+ errp);
if (*errp) {
error_prepend(errp, "failed to watch backend state: ");
return;
}
xendev->backend_online_watch =
- xen_device_add_watch(xendev, xendev->backend_path,
- "online", xen_device_backend_changed,
- errp);
+ xs_node_watch(xendev->xsh, xendev->backend_path,
+ "online", xen_device_backend_changed, xendev,
+ errp);
if (*errp) {
error_prepend(errp, "failed to watch backend online: ");
return;
@@ -757,12 +583,12 @@ static void xen_device_backend_destroy(XenDevice *xendev)
Error *local_err = NULL;
if (xendev->backend_online_watch) {
- xen_device_remove_watch(xendev, xendev->backend_online_watch, NULL);
+ xs_node_unwatch(xendev->xsh, xendev->backend_online_watch);
xendev->backend_online_watch = NULL;
}
if (xendev->backend_state_watch) {
- xen_device_remove_watch(xendev, xendev->backend_state_watch, NULL);
+ xs_node_unwatch(xendev->xsh, xendev->backend_state_watch);
xendev->backend_state_watch = NULL;
}
@@ -837,7 +663,7 @@ static void xen_device_frontend_set_state(XenDevice *xendev,
}
}
-static void xen_device_frontend_changed(void *opaque)
+static void xen_device_frontend_changed(void *opaque, const char *path)
{
XenDevice *xendev = opaque;
XenDeviceClass *xendev_class = XEN_DEVICE_GET_CLASS(xendev);
@@ -885,7 +711,6 @@ static void xen_device_frontend_create(XenDevice *xendev, Error **errp)
{
ERRP_GUARD();
XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev)));
- struct xs_permissions perms[2];
xendev->frontend_path = xen_device_get_frontend_path(xendev);
@@ -894,15 +719,11 @@ static void xen_device_frontend_create(XenDevice *xendev, Error **errp)
* toolstack.
*/
if (!xen_device_frontend_exists(xendev)) {
- perms[0].id = xendev->frontend_id;
- perms[0].perms = XS_PERM_NONE;
- perms[1].id = xenbus->backend_id;
- perms[1].perms = XS_PERM_READ | XS_PERM_WRITE;
-
g_assert(xenbus->xsh);
- xs_node_create(xenbus->xsh, XBT_NULL, xendev->frontend_path, perms,
- ARRAY_SIZE(perms), errp);
+ xs_node_create(xenbus->xsh, XBT_NULL, xendev->frontend_path,
+ xendev->frontend_id, xenbus->backend_id,
+ XS_PERM_READ | XS_PERM_WRITE, errp);
if (*errp) {
error_prepend(errp, "failed to create frontend: ");
return;
@@ -910,8 +731,8 @@ static void xen_device_frontend_create(XenDevice *xendev, Error **errp)
}
xendev->frontend_state_watch =
- xen_device_add_watch(xendev, xendev->frontend_path, "state",
- xen_device_frontend_changed, errp);
+ xs_node_watch(xendev->xsh, xendev->frontend_path, "state",
+ xen_device_frontend_changed, xendev, errp);
if (*errp) {
error_prepend(errp, "failed to watch frontend state: ");
}
@@ -923,8 +744,7 @@ static void xen_device_frontend_destroy(XenDevice *xendev)
Error *local_err = NULL;
if (xendev->frontend_state_watch) {
- xen_device_remove_watch(xendev, xendev->frontend_state_watch,
- NULL);
+ xs_node_unwatch(xendev->xsh, xendev->frontend_state_watch);
xendev->frontend_state_watch = NULL;
}
@@ -947,7 +767,7 @@ static void xen_device_frontend_destroy(XenDevice *xendev)
void xen_device_set_max_grant_refs(XenDevice *xendev, unsigned int nr_refs,
Error **errp)
{
- if (xengnttab_set_max_grants(xendev->xgth, nr_refs)) {
+ if (qemu_xen_gnttab_set_max_grants(xendev->xgth, nr_refs)) {
error_setg_errno(errp, errno, "xengnttab_set_max_grants failed");
}
}
@@ -956,9 +776,8 @@ void *xen_device_map_grant_refs(XenDevice *xendev, uint32_t *refs,
unsigned int nr_refs, int prot,
Error **errp)
{
- void *map = xengnttab_map_domain_grant_refs(xendev->xgth, nr_refs,
- xendev->frontend_id, refs,
- prot);
+ void *map = qemu_xen_gnttab_map_refs(xendev->xgth, nr_refs,
+ xendev->frontend_id, refs, prot);
if (!map) {
error_setg_errno(errp, errno,
@@ -968,112 +787,20 @@ void *xen_device_map_grant_refs(XenDevice *xendev, uint32_t *refs,
return map;
}
-void xen_device_unmap_grant_refs(XenDevice *xendev, void *map,
+void xen_device_unmap_grant_refs(XenDevice *xendev, void *map, uint32_t *refs,
unsigned int nr_refs, Error **errp)
{
- if (xengnttab_unmap(xendev->xgth, map, nr_refs)) {
+ if (qemu_xen_gnttab_unmap(xendev->xgth, map, refs, nr_refs)) {
error_setg_errno(errp, errno, "xengnttab_unmap failed");
}
}
-static void compat_copy_grant_refs(XenDevice *xendev, bool to_domain,
- XenDeviceGrantCopySegment segs[],
- unsigned int nr_segs, Error **errp)
-{
- uint32_t *refs = g_new(uint32_t, nr_segs);
- int prot = to_domain ? PROT_WRITE : PROT_READ;
- void *map;
- unsigned int i;
-
- for (i = 0; i < nr_segs; i++) {
- XenDeviceGrantCopySegment *seg = &segs[i];
-
- refs[i] = to_domain ? seg->dest.foreign.ref :
- seg->source.foreign.ref;
- }
-
- map = xengnttab_map_domain_grant_refs(xendev->xgth, nr_segs,
- xendev->frontend_id, refs,
- prot);
- if (!map) {
- error_setg_errno(errp, errno,
- "xengnttab_map_domain_grant_refs failed");
- goto done;
- }
-
- for (i = 0; i < nr_segs; i++) {
- XenDeviceGrantCopySegment *seg = &segs[i];
- void *page = map + (i * XC_PAGE_SIZE);
-
- if (to_domain) {
- memcpy(page + seg->dest.foreign.offset, seg->source.virt,
- seg->len);
- } else {
- memcpy(seg->dest.virt, page + seg->source.foreign.offset,
- seg->len);
- }
- }
-
- if (xengnttab_unmap(xendev->xgth, map, nr_segs)) {
- error_setg_errno(errp, errno, "xengnttab_unmap failed");
- }
-
-done:
- g_free(refs);
-}
-
void xen_device_copy_grant_refs(XenDevice *xendev, bool to_domain,
XenDeviceGrantCopySegment segs[],
unsigned int nr_segs, Error **errp)
{
- xengnttab_grant_copy_segment_t *xengnttab_segs;
- unsigned int i;
-
- if (!xendev->feature_grant_copy) {
- compat_copy_grant_refs(xendev, to_domain, segs, nr_segs, errp);
- return;
- }
-
- xengnttab_segs = g_new0(xengnttab_grant_copy_segment_t, nr_segs);
-
- for (i = 0; i < nr_segs; i++) {
- XenDeviceGrantCopySegment *seg = &segs[i];
- xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i];
-
- if (to_domain) {
- xengnttab_seg->flags = GNTCOPY_dest_gref;
- xengnttab_seg->dest.foreign.domid = xendev->frontend_id;
- xengnttab_seg->dest.foreign.ref = seg->dest.foreign.ref;
- xengnttab_seg->dest.foreign.offset = seg->dest.foreign.offset;
- xengnttab_seg->source.virt = seg->source.virt;
- } else {
- xengnttab_seg->flags = GNTCOPY_source_gref;
- xengnttab_seg->source.foreign.domid = xendev->frontend_id;
- xengnttab_seg->source.foreign.ref = seg->source.foreign.ref;
- xengnttab_seg->source.foreign.offset =
- seg->source.foreign.offset;
- xengnttab_seg->dest.virt = seg->dest.virt;
- }
-
- xengnttab_seg->len = seg->len;
- }
-
- if (xengnttab_grant_copy(xendev->xgth, nr_segs, xengnttab_segs)) {
- error_setg_errno(errp, errno, "xengnttab_grant_copy failed");
- goto done;
- }
-
- for (i = 0; i < nr_segs; i++) {
- xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i];
-
- if (xengnttab_seg->status != GNTST_okay) {
- error_setg(errp, "xengnttab_grant_copy seg[%u] failed", i);
- break;
- }
- }
-
-done:
- g_free(xengnttab_segs);
+ qemu_xen_gnttab_grant_copy(xendev->xgth, to_domain, xendev->frontend_id,
+ (XenGrantCopySegment *)segs, nr_segs, errp);
}
struct XenEventChannel {
@@ -1095,12 +822,12 @@ static bool xen_device_poll(void *opaque)
static void xen_device_event(void *opaque)
{
XenEventChannel *channel = opaque;
- unsigned long port = xenevtchn_pending(channel->xeh);
+ unsigned long port = qemu_xen_evtchn_pending(channel->xeh);
if (port == channel->local_port) {
xen_device_poll(channel);
- xenevtchn_unmask(channel->xeh, port);
+ qemu_xen_evtchn_unmask(channel->xeh, port);
}
}
@@ -1115,11 +842,11 @@ void xen_device_set_event_channel_context(XenDevice *xendev,
}
if (channel->ctx)
- aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true,
+ aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), true,
NULL, NULL, NULL, NULL, NULL);
channel->ctx = ctx;
- aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true,
+ aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), true,
xen_device_event, NULL, xen_device_poll, NULL, channel);
}
@@ -1131,13 +858,13 @@ XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev,
XenEventChannel *channel = g_new0(XenEventChannel, 1);
xenevtchn_port_or_error_t local_port;
- channel->xeh = xenevtchn_open(NULL, 0);
+ channel->xeh = qemu_xen_evtchn_open();
if (!channel->xeh) {
error_setg_errno(errp, errno, "failed xenevtchn_open");
goto fail;
}
- local_port = xenevtchn_bind_interdomain(channel->xeh,
+ local_port = qemu_xen_evtchn_bind_interdomain(channel->xeh,
xendev->frontend_id,
port);
if (local_port < 0) {
@@ -1160,7 +887,7 @@ XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev,
fail:
if (channel->xeh) {
- xenevtchn_close(channel->xeh);
+ qemu_xen_evtchn_close(channel->xeh);
}
g_free(channel);
@@ -1177,7 +904,7 @@ void xen_device_notify_event_channel(XenDevice *xendev,
return;
}
- if (xenevtchn_notify(channel->xeh, channel->local_port) < 0) {
+ if (qemu_xen_evtchn_notify(channel->xeh, channel->local_port) < 0) {
error_setg_errno(errp, errno, "xenevtchn_notify failed");
}
}
@@ -1193,14 +920,14 @@ void xen_device_unbind_event_channel(XenDevice *xendev,
QLIST_REMOVE(channel, list);
- aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true,
+ aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), true,
NULL, NULL, NULL, NULL, NULL);
- if (xenevtchn_unbind(channel->xeh, channel->local_port) < 0) {
+ if (qemu_xen_evtchn_unbind(channel->xeh, channel->local_port) < 0) {
error_setg_errno(errp, errno, "xenevtchn_unbind failed");
}
- xenevtchn_close(channel->xeh);
+ qemu_xen_evtchn_close(channel->xeh);
g_free(channel);
}
@@ -1235,17 +962,12 @@ static void xen_device_unrealize(DeviceState *dev)
xen_device_backend_destroy(xendev);
if (xendev->xgth) {
- xengnttab_close(xendev->xgth);
+ qemu_xen_gnttab_close(xendev->xgth);
xendev->xgth = NULL;
}
- if (xendev->watch_list) {
- watch_list_destroy(xendev->watch_list);
- xendev->watch_list = NULL;
- }
-
if (xendev->xsh) {
- xs_close(xendev->xsh);
+ qemu_xen_xs_close(xendev->xsh);
xendev->xsh = NULL;
}
@@ -1290,23 +1012,18 @@ static void xen_device_realize(DeviceState *dev, Error **errp)
trace_xen_device_realize(type, xendev->name);
- xendev->xsh = xs_open(0);
+ xendev->xsh = qemu_xen_xs_open();
if (!xendev->xsh) {
error_setg_errno(errp, errno, "failed xs_open");
goto unrealize;
}
- xendev->watch_list = watch_list_create(xendev->xsh);
-
- xendev->xgth = xengnttab_open(NULL, 0);
+ xendev->xgth = qemu_xen_gnttab_open();
if (!xendev->xgth) {
error_setg_errno(errp, errno, "failed xengnttab_open");
goto unrealize;
}
- xendev->feature_grant_copy =
- (xengnttab_grant_copy(xendev->xgth, 0, NULL) == 0);
-
xen_device_backend_create(xendev, errp);
if (*errp) {
goto unrealize;
@@ -1317,13 +1034,6 @@ static void xen_device_realize(DeviceState *dev, Error **errp)
goto unrealize;
}
- if (xendev_class->realize) {
- xendev_class->realize(xendev, errp);
- if (*errp) {
- goto unrealize;
- }
- }
-
xen_device_backend_printf(xendev, "frontend", "%s",
xendev->frontend_path);
xen_device_backend_printf(xendev, "frontend-id", "%u",
@@ -1342,6 +1052,13 @@ static void xen_device_realize(DeviceState *dev, Error **errp)
xen_device_frontend_set_state(xendev, XenbusStateInitialising, true);
}
+ if (xendev_class->realize) {
+ xendev_class->realize(xendev, errp);
+ if (*errp) {
+ goto unrealize;
+ }
+ }
+
xendev->exit.notify = xen_device_exit;
qemu_add_exit_notifier(&xendev->exit);
return;
diff --git a/hw/xen/xen-legacy-backend.c b/hw/xen/xen-legacy-backend.c
index afba71f6eb..4ded3cec23 100644
--- a/hw/xen/xen-legacy-backend.c
+++ b/hw/xen/xen-legacy-backend.c
@@ -39,11 +39,10 @@ BusState *xen_sysbus;
/* ------------------------------------------------------------- */
/* public */
-struct xs_handle *xenstore;
+struct qemu_xs_handle *xenstore;
const char *xen_protocol;
/* private */
-static bool xen_feature_grant_copy;
static int debug;
int xenstore_write_be_str(struct XenLegacyDevice *xendev, const char *node,
@@ -113,7 +112,7 @@ void xen_be_set_max_grant_refs(struct XenLegacyDevice *xendev,
{
assert(xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV);
- if (xengnttab_set_max_grants(xendev->gnttabdev, nr_refs)) {
+ if (qemu_xen_gnttab_set_max_grants(xendev->gnttabdev, nr_refs)) {
xen_pv_printf(xendev, 0, "xengnttab_set_max_grants failed: %s\n",
strerror(errno));
}
@@ -126,8 +125,8 @@ void *xen_be_map_grant_refs(struct XenLegacyDevice *xendev, uint32_t *refs,
assert(xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV);
- ptr = xengnttab_map_domain_grant_refs(xendev->gnttabdev, nr_refs,
- xen_domid, refs, prot);
+ ptr = qemu_xen_gnttab_map_refs(xendev->gnttabdev, nr_refs, xen_domid, refs,
+ prot);
if (!ptr) {
xen_pv_printf(xendev, 0,
"xengnttab_map_domain_grant_refs failed: %s\n",
@@ -138,123 +137,31 @@ void *xen_be_map_grant_refs(struct XenLegacyDevice *xendev, uint32_t *refs,
}
void xen_be_unmap_grant_refs(struct XenLegacyDevice *xendev, void *ptr,
- unsigned int nr_refs)
+ uint32_t *refs, unsigned int nr_refs)
{
assert(xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV);
- if (xengnttab_unmap(xendev->gnttabdev, ptr, nr_refs)) {
+ if (qemu_xen_gnttab_unmap(xendev->gnttabdev, ptr, refs, nr_refs)) {
xen_pv_printf(xendev, 0, "xengnttab_unmap failed: %s\n",
strerror(errno));
}
}
-static int compat_copy_grant_refs(struct XenLegacyDevice *xendev,
- bool to_domain,
- XenGrantCopySegment segs[],
- unsigned int nr_segs)
-{
- uint32_t *refs = g_new(uint32_t, nr_segs);
- int prot = to_domain ? PROT_WRITE : PROT_READ;
- void *pages;
- unsigned int i;
-
- for (i = 0; i < nr_segs; i++) {
- XenGrantCopySegment *seg = &segs[i];
-
- refs[i] = to_domain ?
- seg->dest.foreign.ref : seg->source.foreign.ref;
- }
-
- pages = xengnttab_map_domain_grant_refs(xendev->gnttabdev, nr_segs,
- xen_domid, refs, prot);
- if (!pages) {
- xen_pv_printf(xendev, 0,
- "xengnttab_map_domain_grant_refs failed: %s\n",
- strerror(errno));
- g_free(refs);
- return -1;
- }
-
- for (i = 0; i < nr_segs; i++) {
- XenGrantCopySegment *seg = &segs[i];
- void *page = pages + (i * XC_PAGE_SIZE);
-
- if (to_domain) {
- memcpy(page + seg->dest.foreign.offset, seg->source.virt,
- seg->len);
- } else {
- memcpy(seg->dest.virt, page + seg->source.foreign.offset,
- seg->len);
- }
- }
-
- if (xengnttab_unmap(xendev->gnttabdev, pages, nr_segs)) {
- xen_pv_printf(xendev, 0, "xengnttab_unmap failed: %s\n",
- strerror(errno));
- }
-
- g_free(refs);
- return 0;
-}
-
int xen_be_copy_grant_refs(struct XenLegacyDevice *xendev,
bool to_domain,
XenGrantCopySegment segs[],
unsigned int nr_segs)
{
- xengnttab_grant_copy_segment_t *xengnttab_segs;
- unsigned int i;
int rc;
assert(xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV);
- if (!xen_feature_grant_copy) {
- return compat_copy_grant_refs(xendev, to_domain, segs, nr_segs);
- }
-
- xengnttab_segs = g_new0(xengnttab_grant_copy_segment_t, nr_segs);
-
- for (i = 0; i < nr_segs; i++) {
- XenGrantCopySegment *seg = &segs[i];
- xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i];
-
- if (to_domain) {
- xengnttab_seg->flags = GNTCOPY_dest_gref;
- xengnttab_seg->dest.foreign.domid = xen_domid;
- xengnttab_seg->dest.foreign.ref = seg->dest.foreign.ref;
- xengnttab_seg->dest.foreign.offset = seg->dest.foreign.offset;
- xengnttab_seg->source.virt = seg->source.virt;
- } else {
- xengnttab_seg->flags = GNTCOPY_source_gref;
- xengnttab_seg->source.foreign.domid = xen_domid;
- xengnttab_seg->source.foreign.ref = seg->source.foreign.ref;
- xengnttab_seg->source.foreign.offset =
- seg->source.foreign.offset;
- xengnttab_seg->dest.virt = seg->dest.virt;
- }
-
- xengnttab_seg->len = seg->len;
- }
-
- rc = xengnttab_grant_copy(xendev->gnttabdev, nr_segs, xengnttab_segs);
-
+ rc = qemu_xen_gnttab_grant_copy(xendev->gnttabdev, to_domain, xen_domid,
+ segs, nr_segs, NULL);
if (rc) {
- xen_pv_printf(xendev, 0, "xengnttab_copy failed: %s\n",
- strerror(errno));
- }
-
- for (i = 0; i < nr_segs; i++) {
- xengnttab_grant_copy_segment_t *xengnttab_seg =
- &xengnttab_segs[i];
-
- if (xengnttab_seg->status != GNTST_okay) {
- xen_pv_printf(xendev, 0, "segment[%u] status: %d\n", i,
- xengnttab_seg->status);
- rc = -1;
- }
+ xen_pv_printf(xendev, 0, "xengnttab_grant_copy failed: %s\n",
+ strerror(-rc));
}
-
- g_free(xengnttab_segs);
return rc;
}
@@ -294,13 +201,13 @@ static struct XenLegacyDevice *xen_be_get_xendev(const char *type, int dom,
xendev->debug = debug;
xendev->local_port = -1;
- xendev->evtchndev = xenevtchn_open(NULL, 0);
+ xendev->evtchndev = qemu_xen_evtchn_open();
if (xendev->evtchndev == NULL) {
xen_pv_printf(NULL, 0, "can't open evtchn device\n");
qdev_unplug(DEVICE(xendev), NULL);
return NULL;
}
- qemu_set_cloexec(xenevtchn_fd(xendev->evtchndev));
+ qemu_set_cloexec(qemu_xen_evtchn_fd(xendev->evtchndev));
xen_pv_insert_xendev(xendev);
@@ -367,6 +274,25 @@ static void xen_be_frontend_changed(struct XenLegacyDevice *xendev,
}
}
+static void xenstore_update_fe(void *opaque, const char *watch)
+{
+ struct XenLegacyDevice *xendev = opaque;
+ const char *node;
+ unsigned int len;
+
+ len = strlen(xendev->fe);
+ if (strncmp(xendev->fe, watch, len) != 0) {
+ return;
+ }
+ if (watch[len] != '/') {
+ return;
+ }
+ node = watch + len + 1;
+
+ xen_be_frontend_changed(xendev, node);
+ xen_be_check_state(xendev);
+}
+
/* ------------------------------------------------------------- */
/* Check for possible state transitions and perform them. */
@@ -380,7 +306,6 @@ static void xen_be_frontend_changed(struct XenLegacyDevice *xendev,
*/
static int xen_be_try_setup(struct XenLegacyDevice *xendev)
{
- char token[XEN_BUFSIZE];
int be_state;
if (xenstore_read_be_int(xendev, "state", &be_state) == -1) {
@@ -401,8 +326,9 @@ static int xen_be_try_setup(struct XenLegacyDevice *xendev)
}
/* setup frontend watch */
- snprintf(token, sizeof(token), "fe:%p", xendev);
- if (!xs_watch(xenstore, xendev->fe, token)) {
+ xendev->watch = qemu_xen_xs_watch(xenstore, xendev->fe, xenstore_update_fe,
+ xendev);
+ if (!xendev->watch) {
xen_pv_printf(xendev, 0, "watching frontend path (%s) failed\n",
xendev->fe);
return -1;
@@ -466,7 +392,7 @@ static int xen_be_try_initialise(struct XenLegacyDevice *xendev)
}
if (xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV) {
- xendev->gnttabdev = xengnttab_open(NULL, 0);
+ xendev->gnttabdev = qemu_xen_gnttab_open();
if (xendev->gnttabdev == NULL) {
xen_pv_printf(NULL, 0, "can't open gnttab device\n");
return -1;
@@ -524,7 +450,7 @@ static void xen_be_disconnect(struct XenLegacyDevice *xendev,
xendev->ops->disconnect(xendev);
}
if (xendev->gnttabdev) {
- xengnttab_close(xendev->gnttabdev);
+ qemu_xen_gnttab_close(xendev->gnttabdev);
xendev->gnttabdev = NULL;
}
if (xendev->be_state != state) {
@@ -591,46 +517,20 @@ void xen_be_check_state(struct XenLegacyDevice *xendev)
/* ------------------------------------------------------------- */
-static int xenstore_scan(const char *type, int dom, struct XenDevOps *ops)
-{
- struct XenLegacyDevice *xendev;
- char path[XEN_BUFSIZE], token[XEN_BUFSIZE];
- char **dev = NULL;
- unsigned int cdev, j;
-
- /* setup watch */
- snprintf(token, sizeof(token), "be:%p:%d:%p", type, dom, ops);
- snprintf(path, sizeof(path), "backend/%s/%d", type, dom);
- if (!xs_watch(xenstore, path, token)) {
- xen_pv_printf(NULL, 0, "xen be: watching backend path (%s) failed\n",
- path);
- return -1;
- }
-
- /* look for backends */
- dev = xs_directory(xenstore, 0, path, &cdev);
- if (!dev) {
- return 0;
- }
- for (j = 0; j < cdev; j++) {
- xendev = xen_be_get_xendev(type, dom, atoi(dev[j]), ops);
- if (xendev == NULL) {
- continue;
- }
- xen_be_check_state(xendev);
- }
- free(dev);
- return 0;
-}
+struct xenstore_be {
+ const char *type;
+ int dom;
+ struct XenDevOps *ops;
+};
-void xenstore_update_be(char *watch, char *type, int dom,
- struct XenDevOps *ops)
+static void xenstore_update_be(void *opaque, const char *watch)
{
+ struct xenstore_be *be = opaque;
struct XenLegacyDevice *xendev;
char path[XEN_BUFSIZE], *bepath;
unsigned int len, dev;
- len = snprintf(path, sizeof(path), "backend/%s/%d", type, dom);
+ len = snprintf(path, sizeof(path), "backend/%s/%d", be->type, be->dom);
if (strncmp(path, watch, len) != 0) {
return;
}
@@ -644,9 +544,9 @@ void xenstore_update_be(char *watch, char *type, int dom,
return;
}
- xendev = xen_be_get_xendev(type, dom, dev, ops);
+ xendev = xen_be_get_xendev(be->type, be->dom, dev, be->ops);
if (xendev != NULL) {
- bepath = xs_read(xenstore, 0, xendev->be, &len);
+ bepath = qemu_xen_xs_read(xenstore, 0, xendev->be, &len);
if (bepath == NULL) {
xen_pv_del_xendev(xendev);
} else {
@@ -657,23 +557,41 @@ void xenstore_update_be(char *watch, char *type, int dom,
}
}
-void xenstore_update_fe(char *watch, struct XenLegacyDevice *xendev)
+static int xenstore_scan(const char *type, int dom, struct XenDevOps *ops)
{
- char *node;
- unsigned int len;
+ struct XenLegacyDevice *xendev;
+ char path[XEN_BUFSIZE];
+ struct xenstore_be *be = g_new0(struct xenstore_be, 1);
+ char **dev = NULL;
+ unsigned int cdev, j;
- len = strlen(xendev->fe);
- if (strncmp(xendev->fe, watch, len) != 0) {
- return;
- }
- if (watch[len] != '/') {
- return;
+ /* setup watch */
+ be->type = type;
+ be->dom = dom;
+ be->ops = ops;
+ snprintf(path, sizeof(path), "backend/%s/%d", type, dom);
+ if (!qemu_xen_xs_watch(xenstore, path, xenstore_update_be, be)) {
+ xen_pv_printf(NULL, 0, "xen be: watching backend path (%s) failed\n",
+ path);
+ return -1;
}
- node = watch + len + 1;
- xen_be_frontend_changed(xendev, node);
- xen_be_check_state(xendev);
+ /* look for backends */
+ dev = qemu_xen_xs_directory(xenstore, 0, path, &cdev);
+ if (!dev) {
+ return 0;
+ }
+ for (j = 0; j < cdev; j++) {
+ xendev = xen_be_get_xendev(type, dom, atoi(dev[j]), ops);
+ if (xendev == NULL) {
+ continue;
+ }
+ xen_be_check_state(xendev);
+ }
+ free(dev);
+ return 0;
}
+
/* -------------------------------------------------------------------- */
static void xen_set_dynamic_sysbus(void)
@@ -687,29 +605,17 @@ static void xen_set_dynamic_sysbus(void)
void xen_be_init(void)
{
- xengnttab_handle *gnttabdev;
-
- xenstore = xs_daemon_open();
+ xenstore = qemu_xen_xs_open();
if (!xenstore) {
xen_pv_printf(NULL, 0, "can't connect to xenstored\n");
exit(1);
}
- qemu_set_fd_handler(xs_fileno(xenstore), xenstore_update, NULL, NULL);
-
- if (xen_xc == NULL || xen_fmem == NULL) {
+ if (xen_evtchn_ops == NULL || xen_gnttab_ops == NULL) {
xen_pv_printf(NULL, 0, "Xen operations not set up\n");
exit(1);
}
- gnttabdev = xengnttab_open(NULL, 0);
- if (gnttabdev != NULL) {
- if (xengnttab_grant_copy(gnttabdev, 0, NULL) == 0) {
- xen_feature_grant_copy = true;
- }
- xengnttab_close(gnttabdev);
- }
-
xen_sysdev = qdev_new(TYPE_XENSYSDEV);
sysbus_realize_and_unref(SYS_BUS_DEVICE(xen_sysdev), &error_fatal);
xen_sysbus = qbus_new(TYPE_XENSYSBUS, xen_sysdev, "xen-sysbus");
@@ -751,14 +657,14 @@ int xen_be_bind_evtchn(struct XenLegacyDevice *xendev)
if (xendev->local_port != -1) {
return 0;
}
- xendev->local_port = xenevtchn_bind_interdomain
+ xendev->local_port = qemu_xen_evtchn_bind_interdomain
(xendev->evtchndev, xendev->dom, xendev->remote_port);
if (xendev->local_port == -1) {
xen_pv_printf(xendev, 0, "xenevtchn_bind_interdomain failed\n");
return -1;
}
xen_pv_printf(xendev, 2, "bind evtchn port %d\n", xendev->local_port);
- qemu_set_fd_handler(xenevtchn_fd(xendev->evtchndev),
+ qemu_set_fd_handler(qemu_xen_evtchn_fd(xendev->evtchndev),
xen_pv_evtchn_event, NULL, xendev);
return 0;
}
diff --git a/hw/xen/xen-operations.c b/hw/xen/xen-operations.c
new file mode 100644
index 0000000000..4b78fbf4bd
--- /dev/null
+++ b/hw/xen/xen-operations.c
@@ -0,0 +1,478 @@
+/*
+ * QEMU Xen backend support: Operations for true Xen
+ *
+ * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * Authors: David Woodhouse <dwmw2@infradead.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/uuid.h"
+#include "qapi/error.h"
+
+#include "hw/xen/xen_native.h"
+#include "hw/xen/xen_backend_ops.h"
+
+/*
+ * If we have new enough libxenctrl then we do not want/need these compat
+ * interfaces, despite what the user supplied cflags might say. They
+ * must be undefined before including xenctrl.h
+ */
+#undef XC_WANT_COMPAT_EVTCHN_API
+#undef XC_WANT_COMPAT_GNTTAB_API
+#undef XC_WANT_COMPAT_MAP_FOREIGN_API
+
+#include <xenctrl.h>
+
+/*
+ * We don't support Xen prior to 4.2.0.
+ */
+
+/* Xen 4.2 through 4.6 */
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40701
+
+typedef xc_evtchn xenevtchn_handle;
+typedef evtchn_port_or_error_t xenevtchn_port_or_error_t;
+
+#define xenevtchn_open(l, f) xc_evtchn_open(l, f);
+#define xenevtchn_close(h) xc_evtchn_close(h)
+#define xenevtchn_fd(h) xc_evtchn_fd(h)
+#define xenevtchn_pending(h) xc_evtchn_pending(h)
+#define xenevtchn_notify(h, p) xc_evtchn_notify(h, p)
+#define xenevtchn_bind_interdomain(h, d, p) xc_evtchn_bind_interdomain(h, d, p)
+#define xenevtchn_unmask(h, p) xc_evtchn_unmask(h, p)
+#define xenevtchn_unbind(h, p) xc_evtchn_unbind(h, p)
+
+typedef xc_gnttab xengnttab_handle;
+
+#define xengnttab_open(l, f) xc_gnttab_open(l, f)
+#define xengnttab_close(h) xc_gnttab_close(h)
+#define xengnttab_set_max_grants(h, n) xc_gnttab_set_max_grants(h, n)
+#define xengnttab_map_grant_ref(h, d, r, p) xc_gnttab_map_grant_ref(h, d, r, p)
+#define xengnttab_unmap(h, a, n) xc_gnttab_munmap(h, a, n)
+#define xengnttab_map_grant_refs(h, c, d, r, p) \
+ xc_gnttab_map_grant_refs(h, c, d, r, p)
+#define xengnttab_map_domain_grant_refs(h, c, d, r, p) \
+ xc_gnttab_map_domain_grant_refs(h, c, d, r, p)
+
+typedef xc_interface xenforeignmemory_handle;
+
+#else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40701 */
+
+#include <xenevtchn.h>
+#include <xengnttab.h>
+#include <xenforeignmemory.h>
+
+#endif
+
+/* Xen before 4.8 */
+
+static int libxengnttab_fallback_grant_copy(xengnttab_handle *xgt,
+ bool to_domain, uint32_t domid,
+ XenGrantCopySegment segs[],
+ unsigned int nr_segs, Error **errp)
+{
+ uint32_t *refs = g_new(uint32_t, nr_segs);
+ int prot = to_domain ? PROT_WRITE : PROT_READ;
+ void *map;
+ unsigned int i;
+ int rc = 0;
+
+ for (i = 0; i < nr_segs; i++) {
+ XenGrantCopySegment *seg = &segs[i];
+
+ refs[i] = to_domain ? seg->dest.foreign.ref :
+ seg->source.foreign.ref;
+ }
+ map = xengnttab_map_domain_grant_refs(xgt, nr_segs, domid, refs, prot);
+ if (!map) {
+ if (errp) {
+ error_setg_errno(errp, errno,
+ "xengnttab_map_domain_grant_refs failed");
+ }
+ rc = -errno;
+ goto done;
+ }
+
+ for (i = 0; i < nr_segs; i++) {
+ XenGrantCopySegment *seg = &segs[i];
+ void *page = map + (i * XEN_PAGE_SIZE);
+
+ if (to_domain) {
+ memcpy(page + seg->dest.foreign.offset, seg->source.virt,
+ seg->len);
+ } else {
+ memcpy(seg->dest.virt, page + seg->source.foreign.offset,
+ seg->len);
+ }
+ }
+
+ if (xengnttab_unmap(xgt, map, nr_segs)) {
+ if (errp) {
+ error_setg_errno(errp, errno, "xengnttab_unmap failed");
+ }
+ rc = -errno;
+ }
+
+done:
+ g_free(refs);
+ return rc;
+}
+
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40800
+
+static int libxengnttab_backend_grant_copy(xengnttab_handle *xgt,
+ bool to_domain, uint32_t domid,
+ XenGrantCopySegment *segs,
+ uint32_t nr_segs, Error **errp)
+{
+ xengnttab_grant_copy_segment_t *xengnttab_segs;
+ unsigned int i;
+ int rc;
+
+ xengnttab_segs = g_new0(xengnttab_grant_copy_segment_t, nr_segs);
+
+ for (i = 0; i < nr_segs; i++) {
+ XenGrantCopySegment *seg = &segs[i];
+ xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i];
+
+ if (to_domain) {
+ xengnttab_seg->flags = GNTCOPY_dest_gref;
+ xengnttab_seg->dest.foreign.domid = domid;
+ xengnttab_seg->dest.foreign.ref = seg->dest.foreign.ref;
+ xengnttab_seg->dest.foreign.offset = seg->dest.foreign.offset;
+ xengnttab_seg->source.virt = seg->source.virt;
+ } else {
+ xengnttab_seg->flags = GNTCOPY_source_gref;
+ xengnttab_seg->source.foreign.domid = domid;
+ xengnttab_seg->source.foreign.ref = seg->source.foreign.ref;
+ xengnttab_seg->source.foreign.offset =
+ seg->source.foreign.offset;
+ xengnttab_seg->dest.virt = seg->dest.virt;
+ }
+
+ xengnttab_seg->len = seg->len;
+ }
+
+ if (xengnttab_grant_copy(xgt, nr_segs, xengnttab_segs)) {
+ if (errp) {
+ error_setg_errno(errp, errno, "xengnttab_grant_copy failed");
+ }
+ rc = -errno;
+ goto done;
+ }
+
+ rc = 0;
+ for (i = 0; i < nr_segs; i++) {
+ xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i];
+
+ if (xengnttab_seg->status != GNTST_okay) {
+ if (errp) {
+ error_setg(errp, "xengnttab_grant_copy seg[%u] failed", i);
+ }
+ rc = -EIO;
+ break;
+ }
+ }
+
+done:
+ g_free(xengnttab_segs);
+ return rc;
+}
+#endif
+
+static xenevtchn_handle *libxenevtchn_backend_open(void)
+{
+ return xenevtchn_open(NULL, 0);
+}
+
+struct evtchn_backend_ops libxenevtchn_backend_ops = {
+ .open = libxenevtchn_backend_open,
+ .close = xenevtchn_close,
+ .bind_interdomain = xenevtchn_bind_interdomain,
+ .unbind = xenevtchn_unbind,
+ .get_fd = xenevtchn_fd,
+ .notify = xenevtchn_notify,
+ .unmask = xenevtchn_unmask,
+ .pending = xenevtchn_pending,
+};
+
+static xengnttab_handle *libxengnttab_backend_open(void)
+{
+ return xengnttab_open(NULL, 0);
+}
+
+static int libxengnttab_backend_unmap(xengnttab_handle *xgt,
+ void *start_address, uint32_t *refs,
+ uint32_t count)
+{
+ return xengnttab_unmap(xgt, start_address, count);
+}
+
+
+static struct gnttab_backend_ops libxengnttab_backend_ops = {
+ .features = XEN_GNTTAB_OP_FEATURE_MAP_MULTIPLE,
+ .open = libxengnttab_backend_open,
+ .close = xengnttab_close,
+ .grant_copy = libxengnttab_fallback_grant_copy,
+ .set_max_grants = xengnttab_set_max_grants,
+ .map_refs = xengnttab_map_domain_grant_refs,
+ .unmap = libxengnttab_backend_unmap,
+};
+
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40701
+
+static void *libxenforeignmem_backend_map(uint32_t dom, void *addr, int prot,
+ size_t pages, xfn_pfn_t *pfns,
+ int *errs)
+{
+ if (errs) {
+ return xc_map_foreign_bulk(xen_xc, dom, prot, pfns, errs, pages);
+ } else {
+ return xc_map_foreign_pages(xen_xc, dom, prot, pfns, pages);
+ }
+}
+
+static int libxenforeignmem_backend_unmap(void *addr, size_t pages)
+{
+ return munmap(addr, pages * XC_PAGE_SIZE);
+}
+
+#else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40701 */
+
+static void *libxenforeignmem_backend_map(uint32_t dom, void *addr, int prot,
+ size_t pages, xen_pfn_t *pfns,
+ int *errs)
+{
+ return xenforeignmemory_map2(xen_fmem, dom, addr, prot, 0, pages, pfns,
+ errs);
+}
+
+static int libxenforeignmem_backend_unmap(void *addr, size_t pages)
+{
+ return xenforeignmemory_unmap(xen_fmem, addr, pages);
+}
+
+#endif
+
+struct foreignmem_backend_ops libxenforeignmem_backend_ops = {
+ .map = libxenforeignmem_backend_map,
+ .unmap = libxenforeignmem_backend_unmap,
+};
+
+struct qemu_xs_handle {
+ struct xs_handle *xsh;
+ NotifierList notifiers;
+};
+
+static void watch_event(void *opaque)
+{
+ struct qemu_xs_handle *h = opaque;
+
+ for (;;) {
+ char **v = xs_check_watch(h->xsh);
+
+ if (!v) {
+ break;
+ }
+
+ notifier_list_notify(&h->notifiers, v);
+ free(v);
+ }
+}
+
+static struct qemu_xs_handle *libxenstore_open(void)
+{
+ struct xs_handle *xsh = xs_open(0);
+ struct qemu_xs_handle *h = g_new0(struct qemu_xs_handle, 1);
+
+ if (!xsh) {
+ return NULL;
+ }
+
+ h = g_new0(struct qemu_xs_handle, 1);
+ h->xsh = xsh;
+
+ notifier_list_init(&h->notifiers);
+ qemu_set_fd_handler(xs_fileno(h->xsh), watch_event, NULL, h);
+
+ return h;
+}
+
+static void libxenstore_close(struct qemu_xs_handle *h)
+{
+ g_assert(notifier_list_empty(&h->notifiers));
+ qemu_set_fd_handler(xs_fileno(h->xsh), NULL, NULL, NULL);
+ xs_close(h->xsh);
+ g_free(h);
+}
+
+static char *libxenstore_get_domain_path(struct qemu_xs_handle *h,
+ unsigned int domid)
+{
+ return xs_get_domain_path(h->xsh, domid);
+}
+
+static char **libxenstore_directory(struct qemu_xs_handle *h,
+ xs_transaction_t t, const char *path,
+ unsigned int *num)
+{
+ return xs_directory(h->xsh, t, path, num);
+}
+
+static void *libxenstore_read(struct qemu_xs_handle *h, xs_transaction_t t,
+ const char *path, unsigned int *len)
+{
+ return xs_read(h->xsh, t, path, len);
+}
+
+static bool libxenstore_write(struct qemu_xs_handle *h, xs_transaction_t t,
+ const char *path, const void *data,
+ unsigned int len)
+{
+ return xs_write(h->xsh, t, path, data, len);
+}
+
+static bool libxenstore_create(struct qemu_xs_handle *h, xs_transaction_t t,
+ unsigned int owner, unsigned int domid,
+ unsigned int perms, const char *path)
+{
+ struct xs_permissions perms_list[] = {
+ {
+ .id = owner,
+ .perms = XS_PERM_NONE,
+ },
+ {
+ .id = domid,
+ .perms = perms,
+ },
+ };
+
+ if (!xs_mkdir(h->xsh, t, path)) {
+ return false;
+ }
+
+ return xs_set_permissions(h->xsh, t, path, perms_list,
+ ARRAY_SIZE(perms_list));
+}
+
+static bool libxenstore_destroy(struct qemu_xs_handle *h, xs_transaction_t t,
+ const char *path)
+{
+ return xs_rm(h->xsh, t, path);
+}
+
+struct qemu_xs_watch {
+ char *path;
+ char *token;
+ xs_watch_fn fn;
+ void *opaque;
+ Notifier notifier;
+};
+
+static void watch_notify(Notifier *n, void *data)
+{
+ struct qemu_xs_watch *w = container_of(n, struct qemu_xs_watch, notifier);
+ const char **v = data;
+
+ if (!strcmp(w->token, v[XS_WATCH_TOKEN])) {
+ w->fn(w->opaque, v[XS_WATCH_PATH]);
+ }
+}
+
+static struct qemu_xs_watch *new_watch(const char *path, xs_watch_fn fn,
+ void *opaque)
+{
+ struct qemu_xs_watch *w = g_new0(struct qemu_xs_watch, 1);
+ QemuUUID uuid;
+
+ qemu_uuid_generate(&uuid);
+
+ w->token = qemu_uuid_unparse_strdup(&uuid);
+ w->path = g_strdup(path);
+ w->fn = fn;
+ w->opaque = opaque;
+ w->notifier.notify = watch_notify;
+
+ return w;
+}
+
+static void free_watch(struct qemu_xs_watch *w)
+{
+ g_free(w->token);
+ g_free(w->path);
+
+ g_free(w);
+}
+
+static struct qemu_xs_watch *libxenstore_watch(struct qemu_xs_handle *h,
+ const char *path, xs_watch_fn fn,
+ void *opaque)
+{
+ struct qemu_xs_watch *w = new_watch(path, fn, opaque);
+
+ notifier_list_add(&h->notifiers, &w->notifier);
+
+ if (!xs_watch(h->xsh, path, w->token)) {
+ notifier_remove(&w->notifier);
+ free_watch(w);
+ return NULL;
+ }
+
+ return w;
+}
+
+static void libxenstore_unwatch(struct qemu_xs_handle *h,
+ struct qemu_xs_watch *w)
+{
+ xs_unwatch(h->xsh, w->path, w->token);
+ notifier_remove(&w->notifier);
+ free_watch(w);
+}
+
+static xs_transaction_t libxenstore_transaction_start(struct qemu_xs_handle *h)
+{
+ return xs_transaction_start(h->xsh);
+}
+
+static bool libxenstore_transaction_end(struct qemu_xs_handle *h,
+ xs_transaction_t t, bool abort)
+{
+ return xs_transaction_end(h->xsh, t, abort);
+}
+
+struct xenstore_backend_ops libxenstore_backend_ops = {
+ .open = libxenstore_open,
+ .close = libxenstore_close,
+ .get_domain_path = libxenstore_get_domain_path,
+ .directory = libxenstore_directory,
+ .read = libxenstore_read,
+ .write = libxenstore_write,
+ .create = libxenstore_create,
+ .destroy = libxenstore_destroy,
+ .watch = libxenstore_watch,
+ .unwatch = libxenstore_unwatch,
+ .transaction_start = libxenstore_transaction_start,
+ .transaction_end = libxenstore_transaction_end,
+};
+
+void setup_xen_backend_ops(void)
+{
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40800
+ xengnttab_handle *xgt = xengnttab_open(NULL, 0);
+
+ if (xgt) {
+ if (xengnttab_grant_copy(xgt, 0, NULL) == 0) {
+ libxengnttab_backend_ops.grant_copy = libxengnttab_backend_grant_copy;
+ }
+ xengnttab_close(xgt);
+ }
+#endif
+ xen_evtchn_ops = &libxenevtchn_backend_ops;
+ xen_gnttab_ops = &libxengnttab_backend_ops;
+ xen_foreignmem_ops = &libxenforeignmem_backend_ops;
+ xen_xenstore_ops = &libxenstore_backend_ops;
+}
diff --git a/hw/xen/xen_devconfig.c b/hw/xen/xen_devconfig.c
index 46ee4a7f02..9b7304e544 100644
--- a/hw/xen/xen_devconfig.c
+++ b/hw/xen/xen_devconfig.c
@@ -11,11 +11,11 @@ static int xen_config_dev_dirs(const char *ftype, const char *btype, int vdev,
{
char *dom;
- dom = xs_get_domain_path(xenstore, xen_domid);
+ dom = qemu_xen_xs_get_domain_path(xenstore, xen_domid);
snprintf(fe, len, "%s/device/%s/%d", dom, ftype, vdev);
free(dom);
- dom = xs_get_domain_path(xenstore, 0);
+ dom = qemu_xen_xs_get_domain_path(xenstore, 0);
snprintf(be, len, "%s/backend/%s/%d/%d", dom, btype, xen_domid, vdev);
free(dom);
diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
index 85c93cffcf..2d33d178ad 100644
--- a/hw/xen/xen_pt.c
+++ b/hw/xen/xen_pt.c
@@ -60,9 +60,9 @@
#include "hw/pci/pci_bus.h"
#include "hw/qdev-properties.h"
#include "hw/qdev-properties-system.h"
+#include "xen_pt.h"
#include "hw/xen/xen.h"
#include "hw/xen/xen-legacy-backend.h"
-#include "xen_pt.h"
#include "qemu/range.h"
static bool has_igd_gfx_passthru;
diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h
index e184699740..b20744f7c7 100644
--- a/hw/xen/xen_pt.h
+++ b/hw/xen/xen_pt.h
@@ -1,7 +1,7 @@
#ifndef XEN_PT_H
#define XEN_PT_H
-#include "hw/xen/xen_common.h"
+#include "hw/xen/xen_native.h"
#include "xen-host-pci-device.h"
#include "qom/object.h"
diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c
index 8b9b554352..2b8680b112 100644
--- a/hw/xen/xen_pt_config_init.c
+++ b/hw/xen/xen_pt_config_init.c
@@ -15,8 +15,8 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qemu/timer.h"
-#include "hw/xen/xen-legacy-backend.h"
#include "xen_pt.h"
+#include "hw/xen/xen-legacy-backend.h"
#define XEN_PT_MERGE_VALUE(value, data, val_mask) \
(((value) & (val_mask)) | ((data) & ~(val_mask)))
diff --git a/hw/xen/xen_pt_graphics.c b/hw/xen/xen_pt_graphics.c
index f303f67c9c..0aed3bb6fd 100644
--- a/hw/xen/xen_pt_graphics.c
+++ b/hw/xen/xen_pt_graphics.c
@@ -5,7 +5,6 @@
#include "qapi/error.h"
#include "xen_pt.h"
#include "xen-host-pci-device.h"
-#include "hw/xen/xen-legacy-backend.h"
static unsigned long igd_guest_opregion;
static unsigned long igd_host_opregion;
diff --git a/hw/xen/xen_pt_msi.c b/hw/xen/xen_pt_msi.c
index b71563f98a..09cca4eecb 100644
--- a/hw/xen/xen_pt_msi.c
+++ b/hw/xen/xen_pt_msi.c
@@ -11,9 +11,9 @@
#include "qemu/osdep.h"
-#include "hw/xen/xen-legacy-backend.h"
-#include "xen_pt.h"
#include "hw/i386/apic-msidef.h"
+#include "xen_pt.h"
+#include "hw/xen/xen-legacy-backend.h"
#define XEN_PT_AUTO_ASSIGN -1
diff --git a/hw/xen/xen_pvdev.c b/hw/xen/xen_pvdev.c
index 1a5177b354..be1504b82c 100644
--- a/hw/xen/xen_pvdev.c
+++ b/hw/xen/xen_pvdev.c
@@ -54,31 +54,17 @@ void xen_config_cleanup(void)
struct xs_dirs *d;
QTAILQ_FOREACH(d, &xs_cleanup, list) {
- xs_rm(xenstore, 0, d->xs_dir);
+ qemu_xen_xs_destroy(xenstore, 0, d->xs_dir);
}
}
int xenstore_mkdir(char *path, int p)
{
- struct xs_permissions perms[2] = {
- {
- .id = 0, /* set owner: dom0 */
- }, {
- .id = xen_domid,
- .perms = p,
- }
- };
-
- if (!xs_mkdir(xenstore, 0, path)) {
+ if (!qemu_xen_xs_create(xenstore, 0, 0, xen_domid, p, path)) {
xen_pv_printf(NULL, 0, "xs_mkdir %s: failed\n", path);
return -1;
}
xenstore_cleanup_dir(g_strdup(path));
-
- if (!xs_set_permissions(xenstore, 0, path, perms, 2)) {
- xen_pv_printf(NULL, 0, "xs_set_permissions %s: failed\n", path);
- return -1;
- }
return 0;
}
@@ -87,7 +73,7 @@ int xenstore_write_str(const char *base, const char *node, const char *val)
char abspath[XEN_BUFSIZE];
snprintf(abspath, sizeof(abspath), "%s/%s", base, node);
- if (!xs_write(xenstore, 0, abspath, val, strlen(val))) {
+ if (!qemu_xen_xs_write(xenstore, 0, abspath, val, strlen(val))) {
return -1;
}
return 0;
@@ -100,7 +86,7 @@ char *xenstore_read_str(const char *base, const char *node)
char *str, *ret = NULL;
snprintf(abspath, sizeof(abspath), "%s/%s", base, node);
- str = xs_read(xenstore, 0, abspath, &len);
+ str = qemu_xen_xs_read(xenstore, 0, abspath, &len);
if (str != NULL) {
/* move to qemu-allocated memory to make sure
* callers can savely g_free() stuff. */
@@ -152,29 +138,6 @@ int xenstore_read_uint64(const char *base, const char *node, uint64_t *uval)
return rc;
}
-void xenstore_update(void *unused)
-{
- char **vec = NULL;
- intptr_t type, ops, ptr;
- unsigned int dom, count;
-
- vec = xs_read_watch(xenstore, &count);
- if (vec == NULL) {
- goto cleanup;
- }
-
- if (sscanf(vec[XS_WATCH_TOKEN], "be:%" PRIxPTR ":%d:%" PRIxPTR,
- &type, &dom, &ops) == 3) {
- xenstore_update_be(vec[XS_WATCH_PATH], (void *)type, dom, (void*)ops);
- }
- if (sscanf(vec[XS_WATCH_TOKEN], "fe:%" PRIxPTR, &ptr) == 1) {
- xenstore_update_fe(vec[XS_WATCH_PATH], (void *)ptr);
- }
-
-cleanup:
- free(vec);
-}
-
const char *xenbus_strstate(enum xenbus_state state)
{
static const char *const name[] = {
@@ -238,14 +201,14 @@ void xen_pv_evtchn_event(void *opaque)
struct XenLegacyDevice *xendev = opaque;
evtchn_port_t port;
- port = xenevtchn_pending(xendev->evtchndev);
+ port = qemu_xen_evtchn_pending(xendev->evtchndev);
if (port != xendev->local_port) {
xen_pv_printf(xendev, 0,
"xenevtchn_pending returned %d (expected %d)\n",
port, xendev->local_port);
return;
}
- xenevtchn_unmask(xendev->evtchndev, port);
+ qemu_xen_evtchn_unmask(xendev->evtchndev, port);
if (xendev->ops->event) {
xendev->ops->event(xendev);
@@ -257,15 +220,15 @@ void xen_pv_unbind_evtchn(struct XenLegacyDevice *xendev)
if (xendev->local_port == -1) {
return;
}
- qemu_set_fd_handler(xenevtchn_fd(xendev->evtchndev), NULL, NULL, NULL);
- xenevtchn_unbind(xendev->evtchndev, xendev->local_port);
+ qemu_set_fd_handler(qemu_xen_evtchn_fd(xendev->evtchndev), NULL, NULL, NULL);
+ qemu_xen_evtchn_unbind(xendev->evtchndev, xendev->local_port);
xen_pv_printf(xendev, 2, "unbind evtchn port %d\n", xendev->local_port);
xendev->local_port = -1;
}
int xen_pv_send_notify(struct XenLegacyDevice *xendev)
{
- return xenevtchn_notify(xendev->evtchndev, xendev->local_port);
+ return qemu_xen_evtchn_notify(xendev->evtchndev, xendev->local_port);
}
/* ------------------------------------------------------------- */
@@ -299,17 +262,15 @@ void xen_pv_del_xendev(struct XenLegacyDevice *xendev)
}
if (xendev->fe) {
- char token[XEN_BUFSIZE];
- snprintf(token, sizeof(token), "fe:%p", xendev);
- xs_unwatch(xenstore, xendev->fe, token);
+ qemu_xen_xs_unwatch(xenstore, xendev->watch);
g_free(xendev->fe);
}
if (xendev->evtchndev != NULL) {
- xenevtchn_close(xendev->evtchndev);
+ qemu_xen_evtchn_close(xendev->evtchndev);
}
if (xendev->gnttabdev != NULL) {
- xengnttab_close(xendev->gnttabdev);
+ qemu_xen_gnttab_close(xendev->gnttabdev);
}
QTAILQ_REMOVE(&xendevs, xendev, next);
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
index dd9a7f6461..da13357bb8 100644
--- a/include/block/aio-wait.h
+++ b/include/block/aio-wait.h
@@ -85,7 +85,7 @@ extern AioWait global_aio_wait;
/* Increment wait_->num_waiters before evaluating cond. */ \
qatomic_inc(&wait_->num_waiters); \
/* Paired with smp_mb in aio_wait_kick(). */ \
- smp_mb(); \
+ smp_mb__after_rmw(); \
if (ctx_ && in_aio_context_home_thread(ctx_)) { \
while ((cond)) { \
aio_poll(ctx_, true); \
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
index 0b337a036c..da19ae1546 100644
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -18,10 +18,8 @@
#define HW_I386_X86_H
#include "exec/hwaddr.h"
-#include "qemu/notify.h"
#include "hw/boards.h"
-#include "hw/nmi.h"
#include "hw/intc/ioapic.h"
#include "hw/isa/isa.h"
#include "qom/object.h"
diff --git a/include/hw/intc/mips_gic.h b/include/hw/intc/mips_gic.h
index eeb136e261..5e4c71edd4 100644
--- a/include/hw/intc/mips_gic.h
+++ b/include/hw/intc/mips_gic.h
@@ -211,8 +211,8 @@ struct MIPSGICState {
/* GIC VP Timer */
MIPSGICTimerState *gic_timer;
- int32_t num_vps;
- int32_t num_irq;
+ uint32_t num_vps;
+ uint32_t num_irq;
};
#endif /* MIPS_GIC_H */
diff --git a/include/hw/isa/i8259_internal.h b/include/hw/isa/i8259_internal.h
index 155b098452..f9dcc4163e 100644
--- a/include/hw/isa/i8259_internal.h
+++ b/include/hw/isa/i8259_internal.h
@@ -61,6 +61,7 @@ struct PICCommonState {
uint8_t single_mode; /* true if slave pic is not initialized */
uint8_t elcr; /* PIIX edge/trigger selection*/
uint8_t elcr_mask;
+ uint8_t ltim; /* Edge/Level Bank Select (pre-PIIX, chip-wide) */
qemu_irq int_out[1];
uint32_t master; /* reflects /SP input pin */
uint32_t iobase;
diff --git a/include/hw/isa/vt82c686.h b/include/hw/isa/vt82c686.h
index e273cd38dc..da1722daf2 100644
--- a/include/hw/isa/vt82c686.h
+++ b/include/hw/isa/vt82c686.h
@@ -1,6 +1,8 @@
#ifndef HW_VT82C686_H
#define HW_VT82C686_H
+#include "hw/pci/pci_device.h"
+#include "audio/audio.h"
#define TYPE_VT82C686B_ISA "vt82c686b-isa"
#define TYPE_VT82C686B_USB_UHCI "vt82c686b-usb-uhci"
@@ -9,6 +11,29 @@
#define TYPE_VIA_IDE "via-ide"
#define TYPE_VIA_MC97 "via-mc97"
+typedef struct {
+ uint8_t stat;
+ uint8_t type;
+ uint32_t base;
+ uint32_t curr;
+ uint32_t addr;
+ uint32_t clen;
+} ViaAC97SGDChannel;
+
+OBJECT_DECLARE_SIMPLE_TYPE(ViaAC97State, VIA_AC97);
+
+struct ViaAC97State {
+ PCIDevice dev;
+ QEMUSoundCard card;
+ MemoryRegion sgd;
+ MemoryRegion fm;
+ MemoryRegion midi;
+ SWVoiceOut *vo;
+ ViaAC97SGDChannel aur;
+ uint16_t codec_regs[128];
+ uint32_t ac97_cmd;
+};
+
void via_isa_set_irq(PCIDevice *d, int n, int level);
#endif
diff --git a/include/hw/misc/mips_cmgcr.h b/include/hw/misc/mips_cmgcr.h
index 9fa58942d7..db4bf5f449 100644
--- a/include/hw/misc/mips_cmgcr.h
+++ b/include/hw/misc/mips_cmgcr.h
@@ -75,7 +75,7 @@ struct MIPSGCRState {
SysBusDevice parent_obj;
int32_t gcr_rev;
- int32_t num_vps;
+ uint32_t num_vps;
hwaddr gcr_base;
MemoryRegion iomem;
MemoryRegion *cpc_mr;
diff --git a/include/hw/misc/mips_itu.h b/include/hw/misc/mips_itu.h
index 50d961106d..35218b2d14 100644
--- a/include/hw/misc/mips_itu.h
+++ b/include/hw/misc/mips_itu.h
@@ -57,8 +57,8 @@ struct MIPSITUState {
SysBusDevice parent_obj;
/*< public >*/
- int32_t num_fifo;
- int32_t num_semaphores;
+ uint32_t num_fifo;
+ uint32_t num_semaphores;
/* ITC Storage */
ITCStorageCell *cell;
@@ -72,9 +72,8 @@ struct MIPSITUState {
uint64_t icr0;
/* SAAR */
- bool saar_present;
- void *saar;
-
+ uint64_t *saar;
+ MIPSCPU *cpu0;
};
/* Get ITC Configuration Tag memory region. */
diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h
index b3d26135c0..e5c474b26e 100644
--- a/include/hw/riscv/virt.h
+++ b/include/hw/riscv/virt.h
@@ -56,6 +56,10 @@ struct RISCVVirtState {
bool have_aclint;
RISCVVirtAIAType aia_type;
int aia_guests;
+ char *oem_id;
+ char *oem_table_id;
+ OnOffAuto acpi;
+ const MemMapEntry *memmap;
};
enum {
@@ -121,4 +125,6 @@ enum {
#define FDT_APLIC_INT_MAP_WIDTH (FDT_PCI_ADDR_CELLS + FDT_PCI_INT_CELLS + \
1 + FDT_APLIC_INT_CELLS)
+bool virt_is_acpi_enabled(RISCVVirtState *s);
+void virt_acpi_setup(RISCVVirtState *vms);
#endif
diff --git a/include/hw/xen/xen-bus-helper.h b/include/hw/xen/xen-bus-helper.h
index 8782f30550..d8dcc2f010 100644
--- a/include/hw/xen/xen-bus-helper.h
+++ b/include/hw/xen/xen-bus-helper.h
@@ -8,40 +8,40 @@
#ifndef HW_XEN_BUS_HELPER_H
#define HW_XEN_BUS_HELPER_H
-#include "hw/xen/xen_common.h"
+#include "hw/xen/xen_backend_ops.h"
const char *xs_strstate(enum xenbus_state state);
-void xs_node_create(struct xs_handle *xsh, xs_transaction_t tid,
- const char *node, struct xs_permissions perms[],
- unsigned int nr_perms, Error **errp);
-void xs_node_destroy(struct xs_handle *xsh, xs_transaction_t tid,
+void xs_node_create(struct qemu_xs_handle *h, xs_transaction_t tid,
+ const char *node, unsigned int owner, unsigned int domid,
+ unsigned int perms, Error **errp);
+void xs_node_destroy(struct qemu_xs_handle *h, xs_transaction_t tid,
const char *node, Error **errp);
/* Write to node/key unless node is empty, in which case write to key */
-void xs_node_vprintf(struct xs_handle *xsh, xs_transaction_t tid,
+void xs_node_vprintf(struct qemu_xs_handle *h, xs_transaction_t tid,
const char *node, const char *key, Error **errp,
const char *fmt, va_list ap)
G_GNUC_PRINTF(6, 0);
-void xs_node_printf(struct xs_handle *xsh, xs_transaction_t tid,
+void xs_node_printf(struct qemu_xs_handle *h, xs_transaction_t tid,
const char *node, const char *key, Error **errp,
const char *fmt, ...)
G_GNUC_PRINTF(6, 7);
/* Read from node/key unless node is empty, in which case read from key */
-int xs_node_vscanf(struct xs_handle *xsh, xs_transaction_t tid,
+int xs_node_vscanf(struct qemu_xs_handle *h, xs_transaction_t tid,
const char *node, const char *key, Error **errp,
const char *fmt, va_list ap)
G_GNUC_SCANF(6, 0);
-int xs_node_scanf(struct xs_handle *xsh, xs_transaction_t tid,
+int xs_node_scanf(struct qemu_xs_handle *h, xs_transaction_t tid,
const char *node, const char *key, Error **errp,
const char *fmt, ...)
G_GNUC_SCANF(6, 7);
/* Watch node/key unless node is empty, in which case watch key */
-void xs_node_watch(struct xs_handle *xsh, const char *node, const char *key,
- char *token, Error **errp);
-void xs_node_unwatch(struct xs_handle *xsh, const char *node, const char *key,
- const char *token, Error **errp);
+struct qemu_xs_watch *xs_node_watch(struct qemu_xs_handle *h, const char *node,
+ const char *key, xs_watch_fn fn,
+ void *opaque, Error **errp);
+void xs_node_unwatch(struct qemu_xs_handle *h, struct qemu_xs_watch *w);
#endif /* HW_XEN_BUS_HELPER_H */
diff --git a/include/hw/xen/xen-bus.h b/include/hw/xen/xen-bus.h
index 4d966a2dbb..f435898164 100644
--- a/include/hw/xen/xen-bus.h
+++ b/include/hw/xen/xen-bus.h
@@ -8,31 +8,25 @@
#ifndef HW_XEN_BUS_H
#define HW_XEN_BUS_H
-#include "hw/xen/xen_common.h"
+#include "hw/xen/xen_backend_ops.h"
#include "hw/sysbus.h"
#include "qemu/notify.h"
#include "qom/object.h"
-typedef void (*XenWatchHandler)(void *opaque);
-
-typedef struct XenWatchList XenWatchList;
-typedef struct XenWatch XenWatch;
typedef struct XenEventChannel XenEventChannel;
struct XenDevice {
DeviceState qdev;
domid_t frontend_id;
char *name;
- struct xs_handle *xsh;
- XenWatchList *watch_list;
+ struct qemu_xs_handle *xsh;
char *backend_path, *frontend_path;
enum xenbus_state backend_state, frontend_state;
Notifier exit;
- XenWatch *backend_state_watch, *frontend_state_watch;
+ struct qemu_xs_watch *backend_state_watch, *frontend_state_watch;
bool backend_online;
- XenWatch *backend_online_watch;
+ struct qemu_xs_watch *backend_online_watch;
xengnttab_handle *xgth;
- bool feature_grant_copy;
bool inactive;
QLIST_HEAD(, XenEventChannel) event_channels;
QLIST_ENTRY(XenDevice) list;
@@ -64,10 +58,9 @@ OBJECT_DECLARE_TYPE(XenDevice, XenDeviceClass, XEN_DEVICE)
struct XenBus {
BusState qbus;
domid_t backend_id;
- struct xs_handle *xsh;
- XenWatchList *watch_list;
+ struct qemu_xs_handle *xsh;
unsigned int backend_types;
- XenWatch **backend_watch;
+ struct qemu_xs_watch **backend_watch;
QLIST_HEAD(, XenDevice) inactive_devices;
};
@@ -102,7 +95,7 @@ void xen_device_set_max_grant_refs(XenDevice *xendev, unsigned int nr_refs,
void *xen_device_map_grant_refs(XenDevice *xendev, uint32_t *refs,
unsigned int nr_refs, int prot,
Error **errp);
-void xen_device_unmap_grant_refs(XenDevice *xendev, void *map,
+void xen_device_unmap_grant_refs(XenDevice *xendev, void *map, uint32_t *refs,
unsigned int nr_refs, Error **errp);
typedef struct XenDeviceGrantCopySegment {
diff --git a/include/hw/xen/xen-legacy-backend.h b/include/hw/xen/xen-legacy-backend.h
index e31cd3a068..6c307c5f2c 100644
--- a/include/hw/xen/xen-legacy-backend.h
+++ b/include/hw/xen/xen-legacy-backend.h
@@ -1,7 +1,7 @@
#ifndef HW_XEN_LEGACY_BACKEND_H
#define HW_XEN_LEGACY_BACKEND_H
-#include "hw/xen/xen_common.h"
+#include "hw/xen/xen_backend_ops.h"
#include "hw/xen/xen_pvdev.h"
#include "net/net.h"
#include "qom/object.h"
@@ -15,7 +15,7 @@ DECLARE_INSTANCE_CHECKER(XenLegacyDevice, XENBACKEND,
TYPE_XENBACKEND)
/* variables */
-extern struct xs_handle *xenstore;
+extern struct qemu_xs_handle *xenstore;
extern const char *xen_protocol;
extern DeviceState *xen_sysdev;
extern BusState *xen_sysbus;
@@ -30,9 +30,6 @@ int xenstore_write_be_int64(struct XenLegacyDevice *xendev, const char *node,
char *xenstore_read_be_str(struct XenLegacyDevice *xendev, const char *node);
int xenstore_read_be_int(struct XenLegacyDevice *xendev, const char *node,
int *ival);
-void xenstore_update_fe(char *watch, struct XenLegacyDevice *xendev);
-void xenstore_update_be(char *watch, char *type, int dom,
- struct XenDevOps *ops);
char *xenstore_read_fe_str(struct XenLegacyDevice *xendev, const char *node);
int xenstore_read_fe_int(struct XenLegacyDevice *xendev, const char *node,
int *ival);
@@ -51,18 +48,7 @@ void xen_be_set_max_grant_refs(struct XenLegacyDevice *xendev,
void *xen_be_map_grant_refs(struct XenLegacyDevice *xendev, uint32_t *refs,
unsigned int nr_refs, int prot);
void xen_be_unmap_grant_refs(struct XenLegacyDevice *xendev, void *ptr,
- unsigned int nr_refs);
-
-typedef struct XenGrantCopySegment {
- union {
- void *virt;
- struct {
- uint32_t ref;
- off_t offset;
- } foreign;
- } source, dest;
- size_t len;
-} XenGrantCopySegment;
+ uint32_t *refs, unsigned int nr_refs);
int xen_be_copy_grant_refs(struct XenLegacyDevice *xendev,
bool to_domain, XenGrantCopySegment segs[],
@@ -75,9 +61,9 @@ static inline void *xen_be_map_grant_ref(struct XenLegacyDevice *xendev,
}
static inline void xen_be_unmap_grant_ref(struct XenLegacyDevice *xendev,
- void *ptr)
+ void *ptr, uint32_t ref)
{
- return xen_be_unmap_grant_refs(xendev, ptr, 1);
+ return xen_be_unmap_grant_refs(xendev, ptr, &ref, 1);
}
/* actual backend drivers */
diff --git a/include/hw/xen/xen.h b/include/hw/xen/xen.h
index 03983939f9..2bd8ec742d 100644
--- a/include/hw/xen/xen.h
+++ b/include/hw/xen/xen.h
@@ -8,15 +8,21 @@
#define QEMU_HW_XEN_H
/*
- * As a temporary measure while the headers are being untangled, define
- * __XEN_TOOLS__ here before any Xen headers are included. Otherwise, if
- * the Xen toolstack library headers are later included, they will find
- * some of the "internal" definitions missing and the build will fail. In
- * later commits, we'll end up with a rule that the native libraries have
- * to be included first, which will ensure that the libraries get the
- * version of Xen libraries that they expect.
+ * C files using Xen toolstack libraries will have included those headers
+ * already via xen_native.h, and having __XEM_TOOLS__ defined will have
+ * automatically set __XEN_INTERFACE_VERSION__ to the latest supported
+ * by the *system* Xen headers which were transitively included.
+ *
+ * C files which are part of the internal emulation, and which did not
+ * include xen_native.h, may need this defined so that the Xen headers
+ * imported to include/hw/xen/interface/ will expose the appropriate API
+ * version.
+ *
+ * This is why there's a rule that xen_native.h must be included first.
*/
-#define __XEN_TOOLS__ 1
+#ifndef __XEN_INTERFACE_VERSION__
+#define __XEN_INTERFACE_VERSION__ 0x00040e00
+#endif
#include "exec/cpu-common.h"
@@ -39,8 +45,6 @@ int xen_is_pirq_msi(uint32_t msi_data);
qemu_irq *xen_interrupt_controller_init(void);
-void xenstore_store_pv_console_info(int i, Chardev *chr);
-
void xen_register_framebuffer(struct MemoryRegion *mr);
#endif /* QEMU_HW_XEN_H */
diff --git a/include/hw/xen/xen_backend_ops.h b/include/hw/xen/xen_backend_ops.h
new file mode 100644
index 0000000000..90cca85f52
--- /dev/null
+++ b/include/hw/xen/xen_backend_ops.h
@@ -0,0 +1,408 @@
+/*
+ * QEMU Xen backend support
+ *
+ * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * Authors: David Woodhouse <dwmw2@infradead.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_XEN_BACKEND_OPS_H
+#define QEMU_XEN_BACKEND_OPS_H
+
+#include "hw/xen/xen.h"
+#include "hw/xen/interface/xen.h"
+#include "hw/xen/interface/io/xenbus.h"
+
+/*
+ * For the time being, these operations map fairly closely to the API of
+ * the actual Xen libraries, e.g. libxenevtchn. As we complete the migration
+ * from XenLegacyDevice back ends to the new XenDevice model, they may
+ * evolve to slightly higher-level APIs.
+ *
+ * The internal emulations do not emulate the Xen APIs entirely faithfully;
+ * only enough to be used by the Xen backend devices. For example, only one
+ * event channel can be bound to each handle, since that's sufficient for
+ * the device support (only the true Xen HVM backend uses more). And the
+ * behaviour of unmask() and pending() is different too because the device
+ * backends don't care.
+ */
+
+typedef struct xenevtchn_handle xenevtchn_handle;
+typedef int xenevtchn_port_or_error_t;
+typedef uint32_t evtchn_port_t;
+typedef uint16_t domid_t;
+typedef uint32_t grant_ref_t;
+
+#define XEN_PAGE_SHIFT 12
+#define XEN_PAGE_SIZE (1UL << XEN_PAGE_SHIFT)
+#define XEN_PAGE_MASK (~(XEN_PAGE_SIZE - 1))
+
+#ifndef xen_rmb
+#define xen_rmb() smp_rmb()
+#endif
+#ifndef xen_wmb
+#define xen_wmb() smp_wmb()
+#endif
+#ifndef xen_mb
+#define xen_mb() smp_mb()
+#endif
+
+struct evtchn_backend_ops {
+ xenevtchn_handle *(*open)(void);
+ int (*bind_interdomain)(xenevtchn_handle *xc, uint32_t domid,
+ evtchn_port_t guest_port);
+ int (*unbind)(xenevtchn_handle *xc, evtchn_port_t port);
+ int (*close)(struct xenevtchn_handle *xc);
+ int (*get_fd)(struct xenevtchn_handle *xc);
+ int (*notify)(struct xenevtchn_handle *xc, evtchn_port_t port);
+ int (*unmask)(struct xenevtchn_handle *xc, evtchn_port_t port);
+ int (*pending)(struct xenevtchn_handle *xc);
+};
+
+extern struct evtchn_backend_ops *xen_evtchn_ops;
+
+static inline xenevtchn_handle *qemu_xen_evtchn_open(void)
+{
+ if (!xen_evtchn_ops) {
+ return NULL;
+ }
+ return xen_evtchn_ops->open();
+}
+
+static inline int qemu_xen_evtchn_bind_interdomain(xenevtchn_handle *xc,
+ uint32_t domid,
+ evtchn_port_t guest_port)
+{
+ if (!xen_evtchn_ops) {
+ return -ENOSYS;
+ }
+ return xen_evtchn_ops->bind_interdomain(xc, domid, guest_port);
+}
+
+static inline int qemu_xen_evtchn_unbind(xenevtchn_handle *xc,
+ evtchn_port_t port)
+{
+ if (!xen_evtchn_ops) {
+ return -ENOSYS;
+ }
+ return xen_evtchn_ops->unbind(xc, port);
+}
+
+static inline int qemu_xen_evtchn_close(xenevtchn_handle *xc)
+{
+ if (!xen_evtchn_ops) {
+ return -ENOSYS;
+ }
+ return xen_evtchn_ops->close(xc);
+}
+
+static inline int qemu_xen_evtchn_fd(xenevtchn_handle *xc)
+{
+ if (!xen_evtchn_ops) {
+ return -ENOSYS;
+ }
+ return xen_evtchn_ops->get_fd(xc);
+}
+
+static inline int qemu_xen_evtchn_notify(xenevtchn_handle *xc,
+ evtchn_port_t port)
+{
+ if (!xen_evtchn_ops) {
+ return -ENOSYS;
+ }
+ return xen_evtchn_ops->notify(xc, port);
+}
+
+static inline int qemu_xen_evtchn_unmask(xenevtchn_handle *xc,
+ evtchn_port_t port)
+{
+ if (!xen_evtchn_ops) {
+ return -ENOSYS;
+ }
+ return xen_evtchn_ops->unmask(xc, port);
+}
+
+static inline int qemu_xen_evtchn_pending(xenevtchn_handle *xc)
+{
+ if (!xen_evtchn_ops) {
+ return -ENOSYS;
+ }
+ return xen_evtchn_ops->pending(xc);
+}
+
+typedef struct xengntdev_handle xengnttab_handle;
+
+typedef struct XenGrantCopySegment {
+ union {
+ void *virt;
+ struct {
+ uint32_t ref;
+ off_t offset;
+ } foreign;
+ } source, dest;
+ size_t len;
+} XenGrantCopySegment;
+
+#define XEN_GNTTAB_OP_FEATURE_MAP_MULTIPLE (1U << 0)
+
+struct gnttab_backend_ops {
+ uint32_t features;
+ xengnttab_handle *(*open)(void);
+ int (*close)(xengnttab_handle *xgt);
+ int (*grant_copy)(xengnttab_handle *xgt, bool to_domain, uint32_t domid,
+ XenGrantCopySegment *segs, uint32_t nr_segs,
+ Error **errp);
+ int (*set_max_grants)(xengnttab_handle *xgt, uint32_t nr_grants);
+ void *(*map_refs)(xengnttab_handle *xgt, uint32_t count, uint32_t domid,
+ uint32_t *refs, int prot);
+ int (*unmap)(xengnttab_handle *xgt, void *start_address, uint32_t *refs,
+ uint32_t count);
+};
+
+extern struct gnttab_backend_ops *xen_gnttab_ops;
+
+static inline bool qemu_xen_gnttab_can_map_multi(void)
+{
+ return xen_gnttab_ops &&
+ !!(xen_gnttab_ops->features & XEN_GNTTAB_OP_FEATURE_MAP_MULTIPLE);
+}
+
+static inline xengnttab_handle *qemu_xen_gnttab_open(void)
+{
+ if (!xen_gnttab_ops) {
+ return NULL;
+ }
+ return xen_gnttab_ops->open();
+}
+
+static inline int qemu_xen_gnttab_close(xengnttab_handle *xgt)
+{
+ if (!xen_gnttab_ops) {
+ return -ENOSYS;
+ }
+ return xen_gnttab_ops->close(xgt);
+}
+
+static inline int qemu_xen_gnttab_grant_copy(xengnttab_handle *xgt,
+ bool to_domain, uint32_t domid,
+ XenGrantCopySegment *segs,
+ uint32_t nr_segs, Error **errp)
+{
+ if (!xen_gnttab_ops) {
+ return -ENOSYS;
+ }
+
+ return xen_gnttab_ops->grant_copy(xgt, to_domain, domid, segs, nr_segs,
+ errp);
+}
+
+static inline int qemu_xen_gnttab_set_max_grants(xengnttab_handle *xgt,
+ uint32_t nr_grants)
+{
+ if (!xen_gnttab_ops) {
+ return -ENOSYS;
+ }
+ return xen_gnttab_ops->set_max_grants(xgt, nr_grants);
+}
+
+static inline void *qemu_xen_gnttab_map_refs(xengnttab_handle *xgt,
+ uint32_t count, uint32_t domid,
+ uint32_t *refs, int prot)
+{
+ if (!xen_gnttab_ops) {
+ return NULL;
+ }
+ return xen_gnttab_ops->map_refs(xgt, count, domid, refs, prot);
+}
+
+static inline int qemu_xen_gnttab_unmap(xengnttab_handle *xgt,
+ void *start_address, uint32_t *refs,
+ uint32_t count)
+{
+ if (!xen_gnttab_ops) {
+ return -ENOSYS;
+ }
+ return xen_gnttab_ops->unmap(xgt, start_address, refs, count);
+}
+
+struct foreignmem_backend_ops {
+ void *(*map)(uint32_t dom, void *addr, int prot, size_t pages,
+ xen_pfn_t *pfns, int *errs);
+ int (*unmap)(void *addr, size_t pages);
+};
+
+extern struct foreignmem_backend_ops *xen_foreignmem_ops;
+
+static inline void *qemu_xen_foreignmem_map(uint32_t dom, void *addr, int prot,
+ size_t pages, xen_pfn_t *pfns,
+ int *errs)
+{
+ if (!xen_foreignmem_ops) {
+ return NULL;
+ }
+ return xen_foreignmem_ops->map(dom, addr, prot, pages, pfns, errs);
+}
+
+static inline int qemu_xen_foreignmem_unmap(void *addr, size_t pages)
+{
+ if (!xen_foreignmem_ops) {
+ return -ENOSYS;
+ }
+ return xen_foreignmem_ops->unmap(addr, pages);
+}
+
+typedef void (*xs_watch_fn)(void *opaque, const char *path);
+
+struct qemu_xs_handle;
+struct qemu_xs_watch;
+typedef uint32_t xs_transaction_t;
+
+#define XBT_NULL 0
+
+#define XS_PERM_NONE 0x00
+#define XS_PERM_READ 0x01
+#define XS_PERM_WRITE 0x02
+
+struct xenstore_backend_ops {
+ struct qemu_xs_handle *(*open)(void);
+ void (*close)(struct qemu_xs_handle *h);
+ char *(*get_domain_path)(struct qemu_xs_handle *h, unsigned int domid);
+ char **(*directory)(struct qemu_xs_handle *h, xs_transaction_t t,
+ const char *path, unsigned int *num);
+ void *(*read)(struct qemu_xs_handle *h, xs_transaction_t t,
+ const char *path, unsigned int *len);
+ bool (*write)(struct qemu_xs_handle *h, xs_transaction_t t,
+ const char *path, const void *data, unsigned int len);
+ bool (*create)(struct qemu_xs_handle *h, xs_transaction_t t,
+ unsigned int owner, unsigned int domid,
+ unsigned int perms, const char *path);
+ bool (*destroy)(struct qemu_xs_handle *h, xs_transaction_t t,
+ const char *path);
+ struct qemu_xs_watch *(*watch)(struct qemu_xs_handle *h, const char *path,
+ xs_watch_fn fn, void *opaque);
+ void (*unwatch)(struct qemu_xs_handle *h, struct qemu_xs_watch *w);
+ xs_transaction_t (*transaction_start)(struct qemu_xs_handle *h);
+ bool (*transaction_end)(struct qemu_xs_handle *h, xs_transaction_t t,
+ bool abort);
+};
+
+extern struct xenstore_backend_ops *xen_xenstore_ops;
+
+static inline struct qemu_xs_handle *qemu_xen_xs_open(void)
+{
+ if (!xen_xenstore_ops) {
+ return NULL;
+ }
+ return xen_xenstore_ops->open();
+}
+
+static inline void qemu_xen_xs_close(struct qemu_xs_handle *h)
+{
+ if (!xen_xenstore_ops) {
+ return;
+ }
+ xen_xenstore_ops->close(h);
+}
+
+static inline char *qemu_xen_xs_get_domain_path(struct qemu_xs_handle *h,
+ unsigned int domid)
+{
+ if (!xen_xenstore_ops) {
+ return NULL;
+ }
+ return xen_xenstore_ops->get_domain_path(h, domid);
+}
+
+static inline char **qemu_xen_xs_directory(struct qemu_xs_handle *h,
+ xs_transaction_t t, const char *path,
+ unsigned int *num)
+{
+ if (!xen_xenstore_ops) {
+ return NULL;
+ }
+ return xen_xenstore_ops->directory(h, t, path, num);
+}
+
+static inline void *qemu_xen_xs_read(struct qemu_xs_handle *h,
+ xs_transaction_t t, const char *path,
+ unsigned int *len)
+{
+ if (!xen_xenstore_ops) {
+ return NULL;
+ }
+ return xen_xenstore_ops->read(h, t, path, len);
+}
+
+static inline bool qemu_xen_xs_write(struct qemu_xs_handle *h,
+ xs_transaction_t t, const char *path,
+ const void *data, unsigned int len)
+{
+ if (!xen_xenstore_ops) {
+ return false;
+ }
+ return xen_xenstore_ops->write(h, t, path, data, len);
+}
+
+static inline bool qemu_xen_xs_create(struct qemu_xs_handle *h,
+ xs_transaction_t t, unsigned int owner,
+ unsigned int domid, unsigned int perms,
+ const char *path)
+{
+ if (!xen_xenstore_ops) {
+ return false;
+ }
+ return xen_xenstore_ops->create(h, t, owner, domid, perms, path);
+}
+
+static inline bool qemu_xen_xs_destroy(struct qemu_xs_handle *h,
+ xs_transaction_t t, const char *path)
+{
+ if (!xen_xenstore_ops) {
+ return false;
+ }
+ return xen_xenstore_ops->destroy(h, t, path);
+}
+
+static inline struct qemu_xs_watch *qemu_xen_xs_watch(struct qemu_xs_handle *h,
+ const char *path,
+ xs_watch_fn fn,
+ void *opaque)
+{
+ if (!xen_xenstore_ops) {
+ return NULL;
+ }
+ return xen_xenstore_ops->watch(h, path, fn, opaque);
+}
+
+static inline void qemu_xen_xs_unwatch(struct qemu_xs_handle *h,
+ struct qemu_xs_watch *w)
+{
+ if (!xen_xenstore_ops) {
+ return;
+ }
+ xen_xenstore_ops->unwatch(h, w);
+}
+
+static inline xs_transaction_t qemu_xen_xs_transaction_start(struct qemu_xs_handle *h)
+{
+ if (!xen_xenstore_ops) {
+ return XBT_NULL;
+ }
+ return xen_xenstore_ops->transaction_start(h);
+}
+
+static inline bool qemu_xen_xs_transaction_end(struct qemu_xs_handle *h,
+ xs_transaction_t t, bool abort)
+{
+ if (!xen_xenstore_ops) {
+ return false;
+ }
+ return xen_xenstore_ops->transaction_end(h, t, abort);
+}
+
+void setup_xen_backend_ops(void);
+
+#endif /* QEMU_XEN_BACKEND_OPS_H */
diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_native.h
index 9a13a756ae..6bcc83baf9 100644
--- a/include/hw/xen/xen_common.h
+++ b/include/hw/xen/xen_native.h
@@ -1,5 +1,9 @@
-#ifndef QEMU_HW_XEN_COMMON_H
-#define QEMU_HW_XEN_COMMON_H
+#ifndef QEMU_HW_XEN_NATIVE_H
+#define QEMU_HW_XEN_NATIVE_H
+
+#ifdef __XEN_INTERFACE_VERSION__
+#error In Xen native files, include xen_native.h before other Xen headers
+#endif
/*
* If we have new enough libxenctrl then we do not want/need these compat
@@ -12,7 +16,6 @@
#include <xenctrl.h>
#include <xenstore.h>
-#include "hw/xen/interface/io/xenbus.h"
#include "hw/xen/xen.h"
#include "hw/pci/pci_device.h"
@@ -28,49 +31,12 @@ extern xc_interface *xen_xc;
#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40701
typedef xc_interface xenforeignmemory_handle;
-typedef xc_evtchn xenevtchn_handle;
-typedef xc_gnttab xengnttab_handle;
-typedef evtchn_port_or_error_t xenevtchn_port_or_error_t;
-
-#define xenevtchn_open(l, f) xc_evtchn_open(l, f);
-#define xenevtchn_close(h) xc_evtchn_close(h)
-#define xenevtchn_fd(h) xc_evtchn_fd(h)
-#define xenevtchn_pending(h) xc_evtchn_pending(h)
-#define xenevtchn_notify(h, p) xc_evtchn_notify(h, p)
-#define xenevtchn_bind_interdomain(h, d, p) xc_evtchn_bind_interdomain(h, d, p)
-#define xenevtchn_unmask(h, p) xc_evtchn_unmask(h, p)
-#define xenevtchn_unbind(h, p) xc_evtchn_unbind(h, p)
-
-#define xengnttab_open(l, f) xc_gnttab_open(l, f)
-#define xengnttab_close(h) xc_gnttab_close(h)
-#define xengnttab_set_max_grants(h, n) xc_gnttab_set_max_grants(h, n)
-#define xengnttab_map_grant_ref(h, d, r, p) xc_gnttab_map_grant_ref(h, d, r, p)
-#define xengnttab_unmap(h, a, n) xc_gnttab_munmap(h, a, n)
-#define xengnttab_map_grant_refs(h, c, d, r, p) \
- xc_gnttab_map_grant_refs(h, c, d, r, p)
-#define xengnttab_map_domain_grant_refs(h, c, d, r, p) \
- xc_gnttab_map_domain_grant_refs(h, c, d, r, p)
#define xenforeignmemory_open(l, f) xen_xc
#define xenforeignmemory_close(h)
-static inline void *xenforeignmemory_map(xc_interface *h, uint32_t dom,
- int prot, size_t pages,
- const xen_pfn_t arr[/*pages*/],
- int err[/*pages*/])
-{
- if (err)
- return xc_map_foreign_bulk(h, dom, prot, arr, err, pages);
- else
- return xc_map_foreign_pages(h, dom, prot, arr, pages);
-}
-
-#define xenforeignmemory_unmap(h, p, s) munmap(p, s * XC_PAGE_SIZE)
-
#else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40701 */
-#include <xenevtchn.h>
-#include <xengnttab.h>
#include <xenforeignmemory.h>
#endif
@@ -660,31 +626,4 @@ static inline int xen_set_ioreq_server_state(domid_t dom,
#endif
-/* Xen before 4.8 */
-
-#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40800
-
-struct xengnttab_grant_copy_segment {
- union xengnttab_copy_ptr {
- void *virt;
- struct {
- uint32_t ref;
- uint16_t offset;
- uint16_t domid;
- } foreign;
- } source, dest;
- uint16_t len;
- uint16_t flags;
- int16_t status;
-};
-
-typedef struct xengnttab_grant_copy_segment xengnttab_grant_copy_segment_t;
-
-static inline int xengnttab_grant_copy(xengnttab_handle *xgt, uint32_t count,
- xengnttab_grant_copy_segment_t *segs)
-{
- return -ENOSYS;
-}
-#endif
-
-#endif /* QEMU_HW_XEN_COMMON_H */
+#endif /* QEMU_HW_XEN_NATIVE_H */
diff --git a/include/hw/xen/xen_pvdev.h b/include/hw/xen/xen_pvdev.h
index 7cd4bc2b82..ddad4b9f36 100644
--- a/include/hw/xen/xen_pvdev.h
+++ b/include/hw/xen/xen_pvdev.h
@@ -1,7 +1,9 @@
#ifndef QEMU_HW_XEN_PVDEV_H
#define QEMU_HW_XEN_PVDEV_H
-#include "hw/xen/xen_common.h"
+#include "hw/qdev-core.h"
+#include "hw/xen/xen_backend_ops.h"
+
/* ------------------------------------------------------------- */
#define XEN_BUFSIZE 1024
@@ -38,6 +40,7 @@ struct XenLegacyDevice {
char name[64];
int debug;
+ struct qemu_xs_watch *watch;
enum xenbus_state be_state;
enum xenbus_state fe_state;
int online;
@@ -63,7 +66,6 @@ int xenstore_write_int64(const char *base, const char *node, int64_t ival);
char *xenstore_read_str(const char *base, const char *node);
int xenstore_read_int(const char *base, const char *node, int *ival);
int xenstore_read_uint64(const char *base, const char *node, uint64_t *uval);
-void xenstore_update(void *unused);
const char *xenbus_strstate(enum xenbus_state state);
diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
index 874134fd19..f85834ee8b 100644
--- a/include/qemu/atomic.h
+++ b/include/qemu/atomic.h
@@ -245,6 +245,20 @@
#define smp_wmb() smp_mb_release()
#define smp_rmb() smp_mb_acquire()
+/*
+ * SEQ_CST is weaker than the older __sync_* builtins and Linux
+ * kernel read-modify-write atomics. Provide a macro to obtain
+ * the same semantics.
+ */
+#if !defined(QEMU_SANITIZE_THREAD) && \
+ (defined(__i386__) || defined(__x86_64__) || defined(__s390x__))
+# define smp_mb__before_rmw() signal_barrier()
+# define smp_mb__after_rmw() signal_barrier()
+#else
+# define smp_mb__before_rmw() smp_mb()
+# define smp_mb__after_rmw() smp_mb()
+#endif
+
/* qatomic_mb_read/set semantics map Java volatile variables. They are
* less expensive on some platforms (notably POWER) than fully
* sequentially consistent operations.
@@ -259,7 +273,8 @@
#if !defined(QEMU_SANITIZE_THREAD) && \
(defined(__i386__) || defined(__x86_64__) || defined(__s390x__))
/* This is more efficient than a store plus a fence. */
-# define qatomic_mb_set(ptr, i) ((void)qatomic_xchg(ptr, i))
+# define qatomic_mb_set(ptr, i) \
+ ({ (void)qatomic_xchg(ptr, i); smp_mb__after_rmw(); })
#else
# define qatomic_mb_set(ptr, i) \
({ qatomic_store_release(ptr, i); smp_mb(); })
diff --git a/pc-bios/opensbi-riscv32-generic-fw_dynamic.bin b/pc-bios/opensbi-riscv32-generic-fw_dynamic.bin
index 81bab1adc9..6a8425885c 100644
--- a/pc-bios/opensbi-riscv32-generic-fw_dynamic.bin
+++ b/pc-bios/opensbi-riscv32-generic-fw_dynamic.bin
Binary files differ
diff --git a/pc-bios/opensbi-riscv64-generic-fw_dynamic.bin b/pc-bios/opensbi-riscv64-generic-fw_dynamic.bin
index 5eb0a74326..80bdbf2170 100644
--- a/pc-bios/opensbi-riscv64-generic-fw_dynamic.bin
+++ b/pc-bios/opensbi-riscv64-generic-fw_dynamic.bin
Binary files differ
diff --git a/pc-bios/s390-ccw.img b/pc-bios/s390-ccw.img
index 554fcbd1b7..c9a5a21c50 100644
--- a/pc-bios/s390-ccw.img
+++ b/pc-bios/s390-ccw.img
Binary files differ
diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c
index 994e59c0b0..a2137449dc 100644
--- a/pc-bios/s390-ccw/bootmap.c
+++ b/pc-bios/s390-ccw/bootmap.c
@@ -72,42 +72,74 @@ static inline void verify_boot_info(BootInfo *bip)
"Bad block size in zIPL section of the 1st record.");
}
-static block_number_t eckd_block_num(EckdCHS *chs)
+static void eckd_format_chs(ExtEckdBlockPtr *ptr, bool ldipl,
+ uint64_t *c,
+ uint64_t *h,
+ uint64_t *s)
+{
+ if (ldipl) {
+ *c = ptr->ldptr.chs.cylinder;
+ *h = ptr->ldptr.chs.head;
+ *s = ptr->ldptr.chs.sector;
+ } else {
+ *c = ptr->bptr.chs.cylinder;
+ *h = ptr->bptr.chs.head;
+ *s = ptr->bptr.chs.sector;
+ }
+}
+
+static block_number_t eckd_chs_to_block(uint64_t c, uint64_t h, uint64_t s)
{
const uint64_t sectors = virtio_get_sectors();
const uint64_t heads = virtio_get_heads();
- const uint64_t cylinder = chs->cylinder
- + ((chs->head & 0xfff0) << 12);
- const uint64_t head = chs->head & 0x000f;
+ const uint64_t cylinder = c + ((h & 0xfff0) << 12);
+ const uint64_t head = h & 0x000f;
const block_number_t block = sectors * heads * cylinder
+ sectors * head
- + chs->sector
- - 1; /* block nr starts with zero */
+ + s - 1; /* block nr starts with zero */
return block;
}
-static bool eckd_valid_address(BootMapPointer *p)
+static block_number_t eckd_block_num(EckdCHS *chs)
{
- const uint64_t head = p->eckd.chs.head & 0x000f;
+ return eckd_chs_to_block(chs->cylinder, chs->head, chs->sector);
+}
+
+static block_number_t gen_eckd_block_num(ExtEckdBlockPtr *ptr, bool ldipl)
+{
+ uint64_t cyl, head, sec;
+ eckd_format_chs(ptr, ldipl, &cyl, &head, &sec);
+ return eckd_chs_to_block(cyl, head, sec);
+}
+static bool eckd_valid_chs(uint64_t cyl, uint64_t head, uint64_t sector)
+{
if (head >= virtio_get_heads()
- || p->eckd.chs.sector > virtio_get_sectors()
- || p->eckd.chs.sector <= 0) {
+ || sector > virtio_get_sectors()
+ || sector <= 0) {
return false;
}
if (!virtio_guessed_disk_nature() &&
- eckd_block_num(&p->eckd.chs) >= virtio_get_blocks()) {
+ eckd_chs_to_block(cyl, head, sector) >= virtio_get_blocks()) {
return false;
}
return true;
}
-static block_number_t load_eckd_segments(block_number_t blk, uint64_t *address)
+static bool eckd_valid_address(ExtEckdBlockPtr *ptr, bool ldipl)
+{
+ uint64_t cyl, head, sec;
+ eckd_format_chs(ptr, ldipl, &cyl, &head, &sec);
+ return eckd_valid_chs(cyl, head, sec);
+}
+
+static block_number_t load_eckd_segments(block_number_t blk, bool ldipl,
+ uint64_t *address)
{
block_number_t block_nr;
- int j, rc;
+ int j, rc, count;
BootMapPointer *bprs = (void *)_bprs;
bool more_data;
@@ -117,7 +149,7 @@ static block_number_t load_eckd_segments(block_number_t blk, uint64_t *address)
do {
more_data = false;
for (j = 0;; j++) {
- block_nr = eckd_block_num(&bprs[j].xeckd.bptr.chs);
+ block_nr = gen_eckd_block_num(&bprs[j].xeckd, ldipl);
if (is_null_block_number(block_nr)) { /* end of chunk */
break;
}
@@ -129,11 +161,26 @@ static block_number_t load_eckd_segments(block_number_t blk, uint64_t *address)
break;
}
- IPL_assert(block_size_ok(bprs[j].xeckd.bptr.size),
+ /* List directed pointer does not store block size */
+ IPL_assert(ldipl || block_size_ok(bprs[j].xeckd.bptr.size),
"bad chunk block size");
- IPL_assert(eckd_valid_address(&bprs[j]), "bad chunk ECKD addr");
- if ((bprs[j].xeckd.bptr.count == 0) && unused_space(&(bprs[j+1]),
+ if (!eckd_valid_address(&bprs[j].xeckd, ldipl)) {
+ /*
+ * If an invalid address is found during LD-IPL then break and
+ * retry as CCW
+ */
+ IPL_assert(ldipl, "bad chunk ECKD addr");
+ break;
+ }
+
+ if (ldipl) {
+ count = bprs[j].xeckd.ldptr.count;
+ } else {
+ count = bprs[j].xeckd.bptr.count;
+ }
+
+ if (count == 0 && unused_space(&bprs[j + 1],
sizeof(EckdBlockPtr))) {
/* This is a "continue" pointer.
* This ptr should be the last one in the current
@@ -149,11 +196,10 @@ static block_number_t load_eckd_segments(block_number_t blk, uint64_t *address)
/* Load (count+1) blocks of code at (block_nr)
* to memory (address).
*/
- rc = virtio_read_many(block_nr, (void *)(*address),
- bprs[j].xeckd.bptr.count+1);
+ rc = virtio_read_many(block_nr, (void *)(*address), count + 1);
IPL_assert(rc == 0, "code chunk read failed");
- *address += (bprs[j].xeckd.bptr.count+1) * virtio_get_block_size();
+ *address += (count + 1) * virtio_get_block_size();
}
} while (more_data);
return block_nr;
@@ -237,8 +283,10 @@ static void run_eckd_boot_script(block_number_t bmt_block_nr,
uint64_t address;
BootMapTable *bmt = (void *)sec;
BootMapScript *bms = (void *)sec;
+ /* The S1B block number is NULL_BLOCK_NR if and only if it's an LD-IPL */
+ bool ldipl = (s1b_block_nr == NULL_BLOCK_NR);
- if (menu_is_enabled_zipl()) {
+ if (menu_is_enabled_zipl() && !ldipl) {
loadparm = eckd_get_boot_menu_index(s1b_block_nr);
}
@@ -249,7 +297,7 @@ static void run_eckd_boot_script(block_number_t bmt_block_nr,
memset(sec, FREE_SPACE_FILLER, sizeof(sec));
read_block(bmt_block_nr, sec, "Cannot read Boot Map Table");
- block_nr = eckd_block_num(&bmt->entry[loadparm].xeckd.bptr.chs);
+ block_nr = gen_eckd_block_num(&bmt->entry[loadparm].xeckd, ldipl);
IPL_assert(block_nr != -1, "Cannot find Boot Map Table Entry");
memset(sec, FREE_SPACE_FILLER, sizeof(sec));
@@ -264,13 +312,18 @@ static void run_eckd_boot_script(block_number_t bmt_block_nr,
}
address = bms->entry[i].address.load_address;
- block_nr = eckd_block_num(&bms->entry[i].blkptr.xeckd.bptr.chs);
+ block_nr = gen_eckd_block_num(&bms->entry[i].blkptr.xeckd, ldipl);
do {
- block_nr = load_eckd_segments(block_nr, &address);
+ block_nr = load_eckd_segments(block_nr, ldipl, &address);
} while (block_nr != -1);
}
+ if (ldipl && bms->entry[i].type != BOOT_SCRIPT_EXEC) {
+ /* Abort LD-IPL and retry as CCW-IPL */
+ return;
+ }
+
IPL_assert(bms->entry[i].type == BOOT_SCRIPT_EXEC,
"Unknown script entry type");
write_reset_psw(bms->entry[i].address.load_address); /* no return */
@@ -380,6 +433,23 @@ static void ipl_eckd_ldl(ECKD_IPL_mode_t mode)
/* no return */
}
+static block_number_t eckd_find_bmt(ExtEckdBlockPtr *ptr)
+{
+ block_number_t blockno;
+ uint8_t tmp_sec[MAX_SECTOR_SIZE];
+ BootRecord *br;
+
+ blockno = gen_eckd_block_num(ptr, 0);
+ read_block(blockno, tmp_sec, "Cannot read boot record");
+ br = (BootRecord *)tmp_sec;
+ if (!magic_match(br->magic, ZIPL_MAGIC)) {
+ /* If the boot record is invalid, return and try CCW-IPL instead */
+ return NULL_BLOCK_NR;
+ }
+
+ return gen_eckd_block_num(&br->pgt.xeckd, 1);
+}
+
static void print_eckd_msg(void)
{
char msg[] = "Using ECKD scheme (block size *****), ";
@@ -401,28 +471,43 @@ static void print_eckd_msg(void)
static void ipl_eckd(void)
{
- XEckdMbr *mbr = (void *)sec;
- LDL_VTOC *vlbl = (void *)sec;
+ IplVolumeLabel *vlbl = (void *)sec;
+ LDL_VTOC *vtoc = (void *)sec;
+ block_number_t ldipl_bmt; /* Boot Map Table for List-Directed IPL */
print_eckd_msg();
- /* Grab the MBR again */
+ /* Block 2 can contain either the CDL VOL1 label or the LDL VTOC */
memset(sec, FREE_SPACE_FILLER, sizeof(sec));
- read_block(0, mbr, "Cannot read block 0 on DASD");
+ read_block(2, vlbl, "Cannot read block 2");
- if (magic_match(mbr->magic, IPL1_MAGIC)) {
- ipl_eckd_cdl(); /* only returns in case of error */
- return;
+ /*
+ * First check for a list-directed-format pointer which would
+ * supersede the CCW pointer.
+ */
+ if (eckd_valid_address((ExtEckdBlockPtr *)&vlbl->f.br, 0)) {
+ ldipl_bmt = eckd_find_bmt((ExtEckdBlockPtr *)&vlbl->f.br);
+ if (ldipl_bmt) {
+ sclp_print("List-Directed\n");
+ /* LD-IPL does not use the S1B bock, just make it NULL */
+ run_eckd_boot_script(ldipl_bmt, NULL_BLOCK_NR);
+ /* Only return in error, retry as CCW-IPL */
+ sclp_print("Retrying IPL ");
+ print_eckd_msg();
+ }
+ memset(sec, FREE_SPACE_FILLER, sizeof(sec));
+ read_block(2, vtoc, "Cannot read block 2");
}
- /* LDL/CMS? */
- memset(sec, FREE_SPACE_FILLER, sizeof(sec));
- read_block(2, vlbl, "Cannot read block 2");
+ /* Not list-directed */
+ if (magic_match(vtoc->magic, VOL1_MAGIC)) {
+ ipl_eckd_cdl(); /* may return in error */
+ }
- if (magic_match(vlbl->magic, CMS1_MAGIC)) {
+ if (magic_match(vtoc->magic, CMS1_MAGIC)) {
ipl_eckd_ldl(ECKD_CMS); /* no return */
}
- if (magic_match(vlbl->magic, LNX1_MAGIC)) {
+ if (magic_match(vtoc->magic, LNX1_MAGIC)) {
ipl_eckd_ldl(ECKD_LDL); /* no return */
}
diff --git a/pc-bios/s390-ccw/bootmap.h b/pc-bios/s390-ccw/bootmap.h
index 3946aa3f8d..d4690a88c2 100644
--- a/pc-bios/s390-ccw/bootmap.h
+++ b/pc-bios/s390-ccw/bootmap.h
@@ -45,9 +45,23 @@ typedef struct EckdBlockPtr {
* it's 0 for TablePtr, ScriptPtr, and SectionPtr */
} __attribute__ ((packed)) EckdBlockPtr;
-typedef struct ExtEckdBlockPtr {
+typedef struct LdEckdCHS {
+ uint32_t cylinder;
+ uint8_t head;
+ uint8_t sector;
+} __attribute__ ((packed)) LdEckdCHS;
+
+typedef struct LdEckdBlockPtr {
+ LdEckdCHS chs; /* cylinder/head/sector is an address of the block */
+ uint8_t reserved[4];
+ uint16_t count;
+ uint32_t pad;
+} __attribute__ ((packed)) LdEckdBlockPtr;
+
+/* bptr is used for CCW type IPL, while ldptr is for list-directed IPL */
+typedef union ExtEckdBlockPtr {
EckdBlockPtr bptr;
- uint8_t reserved[8];
+ LdEckdBlockPtr ldptr;
} __attribute__ ((packed)) ExtEckdBlockPtr;
typedef union BootMapPointer {
@@ -57,6 +71,15 @@ typedef union BootMapPointer {
ExtEckdBlockPtr xeckd;
} __attribute__ ((packed)) BootMapPointer;
+typedef struct BootRecord {
+ uint8_t magic[4];
+ uint32_t version;
+ uint64_t res1;
+ BootMapPointer pgt;
+ uint8_t reserved[510 - 32];
+ uint16_t os_id;
+} __attribute__ ((packed)) BootRecord;
+
/* aka Program Table */
typedef struct BootMapTable {
uint8_t magic[4];
@@ -292,7 +315,8 @@ typedef struct IplVolumeLabel {
struct {
unsigned char key[4]; /* == "VOL1" */
unsigned char volser[6];
- unsigned char reserved[6];
+ unsigned char reserved[64];
+ EckdCHS br; /* Location of Boot Record for list-directed IPL */
} f;
};
} __attribute__((packed)) IplVolumeLabel;
diff --git a/roms/opensbi b/roms/opensbi
-Subproject 4489876e933d8ba0d8bc6c64bae71e295d45faa
+Subproject 6b5188ca14e59ce7bf71afe4e7d3d557c3d31bf
diff --git a/softmmu/globals.c b/softmmu/globals.c
index 0a4405614e..39678aa8c5 100644
--- a/softmmu/globals.c
+++ b/softmmu/globals.c
@@ -65,3 +65,7 @@ bool qemu_uuid_set;
uint32_t xen_domid;
enum xen_mode xen_mode = XEN_DISABLED;
bool xen_domid_restrict;
+struct evtchn_backend_ops *xen_evtchn_ops;
+struct gnttab_backend_ops *xen_gnttab_ops;
+struct foreignmem_backend_ops *xen_foreignmem_ops;
+struct xenstore_backend_ops *xen_xenstore_ops;
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index 47143edb4f..fb412a56e1 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -1126,15 +1126,21 @@ GString *ram_block_format(void)
GString *buf = g_string_new("");
RCU_READ_LOCK_GUARD();
- g_string_append_printf(buf, "%24s %8s %18s %18s %18s\n",
- "Block Name", "PSize", "Offset", "Used", "Total");
+ g_string_append_printf(buf, "%24s %8s %18s %18s %18s %18s %3s\n",
+ "Block Name", "PSize", "Offset", "Used", "Total",
+ "HVA", "RO");
+
RAMBLOCK_FOREACH(block) {
psize = size_to_str(block->page_size);
g_string_append_printf(buf, "%24s %8s 0x%016" PRIx64 " 0x%016" PRIx64
- " 0x%016" PRIx64 "\n", block->idstr, psize,
+ " 0x%016" PRIx64 " 0x%016" PRIx64 " %3s\n",
+ block->idstr, psize,
(uint64_t)block->offset,
(uint64_t)block->used_length,
- (uint64_t)block->max_length);
+ (uint64_t)block->max_length,
+ (uint64_t)(uintptr_t)block->host,
+ block->mr->readonly ? "ro" : "rw");
+
g_free(psize);
}
@@ -2927,6 +2933,8 @@ void cpu_register_map_client(QEMUBH *bh)
qemu_mutex_lock(&map_client_list_lock);
client->bh = bh;
QLIST_INSERT_HEAD(&map_client_list, client, link);
+ /* Write map_client_list before reading in_use. */
+ smp_mb();
if (!qatomic_read(&bounce.in_use)) {
cpu_notify_map_clients_locked();
}
@@ -3116,6 +3124,7 @@ void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
qemu_vfree(bounce.buffer);
bounce.buffer = NULL;
memory_region_unref(bounce.mr);
+ /* Clear in_use before reading map_client_list. */
qatomic_mb_set(&bounce.in_use, false);
cpu_notify_map_clients();
}
diff --git a/target/hexagon/meson.build b/target/hexagon/meson.build
index c9d31d095c..42b03c81e6 100644
--- a/target/hexagon/meson.build
+++ b/target/hexagon/meson.build
@@ -183,7 +183,7 @@ if idef_parser_enabled and 'hexagon-linux-user' in target_dirs
)
bison = generator(
- find_program('bison'),
+ find_program('bison', version: '>=3.0'),
output: ['@BASENAME@.tab.c', '@BASENAME@.tab.h'],
arguments: ['@INPUT@', '--defines=@OUTPUT1@', '--output=@OUTPUT0@']
)
diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c
index bad3131d08..0bb6c601c9 100644
--- a/target/i386/kvm/xen-emu.c
+++ b/target/i386/kvm/xen-emu.c
@@ -1406,6 +1406,11 @@ int kvm_xen_soft_reset(void)
return err;
}
+ err = xen_gnttab_reset();
+ if (err) {
+ return err;
+ }
+
err = xen_xenstore_reset();
if (err) {
return err;
diff --git a/target/mips/cpu-defs.c.inc b/target/mips/cpu-defs.c.inc
index 480e60aeec..d45f245a67 100644
--- a/target/mips/cpu-defs.c.inc
+++ b/target/mips/cpu-defs.c.inc
@@ -332,7 +332,11 @@ const mips_def_t mips_defs[] =
(0x1 << CP0C0_AR) | (MMU_TYPE_FMT << CP0C0_MT),
.CP0_Config1 = MIPS_CONFIG1,
.CP0_Config2 = MIPS_CONFIG2,
- .CP0_Config3 = MIPS_CONFIG3 | (0x2 << CP0C3_ISA) | (1 << CP0C3_VInt),
+ .CP0_Config3 = MIPS_CONFIG3 | (0x2 << CP0C3_ISA) | (1 << CP0C3_VInt) |
+ (1 << CP0C3_M),
+ .CP0_Config4 = MIPS_CONFIG4 | (1 << CP0C4_M),
+ .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_NFExists),
+ .CP0_Config7 = 1 << CP0C7_WII,
.CP0_LLAddr_rw_bitmask = 0,
.CP0_LLAddr_shift = 4,
.SYNCI_Step = 32,
@@ -353,7 +357,11 @@ const mips_def_t mips_defs[] =
(0 << CP0C1_IS) | (3 << CP0C1_IL) | (1 << CP0C1_IA) |
(0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA),
.CP0_Config2 = MIPS_CONFIG2,
- .CP0_Config3 = MIPS_CONFIG3 | (0x2 << CP0C3_ISA) | (0 << CP0C3_VInt),
+ .CP0_Config3 = MIPS_CONFIG3 | (0x2 << CP0C3_ISA) | (0 << CP0C3_VInt) |
+ (1 << CP0C3_M),
+ .CP0_Config4 = MIPS_CONFIG4 | (1 << CP0C4_M),
+ .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_NFExists),
+ .CP0_Config7 = 1 << CP0C7_WII,
.CP0_LLAddr_rw_bitmask = 0,
.CP0_LLAddr_shift = 4,
.SYNCI_Step = 32,
@@ -392,6 +400,7 @@ const mips_def_t mips_defs[] =
.CP0_Config5_rw_bitmask = (1 << CP0C5_K) | (1 << CP0C5_CV) |
(1 << CP0C5_MSAEn) | (1 << CP0C5_UFE) |
(1 << CP0C5_FRE) | (1 << CP0C5_UFR),
+ .CP0_Config7 = 1 << CP0C7_WII,
.CP0_LLAddr_rw_bitmask = 0,
.CP0_LLAddr_shift = 0,
.SYNCI_Step = 32,
diff --git a/target/mips/cpu.c b/target/mips/cpu.c
index 05caf54999..543da911e3 100644
--- a/target/mips/cpu.c
+++ b/target/mips/cpu.c
@@ -143,11 +143,13 @@ static bool mips_cpu_has_work(CPUState *cs)
/*
* Prior to MIPS Release 6 it is implementation dependent if non-enabled
* interrupts wake-up the CPU, however most of the implementations only
- * check for interrupts that can be taken.
+ * check for interrupts that can be taken. For pre-release 6 CPUs,
+ * check for CP0 Config7 'Wait IE ignore' bit.
*/
if ((cs->interrupt_request & CPU_INTERRUPT_HARD) &&
cpu_mips_hw_interrupts_pending(env)) {
if (cpu_mips_hw_interrupts_enabled(env) ||
+ (env->CP0_Config7 & (1 << CP0C7_WII)) ||
(env->insn_flags & ISA_MIPS_R6)) {
has_work = true;
}
diff --git a/target/mips/cpu.h b/target/mips/cpu.h
index caf2b06911..142c55af47 100644
--- a/target/mips/cpu.h
+++ b/target/mips/cpu.h
@@ -980,6 +980,7 @@ typedef struct CPUArchState {
#define CP0C6_DATAPREF 0
int32_t CP0_Config7;
int64_t CP0_Config7_rw_bitmask;
+#define CP0C7_WII 31
#define CP0C7_NAPCGEN 2
#define CP0C7_UNIMUEN 1
#define CP0C7_VFPUCGEN 0
diff --git a/target/mips/sysemu/physaddr.c b/target/mips/sysemu/physaddr.c
index 2970df8a09..05990aa5bb 100644
--- a/target/mips/sysemu/physaddr.c
+++ b/target/mips/sysemu/physaddr.c
@@ -70,8 +70,7 @@ static int is_seg_am_mapped(unsigned int am, bool eu, int mmu_idx)
/* is this AM mapped in current execution mode */
return ((adetlb_mask << am) < 0);
default:
- assert(0);
- return TLBRET_BADADDR;
+ g_assert_not_reached();
};
}
diff --git a/target/mips/tcg/ldst_helper.c b/target/mips/tcg/ldst_helper.c
index d0bd0267b2..c1a8380e34 100644
--- a/target/mips/tcg/ldst_helper.c
+++ b/target/mips/tcg/ldst_helper.c
@@ -248,14 +248,14 @@ void helper_swm(CPUMIPSState *env, target_ulong addr, target_ulong reglist,
target_ulong i;
for (i = 0; i < base_reglist; i++) {
- cpu_stw_mmuidx_ra(env, addr, env->active_tc.gpr[multiple_regs[i]],
+ cpu_stl_mmuidx_ra(env, addr, env->active_tc.gpr[multiple_regs[i]],
mem_idx, GETPC());
addr += 4;
}
}
if (do_r31) {
- cpu_stw_mmuidx_ra(env, addr, env->active_tc.gpr[31], mem_idx, GETPC());
+ cpu_stl_mmuidx_ra(env, addr, env->active_tc.gpr[31], mem_idx, GETPC());
}
}
diff --git a/target/mips/tcg/msa_helper.c b/target/mips/tcg/msa_helper.c
index 736283e2af..29b31d70fe 100644
--- a/target/mips/tcg/msa_helper.c
+++ b/target/mips/tcg/msa_helper.c
@@ -5333,7 +5333,7 @@ void helper_msa_shf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
msa_move_v(pwd, pwx);
}
@@ -5368,7 +5368,7 @@ void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df, \
} \
break; \
default: \
- assert(0); \
+ g_assert_not_reached(); \
} \
}
@@ -5413,7 +5413,7 @@ void helper_msa_ldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
}
@@ -5461,7 +5461,7 @@ void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd, \
} \
break; \
default: \
- assert(0); \
+ g_assert_not_reached(); \
} \
}
@@ -5511,7 +5511,7 @@ void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df, \
} \
break; \
default: \
- assert(0); \
+ g_assert_not_reached(); \
} \
}
@@ -5557,7 +5557,7 @@ static inline void msa_sld_df(uint32_t df, wr_t *pwd,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
}
@@ -5632,7 +5632,7 @@ void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df, \
pwd->d[1] = msa_ ## func ## _df(df, pws->d[1], pwt->d[1]); \
break; \
default: \
- assert(0); \
+ g_assert_not_reached(); \
} \
}
@@ -5771,7 +5771,7 @@ void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd, \
pwd->d[1] = msa_ ## func ## _df(df, pwd->d[1], pws->d[1], pwt->d[1]); \
break; \
default: \
- assert(0); \
+ g_assert_not_reached(); \
} \
}
@@ -5811,7 +5811,7 @@ static inline void msa_splat_df(uint32_t df, wr_t *pwd,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
}
@@ -5869,7 +5869,7 @@ void helper_msa_##FUNC(CPUMIPSState *env, uint32_t df, uint32_t wd, \
MSA_LOOP_D; \
break; \
default: \
- assert(0); \
+ g_assert_not_reached(); \
} \
msa_move_v(pwd, pwx); \
}
@@ -6090,7 +6090,7 @@ void helper_msa_insve_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
pwd->d[n] = (int64_t)pws->d[0];
break;
default:
- assert(0);
+ g_assert_not_reached();
}
}
@@ -6150,7 +6150,7 @@ void helper_msa_fill_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
}
@@ -6565,7 +6565,7 @@ static inline void compare_af(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, retaddr);
@@ -6596,7 +6596,7 @@ static inline void compare_un(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, retaddr);
@@ -6625,7 +6625,7 @@ static inline void compare_eq(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, retaddr);
@@ -6654,7 +6654,7 @@ static inline void compare_ueq(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, retaddr);
@@ -6683,7 +6683,7 @@ static inline void compare_lt(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, retaddr);
@@ -6712,7 +6712,7 @@ static inline void compare_ult(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, retaddr);
@@ -6741,7 +6741,7 @@ static inline void compare_le(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, retaddr);
@@ -6770,7 +6770,7 @@ static inline void compare_ule(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, retaddr);
@@ -6799,7 +6799,7 @@ static inline void compare_or(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, retaddr);
@@ -6828,7 +6828,7 @@ static inline void compare_une(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, retaddr);
@@ -6857,7 +6857,7 @@ static inline void compare_ne(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, retaddr);
@@ -7107,7 +7107,7 @@ void helper_msa_fadd_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, GETPC());
@@ -7137,7 +7137,7 @@ void helper_msa_fsub_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, GETPC());
@@ -7167,7 +7167,7 @@ void helper_msa_fmul_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, GETPC());
@@ -7198,7 +7198,7 @@ void helper_msa_fdiv_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, GETPC());
@@ -7245,7 +7245,7 @@ void helper_msa_fmadd_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, GETPC());
@@ -7280,7 +7280,7 @@ void helper_msa_fmsub_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, GETPC());
@@ -7317,7 +7317,7 @@ void helper_msa_fexp2_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, GETPC());
@@ -7371,7 +7371,7 @@ void helper_msa_fexdo_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, GETPC());
@@ -7417,7 +7417,7 @@ void helper_msa_ftq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, GETPC());
@@ -7526,7 +7526,7 @@ void helper_msa_fmin_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
} else {
- assert(0);
+ g_assert_not_reached();
}
@@ -7555,7 +7555,7 @@ void helper_msa_fmin_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
FMAXMIN_A(min, max, pwx->d[0], pws->d[0], pwt->d[0], 64, status);
FMAXMIN_A(min, max, pwx->d[1], pws->d[1], pwt->d[1], 64, status);
} else {
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, GETPC());
@@ -7628,7 +7628,7 @@ void helper_msa_fmax_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
} else {
- assert(0);
+ g_assert_not_reached();
}
@@ -7657,7 +7657,7 @@ void helper_msa_fmax_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
FMAXMIN_A(max, min, pwx->d[0], pws->d[0], pwt->d[0], 64, status);
FMAXMIN_A(max, min, pwx->d[1], pws->d[1], pwt->d[1], 64, status);
} else {
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, GETPC());
@@ -7681,7 +7681,7 @@ void helper_msa_fclass_df(CPUMIPSState *env, uint32_t df,
pwd->d[0] = float_class_d(pws->d[0], status);
pwd->d[1] = float_class_d(pws->d[1], status);
} else {
- assert(0);
+ g_assert_not_reached();
}
}
@@ -7723,7 +7723,7 @@ void helper_msa_ftrunc_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, GETPC());
@@ -7753,7 +7753,7 @@ void helper_msa_ftrunc_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, GETPC());
@@ -7783,7 +7783,7 @@ void helper_msa_fsqrt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, GETPC());
@@ -7832,7 +7832,7 @@ void helper_msa_frsqrt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, GETPC());
@@ -7862,7 +7862,7 @@ void helper_msa_frcp_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, GETPC());
@@ -7892,7 +7892,7 @@ void helper_msa_frint_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, GETPC());
@@ -7946,7 +7946,7 @@ void helper_msa_flog2_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, GETPC());
@@ -7983,7 +7983,7 @@ void helper_msa_fexupl_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, GETPC());
@@ -8019,7 +8019,7 @@ void helper_msa_fexupr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, GETPC());
@@ -8046,7 +8046,7 @@ void helper_msa_ffql_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
msa_move_v(pwd, pwx);
@@ -8072,7 +8072,7 @@ void helper_msa_ffqr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
msa_move_v(pwd, pwx);
@@ -8100,7 +8100,7 @@ void helper_msa_ftint_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, GETPC());
@@ -8130,7 +8130,7 @@ void helper_msa_ftint_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, GETPC());
@@ -8166,7 +8166,7 @@ void helper_msa_ffint_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, GETPC());
@@ -8196,7 +8196,7 @@ void helper_msa_ffint_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
check_msacsr_cause(env, GETPC());
diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c
index 8cad3d15a0..24993bc97d 100644
--- a/target/mips/tcg/translate.c
+++ b/target/mips/tcg/translate.c
@@ -4887,6 +4887,14 @@ static void gen_compute_branch(DisasContext *ctx, uint32_t opc,
break;
case OPC_J:
case OPC_JAL:
+ {
+ /* Jump to immediate */
+ int jal_mask = ctx->hflags & MIPS_HFLAG_M16 ? 0xF8000000
+ : 0xF0000000;
+ btgt = ((ctx->base.pc_next + insn_bytes) & jal_mask)
+ | (uint32_t)offset;
+ break;
+ }
case OPC_JALX:
/* Jump to immediate */
btgt = ((ctx->base.pc_next + insn_bytes) & (int32_t)0xF0000000) |
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 5bc0005cc7..1e97473af2 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -28,6 +28,7 @@
#include "time_helper.h"
#include "exec/exec-all.h"
#include "qapi/error.h"
+#include "qapi/visitor.h"
#include "qemu/error-report.h"
#include "hw/qdev-properties.h"
#include "migration/vmstate.h"
@@ -75,6 +76,8 @@ struct isa_ext_data {
static const struct isa_ext_data isa_edata_arr[] = {
ISA_EXT_DATA_ENTRY(h, false, PRIV_VERSION_1_12_0, ext_h),
ISA_EXT_DATA_ENTRY(v, false, PRIV_VERSION_1_10_0, ext_v),
+ ISA_EXT_DATA_ENTRY(zicbom, true, PRIV_VERSION_1_12_0, ext_icbom),
+ ISA_EXT_DATA_ENTRY(zicboz, true, PRIV_VERSION_1_12_0, ext_icboz),
ISA_EXT_DATA_ENTRY(zicond, true, PRIV_VERSION_1_12_0, ext_zicond),
ISA_EXT_DATA_ENTRY(zicsr, true, PRIV_VERSION_1_10_0, ext_icsr),
ISA_EXT_DATA_ENTRY(zifencei, true, PRIV_VERSION_1_10_0, ext_ifencei),
@@ -218,7 +221,7 @@ static const char * const riscv_intr_names[] = {
"reserved"
};
-static void register_cpu_props(DeviceState *dev);
+static void register_cpu_props(Object *obj);
const char *riscv_cpu_get_trap_name(target_ulong cause, bool async)
{
@@ -247,6 +250,89 @@ static void set_vext_version(CPURISCVState *env, int vext_ver)
env->vext_ver = vext_ver;
}
+#ifndef CONFIG_USER_ONLY
+static uint8_t satp_mode_from_str(const char *satp_mode_str)
+{
+ if (!strncmp(satp_mode_str, "mbare", 5)) {
+ return VM_1_10_MBARE;
+ }
+
+ if (!strncmp(satp_mode_str, "sv32", 4)) {
+ return VM_1_10_SV32;
+ }
+
+ if (!strncmp(satp_mode_str, "sv39", 4)) {
+ return VM_1_10_SV39;
+ }
+
+ if (!strncmp(satp_mode_str, "sv48", 4)) {
+ return VM_1_10_SV48;
+ }
+
+ if (!strncmp(satp_mode_str, "sv57", 4)) {
+ return VM_1_10_SV57;
+ }
+
+ if (!strncmp(satp_mode_str, "sv64", 4)) {
+ return VM_1_10_SV64;
+ }
+
+ g_assert_not_reached();
+}
+
+uint8_t satp_mode_max_from_map(uint32_t map)
+{
+ /* map here has at least one bit set, so no problem with clz */
+ return 31 - __builtin_clz(map);
+}
+
+const char *satp_mode_str(uint8_t satp_mode, bool is_32_bit)
+{
+ if (is_32_bit) {
+ switch (satp_mode) {
+ case VM_1_10_SV32:
+ return "sv32";
+ case VM_1_10_MBARE:
+ return "none";
+ }
+ } else {
+ switch (satp_mode) {
+ case VM_1_10_SV64:
+ return "sv64";
+ case VM_1_10_SV57:
+ return "sv57";
+ case VM_1_10_SV48:
+ return "sv48";
+ case VM_1_10_SV39:
+ return "sv39";
+ case VM_1_10_MBARE:
+ return "none";
+ }
+ }
+
+ g_assert_not_reached();
+}
+
+static void set_satp_mode_max_supported(RISCVCPU *cpu,
+ uint8_t satp_mode)
+{
+ bool rv32 = riscv_cpu_mxl(&cpu->env) == MXL_RV32;
+ const bool *valid_vm = rv32 ? valid_vm_1_10_32 : valid_vm_1_10_64;
+
+ for (int i = 0; i <= satp_mode; ++i) {
+ if (valid_vm[i]) {
+ cpu->cfg.satp_mode.supported |= (1 << i);
+ }
+ }
+}
+
+/* Set the satp mode to the max supported */
+static void set_satp_mode_default_map(RISCVCPU *cpu)
+{
+ cpu->cfg.satp_mode.map = cpu->cfg.satp_mode.supported;
+}
+#endif
+
static void riscv_any_cpu_init(Object *obj)
{
CPURISCVState *env = &RISCV_CPU(obj)->env;
@@ -255,8 +341,15 @@ static void riscv_any_cpu_init(Object *obj)
#elif defined(TARGET_RISCV64)
set_misa(env, MXL_RV64, RVI | RVM | RVA | RVF | RVD | RVC | RVU);
#endif
+
+#ifndef CONFIG_USER_ONLY
+ set_satp_mode_max_supported(RISCV_CPU(obj),
+ riscv_cpu_mxl(&RISCV_CPU(obj)->env) == MXL_RV32 ?
+ VM_1_10_SV32 : VM_1_10_SV57);
+#endif
+
set_priv_version(env, PRIV_VERSION_1_12_0);
- register_cpu_props(DEVICE(obj));
+ register_cpu_props(obj);
}
#if defined(TARGET_RISCV64)
@@ -265,17 +358,23 @@ static void rv64_base_cpu_init(Object *obj)
CPURISCVState *env = &RISCV_CPU(obj)->env;
/* We set this in the realise function */
set_misa(env, MXL_RV64, 0);
- register_cpu_props(DEVICE(obj));
+ register_cpu_props(obj);
/* Set latest version of privileged specification */
set_priv_version(env, PRIV_VERSION_1_12_0);
+#ifndef CONFIG_USER_ONLY
+ set_satp_mode_max_supported(RISCV_CPU(obj), VM_1_10_SV57);
+#endif
}
static void rv64_sifive_u_cpu_init(Object *obj)
{
CPURISCVState *env = &RISCV_CPU(obj)->env;
set_misa(env, MXL_RV64, RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU);
- register_cpu_props(DEVICE(obj));
+ register_cpu_props(obj);
set_priv_version(env, PRIV_VERSION_1_10_0);
+#ifndef CONFIG_USER_ONLY
+ set_satp_mode_max_supported(RISCV_CPU(obj), VM_1_10_SV39);
+#endif
}
static void rv64_sifive_e_cpu_init(Object *obj)
@@ -284,9 +383,12 @@ static void rv64_sifive_e_cpu_init(Object *obj)
RISCVCPU *cpu = RISCV_CPU(obj);
set_misa(env, MXL_RV64, RVI | RVM | RVA | RVC | RVU);
- register_cpu_props(DEVICE(obj));
+ register_cpu_props(obj);
set_priv_version(env, PRIV_VERSION_1_10_0);
cpu->cfg.mmu = false;
+#ifndef CONFIG_USER_ONLY
+ set_satp_mode_max_supported(cpu, VM_1_10_MBARE);
+#endif
}
static void rv64_thead_c906_cpu_init(Object *obj)
@@ -316,6 +418,9 @@ static void rv64_thead_c906_cpu_init(Object *obj)
cpu->cfg.ext_xtheadsync = true;
cpu->cfg.mvendorid = THEAD_VENDOR_ID;
+#ifndef CONFIG_USER_ONLY
+ set_satp_mode_max_supported(cpu, VM_1_10_SV39);
+#endif
}
static void rv128_base_cpu_init(Object *obj)
@@ -329,9 +434,12 @@ static void rv128_base_cpu_init(Object *obj)
CPURISCVState *env = &RISCV_CPU(obj)->env;
/* We set this in the realise function */
set_misa(env, MXL_RV128, 0);
- register_cpu_props(DEVICE(obj));
+ register_cpu_props(obj);
/* Set latest version of privileged specification */
set_priv_version(env, PRIV_VERSION_1_12_0);
+#ifndef CONFIG_USER_ONLY
+ set_satp_mode_max_supported(RISCV_CPU(obj), VM_1_10_SV57);
+#endif
}
#else
static void rv32_base_cpu_init(Object *obj)
@@ -339,17 +447,23 @@ static void rv32_base_cpu_init(Object *obj)
CPURISCVState *env = &RISCV_CPU(obj)->env;
/* We set this in the realise function */
set_misa(env, MXL_RV32, 0);
- register_cpu_props(DEVICE(obj));
+ register_cpu_props(obj);
/* Set latest version of privileged specification */
set_priv_version(env, PRIV_VERSION_1_12_0);
+#ifndef CONFIG_USER_ONLY
+ set_satp_mode_max_supported(RISCV_CPU(obj), VM_1_10_SV32);
+#endif
}
static void rv32_sifive_u_cpu_init(Object *obj)
{
CPURISCVState *env = &RISCV_CPU(obj)->env;
set_misa(env, MXL_RV32, RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU);
- register_cpu_props(DEVICE(obj));
+ register_cpu_props(obj);
set_priv_version(env, PRIV_VERSION_1_10_0);
+#ifndef CONFIG_USER_ONLY
+ set_satp_mode_max_supported(RISCV_CPU(obj), VM_1_10_SV32);
+#endif
}
static void rv32_sifive_e_cpu_init(Object *obj)
@@ -358,9 +472,12 @@ static void rv32_sifive_e_cpu_init(Object *obj)
RISCVCPU *cpu = RISCV_CPU(obj);
set_misa(env, MXL_RV32, RVI | RVM | RVA | RVC | RVU);
- register_cpu_props(DEVICE(obj));
+ register_cpu_props(obj);
set_priv_version(env, PRIV_VERSION_1_10_0);
cpu->cfg.mmu = false;
+#ifndef CONFIG_USER_ONLY
+ set_satp_mode_max_supported(cpu, VM_1_10_MBARE);
+#endif
}
static void rv32_ibex_cpu_init(Object *obj)
@@ -369,9 +486,12 @@ static void rv32_ibex_cpu_init(Object *obj)
RISCVCPU *cpu = RISCV_CPU(obj);
set_misa(env, MXL_RV32, RVI | RVM | RVC | RVU);
- register_cpu_props(DEVICE(obj));
+ register_cpu_props(obj);
set_priv_version(env, PRIV_VERSION_1_11_0);
cpu->cfg.mmu = false;
+#ifndef CONFIG_USER_ONLY
+ set_satp_mode_max_supported(cpu, VM_1_10_MBARE);
+#endif
cpu->cfg.epmp = true;
}
@@ -381,9 +501,12 @@ static void rv32_imafcu_nommu_cpu_init(Object *obj)
RISCVCPU *cpu = RISCV_CPU(obj);
set_misa(env, MXL_RV32, RVI | RVM | RVA | RVF | RVC | RVU);
- register_cpu_props(DEVICE(obj));
+ register_cpu_props(obj);
set_priv_version(env, PRIV_VERSION_1_10_0);
cpu->cfg.mmu = false;
+#ifndef CONFIG_USER_ONLY
+ set_satp_mode_max_supported(cpu, VM_1_10_MBARE);
+#endif
}
#endif
@@ -396,7 +519,7 @@ static void riscv_host_cpu_init(Object *obj)
#elif defined(TARGET_RISCV64)
set_misa(env, MXL_RV64, 0);
#endif
- register_cpu_props(DEVICE(obj));
+ register_cpu_props(obj);
}
#endif
@@ -916,6 +1039,88 @@ static void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
set_misa(env, env->misa_mxl, ext);
}
+#ifndef CONFIG_USER_ONLY
+static void riscv_cpu_satp_mode_finalize(RISCVCPU *cpu, Error **errp)
+{
+ bool rv32 = riscv_cpu_mxl(&cpu->env) == MXL_RV32;
+ uint8_t satp_mode_map_max;
+ uint8_t satp_mode_supported_max =
+ satp_mode_max_from_map(cpu->cfg.satp_mode.supported);
+
+ if (cpu->cfg.satp_mode.map == 0) {
+ if (cpu->cfg.satp_mode.init == 0) {
+ /* If unset by the user, we fallback to the default satp mode. */
+ set_satp_mode_default_map(cpu);
+ } else {
+ /*
+ * Find the lowest level that was disabled and then enable the
+ * first valid level below which can be found in
+ * valid_vm_1_10_32/64.
+ */
+ for (int i = 1; i < 16; ++i) {
+ if ((cpu->cfg.satp_mode.init & (1 << i)) &&
+ (cpu->cfg.satp_mode.supported & (1 << i))) {
+ for (int j = i - 1; j >= 0; --j) {
+ if (cpu->cfg.satp_mode.supported & (1 << j)) {
+ cpu->cfg.satp_mode.map |= (1 << j);
+ break;
+ }
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ satp_mode_map_max = satp_mode_max_from_map(cpu->cfg.satp_mode.map);
+
+ /* Make sure the user asked for a supported configuration (HW and qemu) */
+ if (satp_mode_map_max > satp_mode_supported_max) {
+ error_setg(errp, "satp_mode %s is higher than hw max capability %s",
+ satp_mode_str(satp_mode_map_max, rv32),
+ satp_mode_str(satp_mode_supported_max, rv32));
+ return;
+ }
+
+ /*
+ * Make sure the user did not ask for an invalid configuration as per
+ * the specification.
+ */
+ if (!rv32) {
+ for (int i = satp_mode_map_max - 1; i >= 0; --i) {
+ if (!(cpu->cfg.satp_mode.map & (1 << i)) &&
+ (cpu->cfg.satp_mode.init & (1 << i)) &&
+ (cpu->cfg.satp_mode.supported & (1 << i))) {
+ error_setg(errp, "cannot disable %s satp mode if %s "
+ "is enabled", satp_mode_str(i, false),
+ satp_mode_str(satp_mode_map_max, false));
+ return;
+ }
+ }
+ }
+
+ /* Finally expand the map so that all valid modes are set */
+ for (int i = satp_mode_map_max - 1; i >= 0; --i) {
+ if (cpu->cfg.satp_mode.supported & (1 << i)) {
+ cpu->cfg.satp_mode.map |= (1 << i);
+ }
+ }
+}
+#endif
+
+static void riscv_cpu_finalize_features(RISCVCPU *cpu, Error **errp)
+{
+#ifndef CONFIG_USER_ONLY
+ Error *local_err = NULL;
+
+ riscv_cpu_satp_mode_finalize(cpu, &local_err);
+ if (local_err != NULL) {
+ error_propagate(errp, local_err);
+ return;
+ }
+#endif
+}
+
static void riscv_cpu_realize(DeviceState *dev, Error **errp)
{
CPUState *cs = CPU(dev);
@@ -1015,6 +1220,12 @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp)
}
#endif
+ riscv_cpu_finalize_features(cpu, &local_err);
+ if (local_err != NULL) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
riscv_cpu_register_gdb_regs_for_features(cs);
qemu_init_vcpu(cs);
@@ -1024,6 +1235,52 @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp)
}
#ifndef CONFIG_USER_ONLY
+static void cpu_riscv_get_satp(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ RISCVSATPMap *satp_map = opaque;
+ uint8_t satp = satp_mode_from_str(name);
+ bool value;
+
+ value = satp_map->map & (1 << satp);
+
+ visit_type_bool(v, name, &value, errp);
+}
+
+static void cpu_riscv_set_satp(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ RISCVSATPMap *satp_map = opaque;
+ uint8_t satp = satp_mode_from_str(name);
+ bool value;
+
+ if (!visit_type_bool(v, name, &value, errp)) {
+ return;
+ }
+
+ satp_map->map = deposit32(satp_map->map, satp, 1, value);
+ satp_map->init |= 1 << satp;
+}
+
+static void riscv_add_satp_mode_properties(Object *obj)
+{
+ RISCVCPU *cpu = RISCV_CPU(obj);
+
+ if (cpu->env.misa_mxl == MXL_RV32) {
+ object_property_add(obj, "sv32", "bool", cpu_riscv_get_satp,
+ cpu_riscv_set_satp, NULL, &cpu->cfg.satp_mode);
+ } else {
+ object_property_add(obj, "sv39", "bool", cpu_riscv_get_satp,
+ cpu_riscv_set_satp, NULL, &cpu->cfg.satp_mode);
+ object_property_add(obj, "sv48", "bool", cpu_riscv_get_satp,
+ cpu_riscv_set_satp, NULL, &cpu->cfg.satp_mode);
+ object_property_add(obj, "sv57", "bool", cpu_riscv_get_satp,
+ cpu_riscv_set_satp, NULL, &cpu->cfg.satp_mode);
+ object_property_add(obj, "sv64", "bool", cpu_riscv_get_satp,
+ cpu_riscv_set_satp, NULL, &cpu->cfg.satp_mode);
+ }
+}
+
static void riscv_cpu_set_irq(void *opaque, int irq, int level)
{
RISCVCPU *cpu = RISCV_CPU(opaque);
@@ -1167,6 +1424,11 @@ static Property riscv_cpu_extensions[] = {
DEFINE_PROP_BOOL("zhinx", RISCVCPU, cfg.ext_zhinx, false),
DEFINE_PROP_BOOL("zhinxmin", RISCVCPU, cfg.ext_zhinxmin, false),
+ DEFINE_PROP_BOOL("zicbom", RISCVCPU, cfg.ext_icbom, true),
+ DEFINE_PROP_UINT16("cbom_blocksize", RISCVCPU, cfg.cbom_blocksize, 64),
+ DEFINE_PROP_BOOL("zicboz", RISCVCPU, cfg.ext_icboz, true),
+ DEFINE_PROP_UINT16("cboz_blocksize", RISCVCPU, cfg.cboz_blocksize, 64),
+
DEFINE_PROP_BOOL("zmmul", RISCVCPU, cfg.ext_zmmul, false),
/* Vendor-specific custom extensions */
@@ -1203,11 +1465,12 @@ static Property riscv_cpu_extensions[] = {
* properties and leave. env.misa_ext = 0 means that we want
* all the default properties to be registered.
*/
-static void register_cpu_props(DeviceState *dev)
+static void register_cpu_props(Object *obj)
{
- RISCVCPU *cpu = RISCV_CPU(OBJECT(dev));
+ RISCVCPU *cpu = RISCV_CPU(obj);
uint32_t misa_ext = cpu->env.misa_ext;
Property *prop;
+ DeviceState *dev = DEVICE(obj);
/*
* If misa_ext is not zero, set cfg properties now to
@@ -1238,6 +1501,10 @@ static void register_cpu_props(DeviceState *dev)
for (prop = riscv_cpu_extensions; prop && prop->name; prop++) {
qdev_property_add_static(dev, prop);
}
+
+#ifndef CONFIG_USER_ONLY
+ riscv_add_satp_mode_properties(obj);
+#endif
}
static Property riscv_cpu_properties[] = {
@@ -1294,6 +1561,13 @@ static const char *riscv_gdb_get_dynamic_xml(CPUState *cs, const char *xmlname)
}
#ifndef CONFIG_USER_ONLY
+static int64_t riscv_get_arch_id(CPUState *cs)
+{
+ RISCVCPU *cpu = RISCV_CPU(cs);
+
+ return cpu->env.mhartid;
+}
+
#include "hw/core/sysemu-cpu-ops.h"
static const struct SysemuCPUOps riscv_sysemu_ops = {
@@ -1348,6 +1622,7 @@ static void riscv_cpu_class_init(ObjectClass *c, void *data)
cc->disas_set_info = riscv_cpu_disas_set_info;
#ifndef CONFIG_USER_ONLY
cc->sysemu_ops = &riscv_sysemu_ops;
+ cc->get_arch_id = riscv_get_arch_id;
#endif
cc->gdb_arch_name = riscv_gdb_arch_name;
cc->gdb_get_dynamic_xml = riscv_gdb_get_dynamic_xml;
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 665b4c60b0..638e47c75a 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -27,6 +27,7 @@
#include "qom/object.h"
#include "qemu/int128.h"
#include "cpu_bits.h"
+#include "qapi/qapi-types-common.h"
#define TCG_GUEST_DEFAULT_MO 0
@@ -401,6 +402,21 @@ struct RISCVCPUClass {
ResettablePhases parent_phases;
};
+/*
+ * map is a 16-bit bitmap: the most significant set bit in map is the maximum
+ * satp mode that is supported. It may be chosen by the user and must respect
+ * what qemu implements (valid_1_10_32/64) and what the hw is capable of
+ * (supported bitmap below).
+ *
+ * init is a 16-bit bitmap used to make sure the user selected a correct
+ * configuration as per the specification.
+ *
+ * supported is a 16-bit bitmap used to reflect the hw capabilities.
+ */
+typedef struct {
+ uint16_t map, init, supported;
+} RISCVSATPMap;
+
struct RISCVCPUConfig {
bool ext_i;
bool ext_e;
@@ -434,6 +450,8 @@ struct RISCVCPUConfig {
bool ext_zkt;
bool ext_ifencei;
bool ext_icsr;
+ bool ext_icbom;
+ bool ext_icboz;
bool ext_zicond;
bool ext_zihintpause;
bool ext_smstateen;
@@ -486,6 +504,8 @@ struct RISCVCPUConfig {
char *vext_spec;
uint16_t vlen;
uint16_t elen;
+ uint16_t cbom_blocksize;
+ uint16_t cboz_blocksize;
bool mmu;
bool pmp;
bool epmp;
@@ -493,6 +513,10 @@ struct RISCVCPUConfig {
bool misa_w;
bool short_isa_string;
+
+#ifndef CONFIG_USER_ONLY
+ RISCVSATPMap satp_mode;
+#endif
};
typedef struct RISCVCPUConfig RISCVCPUConfig;
@@ -794,9 +818,14 @@ enum riscv_pmu_event_idx {
/* CSR function table */
extern riscv_csr_operations csr_ops[CSR_TABLE_SIZE];
+extern const bool valid_vm_1_10_32[], valid_vm_1_10_64[];
+
void riscv_get_csr_ops(int csrno, riscv_csr_operations *ops);
void riscv_set_csr_ops(int csrno, riscv_csr_operations *ops);
void riscv_cpu_register_gdb_regs_for_features(CPUState *cs);
+uint8_t satp_mode_max_from_map(uint32_t map);
+const char *satp_mode_str(uint8_t satp_mode, bool is_32_bit);
+
#endif /* RISCV_CPU_H */
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index 3106f96212..ab566639e5 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -1141,16 +1141,16 @@ static const target_ulong hip_writable_mask = MIP_VSSIP;
static const target_ulong hvip_writable_mask = MIP_VSSIP | MIP_VSTIP | MIP_VSEIP;
static const target_ulong vsip_writable_mask = MIP_VSSIP;
-static const char valid_vm_1_10_32[16] = {
- [VM_1_10_MBARE] = 1,
- [VM_1_10_SV32] = 1
+const bool valid_vm_1_10_32[16] = {
+ [VM_1_10_MBARE] = true,
+ [VM_1_10_SV32] = true
};
-static const char valid_vm_1_10_64[16] = {
- [VM_1_10_MBARE] = 1,
- [VM_1_10_SV39] = 1,
- [VM_1_10_SV48] = 1,
- [VM_1_10_SV57] = 1
+const bool valid_vm_1_10_64[16] = {
+ [VM_1_10_MBARE] = true,
+ [VM_1_10_SV39] = true,
+ [VM_1_10_SV48] = true,
+ [VM_1_10_SV57] = true
};
/* Machine Information Registers */
@@ -1230,13 +1230,11 @@ static RISCVException read_mstatus(CPURISCVState *env, int csrno,
return RISCV_EXCP_NONE;
}
-static int validate_vm(CPURISCVState *env, target_ulong vm)
+static bool validate_vm(CPURISCVState *env, target_ulong vm)
{
- if (riscv_cpu_mxl(env) == MXL_RV32) {
- return valid_vm_1_10_32[vm & 0xf];
- } else {
- return valid_vm_1_10_64[vm & 0xf];
- }
+ RISCVCPU *cpu = RISCV_CPU(env_cpu(env));
+
+ return (vm & 0xf) <= satp_mode_max_from_map(cpu->cfg.satp_mode.map);
}
static RISCVException write_mstatus(CPURISCVState *env, int csrno,
@@ -2669,7 +2667,8 @@ static RISCVException read_satp(CPURISCVState *env, int csrno,
static RISCVException write_satp(CPURISCVState *env, int csrno,
target_ulong val)
{
- target_ulong vm, mask;
+ target_ulong mask;
+ bool vm;
if (!riscv_cpu_cfg(env)->mmu) {
return RISCV_EXCP_NONE;
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 0497370afd..37b54e0991 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -97,6 +97,11 @@ DEF_HELPER_FLAGS_2(fcvt_h_l, TCG_CALL_NO_RWG, i64, env, tl)
DEF_HELPER_FLAGS_2(fcvt_h_lu, TCG_CALL_NO_RWG, i64, env, tl)
DEF_HELPER_FLAGS_2(fclass_h, TCG_CALL_NO_RWG_SE, tl, env, i64)
+/* Cache-block operations */
+DEF_HELPER_2(cbo_clean_flush, void, env, tl)
+DEF_HELPER_2(cbo_inval, void, env, tl)
+DEF_HELPER_2(cbo_zero, void, env, tl)
+
/* Special functions */
DEF_HELPER_2(csrr, tl, env, int)
DEF_HELPER_3(csrw, void, env, int, tl)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index fb537e922e..73d5d1b045 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -134,6 +134,7 @@ addi ............ ..... 000 ..... 0010011 @i
slti ............ ..... 010 ..... 0010011 @i
sltiu ............ ..... 011 ..... 0010011 @i
xori ............ ..... 100 ..... 0010011 @i
+# cbo.prefetch_{i,r,m} instructions are ori with rd=x0 and not decoded.
ori ............ ..... 110 ..... 0010011 @i
andi ............ ..... 111 ..... 0010011 @i
slli 00000. ...... ..... 001 ..... 0010011 @sh
@@ -179,7 +180,20 @@ sraw 0100000 ..... ..... 101 ..... 0111011 @r
# *** RV128I Base Instruction Set (in addition to RV64I) ***
ldu ............ ..... 111 ..... 0000011 @i
-lq ............ ..... 010 ..... 0001111 @i
+{
+ [
+ # *** RV32 Zicbom Standard Extension ***
+ cbo_clean 0000000 00001 ..... 010 00000 0001111 @sfence_vm
+ cbo_flush 0000000 00010 ..... 010 00000 0001111 @sfence_vm
+ cbo_inval 0000000 00000 ..... 010 00000 0001111 @sfence_vm
+
+ # *** RV32 Zicboz Standard Extension ***
+ cbo_zero 0000000 00100 ..... 010 00000 0001111 @sfence_vm
+ ]
+
+ # *** RVI128 lq ***
+ lq ............ ..... 010 ..... 0001111 @i
+}
sq ............ ..... 100 ..... 0100011 @s
addid ............ ..... 000 ..... 1011011 @i
sllid 000000 ...... ..... 001 ..... 1011011 @sh6
diff --git a/target/riscv/insn_trans/trans_rvzicbo.c.inc b/target/riscv/insn_trans/trans_rvzicbo.c.inc
new file mode 100644
index 0000000000..7df9c30b58
--- /dev/null
+++ b/target/riscv/insn_trans/trans_rvzicbo.c.inc
@@ -0,0 +1,57 @@
+/*
+ * RISC-V translation routines for the RISC-V CBO Extension.
+ *
+ * Copyright (c) 2021 Philipp Tomsich, philipp.tomsich@vrull.eu
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define REQUIRE_ZICBOM(ctx) do { \
+ if (!ctx->cfg_ptr->ext_icbom) { \
+ return false; \
+ } \
+} while (0)
+
+#define REQUIRE_ZICBOZ(ctx) do { \
+ if (!ctx->cfg_ptr->ext_icboz) { \
+ return false; \
+ } \
+} while (0)
+
+static bool trans_cbo_clean(DisasContext *ctx, arg_cbo_clean *a)
+{
+ REQUIRE_ZICBOM(ctx);
+ gen_helper_cbo_clean_flush(cpu_env, cpu_gpr[a->rs1]);
+ return true;
+}
+
+static bool trans_cbo_flush(DisasContext *ctx, arg_cbo_flush *a)
+{
+ REQUIRE_ZICBOM(ctx);
+ gen_helper_cbo_clean_flush(cpu_env, cpu_gpr[a->rs1]);
+ return true;
+}
+
+static bool trans_cbo_inval(DisasContext *ctx, arg_cbo_inval *a)
+{
+ REQUIRE_ZICBOM(ctx);
+ gen_helper_cbo_inval(cpu_env, cpu_gpr[a->rs1]);
+ return true;
+}
+
+static bool trans_cbo_zero(DisasContext *ctx, arg_cbo_zero *a)
+{
+ REQUIRE_ZICBOZ(ctx);
+ gen_helper_cbo_zero(cpu_env, cpu_gpr[a->rs1]);
+ return true;
+}
diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c
index 9c0b91c88f..84ee018f7d 100644
--- a/target/riscv/op_helper.c
+++ b/target/riscv/op_helper.c
@@ -3,6 +3,7 @@
*
* Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu
* Copyright (c) 2017-2018 SiFive, Inc.
+ * Copyright (c) 2022 VRULL GmbH
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -123,6 +124,140 @@ target_ulong helper_csrrw_i128(CPURISCVState *env, int csr,
return int128_getlo(rv);
}
+
+/*
+ * check_zicbo_envcfg
+ *
+ * Raise virtual exceptions and illegal instruction exceptions for
+ * Zicbo[mz] instructions based on the settings of [mhs]envcfg as
+ * specified in section 2.5.1 of the CMO specification.
+ */
+static void check_zicbo_envcfg(CPURISCVState *env, target_ulong envbits,
+ uintptr_t ra)
+{
+#ifndef CONFIG_USER_ONLY
+ if ((env->priv < PRV_M) && !get_field(env->menvcfg, envbits)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, ra);
+ }
+
+ if (riscv_cpu_virt_enabled(env) &&
+ (((env->priv < PRV_H) && !get_field(env->henvcfg, envbits)) ||
+ ((env->priv < PRV_S) && !get_field(env->senvcfg, envbits)))) {
+ riscv_raise_exception(env, RISCV_EXCP_VIRT_INSTRUCTION_FAULT, ra);
+ }
+
+ if ((env->priv < PRV_S) && !get_field(env->senvcfg, envbits)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, ra);
+ }
+#endif
+}
+
+void helper_cbo_zero(CPURISCVState *env, target_ulong address)
+{
+ RISCVCPU *cpu = env_archcpu(env);
+ uint16_t cbozlen = cpu->cfg.cboz_blocksize;
+ int mmu_idx = cpu_mmu_index(env, false);
+ uintptr_t ra = GETPC();
+ void *mem;
+
+ check_zicbo_envcfg(env, MENVCFG_CBZE, ra);
+
+ /* Mask off low-bits to align-down to the cache-block. */
+ address &= ~(cbozlen - 1);
+
+ /*
+ * cbo.zero requires MMU_DATA_STORE access. Do a probe_write()
+ * to raise any exceptions, including PMP.
+ */
+ mem = probe_write(env, address, cbozlen, mmu_idx, ra);
+
+ if (likely(mem)) {
+ memset(mem, 0, cbozlen);
+ } else {
+ /*
+ * This means that we're dealing with an I/O page. Section 4.2
+ * of cmobase v1.0.1 says:
+ *
+ * "Cache-block zero instructions store zeros independently
+ * of whether data from the underlying memory locations are
+ * cacheable."
+ *
+ * Write zeros in address + cbozlen regardless of not being
+ * a RAM page.
+ */
+ for (int i = 0; i < cbozlen; i++) {
+ cpu_stb_mmuidx_ra(env, address + i, 0, mmu_idx, ra);
+ }
+ }
+}
+
+/*
+ * check_zicbom_access
+ *
+ * Check access permissions (LOAD, STORE or FETCH as specified in
+ * section 2.5.2 of the CMO specification) for Zicbom, raising
+ * either store page-fault (non-virtualized) or store guest-page
+ * fault (virtualized).
+ */
+static void check_zicbom_access(CPURISCVState *env,
+ target_ulong address,
+ uintptr_t ra)
+{
+ RISCVCPU *cpu = env_archcpu(env);
+ int mmu_idx = cpu_mmu_index(env, false);
+ uint16_t cbomlen = cpu->cfg.cbom_blocksize;
+ void *phost;
+ int ret;
+
+ /* Mask off low-bits to align-down to the cache-block. */
+ address &= ~(cbomlen - 1);
+
+ /*
+ * Section 2.5.2 of cmobase v1.0.1:
+ *
+ * "A cache-block management instruction is permitted to
+ * access the specified cache block whenever a load instruction
+ * or store instruction is permitted to access the corresponding
+ * physical addresses. If neither a load instruction nor store
+ * instruction is permitted to access the physical addresses,
+ * but an instruction fetch is permitted to access the physical
+ * addresses, whether a cache-block management instruction is
+ * permitted to access the cache block is UNSPECIFIED."
+ */
+ ret = probe_access_flags(env, address, cbomlen, MMU_DATA_LOAD,
+ mmu_idx, true, &phost, ra);
+ if (ret != TLB_INVALID_MASK) {
+ /* Success: readable */
+ return;
+ }
+
+ /*
+ * Since not readable, must be writable. On failure, store
+ * fault/store guest amo fault will be raised by
+ * riscv_cpu_tlb_fill(). PMP exceptions will be caught
+ * there as well.
+ */
+ probe_write(env, address, cbomlen, mmu_idx, ra);
+}
+
+void helper_cbo_clean_flush(CPURISCVState *env, target_ulong address)
+{
+ uintptr_t ra = GETPC();
+ check_zicbo_envcfg(env, MENVCFG_CBCFE, ra);
+ check_zicbom_access(env, address, ra);
+
+ /* We don't emulate the cache-hierarchy, so we're done. */
+}
+
+void helper_cbo_inval(CPURISCVState *env, target_ulong address)
+{
+ uintptr_t ra = GETPC();
+ check_zicbo_envcfg(env, MENVCFG_CBIE, ra);
+ check_zicbom_access(env, address, ra);
+
+ /* We don't emulate the cache-hierarchy, so we're done. */
+}
+
#ifndef CONFIG_USER_ONLY
target_ulong helper_sret(CPURISCVState *env)
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 93909207d2..0ee8ee147d 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -1080,6 +1080,7 @@ static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc)
#include "insn_trans/trans_rvb.c.inc"
#include "insn_trans/trans_rvzicond.c.inc"
#include "insn_trans/trans_rvzawrs.c.inc"
+#include "insn_trans/trans_rvzicbo.c.inc"
#include "insn_trans/trans_rvzfh.c.inc"
#include "insn_trans/trans_rvk.c.inc"
#include "insn_trans/trans_privileged.c.inc"
diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index 29a4efb4c2..62eecf2edf 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -309,10 +309,12 @@ qtests = {
'netdev-socket': files('netdev-socket.c', '../unit/socket-helpers.c'),
}
-gvnc = dependency('gvnc-1.0', required: false)
-if gvnc.found()
- qtests += {'vnc-display-test': [gvnc]}
- qtests_generic += [ 'vnc-display-test' ]
+if vnc.found()
+ gvnc = dependency('gvnc-1.0', required: false)
+ if gvnc.found()
+ qtests += {'vnc-display-test': [gvnc]}
+ qtests_generic += [ 'vnc-display-test' ]
+ endif
endif
if dbus_display
diff --git a/tests/qtest/readconfig-test.c b/tests/qtest/readconfig-test.c
index 9ef870643d..2160603880 100644
--- a/tests/qtest/readconfig-test.c
+++ b/tests/qtest/readconfig-test.c
@@ -124,13 +124,15 @@ static void test_spice(void)
}
#endif
-static void test_object_rng_resp(QObject *res)
+static void test_object_available(QObject *res, const char *name,
+ const char *type)
{
Visitor *v;
g_autoptr(ObjectPropertyInfoList) objs = NULL;
ObjectPropertyInfoList *tmp;
ObjectPropertyInfo *obj;
- bool seen_rng = false;
+ bool object_available = false;
+ g_autofree char *childtype = g_strdup_printf("child<%s>", type);
g_assert(res);
v = qobject_input_visitor_new(res);
@@ -142,16 +144,15 @@ static void test_object_rng_resp(QObject *res)
g_assert(tmp->value);
obj = tmp->value;
- if (g_str_equal(obj->name, "rng0") &&
- g_str_equal(obj->type, "child<rng-builtin>")) {
- seen_rng = true;
+ if (g_str_equal(obj->name, name) && g_str_equal(obj->type, childtype)) {
+ object_available = true;
break;
}
tmp = tmp->next;
}
- g_assert(seen_rng);
+ g_assert(object_available);
visit_free(v);
}
@@ -170,7 +171,27 @@ static void test_object_rng(void)
resp = qtest_qmp(qts,
"{ 'execute': 'qom-list',"
" 'arguments': {'path': '/objects' }}");
- test_object_rng_resp(qdict_get(resp, "return"));
+ test_object_available(qdict_get(resp, "return"), "rng0", "rng-builtin");
+ qobject_unref(resp);
+
+ qtest_quit(qts);
+}
+
+static void test_docs_config_ich9(void)
+{
+ QTestState *qts;
+ QDict *resp;
+ QObject *qobj;
+
+ qts = qtest_initf("-nodefaults -readconfig docs/config/ich9-ehci-uhci.cfg");
+
+ resp = qtest_qmp(qts, "{ 'execute': 'qom-list',"
+ " 'arguments': {'path': '/machine/peripheral' }}");
+ qobj = qdict_get(resp, "return");
+ test_object_available(qobj, "ehci", "ich9-usb-ehci1");
+ test_object_available(qobj, "uhci-1", "ich9-usb-uhci1");
+ test_object_available(qobj, "uhci-2", "ich9-usb-uhci2");
+ test_object_available(qobj, "uhci-3", "ich9-usb-uhci3");
qobject_unref(resp);
qtest_quit(qts);
@@ -186,6 +207,7 @@ int main(int argc, char *argv[])
if (g_str_equal(arch, "i386") ||
g_str_equal(arch, "x86_64")) {
qtest_add_func("readconfig/x86/memdev", test_x86_memdev);
+ qtest_add_func("readconfig/x86/ich9-ehci-uhci", test_docs_config_ich9);
}
#ifdef CONFIG_SPICE
qtest_add_func("readconfig/spice", test_spice);
diff --git a/tests/unit/meson.build b/tests/unit/meson.build
index 51f453e6c4..d9c0a7eae6 100644
--- a/tests/unit/meson.build
+++ b/tests/unit/meson.build
@@ -47,6 +47,7 @@ tests = {
'ptimer-test': ['ptimer-test-stubs.c', meson.project_source_root() / 'hw/core/ptimer.c'],
'test-qapi-util': [],
'test-interval-tree': [],
+ 'test-xs-node': [qom],
}
if have_system or have_tools
diff --git a/tests/unit/test-xs-node.c b/tests/unit/test-xs-node.c
new file mode 100644
index 0000000000..b80d10ff98
--- /dev/null
+++ b/tests/unit/test-xs-node.c
@@ -0,0 +1,871 @@
+/*
+ * QEMU XenStore XsNode testing
+ *
+ * Copyright © 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/module.h"
+
+static int nr_xs_nodes;
+static GList *xs_node_list;
+
+#define XS_NODE_UNIT_TEST
+
+/*
+ * We don't need the core Xen definitions. And we *do* want to be able
+ * to run the unit tests even on architectures that Xen doesn't support
+ * (because life's too short to bother doing otherwise, and test coverage
+ * doesn't hurt).
+ */
+#define __XEN_PUBLIC_XEN_H__
+typedef unsigned long xen_pfn_t;
+
+#include "hw/i386/kvm/xenstore_impl.c"
+
+#define DOMID_QEMU 0
+#define DOMID_GUEST 1
+
+static void dump_ref(const char *name, XsNode *n, int indent)
+{
+ int i;
+
+ if (!indent && name) {
+ printf("%s:\n", name);
+ }
+
+ for (i = 0; i < indent; i++) {
+ printf(" ");
+ }
+
+ printf("->%p(%d, '%s'): '%.*s'%s%s\n", n, n->ref, n->name,
+ (int)(n->content ? n->content->len : strlen("<empty>")),
+ n->content ? (char *)n->content->data : "<empty>",
+ n->modified_in_tx ? " MODIFIED" : "",
+ n->deleted_in_tx ? " DELETED" : "");
+
+ if (n->children) {
+ g_hash_table_foreach(n->children, (void *)dump_ref,
+ GINT_TO_POINTER(indent + 2));
+ }
+}
+
+/* This doesn't happen in qemu but we want to make valgrind happy */
+static void xs_impl_delete(XenstoreImplState *s, bool last)
+{
+ int err;
+
+ xs_impl_reset_watches(s, DOMID_GUEST);
+ g_assert(!s->nr_domu_watches);
+
+ err = xs_impl_rm(s, DOMID_QEMU, XBT_NULL, "/local");
+ g_assert(!err);
+ g_assert(s->nr_nodes == 1);
+
+ g_hash_table_unref(s->watches);
+ g_hash_table_unref(s->transactions);
+ xs_node_unref(s->root);
+ g_free(s);
+
+ if (!last) {
+ return;
+ }
+
+ if (xs_node_list) {
+ GList *l;
+ for (l = xs_node_list; l; l = l->next) {
+ XsNode *n = l->data;
+ printf("Remaining node at %p name %s ref %u\n", n, n->name,
+ n->ref);
+ }
+ }
+ g_assert(!nr_xs_nodes);
+}
+
+struct compare_walk {
+ char path[XENSTORE_ABS_PATH_MAX + 1];
+ XsNode *parent_2;
+ bool compare_ok;
+};
+
+
+static bool compare_perms(GList *p1, GList *p2)
+{
+ while (p1) {
+ if (!p2 || g_strcmp0(p1->data, p2->data)) {
+ return false;
+ }
+ p1 = p1->next;
+ p2 = p2->next;
+ }
+ return (p2 == NULL);
+}
+
+static bool compare_content(GByteArray *c1, GByteArray *c2)
+{
+ size_t len1 = 0, len2 = 0;
+
+ if (c1) {
+ len1 = c1->len;
+ }
+ if (c2) {
+ len2 = c2->len;
+ }
+ if (len1 != len2) {
+ return false;
+ }
+
+ if (!len1) {
+ return true;
+ }
+
+ return !memcmp(c1->data, c2->data, len1);
+}
+
+static void compare_child(gpointer, gpointer, gpointer);
+
+static void compare_nodes(struct compare_walk *cw, XsNode *n1, XsNode *n2)
+{
+ int nr_children1 = 0, nr_children2 = 0;
+
+ if (n1->children) {
+ nr_children1 = g_hash_table_size(n1->children);
+ }
+ if (n2->children) {
+ nr_children2 = g_hash_table_size(n2->children);
+ }
+
+ if (n1->ref != n2->ref ||
+ n1->deleted_in_tx != n2->deleted_in_tx ||
+ n1->modified_in_tx != n2->modified_in_tx ||
+ !compare_perms(n1->perms, n2->perms) ||
+ !compare_content(n1->content, n2->content) ||
+ nr_children1 != nr_children2) {
+ cw->compare_ok = false;
+ printf("Compare failure on '%s'\n", cw->path);
+ }
+
+ if (nr_children1) {
+ XsNode *oldparent = cw->parent_2;
+ cw->parent_2 = n2;
+ g_hash_table_foreach(n1->children, compare_child, cw);
+
+ cw->parent_2 = oldparent;
+ }
+}
+
+static void compare_child(gpointer key, gpointer val, gpointer opaque)
+{
+ struct compare_walk *cw = opaque;
+ char *childname = key;
+ XsNode *child1 = val;
+ XsNode *child2 = g_hash_table_lookup(cw->parent_2->children, childname);
+ int pathlen = strlen(cw->path);
+
+ if (!child2) {
+ cw->compare_ok = false;
+ printf("Child '%s' does not exist under '%s'\n", childname, cw->path);
+ return;
+ }
+
+ strncat(cw->path, "/", sizeof(cw->path) - 1);
+ strncat(cw->path, childname, sizeof(cw->path) - 1);
+
+ compare_nodes(cw, child1, child2);
+ cw->path[pathlen] = '\0';
+}
+
+static bool compare_trees(XsNode *n1, XsNode *n2)
+{
+ struct compare_walk cw;
+
+ cw.path[0] = '\0';
+ cw.parent_2 = n2;
+ cw.compare_ok = true;
+
+ if (!n1 || !n2) {
+ return false;
+ }
+
+ compare_nodes(&cw, n1, n2);
+ return cw.compare_ok;
+}
+
+static void compare_tx(gpointer key, gpointer val, gpointer opaque)
+{
+ XenstoreImplState *s2 = opaque;
+ XsTransaction *t1 = val, *t2;
+ unsigned int tx_id = GPOINTER_TO_INT(key);
+
+ t2 = g_hash_table_lookup(s2->transactions, key);
+ g_assert(t2);
+
+ g_assert(t1->tx_id == tx_id);
+ g_assert(t2->tx_id == tx_id);
+ g_assert(t1->base_tx == t2->base_tx);
+ g_assert(t1->dom_id == t2->dom_id);
+ if (!compare_trees(t1->root, t2->root)) {
+ printf("Comparison failure in TX %u after serdes:\n", tx_id);
+ dump_ref("Original", t1->root, 0);
+ dump_ref("Deserialised", t2->root, 0);
+ g_assert(0);
+ }
+ g_assert(t1->nr_nodes == t2->nr_nodes);
+}
+
+static int write_str(XenstoreImplState *s, unsigned int dom_id,
+ unsigned int tx_id, const char *path,
+ const char *content)
+{
+ GByteArray *d = g_byte_array_new();
+ int err;
+
+ g_byte_array_append(d, (void *)content, strlen(content));
+ err = xs_impl_write(s, dom_id, tx_id, path, d);
+ g_byte_array_unref(d);
+ return err;
+}
+
+static void watch_cb(void *_str, const char *path, const char *token)
+{
+ GString *str = _str;
+
+ g_string_append(str, path);
+ g_string_append(str, token);
+}
+
+static void check_serdes(XenstoreImplState *s)
+{
+ XenstoreImplState *s2 = xs_impl_create(DOMID_GUEST);
+ GByteArray *bytes = xs_impl_serialize(s);
+ int nr_transactions1, nr_transactions2;
+ int ret;
+
+ ret = xs_impl_deserialize(s2, bytes, DOMID_GUEST, watch_cb, NULL);
+ g_assert(!ret);
+
+ g_byte_array_unref(bytes);
+
+ g_assert(s->last_tx == s2->last_tx);
+ g_assert(s->root_tx == s2->root_tx);
+
+ if (!compare_trees(s->root, s2->root)) {
+ printf("Comparison failure in main tree after serdes:\n");
+ dump_ref("Original", s->root, 0);
+ dump_ref("Deserialised", s2->root, 0);
+ g_assert(0);
+ }
+
+ nr_transactions1 = g_hash_table_size(s->transactions);
+ nr_transactions2 = g_hash_table_size(s2->transactions);
+ g_assert(nr_transactions1 == nr_transactions2);
+
+ g_hash_table_foreach(s->transactions, compare_tx, s2);
+
+ g_assert(s->nr_domu_watches == s2->nr_domu_watches);
+ g_assert(s->nr_domu_transactions == s2->nr_domu_transactions);
+ g_assert(s->nr_nodes == s2->nr_nodes);
+ xs_impl_delete(s2, false);
+}
+
+static XenstoreImplState *setup(void)
+{
+ XenstoreImplState *s = xs_impl_create(DOMID_GUEST);
+ char *abspath;
+ GList *perms;
+ int err;
+
+ abspath = g_strdup_printf("/local/domain/%u", DOMID_GUEST);
+
+ err = write_str(s, DOMID_QEMU, XBT_NULL, abspath, "");
+ g_assert(!err);
+ g_assert(s->nr_nodes == 4);
+
+ perms = g_list_append(NULL, g_strdup_printf("n%u", DOMID_QEMU));
+ perms = g_list_append(perms, g_strdup_printf("r%u", DOMID_GUEST));
+
+ err = xs_impl_set_perms(s, DOMID_QEMU, XBT_NULL, abspath, perms);
+ g_assert(!err);
+
+ g_list_free_full(perms, g_free);
+ g_free(abspath);
+
+ abspath = g_strdup_printf("/local/domain/%u/some", DOMID_GUEST);
+
+ err = write_str(s, DOMID_QEMU, XBT_NULL, abspath, "");
+ g_assert(!err);
+ g_assert(s->nr_nodes == 5);
+
+ perms = g_list_append(NULL, g_strdup_printf("n%u", DOMID_GUEST));
+
+ err = xs_impl_set_perms(s, DOMID_QEMU, XBT_NULL, abspath, perms);
+ g_assert(!err);
+
+ g_list_free_full(perms, g_free);
+ g_free(abspath);
+
+ return s;
+}
+
+static void test_xs_node_simple(void)
+{
+ GByteArray *data = g_byte_array_new();
+ XenstoreImplState *s = setup();
+ GString *guest_watches = g_string_new(NULL);
+ GString *qemu_watches = g_string_new(NULL);
+ GList *items = NULL;
+ XsNode *old_root;
+ uint64_t gencnt;
+ int err;
+
+ g_assert(s);
+
+ err = xs_impl_watch(s, DOMID_GUEST, "some", "guestwatch",
+ watch_cb, guest_watches);
+ g_assert(!err);
+ g_assert(guest_watches->len == strlen("someguestwatch"));
+ g_assert(!strcmp(guest_watches->str, "someguestwatch"));
+ g_string_truncate(guest_watches, 0);
+
+ err = xs_impl_watch(s, 0, "/local/domain/1/some", "qemuwatch",
+ watch_cb, qemu_watches);
+ g_assert(!err);
+ g_assert(qemu_watches->len == strlen("/local/domain/1/someqemuwatch"));
+ g_assert(!strcmp(qemu_watches->str, "/local/domain/1/someqemuwatch"));
+ g_string_truncate(qemu_watches, 0);
+
+ /* Read gives ENOENT when it should */
+ err = xs_impl_read(s, DOMID_GUEST, XBT_NULL, "foo", data);
+ g_assert(err == ENOENT);
+
+ /* Write works */
+ err = write_str(s, DOMID_GUEST, XBT_NULL, "some/relative/path",
+ "something");
+ g_assert(s->nr_nodes == 7);
+ g_assert(!err);
+ g_assert(!strcmp(guest_watches->str,
+ "some/relative/pathguestwatch"));
+ g_assert(!strcmp(qemu_watches->str,
+ "/local/domain/1/some/relative/pathqemuwatch"));
+
+ g_string_truncate(qemu_watches, 0);
+ g_string_truncate(guest_watches, 0);
+ xs_impl_reset_watches(s, 0);
+
+ /* Read gives back what we wrote */
+ err = xs_impl_read(s, DOMID_GUEST, XBT_NULL, "some/relative/path", data);
+ g_assert(!err);
+ g_assert(data->len == strlen("something"));
+ g_assert(!memcmp(data->data, "something", data->len));
+
+ /* Even if we use an abolute path */
+ g_byte_array_set_size(data, 0);
+ err = xs_impl_read(s, DOMID_GUEST, XBT_NULL,
+ "/local/domain/1/some/relative/path", data);
+ g_assert(!err);
+ g_assert(data->len == strlen("something"));
+
+ g_assert(!qemu_watches->len);
+ g_assert(!guest_watches->len);
+ /* Keep a copy, to force COW mode */
+ old_root = xs_node_ref(s->root);
+
+ /* Write somewhere we aren't allowed, in COW mode */
+ err = write_str(s, DOMID_GUEST, XBT_NULL, "/local/domain/badplace",
+ "moredata");
+ g_assert(err == EACCES);
+ g_assert(s->nr_nodes == 7);
+
+ /* Write works again */
+ err = write_str(s, DOMID_GUEST, XBT_NULL,
+ "/local/domain/1/some/relative/path2",
+ "something else");
+ g_assert(!err);
+ g_assert(s->nr_nodes == 8);
+ g_assert(!qemu_watches->len);
+ g_assert(!strcmp(guest_watches->str, "some/relative/path2guestwatch"));
+ g_string_truncate(guest_watches, 0);
+
+ /* Overwrite an existing node */
+ err = write_str(s, DOMID_GUEST, XBT_NULL, "some/relative/path",
+ "another thing");
+ g_assert(!err);
+ g_assert(s->nr_nodes == 8);
+ g_assert(!qemu_watches->len);
+ g_assert(!strcmp(guest_watches->str, "some/relative/pathguestwatch"));
+ g_string_truncate(guest_watches, 0);
+
+ /* We can list the two files we wrote */
+ err = xs_impl_directory(s, DOMID_GUEST, XBT_NULL, "some/relative", &gencnt,
+ &items);
+ g_assert(!err);
+ g_assert(items);
+ g_assert(gencnt == 2);
+ g_assert(!strcmp(items->data, "path"));
+ g_assert(items->next);
+ g_assert(!strcmp(items->next->data, "path2"));
+ g_assert(!items->next->next);
+ g_list_free_full(items, g_free);
+
+ err = xs_impl_unwatch(s, DOMID_GUEST, "some", "guestwatch",
+ watch_cb, guest_watches);
+ g_assert(!err);
+
+ err = xs_impl_unwatch(s, DOMID_GUEST, "some", "guestwatch",
+ watch_cb, guest_watches);
+ g_assert(err == ENOENT);
+
+ err = xs_impl_watch(s, DOMID_GUEST, "some/relative/path2", "watchp2",
+ watch_cb, guest_watches);
+ g_assert(!err);
+ g_assert(guest_watches->len == strlen("some/relative/path2watchp2"));
+ g_assert(!strcmp(guest_watches->str, "some/relative/path2watchp2"));
+ g_string_truncate(guest_watches, 0);
+
+ err = xs_impl_watch(s, DOMID_GUEST, "/local/domain/1/some/relative",
+ "watchrel", watch_cb, guest_watches);
+ g_assert(!err);
+ g_assert(guest_watches->len ==
+ strlen("/local/domain/1/some/relativewatchrel"));
+ g_assert(!strcmp(guest_watches->str,
+ "/local/domain/1/some/relativewatchrel"));
+ g_string_truncate(guest_watches, 0);
+
+ /* Write somewhere else which already existed */
+ err = write_str(s, DOMID_GUEST, XBT_NULL, "some/relative", "moredata");
+ g_assert(!err);
+ g_assert(s->nr_nodes == 8);
+
+ /* Write somewhere we aren't allowed */
+ err = write_str(s, DOMID_GUEST, XBT_NULL, "/local/domain/badplace",
+ "moredata");
+ g_assert(err == EACCES);
+
+ g_assert(!strcmp(guest_watches->str,
+ "/local/domain/1/some/relativewatchrel"));
+ g_string_truncate(guest_watches, 0);
+
+ g_byte_array_set_size(data, 0);
+ err = xs_impl_read(s, DOMID_GUEST, XBT_NULL, "some/relative", data);
+ g_assert(!err);
+ g_assert(data->len == strlen("moredata"));
+ g_assert(!memcmp(data->data, "moredata", data->len));
+
+ /* Overwrite existing data */
+ err = write_str(s, DOMID_GUEST, XBT_NULL, "some/relative", "otherdata");
+ g_assert(!err);
+ g_string_truncate(guest_watches, 0);
+
+ g_byte_array_set_size(data, 0);
+ err = xs_impl_read(s, DOMID_GUEST, XBT_NULL, "some/relative", data);
+ g_assert(!err);
+ g_assert(data->len == strlen("otherdata"));
+ g_assert(!memcmp(data->data, "otherdata", data->len));
+
+ /* Remove the subtree */
+ err = xs_impl_rm(s, DOMID_GUEST, XBT_NULL, "some/relative");
+ g_assert(!err);
+ g_assert(s->nr_nodes == 5);
+
+ /* Each watch fires with the least specific relevant path */
+ g_assert(strstr(guest_watches->str,
+ "some/relative/path2watchp2"));
+ g_assert(strstr(guest_watches->str,
+ "/local/domain/1/some/relativewatchrel"));
+ g_string_truncate(guest_watches, 0);
+
+ g_byte_array_set_size(data, 0);
+ err = xs_impl_read(s, DOMID_GUEST, XBT_NULL, "some/relative", data);
+ g_assert(err == ENOENT);
+ g_byte_array_unref(data);
+
+ xs_impl_reset_watches(s, DOMID_GUEST);
+ g_string_free(qemu_watches, true);
+ g_string_free(guest_watches, true);
+ xs_node_unref(old_root);
+ xs_impl_delete(s, true);
+}
+
+
+static void do_test_xs_node_tx(bool fail, bool commit)
+{
+ XenstoreImplState *s = setup();
+ GString *watches = g_string_new(NULL);
+ GByteArray *data = g_byte_array_new();
+ unsigned int tx_id = XBT_NULL;
+ int err;
+
+ g_assert(s);
+
+ /* Set a watch */
+ err = xs_impl_watch(s, DOMID_GUEST, "some", "watch",
+ watch_cb, watches);
+ g_assert(!err);
+ g_assert(watches->len == strlen("somewatch"));
+ g_assert(!strcmp(watches->str, "somewatch"));
+ g_string_truncate(watches, 0);
+
+ /* Write something */
+ err = write_str(s, DOMID_GUEST, XBT_NULL, "some/relative/path",
+ "something");
+ g_assert(s->nr_nodes == 7);
+ g_assert(!err);
+ g_assert(!strcmp(watches->str,
+ "some/relative/pathwatch"));
+ g_string_truncate(watches, 0);
+
+ /* Create a transaction */
+ err = xs_impl_transaction_start(s, DOMID_GUEST, &tx_id);
+ g_assert(!err);
+
+ if (fail) {
+ /* Write something else in the root */
+ err = write_str(s, DOMID_GUEST, XBT_NULL, "some/relative/path",
+ "another thing");
+ g_assert(!err);
+ g_assert(s->nr_nodes == 7);
+ g_assert(!strcmp(watches->str,
+ "some/relative/pathwatch"));
+ g_string_truncate(watches, 0);
+ }
+
+ g_assert(!watches->len);
+
+ /* Perform a write in the transaction */
+ err = write_str(s, DOMID_GUEST, tx_id, "some/relative/path",
+ "something else");
+ g_assert(!err);
+ g_assert(s->nr_nodes == 7);
+ g_assert(!watches->len);
+
+ err = xs_impl_read(s, DOMID_GUEST, XBT_NULL, "some/relative/path", data);
+ g_assert(!err);
+ if (fail) {
+ g_assert(data->len == strlen("another thing"));
+ g_assert(!memcmp(data->data, "another thing", data->len));
+ } else {
+ g_assert(data->len == strlen("something"));
+ g_assert(!memcmp(data->data, "something", data->len));
+ }
+ g_byte_array_set_size(data, 0);
+
+ err = xs_impl_read(s, DOMID_GUEST, tx_id, "some/relative/path", data);
+ g_assert(!err);
+ g_assert(data->len == strlen("something else"));
+ g_assert(!memcmp(data->data, "something else", data->len));
+ g_byte_array_set_size(data, 0);
+
+ check_serdes(s);
+
+ /* Attempt to commit the transaction */
+ err = xs_impl_transaction_end(s, DOMID_GUEST, tx_id, commit);
+ if (commit && fail) {
+ g_assert(err == EAGAIN);
+ } else {
+ g_assert(!err);
+ }
+ if (commit && !fail) {
+ g_assert(!strcmp(watches->str,
+ "some/relative/pathwatch"));
+ g_string_truncate(watches, 0);
+ } else {
+ g_assert(!watches->len);
+ }
+ g_assert(s->nr_nodes == 7);
+
+ check_serdes(s);
+
+ err = xs_impl_unwatch(s, DOMID_GUEST, "some", "watch",
+ watch_cb, watches);
+ g_assert(!err);
+
+ err = xs_impl_read(s, DOMID_GUEST, XBT_NULL, "some/relative/path", data);
+ g_assert(!err);
+ if (fail) {
+ g_assert(data->len == strlen("another thing"));
+ g_assert(!memcmp(data->data, "another thing", data->len));
+ } else if (commit) {
+ g_assert(data->len == strlen("something else"));
+ g_assert(!memcmp(data->data, "something else", data->len));
+ } else {
+ g_assert(data->len == strlen("something"));
+ g_assert(!memcmp(data->data, "something", data->len));
+ }
+ g_byte_array_unref(data);
+ g_string_free(watches, true);
+ xs_impl_delete(s, true);
+}
+
+static void test_xs_node_tx_fail(void)
+{
+ do_test_xs_node_tx(true, true);
+}
+
+static void test_xs_node_tx_abort(void)
+{
+ do_test_xs_node_tx(false, false);
+ do_test_xs_node_tx(true, false);
+}
+static void test_xs_node_tx_succeed(void)
+{
+ do_test_xs_node_tx(false, true);
+}
+
+static void test_xs_node_tx_rm(void)
+{
+ XenstoreImplState *s = setup();
+ GString *watches = g_string_new(NULL);
+ GByteArray *data = g_byte_array_new();
+ unsigned int tx_id = XBT_NULL;
+ int err;
+
+ g_assert(s);
+
+ /* Set a watch */
+ err = xs_impl_watch(s, DOMID_GUEST, "some", "watch",
+ watch_cb, watches);
+ g_assert(!err);
+ g_assert(watches->len == strlen("somewatch"));
+ g_assert(!strcmp(watches->str, "somewatch"));
+ g_string_truncate(watches, 0);
+
+ /* Write something */
+ err = write_str(s, DOMID_GUEST, XBT_NULL, "some/deep/dark/relative/path",
+ "something");
+ g_assert(!err);
+ g_assert(s->nr_nodes == 9);
+ g_assert(!strcmp(watches->str,
+ "some/deep/dark/relative/pathwatch"));
+ g_string_truncate(watches, 0);
+
+ /* Create a transaction */
+ err = xs_impl_transaction_start(s, DOMID_GUEST, &tx_id);
+ g_assert(!err);
+
+ /* Delete the tree in the transaction */
+ err = xs_impl_rm(s, DOMID_GUEST, tx_id, "some/deep/dark");
+ g_assert(!err);
+ g_assert(s->nr_nodes == 9);
+ g_assert(!watches->len);
+
+ err = xs_impl_read(s, DOMID_GUEST, XBT_NULL, "some/deep/dark/relative/path",
+ data);
+ g_assert(!err);
+ g_assert(data->len == strlen("something"));
+ g_assert(!memcmp(data->data, "something", data->len));
+ g_byte_array_set_size(data, 0);
+
+ check_serdes(s);
+
+ /* Commit the transaction */
+ err = xs_impl_transaction_end(s, DOMID_GUEST, tx_id, true);
+ g_assert(!err);
+ g_assert(s->nr_nodes == 6);
+
+ g_assert(!strcmp(watches->str, "some/deep/darkwatch"));
+ g_string_truncate(watches, 0);
+
+ /* Now the node is gone */
+ err = xs_impl_read(s, DOMID_GUEST, XBT_NULL, "some/deep/dark/relative/path",
+ data);
+ g_assert(err == ENOENT);
+ g_byte_array_unref(data);
+
+ err = xs_impl_unwatch(s, DOMID_GUEST, "some", "watch",
+ watch_cb, watches);
+ g_assert(!err);
+
+ g_string_free(watches, true);
+ xs_impl_delete(s, true);
+}
+
+static void test_xs_node_tx_resurrect(void)
+{
+ XenstoreImplState *s = setup();
+ GString *watches = g_string_new(NULL);
+ GByteArray *data = g_byte_array_new();
+ unsigned int tx_id = XBT_NULL;
+ int err;
+
+ g_assert(s);
+
+ /* Write something */
+ err = write_str(s, DOMID_GUEST, XBT_NULL, "some/deep/dark/relative/path",
+ "something");
+ g_assert(!err);
+ g_assert(s->nr_nodes == 9);
+
+ /* Another node to remain shared */
+ err = write_str(s, DOMID_GUEST, XBT_NULL, "some/place/safe", "keepme");
+ g_assert(!err);
+ g_assert(s->nr_nodes == 11);
+
+ /* This node will be wiped and resurrected */
+ err = write_str(s, DOMID_GUEST, XBT_NULL, "some/deep/dark",
+ "foo");
+ g_assert(!err);
+ g_assert(s->nr_nodes == 11);
+
+ /* Set a watch */
+ err = xs_impl_watch(s, DOMID_GUEST, "some", "watch",
+ watch_cb, watches);
+ g_assert(!err);
+ g_assert(watches->len == strlen("somewatch"));
+ g_assert(!strcmp(watches->str, "somewatch"));
+ g_string_truncate(watches, 0);
+
+ /* Create a transaction */
+ err = xs_impl_transaction_start(s, DOMID_GUEST, &tx_id);
+ g_assert(!err);
+
+ /* Delete the tree in the transaction */
+ err = xs_impl_rm(s, DOMID_GUEST, tx_id, "some/deep");
+ g_assert(!err);
+ g_assert(s->nr_nodes == 11);
+ g_assert(!watches->len);
+
+ /* Resurrect part of it */
+ err = write_str(s, DOMID_GUEST, tx_id, "some/deep/dark/different/path",
+ "something");
+ g_assert(!err);
+ g_assert(s->nr_nodes == 11);
+
+ check_serdes(s);
+
+ /* Commit the transaction */
+ err = xs_impl_transaction_end(s, DOMID_GUEST, tx_id, true);
+ g_assert(!err);
+ g_assert(s->nr_nodes == 11);
+
+ check_serdes(s);
+
+ /* lost data */
+ g_assert(strstr(watches->str, "some/deep/dark/different/pathwatch"));
+ /* topmost deleted */
+ g_assert(strstr(watches->str, "some/deep/dark/relativewatch"));
+ /* lost data */
+ g_assert(strstr(watches->str, "some/deep/darkwatch"));
+
+ g_string_truncate(watches, 0);
+
+ /* Now the node is gone */
+ err = xs_impl_read(s, DOMID_GUEST, XBT_NULL, "some/deep/dark/relative/path",
+ data);
+ g_assert(err == ENOENT);
+ g_byte_array_unref(data);
+
+ check_serdes(s);
+
+ err = xs_impl_unwatch(s, DOMID_GUEST, "some", "watch",
+ watch_cb, watches);
+ g_assert(!err);
+
+ g_string_free(watches, true);
+ xs_impl_delete(s, true);
+}
+
+static void test_xs_node_tx_resurrect2(void)
+{
+ XenstoreImplState *s = setup();
+ GString *watches = g_string_new(NULL);
+ GByteArray *data = g_byte_array_new();
+ unsigned int tx_id = XBT_NULL;
+ int err;
+
+ g_assert(s);
+
+ /* Write something */
+ err = write_str(s, DOMID_GUEST, XBT_NULL, "some/deep/dark/relative/path",
+ "something");
+ g_assert(!err);
+ g_assert(s->nr_nodes == 9);
+
+ /* Another node to remain shared */
+ err = write_str(s, DOMID_GUEST, XBT_NULL, "some/place/safe", "keepme");
+ g_assert(!err);
+ g_assert(s->nr_nodes == 11);
+
+ /* This node will be wiped and resurrected */
+ err = write_str(s, DOMID_GUEST, XBT_NULL, "some/deep/dark",
+ "foo");
+ g_assert(!err);
+ g_assert(s->nr_nodes == 11);
+
+ /* Set a watch */
+ err = xs_impl_watch(s, DOMID_GUEST, "some", "watch",
+ watch_cb, watches);
+ g_assert(!err);
+ g_assert(watches->len == strlen("somewatch"));
+ g_assert(!strcmp(watches->str, "somewatch"));
+ g_string_truncate(watches, 0);
+
+ /* Create a transaction */
+ err = xs_impl_transaction_start(s, DOMID_GUEST, &tx_id);
+ g_assert(!err);
+
+ /* Delete the tree in the transaction */
+ err = xs_impl_rm(s, DOMID_GUEST, tx_id, "some/deep");
+ g_assert(!err);
+ g_assert(s->nr_nodes == 11);
+ g_assert(!watches->len);
+
+ /* Resurrect part of it */
+ err = write_str(s, DOMID_GUEST, tx_id, "some/deep/dark/relative/path",
+ "something");
+ g_assert(!err);
+ g_assert(s->nr_nodes == 11);
+
+ check_serdes(s);
+
+ /* Commit the transaction */
+ err = xs_impl_transaction_end(s, DOMID_GUEST, tx_id, true);
+ g_assert(!err);
+ g_assert(s->nr_nodes == 11);
+
+ check_serdes(s);
+
+ /* lost data */
+ g_assert(strstr(watches->str, "some/deep/dark/relative/pathwatch"));
+ /* lost data */
+ g_assert(strstr(watches->str, "some/deep/darkwatch"));
+
+ g_string_truncate(watches, 0);
+
+ /* Now the node is gone */
+ err = xs_impl_read(s, DOMID_GUEST, XBT_NULL, "some/deep/dark/relative/path",
+ data);
+ g_assert(!err);
+ g_assert(data->len == strlen("something"));
+ g_assert(!memcmp(data->data, "something", data->len));
+
+ g_byte_array_unref(data);
+
+ check_serdes(s);
+
+ err = xs_impl_unwatch(s, DOMID_GUEST, "some", "watch",
+ watch_cb, watches);
+ g_assert(!err);
+
+ g_string_free(watches, true);
+ xs_impl_delete(s, true);
+}
+
+int main(int argc, char **argv)
+{
+ g_test_init(&argc, &argv, NULL);
+ module_call_init(MODULE_INIT_QOM);
+
+ g_test_add_func("/xs_node/simple", test_xs_node_simple);
+ g_test_add_func("/xs_node/tx_abort", test_xs_node_tx_abort);
+ g_test_add_func("/xs_node/tx_fail", test_xs_node_tx_fail);
+ g_test_add_func("/xs_node/tx_succeed", test_xs_node_tx_succeed);
+ g_test_add_func("/xs_node/tx_rm", test_xs_node_tx_rm);
+ g_test_add_func("/xs_node/tx_resurrect", test_xs_node_tx_resurrect);
+ g_test_add_func("/xs_node/tx_resurrect2", test_xs_node_tx_resurrect2);
+
+ return g_test_run();
+}
diff --git a/ui/cocoa.m b/ui/cocoa.m
index 289a2b193e..985a0f5069 100644
--- a/ui/cocoa.m
+++ b/ui/cocoa.m
@@ -1330,10 +1330,15 @@ static CGEventRef handleTapEvent(CGEventTapProxy proxy, CGEventType type, CGEven
return NO;
}
-/* Called when QEMU goes into the background */
-- (void) applicationWillResignActive: (NSNotification *)aNotification
+/*
+ * Called when QEMU goes into the background. Note that
+ * [-NSWindowDelegate windowDidResignKey:] is used here instead of
+ * [-NSApplicationDelegate applicationWillResignActive:] because it cannot
+ * detect that the window loses focus when the deck is clicked on macOS 13.2.1.
+ */
+- (void) windowDidResignKey: (NSNotification *)aNotification
{
- COCOA_DEBUG("QemuCocoaAppController: applicationWillResignActive\n");
+ COCOA_DEBUG("%s\n", __func__);
[cocoaView ungrabMouse];
[cocoaView raiseAllKeys];
}
diff --git a/util/async.c b/util/async.c
index 0657b75397..21016a1ac7 100644
--- a/util/async.c
+++ b/util/async.c
@@ -74,14 +74,21 @@ static void aio_bh_enqueue(QEMUBH *bh, unsigned new_flags)
unsigned old_flags;
/*
- * The memory barrier implicit in qatomic_fetch_or makes sure that:
- * 1. idle & any writes needed by the callback are done before the
- * locations are read in the aio_bh_poll.
- * 2. ctx is loaded before the callback has a chance to execute and bh
- * could be freed.
+ * Synchronizes with atomic_fetch_and() in aio_bh_dequeue(), ensuring that
+ * insertion starts after BH_PENDING is set.
*/
old_flags = qatomic_fetch_or(&bh->flags, BH_PENDING | new_flags);
+
if (!(old_flags & BH_PENDING)) {
+ /*
+ * At this point the bottom half becomes visible to aio_bh_poll().
+ * This insertion thus synchronizes with QSLIST_MOVE_ATOMIC in
+ * aio_bh_poll(), ensuring that:
+ * 1. any writes needed by the callback are visible from the callback
+ * after aio_bh_dequeue() returns bh.
+ * 2. ctx is loaded before the callback has a chance to execute and bh
+ * could be freed.
+ */
QSLIST_INSERT_HEAD_ATOMIC(&ctx->bh_list, bh, next);
}
@@ -107,11 +114,8 @@ static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags)
QSLIST_REMOVE_HEAD(head, next);
/*
- * The qatomic_and is paired with aio_bh_enqueue(). The implicit memory
- * barrier ensures that the callback sees all writes done by the scheduling
- * thread. It also ensures that the scheduling thread sees the cleared
- * flag before bh->cb has run, and thus will call aio_notify again if
- * necessary.
+ * Synchronizes with qatomic_fetch_or() in aio_bh_enqueue(), ensuring that
+ * the removal finishes before BH_PENDING is reset.
*/
*flags = qatomic_fetch_and(&bh->flags,
~(BH_PENDING | BH_SCHEDULED | BH_IDLE));
@@ -158,6 +162,7 @@ int aio_bh_poll(AioContext *ctx)
BHListSlice *s;
int ret = 0;
+ /* Synchronizes with QSLIST_INSERT_HEAD_ATOMIC in aio_bh_enqueue(). */
QSLIST_MOVE_ATOMIC(&slice.bh_list, &ctx->bh_list);
QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next);
@@ -448,15 +453,15 @@ LuringState *aio_get_linux_io_uring(AioContext *ctx)
void aio_notify(AioContext *ctx)
{
/*
- * Write e.g. bh->flags before writing ctx->notified. Pairs with smp_mb in
- * aio_notify_accept.
+ * Write e.g. ctx->bh_list before writing ctx->notified. Pairs with
+ * smp_mb() in aio_notify_accept().
*/
smp_wmb();
qatomic_set(&ctx->notified, true);
/*
- * Write ctx->notified before reading ctx->notify_me. Pairs
- * with smp_mb in aio_ctx_prepare or aio_poll.
+ * Write ctx->notified (and also ctx->bh_list) before reading ctx->notify_me.
+ * Pairs with smp_mb() in aio_ctx_prepare or aio_poll.
*/
smp_mb();
if (qatomic_read(&ctx->notify_me)) {
@@ -469,8 +474,9 @@ void aio_notify_accept(AioContext *ctx)
qatomic_set(&ctx->notified, false);
/*
- * Write ctx->notified before reading e.g. bh->flags. Pairs with smp_wmb
- * in aio_notify.
+ * Order reads of ctx->notified (in aio_context_notifier_poll()) and the
+ * above clearing of ctx->notified before reads of e.g. bh->flags. Pairs
+ * with smp_wmb() in aio_notify.
*/
smp_mb();
}
@@ -493,6 +499,11 @@ static bool aio_context_notifier_poll(void *opaque)
EventNotifier *e = opaque;
AioContext *ctx = container_of(e, AioContext, notifier);
+ /*
+ * No need for load-acquire because we just want to kick the
+ * event loop. aio_notify_accept() takes care of synchronizing
+ * the event loop with the producers.
+ */
return qatomic_read(&ctx->notified);
}
diff --git a/util/log.c b/util/log.c
index 7837ff9917..53b4f6c58e 100644
--- a/util/log.c
+++ b/util/log.c
@@ -489,7 +489,7 @@ const QEMULogItem qemu_log_items[] = {
"do not chain compiled TBs so that \"exec\" and \"cpu\" show\n"
"complete traces" },
#ifdef CONFIG_PLUGIN
- { CPU_LOG_PLUGIN, "plugin", "output from TCG plugins\n"},
+ { CPU_LOG_PLUGIN, "plugin", "output from TCG plugins"},
#endif
{ LOG_STRACE, "strace",
"log every user-mode syscall, its input, and its result" },
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
index 58f3f77181..84a50a9e91 100644
--- a/util/qemu-coroutine-lock.c
+++ b/util/qemu-coroutine-lock.c
@@ -201,10 +201,16 @@ static void coroutine_fn qemu_co_mutex_lock_slowpath(AioContext *ctx,
trace_qemu_co_mutex_lock_entry(mutex, self);
push_waiter(mutex, &w);
+ /*
+ * Add waiter before reading mutex->handoff. Pairs with qatomic_mb_set
+ * in qemu_co_mutex_unlock.
+ */
+ smp_mb__after_rmw();
+
/* This is the "Responsibility Hand-Off" protocol; a lock() picks from
* a concurrent unlock() the responsibility of waking somebody up.
*/
- old_handoff = qatomic_mb_read(&mutex->handoff);
+ old_handoff = qatomic_read(&mutex->handoff);
if (old_handoff &&
has_waiters(mutex) &&
qatomic_cmpxchg(&mutex->handoff, old_handoff, 0) == old_handoff) {
@@ -303,6 +309,7 @@ void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex)
}
our_handoff = mutex->sequence;
+ /* Set handoff before checking for waiters. */
qatomic_mb_set(&mutex->handoff, our_handoff);
if (!has_waiters(mutex)) {
/* The concurrent lock has not added itself yet, so it
diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
index 93d2505797..b2e26e2120 100644
--- a/util/qemu-thread-posix.c
+++ b/util/qemu-thread-posix.c
@@ -384,13 +384,21 @@ void qemu_event_destroy(QemuEvent *ev)
void qemu_event_set(QemuEvent *ev)
{
- /* qemu_event_set has release semantics, but because it *loads*
+ assert(ev->initialized);
+
+ /*
+ * Pairs with both qemu_event_reset() and qemu_event_wait().
+ *
+ * qemu_event_set has release semantics, but because it *loads*
* ev->value we need a full memory barrier here.
*/
- assert(ev->initialized);
smp_mb();
if (qatomic_read(&ev->value) != EV_SET) {
- if (qatomic_xchg(&ev->value, EV_SET) == EV_BUSY) {
+ int old = qatomic_xchg(&ev->value, EV_SET);
+
+ /* Pairs with memory barrier in kernel futex_wait system call. */
+ smp_mb__after_rmw();
+ if (old == EV_BUSY) {
/* There were waiters, wake them up. */
qemu_futex_wake(ev, INT_MAX);
}
@@ -399,18 +407,19 @@ void qemu_event_set(QemuEvent *ev)
void qemu_event_reset(QemuEvent *ev)
{
- unsigned value;
-
assert(ev->initialized);
- value = qatomic_read(&ev->value);
- smp_mb_acquire();
- if (value == EV_SET) {
- /*
- * If there was a concurrent reset (or even reset+wait),
- * do nothing. Otherwise change EV_SET->EV_FREE.
- */
- qatomic_or(&ev->value, EV_FREE);
- }
+
+ /*
+ * If there was a concurrent reset (or even reset+wait),
+ * do nothing. Otherwise change EV_SET->EV_FREE.
+ */
+ qatomic_or(&ev->value, EV_FREE);
+
+ /*
+ * Order reset before checking the condition in the caller.
+ * Pairs with the first memory barrier in qemu_event_set().
+ */
+ smp_mb__after_rmw();
}
void qemu_event_wait(QemuEvent *ev)
@@ -418,20 +427,40 @@ void qemu_event_wait(QemuEvent *ev)
unsigned value;
assert(ev->initialized);
- value = qatomic_read(&ev->value);
- smp_mb_acquire();
+
+ /*
+ * qemu_event_wait must synchronize with qemu_event_set even if it does
+ * not go down the slow path, so this load-acquire is needed that
+ * synchronizes with the first memory barrier in qemu_event_set().
+ *
+ * If we do go down the slow path, there is no requirement at all: we
+ * might miss a qemu_event_set() here but ultimately the memory barrier in
+ * qemu_futex_wait() will ensure the check is done correctly.
+ */
+ value = qatomic_load_acquire(&ev->value);
if (value != EV_SET) {
if (value == EV_FREE) {
/*
- * Leave the event reset and tell qemu_event_set that there
- * are waiters. No need to retry, because there cannot be
- * a concurrent busy->free transition. After the CAS, the
- * event will be either set or busy.
+ * Leave the event reset and tell qemu_event_set that there are
+ * waiters. No need to retry, because there cannot be a concurrent
+ * busy->free transition. After the CAS, the event will be either
+ * set or busy.
+ *
+ * This cmpxchg doesn't have particular ordering requirements if it
+ * succeeds (moving the store earlier can only cause qemu_event_set()
+ * to issue _more_ wakeups), the failing case needs acquire semantics
+ * like the load above.
*/
if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
return;
}
}
+
+ /*
+ * This is the final check for a concurrent set, so it does need
+ * a smp_mb() pairing with the second barrier of qemu_event_set().
+ * The barrier is inside the FUTEX_WAIT system call.
+ */
qemu_futex_wait(ev, EV_BUSY);
}
}
diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
index 69db254ac7..a7fe3cc345 100644
--- a/util/qemu-thread-win32.c
+++ b/util/qemu-thread-win32.c
@@ -272,12 +272,20 @@ void qemu_event_destroy(QemuEvent *ev)
void qemu_event_set(QemuEvent *ev)
{
assert(ev->initialized);
- /* qemu_event_set has release semantics, but because it *loads*
+
+ /*
+ * Pairs with both qemu_event_reset() and qemu_event_wait().
+ *
+ * qemu_event_set has release semantics, but because it *loads*
* ev->value we need a full memory barrier here.
*/
smp_mb();
if (qatomic_read(&ev->value) != EV_SET) {
- if (qatomic_xchg(&ev->value, EV_SET) == EV_BUSY) {
+ int old = qatomic_xchg(&ev->value, EV_SET);
+
+ /* Pairs with memory barrier after ResetEvent. */
+ smp_mb__after_rmw();
+ if (old == EV_BUSY) {
/* There were waiters, wake them up. */
SetEvent(ev->event);
}
@@ -286,17 +294,19 @@ void qemu_event_set(QemuEvent *ev)
void qemu_event_reset(QemuEvent *ev)
{
- unsigned value;
-
assert(ev->initialized);
- value = qatomic_read(&ev->value);
- smp_mb_acquire();
- if (value == EV_SET) {
- /* If there was a concurrent reset (or even reset+wait),
- * do nothing. Otherwise change EV_SET->EV_FREE.
- */
- qatomic_or(&ev->value, EV_FREE);
- }
+
+ /*
+ * If there was a concurrent reset (or even reset+wait),
+ * do nothing. Otherwise change EV_SET->EV_FREE.
+ */
+ qatomic_or(&ev->value, EV_FREE);
+
+ /*
+ * Order reset before checking the condition in the caller.
+ * Pairs with the first memory barrier in qemu_event_set().
+ */
+ smp_mb__after_rmw();
}
void qemu_event_wait(QemuEvent *ev)
@@ -304,29 +314,49 @@ void qemu_event_wait(QemuEvent *ev)
unsigned value;
assert(ev->initialized);
- value = qatomic_read(&ev->value);
- smp_mb_acquire();
+
+ /*
+ * qemu_event_wait must synchronize with qemu_event_set even if it does
+ * not go down the slow path, so this load-acquire is needed that
+ * synchronizes with the first memory barrier in qemu_event_set().
+ *
+ * If we do go down the slow path, there is no requirement at all: we
+ * might miss a qemu_event_set() here but ultimately the memory barrier in
+ * qemu_futex_wait() will ensure the check is done correctly.
+ */
+ value = qatomic_load_acquire(&ev->value);
if (value != EV_SET) {
if (value == EV_FREE) {
- /* qemu_event_set is not yet going to call SetEvent, but we are
- * going to do another check for EV_SET below when setting EV_BUSY.
- * At that point it is safe to call WaitForSingleObject.
+ /*
+ * Here the underlying kernel event is reset, but qemu_event_set is
+ * not yet going to call SetEvent. However, there will be another
+ * check for EV_SET below when setting EV_BUSY. At that point it
+ * is safe to call WaitForSingleObject.
*/
ResetEvent(ev->event);
- /* Tell qemu_event_set that there are waiters. No need to retry
- * because there cannot be a concurrent busy->free transition.
- * After the CAS, the event will be either set or busy.
+ /*
+ * It is not clear whether ResetEvent provides this barrier; kernel
+ * APIs (KeResetEvent/KeClearEvent) do not. Better safe than sorry!
+ */
+ smp_mb();
+
+ /*
+ * Leave the event reset and tell qemu_event_set that there are
+ * waiters. No need to retry, because there cannot be a concurrent
+ * busy->free transition. After the CAS, the event will be either
+ * set or busy.
*/
if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
- value = EV_SET;
- } else {
- value = EV_BUSY;
+ return;
}
}
- if (value == EV_BUSY) {
- WaitForSingleObject(ev->event, INFINITE);
- }
+
+ /*
+ * ev->value is now EV_BUSY. Since we didn't observe EV_SET,
+ * qemu_event_set() must observe EV_BUSY and call SetEvent().
+ */
+ WaitForSingleObject(ev->event, INFINITE);
}
}