diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2023-03-09 13:22:05 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2023-03-09 13:22:05 +0000 |
commit | 15002921e878e6cf485f655d580733b5319ea015 (patch) | |
tree | 404375b7acc31ee6b5583905fe817dc4f7799d48 | |
parent | ba44caac0741482d2c1921f7f974c205d2d4222b (diff) | |
parent | 154eac37190c4d80d29b09c226abd899e397530f (diff) |
Merge tag 'xenfv-2' of git://git.infradead.org/users/dwmw2/qemu into staging
Enable PV backends with Xen/KVM emulation
This is phase 2, following on from the basic platform support which was
already merged.
• Add a simple single-tenant internal XenStore implementation
• Indirect Xen gnttab/evtchn/foreignmem/xenstore through operations table
• Provide emulated back ends for Xen operations
• Header cleanups to allow PV back ends to build without Xen itself
• Enable PV back ends in emulated mode
• Documentation update
Tested-by: Paul Durrant <paul@xen.org>
... on real Xen (master branch, 4.18) with a Debian guest.
# -----BEGIN PGP SIGNATURE-----
#
# iQJGBAABCgAwFiEEMUsIrNDeSBEzpfKGm+mA/QrAFUQFAmQHu3wSHGR3bXdAYW1h
# em9uLmNvLnVrAAoJEJvpgP0KwBVE5LYP/0VodDsQdP7Z4L+/IzgBSgEec7qmyQFB
# KlBZS/PmvCZKb0DHLI3GhXIyzD+/fnLtGSRl0rYObnKP7im+MpEDGmn97f6nIITk
# AzkdsVhNEBQFXCkLgQ9y8kTrTmsod9O4sqn0+naa2TX4FPcRN0MaNmpuLEubvaRS
# +JuyHmwy9ZeeAnsU31uJ0nx4F1hW9IDaatNoDeFcFnKCXQp36rtdZUViMowUJvwu
# Q+Xyg6dybusznaoiXd485tTPrTt+FK/wEARse3q2gRh9QblLu0r5BFb0rOfhYCTQ
# jw+5lBsOX+UlffmB9IDakRpVe4RKhvvRQSkRvYkPCshsqud9zMGhaquKg1vKBgca
# I31XSN0LCcon/ahHGtmVAxyZUpWdEnfzO1TbTNpz9oacROklgVgEYdw5Vwca71VD
# SURl6uCt9Jb9WmsR4twus4i4qDjQIDOtOF0hcxpl7HGktkxlGxUVI4qVLXARtVCS
# OTB6N0LlhJ2woj2wYK5BRTiOj03T2MkJEWaYhDdIrQREKWe2Sn4xTOH5kGbQQnOr
# km93odjBZFRHsAUnzXHXW3+yHjMefH7KrHePbmvsO4foGF77bBxosuC2ehFfvNJ0
# VM/H04NDtPYCBwdAr545PSN/q+WzEPQaquLZ0UuTBuPpMMOYd+Ff8YvQWJPyCM18
# 1mq9v6Xe9RQZ
# =JGLX
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 07 Mar 2023 22:32:28 GMT
# gpg: using RSA key 314B08ACD0DE481133A5F2869BE980FD0AC01544
# gpg: issuer "dwmw@amazon.co.uk"
# gpg: Good signature from "David Woodhouse <dwmw@amazon.co.uk>" [unknown]
# gpg: aka "David Woodhouse <dwmw@amazon.com>" [unknown]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg: There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 314B 08AC D0DE 4811 33A5 F286 9BE9 80FD 0AC0 1544
* tag 'xenfv-2' of git://git.infradead.org/users/dwmw2/qemu: (27 commits)
docs: Update Xen-on-KVM documentation for PV disk support
MAINTAINERS: Add entry for Xen on KVM emulation
i386/xen: Initialize Xen backends from pc_basic_device_init() for emulation
hw/xen: Implement soft reset for emulated gnttab
hw/xen: Map guest XENSTORE_PFN grant in emulated Xenstore
hw/xen: Add emulated implementation of XenStore operations
hw/xen: Add emulated implementation of grant table operations
hw/xen: Hook up emulated implementation for event channel operations
hw/xen: Only advertise ring-page-order for xen-block if gnttab supports it
hw/xen: Avoid crash when backend watch fires too early
hw/xen: Build PV backend drivers for CONFIG_XEN_BUS
hw/xen: Rename xen_common.h to xen_native.h
hw/xen: Use XEN_PAGE_SIZE in PV backend drivers
hw/xen: Move xenstore_store_pv_console_info to xen_console.c
hw/xen: Add xenstore operations to allow redirection to internal emulation
hw/xen: Add foreignmem operations to allow redirection to internal emulation
hw/xen: Pass grant ref to gnttab unmap operation
hw/xen: Add gnttab operations to allow redirection to internal emulation
hw/xen: Add evtchn operations to allow redirection to internal emulation
hw/xen: Create initial XenStore nodes
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
53 files changed, 5804 insertions, 927 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 5340de0515..640deb2895 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -443,6 +443,15 @@ F: target/i386/kvm/ F: target/i386/sev* F: scripts/kvm/vmxcap +Xen emulation on X86 KVM CPUs +M: David Woodhouse <dwmw2@infradead.org> +M: Paul Durrant <paul@xen.org> +S: Supported +F: include/sysemu/kvm_xen.h +F: target/i386/kvm/xen* +F: hw/i386/kvm/xen* +F: tests/avocado/xen_guest.py + Guest CPU Cores (other accelerators) ------------------------------------ Overall diff --git a/accel/xen/xen-all.c b/accel/xen/xen-all.c index e85e4aeba5..00221e23c5 100644 --- a/accel/xen/xen-all.c +++ b/accel/xen/xen-all.c @@ -12,6 +12,7 @@ #include "qemu/error-report.h" #include "qemu/module.h" #include "qapi/error.h" +#include "hw/xen/xen_native.h" #include "hw/xen/xen-legacy-backend.h" #include "hw/xen/xen_pt.h" #include "chardev/char.h" @@ -29,73 +30,15 @@ xc_interface *xen_xc; xenforeignmemory_handle *xen_fmem; xendevicemodel_handle *xen_dmod; -static int store_dev_info(int domid, Chardev *cs, const char *string) +static void xenstore_record_dm_state(const char *state) { - struct xs_handle *xs = NULL; - char *path = NULL; - char *newpath = NULL; - char *pts = NULL; - int ret = -1; - - /* Only continue if we're talking to a pty. */ - if (!CHARDEV_IS_PTY(cs)) { - return 0; - } - pts = cs->filename + 4; + struct xs_handle *xs; + char path[50]; /* We now have everything we need to set the xenstore entry. */ xs = xs_open(0); if (xs == NULL) { fprintf(stderr, "Could not contact XenStore\n"); - goto out; - } - - path = xs_get_domain_path(xs, domid); - if (path == NULL) { - fprintf(stderr, "xs_get_domain_path() error\n"); - goto out; - } - newpath = realloc(path, (strlen(path) + strlen(string) + - strlen("/tty") + 1)); - if (newpath == NULL) { - fprintf(stderr, "realloc error\n"); - goto out; - } - path = newpath; - - strcat(path, string); - strcat(path, "/tty"); - if (!xs_write(xs, XBT_NULL, path, pts, strlen(pts))) { - fprintf(stderr, "xs_write for '%s' fail", string); - goto out; - } - ret = 0; - -out: - free(path); - xs_close(xs); - - return ret; -} - -void xenstore_store_pv_console_info(int i, Chardev *chr) -{ - if (i == 0) { - store_dev_info(xen_domid, chr, "/console"); - } else { - char buf[32]; - snprintf(buf, sizeof(buf), "/device/console/%d", i); - store_dev_info(xen_domid, chr, buf); - } -} - - -static void xenstore_record_dm_state(struct xs_handle *xs, const char *state) -{ - char path[50]; - - if (xs == NULL) { - error_report("xenstore connection not initialized"); exit(1); } @@ -109,6 +52,8 @@ static void xenstore_record_dm_state(struct xs_handle *xs, const char *state) error_report("error recording dm state"); exit(1); } + + xs_close(xs); } @@ -117,7 +62,7 @@ static void xen_change_state_handler(void *opaque, bool running, { if (running) { /* record state running */ - xenstore_record_dm_state(xenstore, "running"); + xenstore_record_dm_state("running"); } } diff --git a/docs/system/i386/xen.rst b/docs/system/i386/xen.rst index a00523b492..f06765e88c 100644 --- a/docs/system/i386/xen.rst +++ b/docs/system/i386/xen.rst @@ -9,6 +9,8 @@ KVM has support for hosting Xen guests, intercepting Xen hypercalls and event channel (Xen PV interrupt) delivery. This allows guests which expect to be run under Xen to be hosted in QEMU under Linux/KVM instead. +Using the split irqchip is mandatory for Xen support. + Setup ----- @@ -17,14 +19,14 @@ accelerator, for example for Xen 4.10: .. parsed-literal:: - |qemu_system| --accel kvm,xen-version=0x4000a + |qemu_system| --accel kvm,xen-version=0x4000a,kernel-irqchip=split Additionally, virtual APIC support can be advertised to the guest through the ``xen-vapic`` CPU flag: .. parsed-literal:: - |qemu_system| --accel kvm,xen-version=0x4000a --cpu host,+xen_vapic + |qemu_system| --accel kvm,xen-version=0x4000a,kernel-irqchip=split --cpu host,+xen_vapic When Xen support is enabled, QEMU changes hypervisor identification (CPUID 0x40000000..0x4000000A) to Xen. The KVM identification and features are not @@ -33,11 +35,25 @@ moves to leaves 0x40000100..0x4000010A. The Xen platform device is enabled automatically for a Xen guest. This allows a guest to unplug all emulated devices, in order to use Xen PV block and network -drivers instead. Note that until the Xen PV device back ends are enabled to work -with Xen mode in QEMU, that is unlikely to cause significant joy. Linux guests -can be dissuaded from this by adding 'xen_emul_unplug=never' on their command -line, and it can also be noted that AHCI disk controllers are exempt from being -unplugged, as are passthrough VFIO PCI devices. +drivers instead. Under Xen, the boot disk is typically available both via IDE +emulation, and as a PV block device. Guest bootloaders typically use IDE to load +the guest kernel, which then unplugs the IDE and continues with the Xen PV block +device. + +This configuration can be achieved as follows + +.. parsed-literal:: + + |qemu_system| -M pc --accel kvm,xen-version=0x4000a,kernel-irqchip=split \\ + -drive file=${GUEST_IMAGE},if=none,id=disk,file.locking=off -device xen-disk,drive=disk,vdev=xvda \\ + -drive file=${GUEST_IMAGE},index=2,media=disk,file.locking=off,if=ide + +It is necessary to use the pc machine type, as the q35 machine uses AHCI instead +of legacy IDE, and AHCI disks are not unplugged through the Xen PV unplug +mechanism. + +VirtIO devices can also be used; Linux guests may need to be dissuaded from +umplugging them by adding 'xen_emul_unplug=never' on their command line. Properties ---------- diff --git a/hw/9pfs/meson.build b/hw/9pfs/meson.build index 12443b6ad5..fd37b7a02d 100644 --- a/hw/9pfs/meson.build +++ b/hw/9pfs/meson.build @@ -15,7 +15,7 @@ fs_ss.add(files( )) fs_ss.add(when: 'CONFIG_LINUX', if_true: files('9p-util-linux.c')) fs_ss.add(when: 'CONFIG_DARWIN', if_true: files('9p-util-darwin.c')) -fs_ss.add(when: 'CONFIG_XEN', if_true: files('xen-9p-backend.c')) +fs_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-9p-backend.c')) softmmu_ss.add_all(when: 'CONFIG_FSDEV_9P', if_true: fs_ss) specific_ss.add(when: 'CONFIG_VIRTIO_9P', if_true: files('virtio-9p-device.c')) diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c index 65c4979c3c..74f3a05f88 100644 --- a/hw/9pfs/xen-9p-backend.c +++ b/hw/9pfs/xen-9p-backend.c @@ -22,6 +22,7 @@ #include "qemu/config-file.h" #include "qemu/main-loop.h" #include "qemu/option.h" +#include "qemu/iov.h" #include "fsdev/qemu-fsdev.h" #define VERSIONS "1" @@ -241,7 +242,7 @@ static void xen_9pfs_push_and_notify(V9fsPDU *pdu) xen_wmb(); ring->inprogress = false; - xenevtchn_notify(ring->evtchndev, ring->local_port); + qemu_xen_evtchn_notify(ring->evtchndev, ring->local_port); qemu_bh_schedule(ring->bh); } @@ -324,8 +325,8 @@ static void xen_9pfs_evtchn_event(void *opaque) Xen9pfsRing *ring = opaque; evtchn_port_t port; - port = xenevtchn_pending(ring->evtchndev); - xenevtchn_unmask(ring->evtchndev, port); + port = qemu_xen_evtchn_pending(ring->evtchndev); + qemu_xen_evtchn_unmask(ring->evtchndev, port); qemu_bh_schedule(ring->bh); } @@ -337,10 +338,10 @@ static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev) for (i = 0; i < xen_9pdev->num_rings; i++) { if (xen_9pdev->rings[i].evtchndev != NULL) { - qemu_set_fd_handler(xenevtchn_fd(xen_9pdev->rings[i].evtchndev), - NULL, NULL, NULL); - xenevtchn_unbind(xen_9pdev->rings[i].evtchndev, - xen_9pdev->rings[i].local_port); + qemu_set_fd_handler(qemu_xen_evtchn_fd(xen_9pdev->rings[i].evtchndev), + NULL, NULL, NULL); + qemu_xen_evtchn_unbind(xen_9pdev->rings[i].evtchndev, + xen_9pdev->rings[i].local_port); xen_9pdev->rings[i].evtchndev = NULL; } } @@ -359,12 +360,13 @@ static int xen_9pfs_free(struct XenLegacyDevice *xendev) if (xen_9pdev->rings[i].data != NULL) { xen_be_unmap_grant_refs(&xen_9pdev->xendev, xen_9pdev->rings[i].data, + xen_9pdev->rings[i].intf->ref, (1 << xen_9pdev->rings[i].ring_order)); } if (xen_9pdev->rings[i].intf != NULL) { - xen_be_unmap_grant_refs(&xen_9pdev->xendev, - xen_9pdev->rings[i].intf, - 1); + xen_be_unmap_grant_ref(&xen_9pdev->xendev, + xen_9pdev->rings[i].intf, + xen_9pdev->rings[i].ref); } if (xen_9pdev->rings[i].bh != NULL) { qemu_bh_delete(xen_9pdev->rings[i].bh); @@ -447,12 +449,12 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev) xen_9pdev->rings[i].inprogress = false; - xen_9pdev->rings[i].evtchndev = xenevtchn_open(NULL, 0); + xen_9pdev->rings[i].evtchndev = qemu_xen_evtchn_open(); if (xen_9pdev->rings[i].evtchndev == NULL) { goto out; } - qemu_set_cloexec(xenevtchn_fd(xen_9pdev->rings[i].evtchndev)); - xen_9pdev->rings[i].local_port = xenevtchn_bind_interdomain + qemu_set_cloexec(qemu_xen_evtchn_fd(xen_9pdev->rings[i].evtchndev)); + xen_9pdev->rings[i].local_port = qemu_xen_evtchn_bind_interdomain (xen_9pdev->rings[i].evtchndev, xendev->dom, xen_9pdev->rings[i].evtchn); @@ -463,8 +465,8 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev) goto out; } xen_pv_printf(xendev, 2, "bind evtchn port %d\n", xendev->local_port); - qemu_set_fd_handler(xenevtchn_fd(xen_9pdev->rings[i].evtchndev), - xen_9pfs_evtchn_event, NULL, &xen_9pdev->rings[i]); + qemu_set_fd_handler(qemu_xen_evtchn_fd(xen_9pdev->rings[i].evtchndev), + xen_9pfs_evtchn_event, NULL, &xen_9pdev->rings[i]); } xen_9pdev->security_model = xenstore_read_be_str(xendev, "security_model"); diff --git a/hw/block/dataplane/meson.build b/hw/block/dataplane/meson.build index 12c6a264f1..78d7ac1a11 100644 --- a/hw/block/dataplane/meson.build +++ b/hw/block/dataplane/meson.build @@ -1,2 +1,2 @@ specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c')) -specific_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c')) +specific_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-block.c')) diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c index 2785b9e849..734da42ea7 100644 --- a/hw/block/dataplane/xen-block.c +++ b/hw/block/dataplane/xen-block.c @@ -23,8 +23,9 @@ #include "qemu/main-loop.h" #include "qemu/memalign.h" #include "qapi/error.h" -#include "hw/xen/xen_common.h" +#include "hw/xen/xen.h" #include "hw/block/xen_blkif.h" +#include "hw/xen/interface/io/ring.h" #include "sysemu/block-backend.h" #include "sysemu/iothread.h" #include "xen-block.h" @@ -101,9 +102,9 @@ static XenBlockRequest *xen_block_start_request(XenBlockDataPlane *dataplane) * re-use requests, allocate the memory once here. It will be freed * xen_block_dataplane_destroy() when the request list is freed. */ - request->buf = qemu_memalign(XC_PAGE_SIZE, + request->buf = qemu_memalign(XEN_PAGE_SIZE, BLKIF_MAX_SEGMENTS_PER_REQUEST * - XC_PAGE_SIZE); + XEN_PAGE_SIZE); dataplane->requests_total++; qemu_iovec_init(&request->v, 1); } else { @@ -185,7 +186,7 @@ static int xen_block_parse_request(XenBlockRequest *request) goto err; } if (request->req.seg[i].last_sect * dataplane->sector_size >= - XC_PAGE_SIZE) { + XEN_PAGE_SIZE) { error_report("error: page crossing"); goto err; } @@ -705,6 +706,7 @@ void xen_block_dataplane_stop(XenBlockDataPlane *dataplane) Error *local_err = NULL; xen_device_unmap_grant_refs(xendev, dataplane->sring, + dataplane->ring_ref, dataplane->nr_ring_ref, &local_err); dataplane->sring = NULL; @@ -739,7 +741,7 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane, dataplane->protocol = protocol; - ring_size = XC_PAGE_SIZE * dataplane->nr_ring_ref; + ring_size = XEN_PAGE_SIZE * dataplane->nr_ring_ref; switch (dataplane->protocol) { case BLKIF_PROTOCOL_NATIVE: { diff --git a/hw/block/meson.build b/hw/block/meson.build index b434d5654c..cc2a75cc50 100644 --- a/hw/block/meson.build +++ b/hw/block/meson.build @@ -14,7 +14,7 @@ softmmu_ss.add(when: 'CONFIG_PFLASH_CFI02', if_true: files('pflash_cfi02.c')) softmmu_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80.c')) softmmu_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80_sfdp.c')) softmmu_ss.add(when: 'CONFIG_SWIM', if_true: files('swim.c')) -softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c')) +softmmu_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-block.c')) softmmu_ss.add(when: 'CONFIG_TC58128', if_true: files('tc58128.c')) specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c', 'virtio-blk-common.c')) diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c index 345b284d70..f5a744589d 100644 --- a/hw/block/xen-block.c +++ b/hw/block/xen-block.c @@ -19,7 +19,6 @@ #include "qapi/qmp/qdict.h" #include "qapi/qmp/qstring.h" #include "qom/object_interfaces.h" -#include "hw/xen/xen_common.h" #include "hw/block/xen_blkif.h" #include "hw/qdev-properties.h" #include "hw/xen/xen-block.h" @@ -84,7 +83,8 @@ static void xen_block_connect(XenDevice *xendev, Error **errp) g_free(ring_ref); return; } - } else if (order <= blockdev->props.max_ring_page_order) { + } else if (qemu_xen_gnttab_can_map_multi() && + order <= blockdev->props.max_ring_page_order) { unsigned int i; nr_ring_ref = 1 << order; @@ -256,8 +256,12 @@ static void xen_block_realize(XenDevice *xendev, Error **errp) } xen_device_backend_printf(xendev, "feature-flush-cache", "%u", 1); - xen_device_backend_printf(xendev, "max-ring-page-order", "%u", - blockdev->props.max_ring_page_order); + + if (qemu_xen_gnttab_can_map_multi()) { + xen_device_backend_printf(xendev, "max-ring-page-order", "%u", + blockdev->props.max_ring_page_order); + } + xen_device_backend_printf(xendev, "info", "%u", blockdev->info); xen_device_frontend_printf(xendev, "virtual-device", "%lu", diff --git a/hw/char/meson.build b/hw/char/meson.build index 7b594f51b8..e02c60dd54 100644 --- a/hw/char/meson.build +++ b/hw/char/meson.build @@ -18,7 +18,7 @@ softmmu_ss.add(when: 'CONFIG_SERIAL_PCI', if_true: files('serial-pci.c')) softmmu_ss.add(when: 'CONFIG_SERIAL_PCI_MULTI', if_true: files('serial-pci-multi.c')) softmmu_ss.add(when: 'CONFIG_SHAKTI_UART', if_true: files('shakti_uart.c')) softmmu_ss.add(when: 'CONFIG_VIRTIO_SERIAL', if_true: files('virtio-console.c')) -softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen_console.c')) +softmmu_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen_console.c')) softmmu_ss.add(when: 'CONFIG_XILINX', if_true: files('xilinx_uartlite.c')) softmmu_ss.add(when: 'CONFIG_AVR_USART', if_true: files('avr_usart.c')) diff --git a/hw/char/xen_console.c b/hw/char/xen_console.c index 63153dfde4..c7a19c0e7c 100644 --- a/hw/char/xen_console.c +++ b/hw/char/xen_console.c @@ -173,6 +173,48 @@ static void xencons_send(struct XenConsole *con) /* -------------------------------------------------------------------- */ +static int store_con_info(struct XenConsole *con) +{ + Chardev *cs = qemu_chr_fe_get_driver(&con->chr); + char *pts = NULL; + char *dom_path; + GString *path; + int ret = -1; + + /* Only continue if we're talking to a pty. */ + if (!CHARDEV_IS_PTY(cs)) { + return 0; + } + pts = cs->filename + 4; + + dom_path = qemu_xen_xs_get_domain_path(xenstore, xen_domid); + if (!dom_path) { + return 0; + } + + path = g_string_new(dom_path); + free(dom_path); + + if (con->xendev.dev) { + g_string_append_printf(path, "/device/console/%d", con->xendev.dev); + } else { + g_string_append(path, "/console"); + } + g_string_append(path, "/tty"); + + if (xenstore_write_str(con->console, path->str, pts)) { + fprintf(stderr, "xenstore_write_str for '%s' fail", path->str); + goto out; + } + ret = 0; + +out: + g_string_free(path, true); + free(path); + + return ret; +} + static int con_init(struct XenLegacyDevice *xendev) { struct XenConsole *con = container_of(xendev, struct XenConsole, xendev); @@ -181,7 +223,7 @@ static int con_init(struct XenLegacyDevice *xendev) const char *output; /* setup */ - dom = xs_get_domain_path(xenstore, con->xendev.dom); + dom = qemu_xen_xs_get_domain_path(xenstore, con->xendev.dom); if (!xendev->dev) { snprintf(con->console, sizeof(con->console), "%s/console", dom); } else { @@ -215,8 +257,7 @@ static int con_init(struct XenLegacyDevice *xendev) &error_abort); } - xenstore_store_pv_console_info(con->xendev.dev, - qemu_chr_fe_get_driver(&con->chr)); + store_con_info(con); out: g_free(type); @@ -237,9 +278,9 @@ static int con_initialise(struct XenLegacyDevice *xendev) if (!xendev->dev) { xen_pfn_t mfn = con->ring_ref; - con->sring = xenforeignmemory_map(xen_fmem, con->xendev.dom, - PROT_READ | PROT_WRITE, - 1, &mfn, NULL); + con->sring = qemu_xen_foreignmem_map(con->xendev.dom, NULL, + PROT_READ | PROT_WRITE, + 1, &mfn, NULL); } else { con->sring = xen_be_map_grant_ref(xendev, con->ring_ref, PROT_READ | PROT_WRITE); @@ -269,9 +310,9 @@ static void con_disconnect(struct XenLegacyDevice *xendev) if (con->sring) { if (!xendev->dev) { - xenforeignmemory_unmap(xen_fmem, con->sring, 1); + qemu_xen_foreignmem_unmap(con->sring, 1); } else { - xen_be_unmap_grant_ref(xendev, con->sring); + xen_be_unmap_grant_ref(xendev, con->sring, con->ring_ref); } con->sring = NULL; } diff --git a/hw/display/meson.build b/hw/display/meson.build index f470179122..4191694380 100644 --- a/hw/display/meson.build +++ b/hw/display/meson.build @@ -14,7 +14,7 @@ softmmu_ss.add(when: 'CONFIG_PL110', if_true: files('pl110.c')) softmmu_ss.add(when: 'CONFIG_SII9022', if_true: files('sii9022.c')) softmmu_ss.add(when: 'CONFIG_SSD0303', if_true: files('ssd0303.c')) softmmu_ss.add(when: 'CONFIG_SSD0323', if_true: files('ssd0323.c')) -softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xenfb.c')) +softmmu_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xenfb.c')) softmmu_ss.add(when: 'CONFIG_VGA_PCI', if_true: files('vga-pci.c')) softmmu_ss.add(when: 'CONFIG_VGA_ISA', if_true: files('vga-isa.c')) diff --git a/hw/display/xenfb.c b/hw/display/xenfb.c index 260eb38a76..0074a9b6f8 100644 --- a/hw/display/xenfb.c +++ b/hw/display/xenfb.c @@ -98,8 +98,9 @@ static int common_bind(struct common *c) if (xenstore_read_fe_int(&c->xendev, "event-channel", &c->xendev.remote_port) == -1) return -1; - c->page = xenforeignmemory_map(xen_fmem, c->xendev.dom, - PROT_READ | PROT_WRITE, 1, &mfn, NULL); + c->page = qemu_xen_foreignmem_map(c->xendev.dom, NULL, + PROT_READ | PROT_WRITE, 1, &mfn, + NULL); if (c->page == NULL) return -1; @@ -115,7 +116,7 @@ static void common_unbind(struct common *c) { xen_pv_unbind_evtchn(&c->xendev); if (c->page) { - xenforeignmemory_unmap(xen_fmem, c->page, 1); + qemu_xen_foreignmem_unmap(c->page, 1); c->page = NULL; } } @@ -488,27 +489,28 @@ static int xenfb_map_fb(struct XenFB *xenfb) } if (xenfb->pixels) { - munmap(xenfb->pixels, xenfb->fbpages * XC_PAGE_SIZE); + munmap(xenfb->pixels, xenfb->fbpages * XEN_PAGE_SIZE); xenfb->pixels = NULL; } - xenfb->fbpages = DIV_ROUND_UP(xenfb->fb_len, XC_PAGE_SIZE); + xenfb->fbpages = DIV_ROUND_UP(xenfb->fb_len, XEN_PAGE_SIZE); n_fbdirs = xenfb->fbpages * mode / 8; - n_fbdirs = DIV_ROUND_UP(n_fbdirs, XC_PAGE_SIZE); + n_fbdirs = DIV_ROUND_UP(n_fbdirs, XEN_PAGE_SIZE); pgmfns = g_new0(xen_pfn_t, n_fbdirs); fbmfns = g_new0(xen_pfn_t, xenfb->fbpages); xenfb_copy_mfns(mode, n_fbdirs, pgmfns, pd); - map = xenforeignmemory_map(xen_fmem, xenfb->c.xendev.dom, - PROT_READ, n_fbdirs, pgmfns, NULL); + map = qemu_xen_foreignmem_map(xenfb->c.xendev.dom, NULL, PROT_READ, + n_fbdirs, pgmfns, NULL); if (map == NULL) goto out; xenfb_copy_mfns(mode, xenfb->fbpages, fbmfns, map); - xenforeignmemory_unmap(xen_fmem, map, n_fbdirs); + qemu_xen_foreignmem_unmap(map, n_fbdirs); - xenfb->pixels = xenforeignmemory_map(xen_fmem, xenfb->c.xendev.dom, - PROT_READ, xenfb->fbpages, fbmfns, NULL); + xenfb->pixels = qemu_xen_foreignmem_map(xenfb->c.xendev.dom, NULL, + PROT_READ, xenfb->fbpages, + fbmfns, NULL); if (xenfb->pixels == NULL) goto out; @@ -526,8 +528,8 @@ static int xenfb_configure_fb(struct XenFB *xenfb, size_t fb_len_lim, { size_t mfn_sz = sizeof_field(struct xenfb_page, pd[0]); size_t pd_len = sizeof_field(struct xenfb_page, pd) / mfn_sz; - size_t fb_pages = pd_len * XC_PAGE_SIZE / mfn_sz; - size_t fb_len_max = fb_pages * XC_PAGE_SIZE; + size_t fb_pages = pd_len * XEN_PAGE_SIZE / mfn_sz; + size_t fb_len_max = fb_pages * XEN_PAGE_SIZE; int max_width, max_height; if (fb_len_lim > fb_len_max) { @@ -927,8 +929,8 @@ static void fb_disconnect(struct XenLegacyDevice *xendev) * Replacing the framebuffer with anonymous shared memory * instead. This releases the guest pages and keeps qemu happy. */ - xenforeignmemory_unmap(xen_fmem, fb->pixels, fb->fbpages); - fb->pixels = mmap(fb->pixels, fb->fbpages * XC_PAGE_SIZE, + qemu_xen_foreignmem_unmap(fb->pixels, fb->fbpages); + fb->pixels = mmap(fb->pixels, fb->fbpages * XEN_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0); if (fb->pixels == MAP_FAILED) { diff --git a/hw/i386/kvm/meson.build b/hw/i386/kvm/meson.build index 82dd6ae7c6..6621ba5cd7 100644 --- a/hw/i386/kvm/meson.build +++ b/hw/i386/kvm/meson.build @@ -9,6 +9,7 @@ i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files( 'xen_evtchn.c', 'xen_gnttab.c', 'xen_xenstore.c', + 'xenstore_impl.c', )) i386_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss) diff --git a/hw/i386/kvm/trace-events b/hw/i386/kvm/trace-events index b83c3eb965..e4c82de6f3 100644 --- a/hw/i386/kvm/trace-events +++ b/hw/i386/kvm/trace-events @@ -3,3 +3,18 @@ kvm_xen_unmap_pirq(int pirq, int gsi) "pirq %d gsi %d" kvm_xen_get_free_pirq(int pirq, int type) "pirq %d type %d" kvm_xen_bind_pirq(int pirq, int port) "pirq %d port %d" kvm_xen_unmask_pirq(int pirq, char *dev, int vector) "pirq %d dev %s vector %d" +xenstore_error(unsigned int id, unsigned int tx_id, const char *err) "req %u tx %u err %s" +xenstore_read(unsigned int tx_id, const char *path) "tx %u path %s" +xenstore_write(unsigned int tx_id, const char *path) "tx %u path %s" +xenstore_mkdir(unsigned int tx_id, const char *path) "tx %u path %s" +xenstore_directory(unsigned int tx_id, const char *path) "tx %u path %s" +xenstore_directory_part(unsigned int tx_id, const char *path, unsigned int offset) "tx %u path %s offset %u" +xenstore_transaction_start(unsigned int new_tx) "new_tx %u" +xenstore_transaction_end(unsigned int tx_id, bool commit) "tx %u commit %d" +xenstore_rm(unsigned int tx_id, const char *path) "tx %u path %s" +xenstore_get_perms(unsigned int tx_id, const char *path) "tx %u path %s" +xenstore_set_perms(unsigned int tx_id, const char *path) "tx %u path %s" +xenstore_watch(const char *path, const char *token) "path %s token %s" +xenstore_unwatch(const char *path, const char *token) "path %s token %s" +xenstore_reset_watches(void) "" +xenstore_watch_event(const char *path, const char *token) "path %s token %s" diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c index 886fbf6b3b..98a7b85047 100644 --- a/hw/i386/kvm/xen_evtchn.c +++ b/hw/i386/kvm/xen_evtchn.c @@ -34,6 +34,7 @@ #include "hw/pci/msi.h" #include "hw/pci/msix.h" #include "hw/irq.h" +#include "hw/xen/xen_backend_ops.h" #include "xen_evtchn.h" #include "xen_overlay.h" @@ -278,6 +279,17 @@ static const TypeInfo xen_evtchn_info = { .class_init = xen_evtchn_class_init, }; +static struct evtchn_backend_ops emu_evtchn_backend_ops = { + .open = xen_be_evtchn_open, + .bind_interdomain = xen_be_evtchn_bind_interdomain, + .unbind = xen_be_evtchn_unbind, + .close = xen_be_evtchn_close, + .get_fd = xen_be_evtchn_fd, + .notify = xen_be_evtchn_notify, + .unmask = xen_be_evtchn_unmask, + .pending = xen_be_evtchn_pending, +}; + static void gsi_assert_bh(void *opaque) { struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0); @@ -318,6 +330,9 @@ void xen_evtchn_create(void) s->nr_pirq_inuse_words = DIV_ROUND_UP(s->nr_pirqs, 64); s->pirq_inuse_bitmap = g_new0(uint64_t, s->nr_pirq_inuse_words); s->pirq = g_new0(struct pirq_info, s->nr_pirqs); + + /* Set event channel functions for backend drivers to use */ + xen_evtchn_ops = &emu_evtchn_backend_ops; } void xen_evtchn_connect_gsis(qemu_irq *system_gsis) diff --git a/hw/i386/kvm/xen_gnttab.c b/hw/i386/kvm/xen_gnttab.c index 1e691ded32..21c30e3659 100644 --- a/hw/i386/kvm/xen_gnttab.c +++ b/hw/i386/kvm/xen_gnttab.c @@ -22,6 +22,7 @@ #include "hw/sysbus.h" #include "hw/xen/xen.h" +#include "hw/xen/xen_backend_ops.h" #include "xen_overlay.h" #include "xen_gnttab.h" @@ -34,11 +35,10 @@ #define TYPE_XEN_GNTTAB "xen-gnttab" OBJECT_DECLARE_SIMPLE_TYPE(XenGnttabState, XEN_GNTTAB) -#define XEN_PAGE_SHIFT 12 -#define XEN_PAGE_SIZE (1ULL << XEN_PAGE_SHIFT) - #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t)) +static struct gnttab_backend_ops emu_gnttab_backend_ops; + struct XenGnttabState { /*< private >*/ SysBusDevice busdev; @@ -57,6 +57,8 @@ struct XenGnttabState { MemoryRegion gnt_frames; MemoryRegion *gnt_aliases; uint64_t *gnt_frame_gpas; + + uint8_t *map_track; }; struct XenGnttabState *xen_gnttab_singleton; @@ -70,13 +72,11 @@ static void xen_gnttab_realize(DeviceState *dev, Error **errp) error_setg(errp, "Xen grant table support is for Xen emulation"); return; } - s->nr_frames = 0; s->max_frames = kvm_xen_get_gnttab_max_frames(); memory_region_init_ram(&s->gnt_frames, OBJECT(dev), "xen:grant_table", XEN_PAGE_SIZE * s->max_frames, &error_abort); memory_region_set_enabled(&s->gnt_frames, true); s->entries.v1 = memory_region_get_ram_ptr(&s->gnt_frames); - memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames); /* Create individual page-sizes aliases for overlays */ s->gnt_aliases = (void *)g_new0(MemoryRegion, s->max_frames); @@ -88,9 +88,18 @@ static void xen_gnttab_realize(DeviceState *dev, Error **errp) s->gnt_frame_gpas[i] = INVALID_GPA; } + s->nr_frames = 0; + memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames); + s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access; + s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE); + qemu_mutex_init(&s->gnt_lock); xen_gnttab_singleton = s; + + s->map_track = g_new0(uint8_t, s->max_frames * ENTRIES_PER_FRAME_V1); + + xen_gnttab_ops = &emu_gnttab_backend_ops; } static int xen_gnttab_post_load(void *opaque, int version_id) @@ -230,3 +239,309 @@ int xen_gnttab_query_size_op(struct gnttab_query_size *size) size->max_nr_frames = s->max_frames; return 0; } + +/* Track per-open refs, to allow close() to clean up. */ +struct active_ref { + MemoryRegionSection mrs; + void *virtaddr; + uint32_t refcnt; + int prot; +}; + +static void gnt_unref(XenGnttabState *s, grant_ref_t ref, + MemoryRegionSection *mrs, int prot) +{ + if (mrs && mrs->mr) { + if (prot & PROT_WRITE) { + memory_region_set_dirty(mrs->mr, mrs->offset_within_region, + XEN_PAGE_SIZE); + } + memory_region_unref(mrs->mr); + mrs->mr = NULL; + } + assert(s->map_track[ref] != 0); + + if (--s->map_track[ref] == 0) { + grant_entry_v1_t *gnt_p = &s->entries.v1[ref]; + qatomic_and(&gnt_p->flags, (uint16_t)~(GTF_reading | GTF_writing)); + } +} + +static uint64_t gnt_ref(XenGnttabState *s, grant_ref_t ref, int prot) +{ + uint16_t mask = GTF_type_mask | GTF_sub_page; + grant_entry_v1_t gnt, *gnt_p; + int retries = 0; + + if (ref >= s->max_frames * ENTRIES_PER_FRAME_V1 || + s->map_track[ref] == UINT8_MAX) { + return INVALID_GPA; + } + + if (prot & PROT_WRITE) { + mask |= GTF_readonly; + } + + gnt_p = &s->entries.v1[ref]; + + /* + * The guest can legitimately be changing the GTF_readonly flag. Allow + * that, but don't let a malicious guest cause a livelock. + */ + for (retries = 0; retries < 5; retries++) { + uint16_t new_flags; + + /* Read the entry before an atomic operation on its flags */ + gnt = *(volatile grant_entry_v1_t *)gnt_p; + + if ((gnt.flags & mask) != GTF_permit_access || + gnt.domid != DOMID_QEMU) { + return INVALID_GPA; + } + + new_flags = gnt.flags | GTF_reading; + if (prot & PROT_WRITE) { + new_flags |= GTF_writing; + } + + if (qatomic_cmpxchg(&gnt_p->flags, gnt.flags, new_flags) == gnt.flags) { + return (uint64_t)gnt.frame << XEN_PAGE_SHIFT; + } + } + + return INVALID_GPA; +} + +struct xengntdev_handle { + GHashTable *active_maps; +}; + +static int xen_be_gnttab_set_max_grants(struct xengntdev_handle *xgt, + uint32_t nr_grants) +{ + return 0; +} + +static void *xen_be_gnttab_map_refs(struct xengntdev_handle *xgt, + uint32_t count, uint32_t domid, + uint32_t *refs, int prot) +{ + XenGnttabState *s = xen_gnttab_singleton; + struct active_ref *act; + + if (!s) { + errno = ENOTSUP; + return NULL; + } + + if (domid != xen_domid) { + errno = EINVAL; + return NULL; + } + + if (!count || count > 4096) { + errno = EINVAL; + return NULL; + } + + /* + * Making a contiguous mapping from potentially discontiguous grant + * references would be... distinctly non-trivial. We don't support it. + * Even changing the API to return an array of pointers, one per page, + * wouldn't be simple to use in PV backends because some structures + * actually cross page boundaries (e.g. 32-bit blkif_response ring + * entries are 12 bytes). + */ + if (count != 1) { + errno = EINVAL; + return NULL; + } + + QEMU_LOCK_GUARD(&s->gnt_lock); + + act = g_hash_table_lookup(xgt->active_maps, GINT_TO_POINTER(refs[0])); + if (act) { + if ((prot & PROT_WRITE) && !(act->prot & PROT_WRITE)) { + if (gnt_ref(s, refs[0], prot) == INVALID_GPA) { + return NULL; + } + act->prot |= PROT_WRITE; + } + act->refcnt++; + } else { + uint64_t gpa = gnt_ref(s, refs[0], prot); + if (gpa == INVALID_GPA) { + errno = EINVAL; + return NULL; + } + + act = g_new0(struct active_ref, 1); + act->prot = prot; + act->refcnt = 1; + act->mrs = memory_region_find(get_system_memory(), gpa, XEN_PAGE_SIZE); + + if (act->mrs.mr && + !int128_lt(act->mrs.size, int128_make64(XEN_PAGE_SIZE)) && + memory_region_get_ram_addr(act->mrs.mr) != RAM_ADDR_INVALID) { + act->virtaddr = qemu_map_ram_ptr(act->mrs.mr->ram_block, + act->mrs.offset_within_region); + } + if (!act->virtaddr) { + gnt_unref(s, refs[0], &act->mrs, 0); + g_free(act); + errno = EINVAL; + return NULL; + } + + s->map_track[refs[0]]++; + g_hash_table_insert(xgt->active_maps, GINT_TO_POINTER(refs[0]), act); + } + + return act->virtaddr; +} + +static gboolean do_unmap(gpointer key, gpointer value, gpointer user_data) +{ + XenGnttabState *s = user_data; + grant_ref_t gref = GPOINTER_TO_INT(key); + struct active_ref *act = value; + + gnt_unref(s, gref, &act->mrs, act->prot); + g_free(act); + return true; +} + +static int xen_be_gnttab_unmap(struct xengntdev_handle *xgt, + void *start_address, uint32_t *refs, + uint32_t count) +{ + XenGnttabState *s = xen_gnttab_singleton; + struct active_ref *act; + + if (!s) { + return -ENOTSUP; + } + + if (count != 1) { + return -EINVAL; + } + + QEMU_LOCK_GUARD(&s->gnt_lock); + + act = g_hash_table_lookup(xgt->active_maps, GINT_TO_POINTER(refs[0])); + if (!act) { + return -ENOENT; + } + + if (act->virtaddr != start_address) { + return -EINVAL; + } + + if (!--act->refcnt) { + do_unmap(GINT_TO_POINTER(refs[0]), act, s); + g_hash_table_remove(xgt->active_maps, GINT_TO_POINTER(refs[0])); + } + + return 0; +} + +/* + * This looks a bit like the one for true Xen in xen-operations.c but + * in emulation we don't support multi-page mappings. And under Xen we + * *want* the multi-page mappings so we have fewer bounces through the + * kernel and the hypervisor. So the code paths end up being similar, + * but different. + */ +static int xen_be_gnttab_copy(struct xengntdev_handle *xgt, bool to_domain, + uint32_t domid, XenGrantCopySegment *segs, + uint32_t nr_segs, Error **errp) +{ + int prot = to_domain ? PROT_WRITE : PROT_READ; + unsigned int i; + + for (i = 0; i < nr_segs; i++) { + XenGrantCopySegment *seg = &segs[i]; + void *page; + uint32_t ref = to_domain ? seg->dest.foreign.ref : + seg->source.foreign.ref; + + page = xen_be_gnttab_map_refs(xgt, 1, domid, &ref, prot); + if (!page) { + if (errp) { + error_setg_errno(errp, errno, + "xen_be_gnttab_map_refs failed"); + } + return -errno; + } + + if (to_domain) { + memcpy(page + seg->dest.foreign.offset, seg->source.virt, + seg->len); + } else { + memcpy(seg->dest.virt, page + seg->source.foreign.offset, + seg->len); + } + + if (xen_be_gnttab_unmap(xgt, page, &ref, 1)) { + if (errp) { + error_setg_errno(errp, errno, "xen_be_gnttab_unmap failed"); + } + return -errno; + } + } + + return 0; +} + +static struct xengntdev_handle *xen_be_gnttab_open(void) +{ + struct xengntdev_handle *xgt = g_new0(struct xengntdev_handle, 1); + + xgt->active_maps = g_hash_table_new(g_direct_hash, g_direct_equal); + return xgt; +} + +static int xen_be_gnttab_close(struct xengntdev_handle *xgt) +{ + XenGnttabState *s = xen_gnttab_singleton; + + if (!s) { + return -ENOTSUP; + } + + g_hash_table_foreach_remove(xgt->active_maps, do_unmap, s); + g_hash_table_destroy(xgt->active_maps); + g_free(xgt); + return 0; +} + +static struct gnttab_backend_ops emu_gnttab_backend_ops = { + .open = xen_be_gnttab_open, + .close = xen_be_gnttab_close, + .grant_copy = xen_be_gnttab_copy, + .set_max_grants = xen_be_gnttab_set_max_grants, + .map_refs = xen_be_gnttab_map_refs, + .unmap = xen_be_gnttab_unmap, +}; + +int xen_gnttab_reset(void) +{ + XenGnttabState *s = xen_gnttab_singleton; + + if (!s) { + return -ENOTSUP; + } + + QEMU_LOCK_GUARD(&s->gnt_lock); + + s->nr_frames = 0; + + memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames); + + s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access; + s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE); + + memset(s->map_track, 0, s->max_frames * ENTRIES_PER_FRAME_V1); + + return 0; +} diff --git a/hw/i386/kvm/xen_gnttab.h b/hw/i386/kvm/xen_gnttab.h index 3bdbe96191..ee215239b0 100644 --- a/hw/i386/kvm/xen_gnttab.h +++ b/hw/i386/kvm/xen_gnttab.h @@ -13,6 +13,7 @@ #define QEMU_XEN_GNTTAB_H void xen_gnttab_create(void); +int xen_gnttab_reset(void); int xen_gnttab_map_page(uint64_t idx, uint64_t gfn); struct gnttab_set_version; diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c index 14193ef3f9..2cadafd56a 100644 --- a/hw/i386/kvm/xen_xenstore.c +++ b/hw/i386/kvm/xen_xenstore.c @@ -21,6 +21,7 @@ #include "hw/sysbus.h" #include "hw/xen/xen.h" +#include "hw/xen/xen_backend_ops.h" #include "xen_overlay.h" #include "xen_evtchn.h" #include "xen_xenstore.h" @@ -28,15 +29,17 @@ #include "sysemu/kvm.h" #include "sysemu/kvm_xen.h" +#include "trace.h" + +#include "xenstore_impl.h" + #include "hw/xen/interface/io/xs_wire.h" #include "hw/xen/interface/event_channel.h" +#include "hw/xen/interface/grant_table.h" #define TYPE_XEN_XENSTORE "xen-xenstore" OBJECT_DECLARE_SIMPLE_TYPE(XenXenstoreState, XEN_XENSTORE) -#define XEN_PAGE_SHIFT 12 -#define XEN_PAGE_SIZE (1ULL << XEN_PAGE_SHIFT) - #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t)) #define ENTRIES_PER_FRAME_V2 (XEN_PAGE_SIZE / sizeof(grant_entry_v2_t)) @@ -47,6 +50,9 @@ struct XenXenstoreState { SysBusDevice busdev; /*< public >*/ + XenstoreImplState *impl; + GList *watch_events; /* for the guest */ + MemoryRegion xenstore_page; struct xenstore_domain_interface *xs; uint8_t req_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX]; @@ -59,15 +65,54 @@ struct XenXenstoreState { evtchn_port_t guest_port; evtchn_port_t be_port; struct xenevtchn_handle *eh; + + uint8_t *impl_state; + uint32_t impl_state_size; + + struct xengntdev_handle *gt; + void *granted_xs; }; struct XenXenstoreState *xen_xenstore_singleton; static void xen_xenstore_event(void *opaque); +static void fire_watch_cb(void *opaque, const char *path, const char *token); + +static struct xenstore_backend_ops emu_xenstore_backend_ops; + +static void G_GNUC_PRINTF (4, 5) relpath_printf(XenXenstoreState *s, + GList *perms, + const char *relpath, + const char *fmt, ...) +{ + gchar *abspath; + gchar *value; + va_list args; + GByteArray *data; + int err; + + abspath = g_strdup_printf("/local/domain/%u/%s", xen_domid, relpath); + va_start(args, fmt); + value = g_strdup_vprintf(fmt, args); + va_end(args); + + data = g_byte_array_new_take((void *)value, strlen(value)); + + err = xs_impl_write(s->impl, DOMID_QEMU, XBT_NULL, abspath, data); + assert(!err); + + g_byte_array_unref(data); + + err = xs_impl_set_perms(s->impl, DOMID_QEMU, XBT_NULL, abspath, perms); + assert(!err); + + g_free(abspath); +} static void xen_xenstore_realize(DeviceState *dev, Error **errp) { XenXenstoreState *s = XEN_XENSTORE(dev); + GList *perms; if (xen_mode != XEN_EMULATE) { error_setg(errp, "Xen xenstore support is for Xen emulation"); @@ -89,6 +134,50 @@ static void xen_xenstore_realize(DeviceState *dev, Error **errp) } aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh), true, xen_xenstore_event, NULL, NULL, NULL, s); + + s->impl = xs_impl_create(xen_domid); + + /* Populate the default nodes */ + + /* Nodes owned by 'dom0' but readable by the guest */ + perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, DOMID_QEMU)); + perms = g_list_append(perms, xs_perm_as_string(XS_PERM_READ, xen_domid)); + + relpath_printf(s, perms, "", "%s", ""); + + relpath_printf(s, perms, "domid", "%u", xen_domid); + + relpath_printf(s, perms, "control/platform-feature-xs_reset_watches", "%u", 1); + relpath_printf(s, perms, "control/platform-feature-multiprocessor-suspend", "%u", 1); + + relpath_printf(s, perms, "platform/acpi", "%u", 1); + relpath_printf(s, perms, "platform/acpi_s3", "%u", 1); + relpath_printf(s, perms, "platform/acpi_s4", "%u", 1); + relpath_printf(s, perms, "platform/acpi_laptop_slate", "%u", 0); + + g_list_free_full(perms, g_free); + + /* Nodes owned by the guest */ + perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, xen_domid)); + + relpath_printf(s, perms, "attr", "%s", ""); + + relpath_printf(s, perms, "control/shutdown", "%s", ""); + relpath_printf(s, perms, "control/feature-poweroff", "%u", 1); + relpath_printf(s, perms, "control/feature-reboot", "%u", 1); + relpath_printf(s, perms, "control/feature-suspend", "%u", 1); + relpath_printf(s, perms, "control/feature-s3", "%u", 1); + relpath_printf(s, perms, "control/feature-s4", "%u", 1); + + relpath_printf(s, perms, "data", "%s", ""); + relpath_printf(s, perms, "device", "%s", ""); + relpath_printf(s, perms, "drivers", "%s", ""); + relpath_printf(s, perms, "error", "%s", ""); + relpath_printf(s, perms, "feature", "%s", ""); + + g_list_free_full(perms, g_free); + + xen_xenstore_ops = &emu_xenstore_backend_ops; } static bool xen_xenstore_is_needed(void *opaque) @@ -99,16 +188,26 @@ static bool xen_xenstore_is_needed(void *opaque) static int xen_xenstore_pre_save(void *opaque) { XenXenstoreState *s = opaque; + GByteArray *save; if (s->eh) { s->guest_port = xen_be_evtchn_get_guest_port(s->eh); } + + g_free(s->impl_state); + save = xs_impl_serialize(s->impl); + s->impl_state = save->data; + s->impl_state_size = save->len; + g_byte_array_free(save, false); + return 0; } static int xen_xenstore_post_load(void *opaque, int ver) { XenXenstoreState *s = opaque; + GByteArray *save; + int ret; /* * As qemu/dom0, rebind to the guest's port. The Windows drivers may @@ -125,11 +224,18 @@ static int xen_xenstore_post_load(void *opaque, int ver) } s->be_port = be_port; } - return 0; + + save = g_byte_array_new_take(s->impl_state, s->impl_state_size); + s->impl_state = NULL; + s->impl_state_size = 0; + + ret = xs_impl_deserialize(s->impl, save, xen_domid, fire_watch_cb, s); + return ret; } static const VMStateDescription xen_xenstore_vmstate = { .name = "xen_xenstore", + .unmigratable = 1, /* The PV back ends don't migrate yet */ .version_id = 1, .minimum_version_id = 1, .needed = xen_xenstore_is_needed, @@ -145,6 +251,10 @@ static const VMStateDescription xen_xenstore_vmstate = { VMSTATE_BOOL(rsp_pending, XenXenstoreState), VMSTATE_UINT32(guest_port, XenXenstoreState), VMSTATE_BOOL(fatal_error, XenXenstoreState), + VMSTATE_UINT32(impl_state_size, XenXenstoreState), + VMSTATE_VARRAY_UINT32_ALLOC(impl_state, XenXenstoreState, + impl_state_size, 0, + vmstate_info_uint8, uint8_t), VMSTATE_END_OF_LIST() } }; @@ -213,20 +323,761 @@ static void reset_rsp(XenXenstoreState *s) s->rsp_offset = 0; } +static void xs_error(XenXenstoreState *s, unsigned int id, + xs_transaction_t tx_id, int errnum) +{ + struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; + const char *errstr = NULL; + + for (unsigned int i = 0; i < ARRAY_SIZE(xsd_errors); i++) { + struct xsd_errors *xsd_error = &xsd_errors[i]; + + if (xsd_error->errnum == errnum) { + errstr = xsd_error->errstring; + break; + } + } + assert(errstr); + + trace_xenstore_error(id, tx_id, errstr); + + rsp->type = XS_ERROR; + rsp->req_id = id; + rsp->tx_id = tx_id; + rsp->len = (uint32_t)strlen(errstr) + 1; + + memcpy(&rsp[1], errstr, rsp->len); +} + +static void xs_ok(XenXenstoreState *s, unsigned int type, unsigned int req_id, + xs_transaction_t tx_id) +{ + struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; + const char *okstr = "OK"; + + rsp->type = type; + rsp->req_id = req_id; + rsp->tx_id = tx_id; + rsp->len = (uint32_t)strlen(okstr) + 1; + + memcpy(&rsp[1], okstr, rsp->len); +} + +/* + * The correct request and response formats are documented in xen.git: + * docs/misc/xenstore.txt. A summary is given below for convenience. + * The '|' symbol represents a NUL character. + * + * ---------- Database read, write and permissions operations ---------- + * + * READ <path>| <value|> + * WRITE <path>|<value|> + * Store and read the octet string <value> at <path>. + * WRITE creates any missing parent paths, with empty values. + * + * MKDIR <path>| + * Ensures that the <path> exists, by necessary by creating + * it and any missing parents with empty values. If <path> + * or any parent already exists, its value is left unchanged. + * + * RM <path>| + * Ensures that the <path> does not exist, by deleting + * it and all of its children. It is not an error if <path> does + * not exist, but it _is_ an error if <path>'s immediate parent + * does not exist either. + * + * DIRECTORY <path>| <child-leaf-name>|* + * Gives a list of the immediate children of <path>, as only the + * leafnames. The resulting children are each named + * <path>/<child-leaf-name>. + * + * DIRECTORY_PART <path>|<offset> <gencnt>|<child-leaf-name>|* + * Same as DIRECTORY, but to be used for children lists longer than + * XENSTORE_PAYLOAD_MAX. Input are <path> and the byte offset into + * the list of children to return. Return values are the generation + * count <gencnt> of the node (to be used to ensure the node hasn't + * changed between two reads: <gencnt> being the same for multiple + * reads guarantees the node hasn't changed) and the list of children + * starting at the specified <offset> of the complete list. + * + * GET_PERMS <path>| <perm-as-string>|+ + * SET_PERMS <path>|<perm-as-string>|+? + * <perm-as-string> is one of the following + * w<domid> write only + * r<domid> read only + * b<domid> both read and write + * n<domid> no access + * See https://wiki.xen.org/wiki/XenBus section + * `Permissions' for details of the permissions system. + * It is possible to set permissions for the special watch paths + * "@introduceDomain" and "@releaseDomain" to enable receiving those + * watches in unprivileged domains. + * + * ---------- Watches ---------- + * + * WATCH <wpath>|<token>|? + * Adds a watch. + * + * When a <path> is modified (including path creation, removal, + * contents change or permissions change) this generates an event + * on the changed <path>. Changes made in transactions cause an + * event only if and when committed. Each occurring event is + * matched against all the watches currently set up, and each + * matching watch results in a WATCH_EVENT message (see below). + * + * The event's path matches the watch's <wpath> if it is an child + * of <wpath>. + * + * <wpath> can be a <path> to watch or @<wspecial>. In the + * latter case <wspecial> may have any syntax but it matches + * (according to the rules above) only the following special + * events which are invented by xenstored: + * @introduceDomain occurs on INTRODUCE + * @releaseDomain occurs on any domain crash or + * shutdown, and also on RELEASE + * and domain destruction + * <wspecial> events are sent to privileged callers or explicitly + * via SET_PERMS enabled domains only. + * + * When a watch is first set up it is triggered once straight + * away, with <path> equal to <wpath>. Watches may be triggered + * spuriously. The tx_id in a WATCH request is ignored. + * + * Watches are supposed to be restricted by the permissions + * system but in practice the implementation is imperfect. + * Applications should not rely on being sent a notification for + * paths that they cannot read; however, an application may rely + * on being sent a watch when a path which it _is_ able to read + * is deleted even if that leaves only a nonexistent unreadable + * parent. A notification may omitted if a node's permissions + * are changed so as to make it unreadable, in which case future + * notifications may be suppressed (and if the node is later made + * readable, some notifications may have been lost). + * + * WATCH_EVENT <epath>|<token>| + * Unsolicited `reply' generated for matching modification events + * as described above. req_id and tx_id are both 0. + * + * <epath> is the event's path, ie the actual path that was + * modified; however if the event was the recursive removal of an + * parent of <wpath>, <epath> is just + * <wpath> (rather than the actual path which was removed). So + * <epath> is a child of <wpath>, regardless. + * + * Iff <wpath> for the watch was specified as a relative pathname, + * the <epath> path will also be relative (with the same base, + * obviously). + * + * UNWATCH <wpath>|<token>|? + * + * RESET_WATCHES | + * Reset all watches and transactions of the caller. + * + * ---------- Transactions ---------- + * + * TRANSACTION_START | <transid>| + * <transid> is an opaque uint32_t allocated by xenstored + * represented as unsigned decimal. After this, transaction may + * be referenced by using <transid> (as 32-bit binary) in the + * tx_id request header field. When transaction is started whole + * db is copied; reads and writes happen on the copy. + * It is not legal to send non-0 tx_id in TRANSACTION_START. + * + * TRANSACTION_END T| + * TRANSACTION_END F| + * tx_id must refer to existing transaction. After this + * request the tx_id is no longer valid and may be reused by + * xenstore. If F, the transaction is discarded. If T, + * it is committed: if there were any other intervening writes + * then our END gets get EAGAIN. + * + * The plan is that in the future only intervening `conflicting' + * writes cause EAGAIN, meaning only writes or other commits + * which changed paths which were read or written in the + * transaction at hand. + * + */ + +static void xs_read(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, unsigned int len) +{ + const char *path = (const char *)req_data; + struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; + uint8_t *rsp_data = (uint8_t *)&rsp[1]; + g_autoptr(GByteArray) data = g_byte_array_new(); + int err; + + if (len == 0 || req_data[len - 1] != '\0') { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + trace_xenstore_read(tx_id, path); + err = xs_impl_read(s->impl, xen_domid, tx_id, path, data); + if (err) { + xs_error(s, req_id, tx_id, err); + return; + } + + rsp->type = XS_READ; + rsp->req_id = req_id; + rsp->tx_id = tx_id; + rsp->len = 0; + + len = data->len; + if (len > XENSTORE_PAYLOAD_MAX) { + xs_error(s, req_id, tx_id, E2BIG); + return; + } + + memcpy(&rsp_data[rsp->len], data->data, len); + rsp->len += len; +} + +static void xs_write(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + g_autoptr(GByteArray) data = g_byte_array_new(); + const char *path; + int err; + + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + path = (const char *)req_data; + + while (len--) { + if (*req_data++ == '\0') { + break; + } + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + } + + g_byte_array_append(data, req_data, len); + + trace_xenstore_write(tx_id, path); + err = xs_impl_write(s->impl, xen_domid, tx_id, path, data); + if (err) { + xs_error(s, req_id, tx_id, err); + return; + } + + xs_ok(s, XS_WRITE, req_id, tx_id); +} + +static void xs_mkdir(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + g_autoptr(GByteArray) data = g_byte_array_new(); + const char *path; + int err; + + if (len == 0 || req_data[len - 1] != '\0') { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + path = (const char *)req_data; + + trace_xenstore_mkdir(tx_id, path); + err = xs_impl_read(s->impl, xen_domid, tx_id, path, data); + if (err == ENOENT) { + err = xs_impl_write(s->impl, xen_domid, tx_id, path, data); + } + + if (!err) { + xs_error(s, req_id, tx_id, err); + return; + } + + xs_ok(s, XS_MKDIR, req_id, tx_id); +} + +static void xs_append_strings(XenXenstoreState *s, struct xsd_sockmsg *rsp, + GList *strings, unsigned int start, bool truncate) +{ + uint8_t *rsp_data = (uint8_t *)&rsp[1]; + GList *l; + + for (l = strings; l; l = l->next) { + size_t len = strlen(l->data) + 1; /* Including the NUL termination */ + char *str = l->data; + + if (rsp->len + len > XENSTORE_PAYLOAD_MAX) { + if (truncate) { + len = XENSTORE_PAYLOAD_MAX - rsp->len; + if (!len) { + return; + } + } else { + xs_error(s, rsp->req_id, rsp->tx_id, E2BIG); + return; + } + } + + if (start) { + if (start >= len) { + start -= len; + continue; + } + + str += start; + len -= start; + start = 0; + } + + memcpy(&rsp_data[rsp->len], str, len); + rsp->len += len; + } + /* XS_DIRECTORY_PART wants an extra NUL to indicate the end */ + if (truncate && rsp->len < XENSTORE_PAYLOAD_MAX) { + rsp_data[rsp->len++] = '\0'; + } +} + +static void xs_directory(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; + GList *items = NULL; + const char *path; + int err; + + if (len == 0 || req_data[len - 1] != '\0') { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + path = (const char *)req_data; + + trace_xenstore_directory(tx_id, path); + err = xs_impl_directory(s->impl, xen_domid, tx_id, path, NULL, &items); + if (err != 0) { + xs_error(s, req_id, tx_id, err); + return; + } + + rsp->type = XS_DIRECTORY; + rsp->req_id = req_id; + rsp->tx_id = tx_id; + rsp->len = 0; + + xs_append_strings(s, rsp, items, 0, false); + + g_list_free_full(items, g_free); +} + +static void xs_directory_part(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + const char *offset_str, *path = (const char *)req_data; + struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; + char *rsp_data = (char *)&rsp[1]; + uint64_t gencnt = 0; + unsigned int offset; + GList *items = NULL; + int err; + + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + while (len--) { + if (*req_data++ == '\0') { + break; + } + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + } + + offset_str = (const char *)req_data; + while (len--) { + if (*req_data++ == '\0') { + break; + } + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + } + + if (len) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + if (qemu_strtoui(offset_str, NULL, 10, &offset) < 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + trace_xenstore_directory_part(tx_id, path, offset); + err = xs_impl_directory(s->impl, xen_domid, tx_id, path, &gencnt, &items); + if (err != 0) { + xs_error(s, req_id, tx_id, err); + return; + } + + rsp->type = XS_DIRECTORY_PART; + rsp->req_id = req_id; + rsp->tx_id = tx_id; + rsp->len = snprintf(rsp_data, XENSTORE_PAYLOAD_MAX, "%" PRIu64, gencnt) + 1; + + xs_append_strings(s, rsp, items, offset, true); + + g_list_free_full(items, g_free); +} + +static void xs_transaction_start(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; + char *rsp_data = (char *)&rsp[1]; + int err; + + if (len != 1 || req_data[0] != '\0') { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + rsp->type = XS_TRANSACTION_START; + rsp->req_id = req_id; + rsp->tx_id = tx_id; + rsp->len = 0; + + err = xs_impl_transaction_start(s->impl, xen_domid, &tx_id); + if (err) { + xs_error(s, req_id, tx_id, err); + return; + } + + trace_xenstore_transaction_start(tx_id); + + rsp->len = snprintf(rsp_data, XENSTORE_PAYLOAD_MAX, "%u", tx_id); + assert(rsp->len < XENSTORE_PAYLOAD_MAX); + rsp->len++; +} + +static void xs_transaction_end(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + bool commit; + int err; + + if (len != 2 || req_data[1] != '\0') { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + switch (req_data[0]) { + case 'T': + commit = true; + break; + case 'F': + commit = false; + break; + default: + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + trace_xenstore_transaction_end(tx_id, commit); + err = xs_impl_transaction_end(s->impl, xen_domid, tx_id, commit); + if (err) { + xs_error(s, req_id, tx_id, err); + return; + } + + xs_ok(s, XS_TRANSACTION_END, req_id, tx_id); +} + +static void xs_rm(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, unsigned int len) +{ + const char *path = (const char *)req_data; + int err; + + if (len == 0 || req_data[len - 1] != '\0') { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + trace_xenstore_rm(tx_id, path); + err = xs_impl_rm(s->impl, xen_domid, tx_id, path); + if (err) { + xs_error(s, req_id, tx_id, err); + return; + } + + xs_ok(s, XS_RM, req_id, tx_id); +} + +static void xs_get_perms(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + const char *path = (const char *)req_data; + struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; + GList *perms = NULL; + int err; + + if (len == 0 || req_data[len - 1] != '\0') { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + trace_xenstore_get_perms(tx_id, path); + err = xs_impl_get_perms(s->impl, xen_domid, tx_id, path, &perms); + if (err) { + xs_error(s, req_id, tx_id, err); + return; + } + + rsp->type = XS_GET_PERMS; + rsp->req_id = req_id; + rsp->tx_id = tx_id; + rsp->len = 0; + + xs_append_strings(s, rsp, perms, 0, false); + + g_list_free_full(perms, g_free); +} + +static void xs_set_perms(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + const char *path = (const char *)req_data; + uint8_t *perm; + GList *perms = NULL; + int err; + + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + while (len--) { + if (*req_data++ == '\0') { + break; + } + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + } + + perm = req_data; + while (len--) { + if (*req_data++ == '\0') { + perms = g_list_append(perms, perm); + perm = req_data; + } + } + + /* + * Note that there may be trailing garbage at the end of the buffer. + * This is explicitly permitted by the '?' at the end of the definition: + * + * SET_PERMS <path>|<perm-as-string>|+? + */ + + trace_xenstore_set_perms(tx_id, path); + err = xs_impl_set_perms(s->impl, xen_domid, tx_id, path, perms); + g_list_free(perms); + if (err) { + xs_error(s, req_id, tx_id, err); + return; + } + + xs_ok(s, XS_SET_PERMS, req_id, tx_id); +} + +static void xs_watch(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + const char *token, *path = (const char *)req_data; + int err; + + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + while (len--) { + if (*req_data++ == '\0') { + break; + } + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + } + + token = (const char *)req_data; + while (len--) { + if (*req_data++ == '\0') { + break; + } + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + } + + /* + * Note that there may be trailing garbage at the end of the buffer. + * This is explicitly permitted by the '?' at the end of the definition: + * + * WATCH <wpath>|<token>|? + */ + + trace_xenstore_watch(path, token); + err = xs_impl_watch(s->impl, xen_domid, path, token, fire_watch_cb, s); + if (err) { + xs_error(s, req_id, tx_id, err); + return; + } + + xs_ok(s, XS_WATCH, req_id, tx_id); +} + +static void xs_unwatch(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + const char *token, *path = (const char *)req_data; + int err; + + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + while (len--) { + if (*req_data++ == '\0') { + break; + } + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + } + + token = (const char *)req_data; + while (len--) { + if (*req_data++ == '\0') { + break; + } + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + } + + trace_xenstore_unwatch(path, token); + err = xs_impl_unwatch(s->impl, xen_domid, path, token, fire_watch_cb, s); + if (err) { + xs_error(s, req_id, tx_id, err); + return; + } + + xs_ok(s, XS_UNWATCH, req_id, tx_id); +} + +static void xs_reset_watches(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + if (len == 0 || req_data[len - 1] != '\0') { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + trace_xenstore_reset_watches(); + xs_impl_reset_watches(s->impl, xen_domid); + + xs_ok(s, XS_RESET_WATCHES, req_id, tx_id); +} + +static void xs_priv(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *data, + unsigned int len) +{ + xs_error(s, req_id, tx_id, EACCES); +} + +static void xs_unimpl(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *data, + unsigned int len) +{ + xs_error(s, req_id, tx_id, ENOSYS); +} + +typedef void (*xs_impl)(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *data, + unsigned int len); + +struct xsd_req { + const char *name; + xs_impl fn; +}; +#define XSD_REQ(_type, _fn) \ + [_type] = { .name = #_type, .fn = _fn } + +struct xsd_req xsd_reqs[] = { + XSD_REQ(XS_READ, xs_read), + XSD_REQ(XS_WRITE, xs_write), + XSD_REQ(XS_MKDIR, xs_mkdir), + XSD_REQ(XS_DIRECTORY, xs_directory), + XSD_REQ(XS_DIRECTORY_PART, xs_directory_part), + XSD_REQ(XS_TRANSACTION_START, xs_transaction_start), + XSD_REQ(XS_TRANSACTION_END, xs_transaction_end), + XSD_REQ(XS_RM, xs_rm), + XSD_REQ(XS_GET_PERMS, xs_get_perms), + XSD_REQ(XS_SET_PERMS, xs_set_perms), + XSD_REQ(XS_WATCH, xs_watch), + XSD_REQ(XS_UNWATCH, xs_unwatch), + XSD_REQ(XS_CONTROL, xs_priv), + XSD_REQ(XS_INTRODUCE, xs_priv), + XSD_REQ(XS_RELEASE, xs_priv), + XSD_REQ(XS_IS_DOMAIN_INTRODUCED, xs_priv), + XSD_REQ(XS_RESUME, xs_priv), + XSD_REQ(XS_SET_TARGET, xs_priv), + XSD_REQ(XS_RESET_WATCHES, xs_reset_watches), +}; + static void process_req(XenXenstoreState *s) { struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data; - struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; - const char enosys[] = "ENOSYS"; + xs_impl handler = NULL; assert(req_pending(s)); assert(!s->rsp_pending); - rsp->type = XS_ERROR; - rsp->req_id = req->req_id; - rsp->tx_id = req->tx_id; - rsp->len = sizeof(enosys); - memcpy((void *)&rsp[1], enosys, sizeof(enosys)); + if (req->type < ARRAY_SIZE(xsd_reqs)) { + handler = xsd_reqs[req->type].fn; + } + if (!handler) { + handler = &xs_unimpl; + } + + handler(s, req->req_id, req->tx_id, (uint8_t *)&req[1], req->len); s->rsp_pending = true; reset_req(s); @@ -415,6 +1266,113 @@ static unsigned int put_rsp(XenXenstoreState *s) return copylen; } +static void deliver_watch(XenXenstoreState *s, const char *path, + const char *token) +{ + struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; + uint8_t *rsp_data = (uint8_t *)&rsp[1]; + unsigned int len; + + assert(!s->rsp_pending); + + trace_xenstore_watch_event(path, token); + + rsp->type = XS_WATCH_EVENT; + rsp->req_id = 0; + rsp->tx_id = 0; + rsp->len = 0; + + len = strlen(path); + + /* XENSTORE_ABS/REL_PATH_MAX should ensure there can be no overflow */ + assert(rsp->len + len < XENSTORE_PAYLOAD_MAX); + + memcpy(&rsp_data[rsp->len], path, len); + rsp->len += len; + rsp_data[rsp->len] = '\0'; + rsp->len++; + + len = strlen(token); + /* + * It is possible for the guest to have chosen a token that will + * not fit (along with the patch) into a watch event. We have no + * choice but to drop the event if this is the case. + */ + if (rsp->len + len >= XENSTORE_PAYLOAD_MAX) { + return; + } + + memcpy(&rsp_data[rsp->len], token, len); + rsp->len += len; + rsp_data[rsp->len] = '\0'; + rsp->len++; + + s->rsp_pending = true; +} + +struct watch_event { + char *path; + char *token; +}; + +static void free_watch_event(struct watch_event *ev) +{ + if (ev) { + g_free(ev->path); + g_free(ev->token); + g_free(ev); + } +} + +static void queue_watch(XenXenstoreState *s, const char *path, + const char *token) +{ + struct watch_event *ev = g_new0(struct watch_event, 1); + + ev->path = g_strdup(path); + ev->token = g_strdup(token); + + s->watch_events = g_list_append(s->watch_events, ev); +} + +static void fire_watch_cb(void *opaque, const char *path, const char *token) +{ + XenXenstoreState *s = opaque; + + assert(qemu_mutex_iothread_locked()); + + /* + * If there's a response pending, we obviously can't scribble over + * it. But if there's a request pending, it has dibs on the buffer + * too. + * + * In the common case of a watch firing due to backend activity + * when the ring was otherwise idle, we should be able to copy the + * strings directly into the rsp_data and thence the actual ring, + * without needing to perform any allocations and queue them. + */ + if (s->rsp_pending || req_pending(s)) { + queue_watch(s, path, token); + } else { + deliver_watch(s, path, token); + /* + * If the message was queued because there was already ring activity, + * no need to wake the guest. But if not, we need to send the evtchn. + */ + xen_be_evtchn_notify(s->eh, s->be_port); + } +} + +static void process_watch_events(XenXenstoreState *s) +{ + struct watch_event *ev = s->watch_events->data; + + deliver_watch(s, ev->path, ev->token); + + s->watch_events = g_list_remove(s->watch_events, ev); + free_watch_event(ev); +} + static void xen_xenstore_event(void *opaque) { XenXenstoreState *s = opaque; @@ -433,6 +1391,10 @@ static void xen_xenstore_event(void *opaque) copied_to = copied_from = 0; processed = false; + if (!s->rsp_pending && s->watch_events) { + process_watch_events(s); + } + if (s->rsp_pending) { copied_to = put_rsp(s); } @@ -441,7 +1403,7 @@ static void xen_xenstore_event(void *opaque) copied_from = get_req(s); } - if (req_pending(s) && !s->rsp_pending) { + if (req_pending(s) && !s->rsp_pending && !s->watch_events) { process_req(s); processed = true; } @@ -496,5 +1458,270 @@ int xen_xenstore_reset(void) } s->be_port = err; + /* + * We don't actually access the guest's page through the grant, because + * this isn't real Xen, and we can just use the page we gave it in the + * first place. Map the grant anyway, mostly for cosmetic purposes so + * it *looks* like it's in use in the guest-visible grant table. + */ + s->gt = qemu_xen_gnttab_open(); + uint32_t xs_gntref = GNTTAB_RESERVED_XENSTORE; + s->granted_xs = qemu_xen_gnttab_map_refs(s->gt, 1, xen_domid, &xs_gntref, + PROT_READ | PROT_WRITE); + return 0; } + +struct qemu_xs_handle { + XenstoreImplState *impl; + GList *watches; + QEMUBH *watch_bh; +}; + +struct qemu_xs_watch { + struct qemu_xs_handle *h; + char *path; + xs_watch_fn fn; + void *opaque; + GList *events; +}; + +static char *xs_be_get_domain_path(struct qemu_xs_handle *h, unsigned int domid) +{ + return g_strdup_printf("/local/domain/%u", domid); +} + +static char **xs_be_directory(struct qemu_xs_handle *h, xs_transaction_t t, + const char *path, unsigned int *num) +{ + GList *items = NULL, *l; + unsigned int i = 0; + char **items_ret; + int err; + + err = xs_impl_directory(h->impl, DOMID_QEMU, t, path, NULL, &items); + if (err) { + errno = err; + return NULL; + } + + items_ret = g_new0(char *, g_list_length(items) + 1); + *num = 0; + for (l = items; l; l = l->next) { + items_ret[i++] = l->data; + (*num)++; + } + g_list_free(items); + return items_ret; +} + +static void *xs_be_read(struct qemu_xs_handle *h, xs_transaction_t t, + const char *path, unsigned int *len) +{ + GByteArray *data = g_byte_array_new(); + bool free_segment = false; + int err; + + err = xs_impl_read(h->impl, DOMID_QEMU, t, path, data); + if (err) { + free_segment = true; + errno = err; + } else { + if (len) { + *len = data->len; + } + /* The xen-bus-helper code expects to get NUL terminated string! */ + g_byte_array_append(data, (void *)"", 1); + } + + return g_byte_array_free(data, free_segment); +} + +static bool xs_be_write(struct qemu_xs_handle *h, xs_transaction_t t, + const char *path, const void *data, unsigned int len) +{ + GByteArray *gdata = g_byte_array_new(); + int err; + + g_byte_array_append(gdata, data, len); + err = xs_impl_write(h->impl, DOMID_QEMU, t, path, gdata); + g_byte_array_unref(gdata); + if (err) { + errno = err; + return false; + } + return true; +} + +static bool xs_be_create(struct qemu_xs_handle *h, xs_transaction_t t, + unsigned int owner, unsigned int domid, + unsigned int perms, const char *path) +{ + g_autoptr(GByteArray) data = g_byte_array_new(); + GList *perms_list = NULL; + int err; + + /* mkdir does this */ + err = xs_impl_read(h->impl, DOMID_QEMU, t, path, data); + if (err == ENOENT) { + err = xs_impl_write(h->impl, DOMID_QEMU, t, path, data); + } + if (err) { + errno = err; + return false; + } + + perms_list = g_list_append(perms_list, + xs_perm_as_string(XS_PERM_NONE, owner)); + perms_list = g_list_append(perms_list, + xs_perm_as_string(perms, domid)); + + err = xs_impl_set_perms(h->impl, DOMID_QEMU, t, path, perms_list); + g_list_free_full(perms_list, g_free); + if (err) { + errno = err; + return false; + } + return true; +} + +static bool xs_be_destroy(struct qemu_xs_handle *h, xs_transaction_t t, + const char *path) +{ + int err = xs_impl_rm(h->impl, DOMID_QEMU, t, path); + if (err) { + errno = err; + return false; + } + return true; +} + +static void be_watch_bh(void *_h) +{ + struct qemu_xs_handle *h = _h; + GList *l; + + for (l = h->watches; l; l = l->next) { + struct qemu_xs_watch *w = l->data; + + while (w->events) { + struct watch_event *ev = w->events->data; + + w->fn(w->opaque, ev->path); + + w->events = g_list_remove(w->events, ev); + free_watch_event(ev); + } + } +} + +static void xs_be_watch_cb(void *opaque, const char *path, const char *token) +{ + struct watch_event *ev = g_new0(struct watch_event, 1); + struct qemu_xs_watch *w = opaque; + + /* We don't care about the token */ + ev->path = g_strdup(path); + w->events = g_list_append(w->events, ev); + + qemu_bh_schedule(w->h->watch_bh); +} + +static struct qemu_xs_watch *xs_be_watch(struct qemu_xs_handle *h, + const char *path, xs_watch_fn fn, + void *opaque) +{ + struct qemu_xs_watch *w = g_new0(struct qemu_xs_watch, 1); + int err; + + w->h = h; + w->fn = fn; + w->opaque = opaque; + + err = xs_impl_watch(h->impl, DOMID_QEMU, path, NULL, xs_be_watch_cb, w); + if (err) { + errno = err; + g_free(w); + return NULL; + } + + w->path = g_strdup(path); + h->watches = g_list_append(h->watches, w); + return w; +} + +static void xs_be_unwatch(struct qemu_xs_handle *h, struct qemu_xs_watch *w) +{ + xs_impl_unwatch(h->impl, DOMID_QEMU, w->path, NULL, xs_be_watch_cb, w); + + h->watches = g_list_remove(h->watches, w); + g_list_free_full(w->events, (GDestroyNotify)free_watch_event); + g_free(w->path); + g_free(w); +} + +static xs_transaction_t xs_be_transaction_start(struct qemu_xs_handle *h) +{ + unsigned int new_tx = XBT_NULL; + int err = xs_impl_transaction_start(h->impl, DOMID_QEMU, &new_tx); + if (err) { + errno = err; + return XBT_NULL; + } + return new_tx; +} + +static bool xs_be_transaction_end(struct qemu_xs_handle *h, xs_transaction_t t, + bool abort) +{ + int err = xs_impl_transaction_end(h->impl, DOMID_QEMU, t, !abort); + if (err) { + errno = err; + return false; + } + return true; +} + +static struct qemu_xs_handle *xs_be_open(void) +{ + XenXenstoreState *s = xen_xenstore_singleton; + struct qemu_xs_handle *h; + + if (!s && !s->impl) { + errno = -ENOSYS; + return NULL; + } + + h = g_new0(struct qemu_xs_handle, 1); + h->impl = s->impl; + + h->watch_bh = aio_bh_new(qemu_get_aio_context(), be_watch_bh, h); + + return h; +} + +static void xs_be_close(struct qemu_xs_handle *h) +{ + while (h->watches) { + struct qemu_xs_watch *w = h->watches->data; + xs_be_unwatch(h, w); + } + + qemu_bh_delete(h->watch_bh); + g_free(h); +} + +static struct xenstore_backend_ops emu_xenstore_backend_ops = { + .open = xs_be_open, + .close = xs_be_close, + .get_domain_path = xs_be_get_domain_path, + .directory = xs_be_directory, + .read = xs_be_read, + .write = xs_be_write, + .create = xs_be_create, + .destroy = xs_be_destroy, + .watch = xs_be_watch, + .unwatch = xs_be_unwatch, + .transaction_start = xs_be_transaction_start, + .transaction_end = xs_be_transaction_end, +}; diff --git a/hw/i386/kvm/xenstore_impl.c b/hw/i386/kvm/xenstore_impl.c new file mode 100644 index 0000000000..305fe75519 --- /dev/null +++ b/hw/i386/kvm/xenstore_impl.c @@ -0,0 +1,1927 @@ +/* + * QEMU Xen emulation: The actual implementation of XenStore + * + * Copyright © 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Authors: David Woodhouse <dwmw2@infradead.org>, Paul Durrant <paul@xen.org> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qom/object.h" + +#include "hw/xen/xen.h" + +#include "xen_xenstore.h" +#include "xenstore_impl.h" + +#include "hw/xen/interface/io/xs_wire.h" + +#define XS_MAX_WATCHES 128 +#define XS_MAX_DOMAIN_NODES 1000 +#define XS_MAX_NODE_SIZE 2048 +#define XS_MAX_TRANSACTIONS 10 +#define XS_MAX_PERMS_PER_NODE 5 + +#define XS_VALID_CHARS "abcdefghijklmnopqrstuvwxyz" \ + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \ + "0123456789-/_" + +typedef struct XsNode { + uint32_t ref; + GByteArray *content; + GList *perms; + GHashTable *children; + uint64_t gencnt; + bool deleted_in_tx; + bool modified_in_tx; + unsigned int serialized_tx; +#ifdef XS_NODE_UNIT_TEST + gchar *name; /* debug only */ +#endif +} XsNode; + +typedef struct XsWatch { + struct XsWatch *next; + xs_impl_watch_fn *cb; + void *cb_opaque; + char *token; + unsigned int dom_id; + int rel_prefix; +} XsWatch; + +typedef struct XsTransaction { + XsNode *root; + unsigned int nr_nodes; + unsigned int base_tx; + unsigned int tx_id; + unsigned int dom_id; +} XsTransaction; + +struct XenstoreImplState { + XsNode *root; + unsigned int nr_nodes; + GHashTable *watches; + unsigned int nr_domu_watches; + GHashTable *transactions; + unsigned int nr_domu_transactions; + unsigned int root_tx; + unsigned int last_tx; + bool serialized; +}; + + +static void nobble_tx(gpointer key, gpointer value, gpointer user_data) +{ + unsigned int *new_tx_id = user_data; + XsTransaction *tx = value; + + if (tx->base_tx == *new_tx_id) { + /* Transactions based on XBT_NULL will always fail */ + tx->base_tx = XBT_NULL; + } +} + +static inline unsigned int next_tx(struct XenstoreImplState *s) +{ + unsigned int tx_id; + + /* Find the next TX id which isn't either XBT_NULL or in use. */ + do { + tx_id = ++s->last_tx; + } while (tx_id == XBT_NULL || tx_id == s->root_tx || + g_hash_table_lookup(s->transactions, GINT_TO_POINTER(tx_id))); + + /* + * It is vanishingly unlikely, but ensure that no outstanding transaction + * is based on the (previous incarnation of the) newly-allocated TX id. + */ + g_hash_table_foreach(s->transactions, nobble_tx, &tx_id); + + return tx_id; +} + +static inline XsNode *xs_node_new(void) +{ + XsNode *n = g_new0(XsNode, 1); + n->ref = 1; + +#ifdef XS_NODE_UNIT_TEST + nr_xs_nodes++; + xs_node_list = g_list_prepend(xs_node_list, n); +#endif + return n; +} + +static inline XsNode *xs_node_ref(XsNode *n) +{ + /* With just 10 transactions, it can never get anywhere near this. */ + g_assert(n->ref < INT_MAX); + + g_assert(n->ref); + n->ref++; + return n; +} + +static inline void xs_node_unref(XsNode *n) +{ + if (!n) { + return; + } + g_assert(n->ref); + if (--n->ref) { + return; + } + + if (n->content) { + g_byte_array_unref(n->content); + } + if (n->perms) { + g_list_free_full(n->perms, g_free); + } + if (n->children) { + g_hash_table_unref(n->children); + } +#ifdef XS_NODE_UNIT_TEST + g_free(n->name); + nr_xs_nodes--; + xs_node_list = g_list_remove(xs_node_list, n); +#endif + g_free(n); +} + +char *xs_perm_as_string(unsigned int perm, unsigned int domid) +{ + char letter; + + switch (perm) { + case XS_PERM_READ | XS_PERM_WRITE: + letter = 'b'; + break; + case XS_PERM_READ: + letter = 'r'; + break; + case XS_PERM_WRITE: + letter = 'w'; + break; + case XS_PERM_NONE: + default: + letter = 'n'; + break; + } + + return g_strdup_printf("%c%u", letter, domid); +} + +static gpointer do_perm_copy(gconstpointer src, gpointer user_data) +{ + return g_strdup(src); +} + +static XsNode *xs_node_create(const char *name, GList *perms) +{ + XsNode *n = xs_node_new(); + +#ifdef XS_NODE_UNIT_TEST + if (name) { + n->name = g_strdup(name); + } +#endif + + n->perms = g_list_copy_deep(perms, do_perm_copy, NULL); + + return n; +} + +/* For copying from one hash table to another using g_hash_table_foreach() */ +static void do_child_insert(gpointer key, gpointer value, gpointer user_data) +{ + g_hash_table_insert(user_data, g_strdup(key), xs_node_ref(value)); +} + +static XsNode *xs_node_copy(XsNode *old) +{ + XsNode *n = xs_node_new(); + + n->gencnt = old->gencnt; + +#ifdef XS_NODE_UNIT_TEST + if (n->name) { + n->name = g_strdup(old->name); + } +#endif + + assert(old); + if (old->children) { + n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, + (GDestroyNotify)xs_node_unref); + g_hash_table_foreach(old->children, do_child_insert, n->children); + } + if (old->perms) { + n->perms = g_list_copy_deep(old->perms, do_perm_copy, NULL); + } + if (old->content) { + n->content = g_byte_array_ref(old->content); + } + return n; +} + +/* Returns true if it made a change to the hash table */ +static bool xs_node_add_child(XsNode *n, const char *path_elem, XsNode *child) +{ + assert(!strchr(path_elem, '/')); + + if (!child) { + assert(n->children); + return g_hash_table_remove(n->children, path_elem); + } + +#ifdef XS_NODE_UNIT_TEST + g_free(child->name); + child->name = g_strdup(path_elem); +#endif + if (!n->children) { + n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, + (GDestroyNotify)xs_node_unref); + } + + /* + * The documentation for g_hash_table_insert() says that it "returns a + * boolean value to indicate whether the newly added value was already + * in the hash table or not." + * + * It could perhaps be clearer that returning TRUE means it wasn't, + */ + return g_hash_table_insert(n->children, g_strdup(path_elem), child); +} + +struct walk_op { + struct XenstoreImplState *s; + char path[XENSTORE_ABS_PATH_MAX + 2]; /* Two NUL terminators */ + int (*op_fn)(XsNode **n, struct walk_op *op); + void *op_opaque; + void *op_opaque2; + + GList *watches; + unsigned int dom_id; + unsigned int tx_id; + + /* The number of nodes which will exist in the tree if this op succeeds. */ + unsigned int new_nr_nodes; + + /* + * This is maintained on the way *down* the walk to indicate + * whether nodes can be modified in place or whether COW is + * required. It starts off being true, as we're always going to + * replace the root node. If we walk into a shared subtree it + * becomes false. If we start *creating* new nodes for a write, + * it becomes true again. + * + * Do not use it on the way back up. + */ + bool inplace; + bool mutating; + bool create_dirs; + bool in_transaction; + + /* Tracking during recursion so we know which is first. */ + bool deleted_in_tx; +}; + +static void fire_watches(struct walk_op *op, bool parents) +{ + GList *l = NULL; + XsWatch *w; + + if (!op->mutating || op->in_transaction) { + return; + } + + if (parents) { + l = op->watches; + } + + w = g_hash_table_lookup(op->s->watches, op->path); + while (w || l) { + if (!w) { + /* Fire the parent nodes from 'op' if asked to */ + w = l->data; + l = l->next; + continue; + } + + assert(strlen(op->path) > w->rel_prefix); + w->cb(w->cb_opaque, op->path + w->rel_prefix, w->token); + + w = w->next; + } +} + +static int xs_node_add_content(XsNode **n, struct walk_op *op) +{ + GByteArray *data = op->op_opaque; + + if (op->dom_id) { + /* + * The real XenStored includes permissions and names of child nodes + * in the calculated datasize but life's too short. For a single + * tenant internal XenStore, we don't have to be quite as pedantic. + */ + if (data->len > XS_MAX_NODE_SIZE) { + return E2BIG; + } + } + /* We *are* the node to be written. Either this or a copy. */ + if (!op->inplace) { + XsNode *old = *n; + *n = xs_node_copy(old); + xs_node_unref(old); + } + + if ((*n)->content) { + g_byte_array_unref((*n)->content); + } + (*n)->content = g_byte_array_ref(data); + if (op->tx_id != XBT_NULL) { + (*n)->modified_in_tx = true; + } + return 0; +} + +static int xs_node_get_content(XsNode **n, struct walk_op *op) +{ + GByteArray *data = op->op_opaque; + GByteArray *node_data; + + assert(op->inplace); + assert(*n); + + node_data = (*n)->content; + if (node_data) { + g_byte_array_append(data, node_data->data, node_data->len); + } + + return 0; +} + +static int node_rm_recurse(gpointer key, gpointer value, gpointer user_data) +{ + struct walk_op *op = user_data; + int path_len = strlen(op->path); + int key_len = strlen(key); + XsNode *n = value; + bool this_inplace = op->inplace; + + if (n->ref != 1) { + op->inplace = 0; + } + + assert(key_len + path_len + 2 <= sizeof(op->path)); + op->path[path_len] = '/'; + memcpy(op->path + path_len + 1, key, key_len + 1); + + if (n->children) { + g_hash_table_foreach_remove(n->children, node_rm_recurse, op); + } + op->new_nr_nodes--; + + /* + * Fire watches on *this* node but not the parents because they are + * going to be deleted too, so the watch will fire for them anyway. + */ + fire_watches(op, false); + op->path[path_len] = '\0'; + + /* + * Actually deleting the child here is just an optimisation; if we + * don't then the final unref on the topmost victim will just have + * to cascade down again repeating all the g_hash_table_foreach() + * calls. + */ + return this_inplace; +} + +static XsNode *xs_node_copy_deleted(XsNode *old, struct walk_op *op); +static void copy_deleted_recurse(gpointer key, gpointer value, + gpointer user_data) +{ + struct walk_op *op = user_data; + GHashTable *siblings = op->op_opaque2; + XsNode *n = xs_node_copy_deleted(value, op); + + /* + * Reinsert the deleted_in_tx copy of the node into the parent's + * 'children' hash table. Having stashed it from op->op_opaque2 + * before the recursive call to xs_node_copy_deleted() scribbled + * over it. + */ + g_hash_table_insert(siblings, g_strdup(key), n); +} + +static XsNode *xs_node_copy_deleted(XsNode *old, struct walk_op *op) +{ + XsNode *n = xs_node_new(); + + n->gencnt = old->gencnt; + +#ifdef XS_NODE_UNIT_TEST + if (old->name) { + n->name = g_strdup(old->name); + } +#endif + + if (old->children) { + n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, + (GDestroyNotify)xs_node_unref); + op->op_opaque2 = n->children; + g_hash_table_foreach(old->children, copy_deleted_recurse, op); + } + if (old->perms) { + n->perms = g_list_copy_deep(old->perms, do_perm_copy, NULL); + } + n->deleted_in_tx = true; + /* If it gets resurrected we only fire a watch if it lost its content */ + if (old->content) { + n->modified_in_tx = true; + } + op->new_nr_nodes--; + return n; +} + +static int xs_node_rm(XsNode **n, struct walk_op *op) +{ + bool this_inplace = op->inplace; + + if (op->tx_id != XBT_NULL) { + /* It's not trivial to do inplace handling for this one */ + XsNode *old = *n; + *n = xs_node_copy_deleted(old, op); + xs_node_unref(old); + return 0; + } + + /* Fire watches for, and count, nodes in the subtree which get deleted */ + if ((*n)->children) { + g_hash_table_foreach_remove((*n)->children, node_rm_recurse, op); + } + op->new_nr_nodes--; + + if (this_inplace) { + xs_node_unref(*n); + } + *n = NULL; + return 0; +} + +static int xs_node_get_perms(XsNode **n, struct walk_op *op) +{ + GList **perms = op->op_opaque; + + assert(op->inplace); + assert(*n); + + *perms = g_list_copy_deep((*n)->perms, do_perm_copy, NULL); + return 0; +} + +static void parse_perm(const char *perm, char *letter, unsigned int *dom_id) +{ + unsigned int n = sscanf(perm, "%c%u", letter, dom_id); + + assert(n == 2); +} + +static bool can_access(unsigned int dom_id, GList *perms, const char *letters) +{ + unsigned int i, n; + char perm_letter; + unsigned int perm_dom_id; + bool access; + + if (dom_id == 0) { + return true; + } + + n = g_list_length(perms); + assert(n >= 1); + + /* + * The dom_id of the first perm is the owner, and the owner always has + * read-write access. + */ + parse_perm(g_list_nth_data(perms, 0), &perm_letter, &perm_dom_id); + if (dom_id == perm_dom_id) { + return true; + } + + /* + * The letter of the first perm specified the default access for all other + * domains. + */ + access = !!strchr(letters, perm_letter); + for (i = 1; i < n; i++) { + parse_perm(g_list_nth_data(perms, i), &perm_letter, &perm_dom_id); + if (dom_id != perm_dom_id) { + continue; + } + access = !!strchr(letters, perm_letter); + } + + return access; +} + +static int xs_node_set_perms(XsNode **n, struct walk_op *op) +{ + GList *perms = op->op_opaque; + + if (op->dom_id) { + unsigned int perm_dom_id; + char perm_letter; + + /* A guest may not change permissions on nodes it does not own */ + if (!can_access(op->dom_id, (*n)->perms, "")) { + return EPERM; + } + + /* A guest may not change the owner of a node it owns. */ + parse_perm(perms->data, &perm_letter, &perm_dom_id); + if (perm_dom_id != op->dom_id) { + return EPERM; + } + + if (g_list_length(perms) > XS_MAX_PERMS_PER_NODE) { + return ENOSPC; + } + } + + /* We *are* the node to be written. Either this or a copy. */ + if (!op->inplace) { + XsNode *old = *n; + *n = xs_node_copy(old); + xs_node_unref(old); + } + + if ((*n)->perms) { + g_list_free_full((*n)->perms, g_free); + } + (*n)->perms = g_list_copy_deep(perms, do_perm_copy, NULL); + if (op->tx_id != XBT_NULL) { + (*n)->modified_in_tx = true; + } + return 0; +} + +/* + * Passed a full reference in *n which it may free if it needs to COW. + * + * When changing the tree, the op->inplace flag indicates whether this + * node may be modified in place (i.e. it and all its parents had a + * refcount of one). If walking down the tree we find a node whose + * refcount is higher, we must clear op->inplace and COW from there + * down. Unless we are creating new nodes as scaffolding for a write + * (which works like 'mkdir -p' does). In which case those newly + * created nodes can (and must) be modified in place again. + */ +static int xs_node_walk(XsNode **n, struct walk_op *op) +{ + char *child_name = NULL; + size_t namelen; + XsNode *old = *n, *child = NULL; + bool stole_child = false; + bool this_inplace; + XsWatch *watch; + int err; + + namelen = strlen(op->path); + watch = g_hash_table_lookup(op->s->watches, op->path); + + /* Is there a child, or do we hit the double-NUL termination? */ + if (op->path[namelen + 1]) { + char *slash; + child_name = op->path + namelen + 1; + slash = strchr(child_name, '/'); + if (slash) { + *slash = '\0'; + } + op->path[namelen] = '/'; + } + + /* If we walk into a subtree which is shared, we must COW */ + if (op->mutating && old->ref != 1) { + op->inplace = false; + } + + if (!child_name) { + const char *letters = op->mutating ? "wb" : "rb"; + + if (!can_access(op->dom_id, old->perms, letters)) { + err = EACCES; + goto out; + } + + /* This is the actual node on which the operation shall be performed */ + err = op->op_fn(n, op); + if (!err) { + fire_watches(op, true); + } + goto out; + } + + /* op->inplace will be further modified during the recursion */ + this_inplace = op->inplace; + + if (old && old->children) { + child = g_hash_table_lookup(old->children, child_name); + /* This is a *weak* reference to 'child', owned by the hash table */ + } + + if (child) { + if (child->deleted_in_tx) { + assert(child->ref == 1); + /* Cannot actually set child->deleted_in_tx = false until later */ + } + xs_node_ref(child); + /* + * Now we own it too. But if we can modify inplace, that's going to + * foil the check and force it to COW. We want to be the *only* owner + * so that it can be modified in place, so remove it from the hash + * table in that case. We'll add it (or its replacement) back later. + */ + if (op->mutating && this_inplace) { + g_hash_table_remove(old->children, child_name); + stole_child = true; + } + } else if (op->create_dirs) { + assert(op->mutating); + + if (!can_access(op->dom_id, old->perms, "wb")) { + err = EACCES; + goto out; + } + + if (op->dom_id && op->new_nr_nodes >= XS_MAX_DOMAIN_NODES) { + err = ENOSPC; + goto out; + } + + child = xs_node_create(child_name, old->perms); + op->new_nr_nodes++; + + /* + * If we're creating a new child, we can clearly modify it (and its + * children) in place from here on down. + */ + op->inplace = true; + } else { + err = ENOENT; + goto out; + } + + /* + * If there's a watch on this node, add it to the list to be fired + * (with the correct full pathname for the modified node) at the end. + */ + if (watch) { + op->watches = g_list_append(op->watches, watch); + } + + /* + * Except for the temporary child-stealing as noted, our node has not + * changed yet. We don't yet know the overall operation will complete. + */ + err = xs_node_walk(&child, op); + + if (watch) { + op->watches = g_list_remove(op->watches, watch); + } + + if (err || !op->mutating) { + if (stole_child) { + /* Put it back as it was. */ + g_hash_table_replace(old->children, g_strdup(child_name), child); + } else { + xs_node_unref(child); + } + goto out; + } + + /* + * Now we know the operation has completed successfully and we're on + * the way back up. Make the change, substituting 'child' in the + * node at our level. + */ + if (!this_inplace) { + *n = xs_node_copy(old); + xs_node_unref(old); + } + + /* + * If we resurrected a deleted_in_tx node, we can mark it as no longer + * deleted now that we know the overall operation has succeeded. + */ + if (op->create_dirs && child && child->deleted_in_tx) { + op->new_nr_nodes++; + child->deleted_in_tx = false; + } + + /* + * The child may be NULL here, for a remove operation. Either way, + * xs_node_add_child() will do the right thing and return a value + * indicating whether it changed the parent's hash table or not. + * + * We bump the parent gencnt if it adds a child that we *didn't* + * steal from it in the first place, or if child==NULL and was + * thus removed (whether we stole it earlier and didn't put it + * back, or xs_node_add_child() actually removed it now). + */ + if ((xs_node_add_child(*n, child_name, child) && !stole_child) || !child) { + (*n)->gencnt++; + } + + out: + op->path[namelen] = '\0'; + if (!namelen) { + assert(!op->watches); + /* + * On completing the recursion back up the path walk and reaching the + * top, assign the new node count if the operation was successful. If + * the main tree was changed, bump its tx ID so that outstanding + * transactions correctly fail. But don't bump it every time; only + * if it makes a difference. + */ + if (!err && op->mutating) { + if (!op->in_transaction) { + if (op->s->root_tx != op->s->last_tx) { + op->s->root_tx = next_tx(op->s); + } + op->s->nr_nodes = op->new_nr_nodes; + } else { + XsTransaction *tx = g_hash_table_lookup(op->s->transactions, + GINT_TO_POINTER(op->tx_id)); + assert(tx); + tx->nr_nodes = op->new_nr_nodes; + } + } + } + return err; +} + +static void append_directory_item(gpointer key, gpointer value, + gpointer user_data) +{ + GList **items = user_data; + + *items = g_list_insert_sorted(*items, g_strdup(key), (GCompareFunc)strcmp); +} + +/* Populates items with char * names which caller must free. */ +static int xs_node_directory(XsNode **n, struct walk_op *op) +{ + GList **items = op->op_opaque; + + assert(op->inplace); + assert(*n); + + if ((*n)->children) { + g_hash_table_foreach((*n)->children, append_directory_item, items); + } + + if (op->op_opaque2) { + *(uint64_t *)op->op_opaque2 = (*n)->gencnt; + } + + return 0; +} + +static int validate_path(char *outpath, const char *userpath, + unsigned int dom_id) +{ + size_t i, pathlen = strlen(userpath); + + if (!pathlen || userpath[pathlen] == '/' || strstr(userpath, "//")) { + return EINVAL; + } + for (i = 0; i < pathlen; i++) { + if (!strchr(XS_VALID_CHARS, userpath[i])) { + return EINVAL; + } + } + if (userpath[0] == '/') { + if (pathlen > XENSTORE_ABS_PATH_MAX) { + return E2BIG; + } + memcpy(outpath, userpath, pathlen + 1); + } else { + if (pathlen > XENSTORE_REL_PATH_MAX) { + return E2BIG; + } + snprintf(outpath, XENSTORE_ABS_PATH_MAX, "/local/domain/%u/%s", dom_id, + userpath); + } + return 0; +} + + +static int init_walk_op(XenstoreImplState *s, struct walk_op *op, + xs_transaction_t tx_id, unsigned int dom_id, + const char *path, XsNode ***rootp) +{ + int ret = validate_path(op->path, path, dom_id); + if (ret) { + return ret; + } + + /* + * We use *two* NUL terminators at the end of the path, as during the walk + * we will temporarily turn each '/' into a NUL to allow us to use that + * path element for the lookup. + */ + op->path[strlen(op->path) + 1] = '\0'; + op->watches = NULL; + op->path[0] = '\0'; + op->inplace = true; + op->mutating = false; + op->create_dirs = false; + op->in_transaction = false; + op->dom_id = dom_id; + op->tx_id = tx_id; + op->s = s; + + if (tx_id == XBT_NULL) { + *rootp = &s->root; + op->new_nr_nodes = s->nr_nodes; + } else { + XsTransaction *tx = g_hash_table_lookup(s->transactions, + GINT_TO_POINTER(tx_id)); + if (!tx) { + return ENOENT; + } + *rootp = &tx->root; + op->new_nr_nodes = tx->nr_nodes; + op->in_transaction = true; + } + + return 0; +} + +int xs_impl_read(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path, GByteArray *data) +{ + /* + * The data GByteArray shall exist, and will be freed by caller. + * Just g_byte_array_append() to it. + */ + struct walk_op op; + XsNode **n; + int ret; + + ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); + if (ret) { + return ret; + } + op.op_fn = xs_node_get_content; + op.op_opaque = data; + return xs_node_walk(n, &op); +} + +int xs_impl_write(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path, GByteArray *data) +{ + /* + * The data GByteArray shall exist, will be freed by caller. You are + * free to use g_byte_array_steal() and keep the data. Or just ref it. + */ + struct walk_op op; + XsNode **n; + int ret; + + ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); + if (ret) { + return ret; + } + op.op_fn = xs_node_add_content; + op.op_opaque = data; + op.mutating = true; + op.create_dirs = true; + return xs_node_walk(n, &op); +} + +int xs_impl_directory(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path, + uint64_t *gencnt, GList **items) +{ + /* + * The items are (char *) to be freed by caller. Although it's consumed + * immediately so if you want to change it to (const char *) and keep + * them, go ahead and change the caller. + */ + struct walk_op op; + XsNode **n; + int ret; + + ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); + if (ret) { + return ret; + } + op.op_fn = xs_node_directory; + op.op_opaque = items; + op.op_opaque2 = gencnt; + return xs_node_walk(n, &op); +} + +int xs_impl_transaction_start(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t *tx_id) +{ + XsTransaction *tx; + + if (*tx_id != XBT_NULL) { + return EINVAL; + } + + if (dom_id && s->nr_domu_transactions >= XS_MAX_TRANSACTIONS) { + return ENOSPC; + } + + tx = g_new0(XsTransaction, 1); + + tx->nr_nodes = s->nr_nodes; + tx->tx_id = next_tx(s); + tx->base_tx = s->root_tx; + tx->root = xs_node_ref(s->root); + tx->dom_id = dom_id; + + g_hash_table_insert(s->transactions, GINT_TO_POINTER(tx->tx_id), tx); + if (dom_id) { + s->nr_domu_transactions++; + } + *tx_id = tx->tx_id; + return 0; +} + +static gboolean tx_commit_walk(gpointer key, gpointer value, + gpointer user_data) +{ + struct walk_op *op = user_data; + int path_len = strlen(op->path); + int key_len = strlen(key); + bool fire_parents = true; + XsWatch *watch; + XsNode *n = value; + + if (n->ref != 1) { + return false; + } + + if (n->deleted_in_tx) { + /* + * We fire watches on our parents if we are the *first* node + * to be deleted (the topmost one). This matches the behaviour + * when deleting in the live tree. + */ + fire_parents = !op->deleted_in_tx; + + /* Only used on the way down so no need to clear it later */ + op->deleted_in_tx = true; + } + + assert(key_len + path_len + 2 <= sizeof(op->path)); + op->path[path_len] = '/'; + memcpy(op->path + path_len + 1, key, key_len + 1); + + watch = g_hash_table_lookup(op->s->watches, op->path); + if (watch) { + op->watches = g_list_append(op->watches, watch); + } + + if (n->children) { + g_hash_table_foreach_remove(n->children, tx_commit_walk, op); + } + + if (watch) { + op->watches = g_list_remove(op->watches, watch); + } + + /* + * Don't fire watches if this node was only copied because a + * descendent was changed. The modified_in_tx flag indicates the + * ones which were really changed. + */ + if (n->modified_in_tx || n->deleted_in_tx) { + fire_watches(op, fire_parents); + n->modified_in_tx = false; + } + op->path[path_len] = '\0'; + + /* Deleted nodes really do get expunged when we commit */ + return n->deleted_in_tx; +} + +static int transaction_commit(XenstoreImplState *s, XsTransaction *tx) +{ + struct walk_op op; + XsNode **n; + + if (s->root_tx != tx->base_tx) { + return EAGAIN; + } + xs_node_unref(s->root); + s->root = tx->root; + tx->root = NULL; + s->root_tx = tx->tx_id; + s->nr_nodes = tx->nr_nodes; + + init_walk_op(s, &op, XBT_NULL, tx->dom_id, "/", &n); + op.deleted_in_tx = false; + op.mutating = true; + + /* + * Walk the new root and fire watches on any node which has a + * refcount of one (which is therefore unique to this transaction). + */ + if (s->root->children) { + g_hash_table_foreach_remove(s->root->children, tx_commit_walk, &op); + } + + return 0; +} + +int xs_impl_transaction_end(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, bool commit) +{ + int ret = 0; + XsTransaction *tx = g_hash_table_lookup(s->transactions, + GINT_TO_POINTER(tx_id)); + + if (!tx || tx->dom_id != dom_id) { + return ENOENT; + } + + if (commit) { + ret = transaction_commit(s, tx); + } + + g_hash_table_remove(s->transactions, GINT_TO_POINTER(tx_id)); + if (dom_id) { + assert(s->nr_domu_transactions); + s->nr_domu_transactions--; + } + return ret; +} + +int xs_impl_rm(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path) +{ + struct walk_op op; + XsNode **n; + int ret; + + ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); + if (ret) { + return ret; + } + op.op_fn = xs_node_rm; + op.mutating = true; + return xs_node_walk(n, &op); +} + +int xs_impl_get_perms(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path, GList **perms) +{ + struct walk_op op; + XsNode **n; + int ret; + + ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); + if (ret) { + return ret; + } + op.op_fn = xs_node_get_perms; + op.op_opaque = perms; + return xs_node_walk(n, &op); +} + +static void is_valid_perm(gpointer data, gpointer user_data) +{ + char *perm = data; + bool *valid = user_data; + char letter; + unsigned int dom_id; + + if (!*valid) { + return; + } + + if (sscanf(perm, "%c%u", &letter, &dom_id) != 2) { + *valid = false; + return; + } + + switch (letter) { + case 'n': + case 'r': + case 'w': + case 'b': + break; + + default: + *valid = false; + break; + } +} + +int xs_impl_set_perms(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path, GList *perms) +{ + struct walk_op op; + XsNode **n; + bool valid = true; + int ret; + + if (!g_list_length(perms)) { + return EINVAL; + } + + g_list_foreach(perms, is_valid_perm, &valid); + if (!valid) { + return EINVAL; + } + + ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); + if (ret) { + return ret; + } + op.op_fn = xs_node_set_perms; + op.op_opaque = perms; + op.mutating = true; + return xs_node_walk(n, &op); +} + +static int do_xs_impl_watch(XenstoreImplState *s, unsigned int dom_id, + const char *path, const char *token, + xs_impl_watch_fn fn, void *opaque) + +{ + char abspath[XENSTORE_ABS_PATH_MAX + 1]; + XsWatch *w, *l; + int ret; + + ret = validate_path(abspath, path, dom_id); + if (ret) { + return ret; + } + + /* Check for duplicates */ + l = w = g_hash_table_lookup(s->watches, abspath); + while (w) { + if (!g_strcmp0(token, w->token) && opaque == w->cb_opaque && + fn == w->cb && dom_id == w->dom_id) { + return EEXIST; + } + w = w->next; + } + + if (dom_id && s->nr_domu_watches >= XS_MAX_WATCHES) { + return E2BIG; + } + + w = g_new0(XsWatch, 1); + w->token = g_strdup(token); + w->cb = fn; + w->cb_opaque = opaque; + w->dom_id = dom_id; + w->rel_prefix = strlen(abspath) - strlen(path); + + /* l was looked up above when checking for duplicates */ + if (l) { + w->next = l->next; + l->next = w; + } else { + g_hash_table_insert(s->watches, g_strdup(abspath), w); + } + if (dom_id) { + s->nr_domu_watches++; + } + + return 0; +} + +int xs_impl_watch(XenstoreImplState *s, unsigned int dom_id, const char *path, + const char *token, xs_impl_watch_fn fn, void *opaque) +{ + int ret = do_xs_impl_watch(s, dom_id, path, token, fn, opaque); + + if (!ret) { + /* A new watch should fire immediately */ + fn(opaque, path, token); + } + + return ret; +} + +static XsWatch *free_watch(XenstoreImplState *s, XsWatch *w) +{ + XsWatch *next = w->next; + + if (w->dom_id) { + assert(s->nr_domu_watches); + s->nr_domu_watches--; + } + + g_free(w->token); + g_free(w); + + return next; +} + +int xs_impl_unwatch(XenstoreImplState *s, unsigned int dom_id, + const char *path, const char *token, + xs_impl_watch_fn fn, void *opaque) +{ + char abspath[XENSTORE_ABS_PATH_MAX + 1]; + XsWatch *w, **l; + int ret; + + ret = validate_path(abspath, path, dom_id); + if (ret) { + return ret; + } + + w = g_hash_table_lookup(s->watches, abspath); + if (!w) { + return ENOENT; + } + + /* + * The hash table contains the first element of a list of + * watches. Removing the first element in the list is a + * special case because we have to update the hash table to + * point to the next (or remove it if there's nothing left). + */ + if (!g_strcmp0(token, w->token) && fn == w->cb && opaque == w->cb_opaque && + dom_id == w->dom_id) { + if (w->next) { + /* Insert the previous 'next' into the hash table */ + g_hash_table_insert(s->watches, g_strdup(abspath), w->next); + } else { + /* Nothing left; remove from hash table */ + g_hash_table_remove(s->watches, abspath); + } + free_watch(s, w); + return 0; + } + + /* + * We're all done messing with the hash table because the element + * it points to has survived the cull. Now it's just a simple + * linked list removal operation. + */ + for (l = &w->next; *l; l = &w->next) { + w = *l; + + if (!g_strcmp0(token, w->token) && fn == w->cb && + opaque != w->cb_opaque && dom_id == w->dom_id) { + *l = free_watch(s, w); + return 0; + } + } + + return ENOENT; +} + +int xs_impl_reset_watches(XenstoreImplState *s, unsigned int dom_id) +{ + char **watch_paths; + guint nr_watch_paths; + guint i; + + watch_paths = (char **)g_hash_table_get_keys_as_array(s->watches, + &nr_watch_paths); + + for (i = 0; i < nr_watch_paths; i++) { + XsWatch *w1 = g_hash_table_lookup(s->watches, watch_paths[i]); + XsWatch *w2, *w, **l; + + /* + * w1 is the original list. The hash table has this pointer. + * w2 is the head of our newly-filtered list. + * w and l are temporary for processing. w is somewhat redundant + * with *l but makes my eyes bleed less. + */ + + w = w2 = w1; + l = &w; + while (w) { + if (w->dom_id == dom_id) { + /* If we're freeing the head of the list, bump w2 */ + if (w2 == w) { + w2 = w->next; + } + *l = free_watch(s, w); + } else { + l = &w->next; + } + w = *l; + } + /* + * If the head of the list survived the cull, we don't need to + * touch the hash table and we're done with this path. Else... + */ + if (w1 != w2) { + g_hash_table_steal(s->watches, watch_paths[i]); + + /* + * It was already freed. (Don't worry, this whole thing is + * single-threaded and nobody saw it in the meantime). And + * having *stolen* it, we now own the watch_paths[i] string + * so if we don't give it back to the hash table, we need + * to free it. + */ + if (w2) { + g_hash_table_insert(s->watches, watch_paths[i], w2); + } else { + g_free(watch_paths[i]); + } + } + } + g_free(watch_paths); + return 0; +} + +static void xs_tx_free(void *_tx) +{ + XsTransaction *tx = _tx; + if (tx->root) { + xs_node_unref(tx->root); + } + g_free(tx); +} + +XenstoreImplState *xs_impl_create(unsigned int dom_id) +{ + XenstoreImplState *s = g_new0(XenstoreImplState, 1); + GList *perms; + + s->watches = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL); + s->transactions = g_hash_table_new_full(g_direct_hash, g_direct_equal, + NULL, xs_tx_free); + + perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, 0)); + s->root = xs_node_create("/", perms); + g_list_free_full(perms, g_free); + s->nr_nodes = 1; + + s->root_tx = s->last_tx = 1; + return s; +} + + +static void clear_serialized_tx(gpointer key, gpointer value, gpointer opaque) +{ + XsNode *n = value; + + n->serialized_tx = XBT_NULL; + if (n->children) { + g_hash_table_foreach(n->children, clear_serialized_tx, NULL); + } +} + +static void clear_tx_serialized_tx(gpointer key, gpointer value, + gpointer opaque) +{ + XsTransaction *t = value; + + clear_serialized_tx(NULL, t->root, NULL); +} + +static void write_be32(GByteArray *save, uint32_t val) +{ + uint32_t be = htonl(val); + g_byte_array_append(save, (void *)&be, sizeof(be)); +} + + +struct save_state { + GByteArray *bytes; + unsigned int tx_id; +}; + +#define MODIFIED_IN_TX (1U << 0) +#define DELETED_IN_TX (1U << 1) +#define NODE_REF (1U << 2) + +static void save_node(gpointer key, gpointer value, gpointer opaque) +{ + struct save_state *ss = opaque; + XsNode *n = value; + char *name = key; + uint8_t flag = 0; + + /* Child nodes (i.e. anything but the root) have a name */ + if (name) { + g_byte_array_append(ss->bytes, key, strlen(key) + 1); + } + + /* + * If we already wrote this node, refer to the previous copy. + * There's no rename/move in XenStore, so all we need to find + * it is the tx_id of the transation in which it exists. Which + * may be the root tx. + */ + if (n->serialized_tx != XBT_NULL) { + flag = NODE_REF; + g_byte_array_append(ss->bytes, &flag, 1); + write_be32(ss->bytes, n->serialized_tx); + } else { + GList *l; + n->serialized_tx = ss->tx_id; + + if (n->modified_in_tx) { + flag |= MODIFIED_IN_TX; + } + if (n->deleted_in_tx) { + flag |= DELETED_IN_TX; + } + g_byte_array_append(ss->bytes, &flag, 1); + + if (n->content) { + write_be32(ss->bytes, n->content->len); + g_byte_array_append(ss->bytes, n->content->data, n->content->len); + } else { + write_be32(ss->bytes, 0); + } + + for (l = n->perms; l; l = l->next) { + g_byte_array_append(ss->bytes, l->data, strlen(l->data) + 1); + } + /* NUL termination after perms */ + g_byte_array_append(ss->bytes, (void *)"", 1); + + if (n->children) { + g_hash_table_foreach(n->children, save_node, ss); + } + /* NUL termination after children (child name is NUL) */ + g_byte_array_append(ss->bytes, (void *)"", 1); + } +} + +static void save_tree(struct save_state *ss, uint32_t tx_id, XsNode *root) +{ + write_be32(ss->bytes, tx_id); + ss->tx_id = tx_id; + save_node(NULL, root, ss); +} + +static void save_tx(gpointer key, gpointer value, gpointer opaque) +{ + uint32_t tx_id = GPOINTER_TO_INT(key); + struct save_state *ss = opaque; + XsTransaction *n = value; + + write_be32(ss->bytes, n->base_tx); + write_be32(ss->bytes, n->dom_id); + + save_tree(ss, tx_id, n->root); +} + +static void save_watch(gpointer key, gpointer value, gpointer opaque) +{ + struct save_state *ss = opaque; + XsWatch *w = value; + + /* We only save the *guest* watches. */ + if (w->dom_id) { + gpointer relpath = key + w->rel_prefix; + g_byte_array_append(ss->bytes, relpath, strlen(relpath) + 1); + g_byte_array_append(ss->bytes, (void *)w->token, strlen(w->token) + 1); + } +} + +GByteArray *xs_impl_serialize(XenstoreImplState *s) +{ + struct save_state ss; + + ss.bytes = g_byte_array_new(); + + /* + * node = flags [ real_node / node_ref ] + * flags = uint8_t (MODIFIED_IN_TX | DELETED_IN_TX | NODE_REF) + * node_ref = tx_id (in which the original version of this node exists) + * real_node = content perms child* NUL + * content = len data + * len = uint32_t + * data = uint8_t{len} + * perms = perm* NUL + * perm = asciiz + * child = name node + * name = asciiz + * + * tree = tx_id node + * tx_id = uint32_t + * + * transaction = base_tx_id dom_id tree + * base_tx_id = uint32_t + * dom_id = uint32_t + * + * tx_list = tree transaction* XBT_NULL + * + * watch = path token + * path = asciiz + * token = asciiz + * + * watch_list = watch* NUL + * + * xs_serialize_stream = last_tx tx_list watch_list + * last_tx = uint32_t + */ + + /* Clear serialized_tx in every node. */ + if (s->serialized) { + clear_serialized_tx(NULL, s->root, NULL); + g_hash_table_foreach(s->transactions, clear_tx_serialized_tx, NULL); + } + + s->serialized = true; + + write_be32(ss.bytes, s->last_tx); + save_tree(&ss, s->root_tx, s->root); + g_hash_table_foreach(s->transactions, save_tx, &ss); + + write_be32(ss.bytes, XBT_NULL); + + g_hash_table_foreach(s->watches, save_watch, &ss); + g_byte_array_append(ss.bytes, (void *)"", 1); + + return ss.bytes; +} + +struct unsave_state { + char path[XENSTORE_ABS_PATH_MAX + 1]; + XenstoreImplState *s; + GByteArray *bytes; + uint8_t *d; + size_t l; + bool root_walk; +}; + +static int consume_be32(struct unsave_state *us, unsigned int *val) +{ + uint32_t d; + + if (us->l < sizeof(d)) { + return -EINVAL; + } + memcpy(&d, us->d, sizeof(d)); + *val = ntohl(d); + us->d += sizeof(d); + us->l -= sizeof(d); + return 0; +} + +static int consume_string(struct unsave_state *us, char **str, size_t *len) +{ + size_t l; + + if (!us->l) { + return -EINVAL; + } + + l = strnlen((void *)us->d, us->l); + if (l == us->l) { + return -EINVAL; + } + + if (str) { + *str = (void *)us->d; + } + if (len) { + *len = l; + } + + us->d += l + 1; + us->l -= l + 1; + return 0; +} + +static XsNode *lookup_node(XsNode *n, char *path) +{ + char *slash = strchr(path, '/'); + XsNode *child; + + if (path[0] == '\0') { + return n; + } + + if (slash) { + *slash = '\0'; + } + + if (!n->children) { + return NULL; + } + child = g_hash_table_lookup(n->children, path); + if (!slash) { + return child; + } + + *slash = '/'; + if (!child) { + return NULL; + } + return lookup_node(child, slash + 1); +} + +static XsNode *lookup_tx_node(struct unsave_state *us, unsigned int tx_id) +{ + XsTransaction *t; + if (tx_id == us->s->root_tx) { + return lookup_node(us->s->root, us->path + 1); + } + + t = g_hash_table_lookup(us->s->transactions, GINT_TO_POINTER(tx_id)); + if (!t) { + return NULL; + } + g_assert(t->root); + return lookup_node(t->root, us->path + 1); +} + +static void count_child_nodes(gpointer key, gpointer value, gpointer user_data) +{ + unsigned int *nr_nodes = user_data; + XsNode *n = value; + + (*nr_nodes)++; + + if (n->children) { + g_hash_table_foreach(n->children, count_child_nodes, nr_nodes); + } +} + +static int consume_node(struct unsave_state *us, XsNode **nodep, + unsigned int *nr_nodes) +{ + XsNode *n = NULL; + uint8_t flags; + int ret; + + if (us->l < 1) { + return -EINVAL; + } + flags = us->d[0]; + us->d++; + us->l--; + + if (flags == NODE_REF) { + unsigned int tx; + + ret = consume_be32(us, &tx); + if (ret) { + return ret; + } + + n = lookup_tx_node(us, tx); + if (!n) { + return -EINVAL; + } + n->ref++; + if (n->children) { + g_hash_table_foreach(n->children, count_child_nodes, nr_nodes); + } + } else { + uint32_t datalen; + + if (flags & ~(DELETED_IN_TX | MODIFIED_IN_TX)) { + return -EINVAL; + } + n = xs_node_new(); + + if (flags & DELETED_IN_TX) { + n->deleted_in_tx = true; + } + if (flags & MODIFIED_IN_TX) { + n->modified_in_tx = true; + } + ret = consume_be32(us, &datalen); + if (ret) { + xs_node_unref(n); + return -EINVAL; + } + if (datalen) { + if (datalen > us->l) { + xs_node_unref(n); + return -EINVAL; + } + + GByteArray *node_data = g_byte_array_new(); + g_byte_array_append(node_data, us->d, datalen); + us->d += datalen; + us->l -= datalen; + n->content = node_data; + + if (us->root_walk) { + n->modified_in_tx = true; + } + } + while (1) { + char *perm = NULL; + size_t permlen = 0; + + ret = consume_string(us, &perm, &permlen); + if (ret) { + xs_node_unref(n); + return ret; + } + + if (!permlen) { + break; + } + + n->perms = g_list_append(n->perms, g_strdup(perm)); + } + + /* Now children */ + while (1) { + size_t childlen; + char *childname; + char *pathend; + XsNode *child = NULL; + + ret = consume_string(us, &childname, &childlen); + if (ret) { + xs_node_unref(n); + return ret; + } + + if (!childlen) { + break; + } + + pathend = us->path + strlen(us->path); + strncat(us->path, "/", sizeof(us->path) - 1); + strncat(us->path, childname, sizeof(us->path) - 1); + + ret = consume_node(us, &child, nr_nodes); + *pathend = '\0'; + if (ret) { + xs_node_unref(n); + return ret; + } + g_assert(child); + xs_node_add_child(n, childname, child); + } + + /* + * If the node has no data and no children we still want to fire + * a watch on it. + */ + if (us->root_walk && !n->children) { + n->modified_in_tx = true; + } + } + + if (!n->deleted_in_tx) { + (*nr_nodes)++; + } + + *nodep = n; + return 0; +} + +static int consume_tree(struct unsave_state *us, XsTransaction *t) +{ + int ret; + + ret = consume_be32(us, &t->tx_id); + if (ret) { + return ret; + } + + if (t->tx_id > us->s->last_tx) { + return -EINVAL; + } + + us->path[0] = '\0'; + + return consume_node(us, &t->root, &t->nr_nodes); +} + +int xs_impl_deserialize(XenstoreImplState *s, GByteArray *bytes, + unsigned int dom_id, xs_impl_watch_fn watch_fn, + void *watch_opaque) +{ + struct unsave_state us; + XsTransaction base_t = { 0 }; + int ret; + + us.s = s; + us.bytes = bytes; + us.d = bytes->data; + us.l = bytes->len; + + xs_impl_reset_watches(s, dom_id); + g_hash_table_remove_all(s->transactions); + + xs_node_unref(s->root); + s->root = NULL; + s->root_tx = s->last_tx = XBT_NULL; + + ret = consume_be32(&us, &s->last_tx); + if (ret) { + return ret; + } + + /* + * Consume the base tree into a transaction so that watches can be + * fired as we commit it. By setting us.root_walk we cause the nodes + * to be marked as 'modified_in_tx' as they are created, so that the + * watches are triggered on them. + */ + base_t.dom_id = dom_id; + base_t.base_tx = XBT_NULL; + us.root_walk = true; + ret = consume_tree(&us, &base_t); + if (ret) { + return ret; + } + us.root_walk = false; + + /* + * Commit the transaction now while the refcount on all nodes is 1. + * Note that we haven't yet reinstated the *guest* watches but that's + * OK because we don't want the guest to see any changes. Even any + * backend nodes which get recreated should be *precisely* as they + * were before the migration. Back ends may have been instantiated + * already, and will see the frontend magically blink into existence + * now (well, from the aio_bh which fires the watches). It's their + * responsibility to rebuild everything precisely as it was before. + */ + ret = transaction_commit(s, &base_t); + if (ret) { + return ret; + } + + while (1) { + unsigned int base_tx; + XsTransaction *t; + + ret = consume_be32(&us, &base_tx); + if (ret) { + return ret; + } + if (base_tx == XBT_NULL) { + break; + } + + t = g_new0(XsTransaction, 1); + t->base_tx = base_tx; + + ret = consume_be32(&us, &t->dom_id); + if (!ret) { + ret = consume_tree(&us, t); + } + if (ret) { + g_free(t); + return ret; + } + g_assert(t->root); + if (t->dom_id) { + s->nr_domu_transactions++; + } + g_hash_table_insert(s->transactions, GINT_TO_POINTER(t->tx_id), t); + } + + while (1) { + char *path, *token; + size_t pathlen, toklen; + + ret = consume_string(&us, &path, &pathlen); + if (ret) { + return ret; + } + if (!pathlen) { + break; + } + + ret = consume_string(&us, &token, &toklen); + if (ret) { + return ret; + } + + if (!watch_fn) { + continue; + } + + ret = do_xs_impl_watch(s, dom_id, path, token, watch_fn, watch_opaque); + if (ret) { + return ret; + } + } + + if (us.l) { + return -EINVAL; + } + + return 0; +} diff --git a/hw/i386/kvm/xenstore_impl.h b/hw/i386/kvm/xenstore_impl.h new file mode 100644 index 0000000000..0df2a91aae --- /dev/null +++ b/hw/i386/kvm/xenstore_impl.h @@ -0,0 +1,63 @@ +/* + * QEMU Xen emulation: The actual implementation of XenStore + * + * Copyright © 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Authors: David Woodhouse <dwmw2@infradead.org> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_XENSTORE_IMPL_H +#define QEMU_XENSTORE_IMPL_H + +#include "hw/xen/xen_backend_ops.h" + +typedef struct XenstoreImplState XenstoreImplState; + +XenstoreImplState *xs_impl_create(unsigned int dom_id); + +char *xs_perm_as_string(unsigned int perm, unsigned int domid); + +/* + * These functions return *positive* error numbers. This is a little + * unconventional but it helps to keep us honest because there is + * also a very limited set of error numbers that they are permitted + * to return (those in xsd_errors). + */ + +int xs_impl_read(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path, GByteArray *data); +int xs_impl_write(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path, GByteArray *data); +int xs_impl_directory(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path, + uint64_t *gencnt, GList **items); +int xs_impl_transaction_start(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t *tx_id); +int xs_impl_transaction_end(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, bool commit); +int xs_impl_rm(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path); +int xs_impl_get_perms(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path, GList **perms); +int xs_impl_set_perms(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path, GList *perms); + +/* This differs from xs_watch_fn because it has the token */ +typedef void(xs_impl_watch_fn)(void *opaque, const char *path, + const char *token); +int xs_impl_watch(XenstoreImplState *s, unsigned int dom_id, const char *path, + const char *token, xs_impl_watch_fn fn, void *opaque); +int xs_impl_unwatch(XenstoreImplState *s, unsigned int dom_id, + const char *path, const char *token, xs_impl_watch_fn fn, + void *opaque); +int xs_impl_reset_watches(XenstoreImplState *s, unsigned int dom_id); + +GByteArray *xs_impl_serialize(XenstoreImplState *s); +int xs_impl_deserialize(XenstoreImplState *s, GByteArray *bytes, + unsigned int dom_id, xs_impl_watch_fn watch_fn, + void *watch_opaque); + +#endif /* QEMU_XENSTORE_IMPL_H */ diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 7bebea57e3..1489abf010 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -102,6 +102,11 @@ #include "trace.h" #include CONFIG_DEVICES +#ifdef CONFIG_XEN_EMU +#include "hw/xen/xen-legacy-backend.h" +#include "hw/xen/xen-bus.h" +#endif + /* * Helper for setting model-id for CPU models that changed model-id * depending on QEMU versions up to QEMU 2.4. @@ -1318,6 +1323,8 @@ void pc_basic_device_init(struct PCMachineState *pcms, if (pcms->bus) { pci_create_simple(pcms->bus, -1, "xen-platform"); } + xen_bus_init(); + xen_be_init(); } #endif diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 4bf15f9c1f..30eedd62a3 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -47,8 +47,6 @@ #include "hw/kvm/clock.h" #include "hw/sysbus.h" #include "hw/i2c/smbus_eeprom.h" -#include "hw/xen/xen-x86.h" -#include "hw/xen/xen.h" #include "exec/memory.h" #include "hw/acpi/acpi.h" #include "hw/acpi/piix4.h" @@ -60,6 +58,8 @@ #include <xen/hvm/hvm_info_table.h> #include "hw/xen/xen_pt.h" #endif +#include "hw/xen/xen-x86.h" +#include "hw/xen/xen.h" #include "migration/global_state.h" #include "migration/misc.h" #include "sysemu/numa.h" diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c index e5a1dd19f4..56641a550e 100644 --- a/hw/i386/xen/xen-hvm.c +++ b/hw/i386/xen/xen-hvm.c @@ -18,7 +18,7 @@ #include "hw/irq.h" #include "hw/hw.h" #include "hw/i386/apic-msidef.h" -#include "hw/xen/xen_common.h" +#include "hw/xen/xen_native.h" #include "hw/xen/xen-legacy-backend.h" #include "hw/xen/xen-bus.h" #include "hw/xen/xen-x86.h" @@ -52,10 +52,11 @@ static bool xen_in_migration; /* Compatibility with older version */ -/* This allows QEMU to build on a system that has Xen 4.5 or earlier - * installed. This here (not in hw/xen/xen_common.h) because xen/hvm/ioreq.h - * needs to be included before this block and hw/xen/xen_common.h needs to - * be included before xen/hvm/ioreq.h +/* + * This allows QEMU to build on a system that has Xen 4.5 or earlier installed. + * This is here (not in hw/xen/xen_native.h) because xen/hvm/ioreq.h needs to + * be included before this block and hw/xen/xen_native.h needs to be included + * before xen/hvm/ioreq.h */ #ifndef IOREQ_TYPE_VMWARE_PORT #define IOREQ_TYPE_VMWARE_PORT 3 @@ -761,7 +762,7 @@ static ioreq_t *cpu_get_ioreq(XenIOState *state) int i; evtchn_port_t port; - port = xenevtchn_pending(state->xce_handle); + port = qemu_xen_evtchn_pending(state->xce_handle); if (port == state->bufioreq_local_port) { timer_mod(state->buffered_io_timer, BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME)); @@ -780,7 +781,7 @@ static ioreq_t *cpu_get_ioreq(XenIOState *state) } /* unmask the wanted port again */ - xenevtchn_unmask(state->xce_handle, port); + qemu_xen_evtchn_unmask(state->xce_handle, port); /* get the io packet from shared memory */ state->send_vcpu = i; @@ -1147,7 +1148,7 @@ static void handle_buffered_io(void *opaque) BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME)); } else { timer_del(state->buffered_io_timer); - xenevtchn_unmask(state->xce_handle, state->bufioreq_local_port); + qemu_xen_evtchn_unmask(state->xce_handle, state->bufioreq_local_port); } } @@ -1196,8 +1197,8 @@ static void cpu_handle_ioreq(void *opaque) } req->state = STATE_IORESP_READY; - xenevtchn_notify(state->xce_handle, - state->ioreq_local_port[state->send_vcpu]); + qemu_xen_evtchn_notify(state->xce_handle, + state->ioreq_local_port[state->send_vcpu]); } } @@ -1206,7 +1207,7 @@ static void xen_main_loop_prepare(XenIOState *state) int evtchn_fd = -1; if (state->xce_handle != NULL) { - evtchn_fd = xenevtchn_fd(state->xce_handle); + evtchn_fd = qemu_xen_evtchn_fd(state->xce_handle); } state->buffered_io_timer = timer_new_ms(QEMU_CLOCK_REALTIME, handle_buffered_io, @@ -1249,7 +1250,7 @@ static void xen_exit_notifier(Notifier *n, void *data) xenforeignmemory_unmap_resource(xen_fmem, state->fres); } - xenevtchn_close(state->xce_handle); + qemu_xen_evtchn_close(state->xce_handle); xs_daemon_close(state->xenstore); } @@ -1397,9 +1398,11 @@ void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory) xen_pfn_t ioreq_pfn; XenIOState *state; + setup_xen_backend_ops(); + state = g_new0(XenIOState, 1); - state->xce_handle = xenevtchn_open(NULL, 0); + state->xce_handle = qemu_xen_evtchn_open(); if (state->xce_handle == NULL) { perror("xen: event channel open"); goto err; @@ -1463,8 +1466,9 @@ void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory) /* FIXME: how about if we overflow the page here? */ for (i = 0; i < max_cpus; i++) { - rc = xenevtchn_bind_interdomain(state->xce_handle, xen_domid, - xen_vcpu_eport(state->shared_page, i)); + rc = qemu_xen_evtchn_bind_interdomain(state->xce_handle, xen_domid, + xen_vcpu_eport(state->shared_page, + i)); if (rc == -1) { error_report("shared evtchn %d bind error %d", i, errno); goto err; @@ -1472,8 +1476,8 @@ void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory) state->ioreq_local_port[i] = rc; } - rc = xenevtchn_bind_interdomain(state->xce_handle, xen_domid, - state->bufioreq_remote_port); + rc = qemu_xen_evtchn_bind_interdomain(state->xce_handle, xen_domid, + state->bufioreq_remote_port); if (rc == -1) { error_report("buffered evtchn bind error %d", errno); goto err; diff --git a/hw/i386/xen/xen-mapcache.c b/hw/i386/xen/xen-mapcache.c index 1d0879d234..f7d974677d 100644 --- a/hw/i386/xen/xen-mapcache.c +++ b/hw/i386/xen/xen-mapcache.c @@ -14,7 +14,7 @@ #include <sys/resource.h> -#include "hw/xen/xen-legacy-backend.h" +#include "hw/xen/xen_native.h" #include "qemu/bitmap.h" #include "sysemu/runstate.h" diff --git a/hw/i386/xen/xen_platform.c b/hw/i386/xen/xen_platform.c index 539f7da374..57f1d742c1 100644 --- a/hw/i386/xen/xen_platform.c +++ b/hw/i386/xen/xen_platform.c @@ -28,7 +28,6 @@ #include "hw/ide/pci.h" #include "hw/pci/pci.h" #include "migration/vmstate.h" -#include "hw/xen/xen.h" #include "net/net.h" #include "trace.h" #include "sysemu/xen.h" @@ -38,10 +37,12 @@ #include "qom/object.h" #ifdef CONFIG_XEN -#include "hw/xen/xen_common.h" -#include "hw/xen/xen-legacy-backend.h" +#include "hw/xen/xen_native.h" #endif +/* The rule is that xen_native.h must come first */ +#include "hw/xen/xen.h" + //#define DEBUG_PLATFORM #ifdef DEBUG_PLATFORM diff --git a/hw/net/xen_nic.c b/hw/net/xen_nic.c index 7d92c2d022..9bbf6599fc 100644 --- a/hw/net/xen_nic.c +++ b/hw/net/xen_nic.c @@ -145,7 +145,7 @@ static void net_tx_packets(struct XenNetDev *netdev) continue; } - if ((txreq.offset + txreq.size) > XC_PAGE_SIZE) { + if ((txreq.offset + txreq.size) > XEN_PAGE_SIZE) { xen_pv_printf(&netdev->xendev, 0, "error: page crossing\n"); net_tx_error(netdev, &txreq, rc); continue; @@ -171,7 +171,7 @@ static void net_tx_packets(struct XenNetDev *netdev) if (txreq.flags & NETTXF_csum_blank) { /* have read-only mapping -> can't fill checksum in-place */ if (!tmpbuf) { - tmpbuf = g_malloc(XC_PAGE_SIZE); + tmpbuf = g_malloc(XEN_PAGE_SIZE); } memcpy(tmpbuf, page + txreq.offset, txreq.size); net_checksum_calculate(tmpbuf, txreq.size, CSUM_ALL); @@ -181,7 +181,7 @@ static void net_tx_packets(struct XenNetDev *netdev) qemu_send_packet(qemu_get_queue(netdev->nic), page + txreq.offset, txreq.size); } - xen_be_unmap_grant_ref(&netdev->xendev, page); + xen_be_unmap_grant_ref(&netdev->xendev, page, txreq.gref); net_tx_response(netdev, &txreq, NETIF_RSP_OKAY); } if (!netdev->tx_work) { @@ -243,9 +243,9 @@ static ssize_t net_rx_packet(NetClientState *nc, const uint8_t *buf, size_t size if (rc == rp || RING_REQUEST_CONS_OVERFLOW(&netdev->rx_ring, rc)) { return 0; } - if (size > XC_PAGE_SIZE - NET_IP_ALIGN) { + if (size > XEN_PAGE_SIZE - NET_IP_ALIGN) { xen_pv_printf(&netdev->xendev, 0, "packet too big (%lu > %ld)", - (unsigned long)size, XC_PAGE_SIZE - NET_IP_ALIGN); + (unsigned long)size, XEN_PAGE_SIZE - NET_IP_ALIGN); return -1; } @@ -261,7 +261,7 @@ static ssize_t net_rx_packet(NetClientState *nc, const uint8_t *buf, size_t size return -1; } memcpy(page + NET_IP_ALIGN, buf, size); - xen_be_unmap_grant_ref(&netdev->xendev, page); + xen_be_unmap_grant_ref(&netdev->xendev, page, rxreq.gref); net_rx_response(netdev, &rxreq, NETIF_RSP_OKAY, NET_IP_ALIGN, size, 0); return size; @@ -343,12 +343,13 @@ static int net_connect(struct XenLegacyDevice *xendev) netdev->rx_ring_ref, PROT_READ | PROT_WRITE); if (!netdev->rxs) { - xen_be_unmap_grant_ref(&netdev->xendev, netdev->txs); + xen_be_unmap_grant_ref(&netdev->xendev, netdev->txs, + netdev->tx_ring_ref); netdev->txs = NULL; return -1; } - BACK_RING_INIT(&netdev->tx_ring, netdev->txs, XC_PAGE_SIZE); - BACK_RING_INIT(&netdev->rx_ring, netdev->rxs, XC_PAGE_SIZE); + BACK_RING_INIT(&netdev->tx_ring, netdev->txs, XEN_PAGE_SIZE); + BACK_RING_INIT(&netdev->rx_ring, netdev->rxs, XEN_PAGE_SIZE); xen_be_bind_evtchn(&netdev->xendev); @@ -368,11 +369,13 @@ static void net_disconnect(struct XenLegacyDevice *xendev) xen_pv_unbind_evtchn(&netdev->xendev); if (netdev->txs) { - xen_be_unmap_grant_ref(&netdev->xendev, netdev->txs); + xen_be_unmap_grant_ref(&netdev->xendev, netdev->txs, + netdev->tx_ring_ref); netdev->txs = NULL; } if (netdev->rxs) { - xen_be_unmap_grant_ref(&netdev->xendev, netdev->rxs); + xen_be_unmap_grant_ref(&netdev->xendev, netdev->rxs, + netdev->rx_ring_ref); netdev->rxs = NULL; } } diff --git a/hw/usb/meson.build b/hw/usb/meson.build index bdf34cbd3e..599dc24f0d 100644 --- a/hw/usb/meson.build +++ b/hw/usb/meson.build @@ -84,6 +84,6 @@ if libusb.found() hw_usb_modules += {'host': usbhost_ss} endif -softmmu_ss.add(when: ['CONFIG_USB', 'CONFIG_XEN', libusb], if_true: files('xen-usb.c')) +softmmu_ss.add(when: ['CONFIG_USB', 'CONFIG_XEN_BUS', libusb], if_true: files('xen-usb.c')) modules += { 'hw-usb': hw_usb_modules } diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c index 0f7369e7ed..66cb3f7c24 100644 --- a/hw/usb/xen-usb.c +++ b/hw/usb/xen-usb.c @@ -101,6 +101,8 @@ struct usbback_hotplug { struct usbback_info { struct XenLegacyDevice xendev; /* must be first */ USBBus bus; + uint32_t urb_ring_ref; + uint32_t conn_ring_ref; void *urb_sring; void *conn_sring; struct usbif_urb_back_ring urb_ring; @@ -159,7 +161,7 @@ static int usbback_gnttab_map(struct usbback_req *usbback_req) for (i = 0; i < nr_segs; i++) { if ((unsigned)usbback_req->req.seg[i].offset + - (unsigned)usbback_req->req.seg[i].length > XC_PAGE_SIZE) { + (unsigned)usbback_req->req.seg[i].length > XEN_PAGE_SIZE) { xen_pv_printf(xendev, 0, "segment crosses page boundary\n"); return -EINVAL; } @@ -183,7 +185,7 @@ static int usbback_gnttab_map(struct usbback_req *usbback_req) for (i = 0; i < usbback_req->nr_buffer_segs; i++) { seg = usbback_req->req.seg + i; - addr = usbback_req->buffer + i * XC_PAGE_SIZE + seg->offset; + addr = usbback_req->buffer + i * XEN_PAGE_SIZE + seg->offset; qemu_iovec_add(&usbback_req->packet.iov, addr, seg->length); } } @@ -277,10 +279,11 @@ static int usbback_init_packet(struct usbback_req *usbback_req) static void usbback_do_response(struct usbback_req *usbback_req, int32_t status, int32_t actual_length, int32_t error_count) { + uint32_t ref[USBIF_MAX_SEGMENTS_PER_REQUEST]; struct usbback_info *usbif; struct usbif_urb_response *res; struct XenLegacyDevice *xendev; - unsigned int notify; + unsigned int notify, i; usbif = usbback_req->usbif; xendev = &usbif->xendev; @@ -293,13 +296,19 @@ static void usbback_do_response(struct usbback_req *usbback_req, int32_t status, } if (usbback_req->buffer) { - xen_be_unmap_grant_refs(xendev, usbback_req->buffer, + for (i = 0; i < usbback_req->nr_buffer_segs; i++) { + ref[i] = usbback_req->req.seg[i].gref; + } + xen_be_unmap_grant_refs(xendev, usbback_req->buffer, ref, usbback_req->nr_buffer_segs); usbback_req->buffer = NULL; } if (usbback_req->isoc_buffer) { - xen_be_unmap_grant_refs(xendev, usbback_req->isoc_buffer, + for (i = 0; i < usbback_req->nr_extra_segs; i++) { + ref[i] = usbback_req->req.seg[i + usbback_req->req.nr_buffer_segs].gref; + } + xen_be_unmap_grant_refs(xendev, usbback_req->isoc_buffer, ref, usbback_req->nr_extra_segs); usbback_req->isoc_buffer = NULL; } @@ -832,11 +841,11 @@ static void usbback_disconnect(struct XenLegacyDevice *xendev) xen_pv_unbind_evtchn(xendev); if (usbif->urb_sring) { - xen_be_unmap_grant_ref(xendev, usbif->urb_sring); + xen_be_unmap_grant_ref(xendev, usbif->urb_sring, usbif->urb_ring_ref); usbif->urb_sring = NULL; } if (usbif->conn_sring) { - xen_be_unmap_grant_ref(xendev, usbif->conn_sring); + xen_be_unmap_grant_ref(xendev, usbif->conn_sring, usbif->conn_ring_ref); usbif->conn_sring = NULL; } @@ -889,10 +898,12 @@ static int usbback_connect(struct XenLegacyDevice *xendev) return -1; } + usbif->urb_ring_ref = urb_ring_ref; + usbif->conn_ring_ref = conn_ring_ref; urb_sring = usbif->urb_sring; conn_sring = usbif->conn_sring; - BACK_RING_INIT(&usbif->urb_ring, urb_sring, XC_PAGE_SIZE); - BACK_RING_INIT(&usbif->conn_ring, conn_sring, XC_PAGE_SIZE); + BACK_RING_INIT(&usbif->urb_ring, urb_sring, XEN_PAGE_SIZE); + BACK_RING_INIT(&usbif->conn_ring, conn_sring, XEN_PAGE_SIZE); xen_be_bind_evtchn(xendev); diff --git a/hw/xen/meson.build b/hw/xen/meson.build index ae0ace3046..19c6aabc7c 100644 --- a/hw/xen/meson.build +++ b/hw/xen/meson.build @@ -1,4 +1,4 @@ -softmmu_ss.add(when: ['CONFIG_XEN', xen], if_true: files( +softmmu_ss.add(when: ['CONFIG_XEN_BUS'], if_true: files( 'xen-backend.c', 'xen-bus-helper.c', 'xen-bus.c', @@ -7,6 +7,10 @@ softmmu_ss.add(when: ['CONFIG_XEN', xen], if_true: files( 'xen_pvdev.c', )) +softmmu_ss.add(when: ['CONFIG_XEN', xen], if_true: files( + 'xen-operations.c', +)) + xen_specific_ss = ss.source_set() if have_xen_pci_passthrough xen_specific_ss.add(files( diff --git a/hw/xen/trace-events b/hw/xen/trace-events index 3da3fd8348..55c9e1df68 100644 --- a/hw/xen/trace-events +++ b/hw/xen/trace-events @@ -1,6 +1,6 @@ # See docs/devel/tracing.rst for syntax documentation. -# ../../include/hw/xen/xen_common.h +# ../../include/hw/xen/xen_native.h xen_default_ioreq_server(void) "" xen_ioreq_server_create(uint32_t id) "id: %u" xen_ioreq_server_destroy(uint32_t id) "id: %u" diff --git a/hw/xen/xen-bus-helper.c b/hw/xen/xen-bus-helper.c index 5a1e12b374..b2b2cc9c5d 100644 --- a/hw/xen/xen-bus-helper.c +++ b/hw/xen/xen-bus-helper.c @@ -10,6 +10,7 @@ #include "hw/xen/xen-bus.h" #include "hw/xen/xen-bus-helper.h" #include "qapi/error.h" +#include "trace.h" #include <glib/gprintf.h> @@ -46,34 +47,28 @@ const char *xs_strstate(enum xenbus_state state) return "INVALID"; } -void xs_node_create(struct xs_handle *xsh, xs_transaction_t tid, - const char *node, struct xs_permissions perms[], - unsigned int nr_perms, Error **errp) +void xs_node_create(struct qemu_xs_handle *h, xs_transaction_t tid, + const char *node, unsigned int owner, unsigned int domid, + unsigned int perms, Error **errp) { trace_xs_node_create(node); - if (!xs_write(xsh, tid, node, "", 0)) { + if (!qemu_xen_xs_create(h, tid, owner, domid, perms, node)) { error_setg_errno(errp, errno, "failed to create node '%s'", node); - return; - } - - if (!xs_set_permissions(xsh, tid, node, perms, nr_perms)) { - error_setg_errno(errp, errno, "failed to set node '%s' permissions", - node); } } -void xs_node_destroy(struct xs_handle *xsh, xs_transaction_t tid, +void xs_node_destroy(struct qemu_xs_handle *h, xs_transaction_t tid, const char *node, Error **errp) { trace_xs_node_destroy(node); - if (!xs_rm(xsh, tid, node)) { + if (!qemu_xen_xs_destroy(h, tid, node)) { error_setg_errno(errp, errno, "failed to destroy node '%s'", node); } } -void xs_node_vprintf(struct xs_handle *xsh, xs_transaction_t tid, +void xs_node_vprintf(struct qemu_xs_handle *h, xs_transaction_t tid, const char *node, const char *key, Error **errp, const char *fmt, va_list ap) { @@ -86,7 +81,7 @@ void xs_node_vprintf(struct xs_handle *xsh, xs_transaction_t tid, trace_xs_node_vprintf(path, value); - if (!xs_write(xsh, tid, path, value, len)) { + if (!qemu_xen_xs_write(h, tid, path, value, len)) { error_setg_errno(errp, errno, "failed to write '%s' to '%s'", value, path); } @@ -95,18 +90,18 @@ void xs_node_vprintf(struct xs_handle *xsh, xs_transaction_t tid, g_free(path); } -void xs_node_printf(struct xs_handle *xsh, xs_transaction_t tid, +void xs_node_printf(struct qemu_xs_handle *h, xs_transaction_t tid, const char *node, const char *key, Error **errp, const char *fmt, ...) { va_list ap; va_start(ap, fmt); - xs_node_vprintf(xsh, tid, node, key, errp, fmt, ap); + xs_node_vprintf(h, tid, node, key, errp, fmt, ap); va_end(ap); } -int xs_node_vscanf(struct xs_handle *xsh, xs_transaction_t tid, +int xs_node_vscanf(struct qemu_xs_handle *h, xs_transaction_t tid, const char *node, const char *key, Error **errp, const char *fmt, va_list ap) { @@ -115,7 +110,7 @@ int xs_node_vscanf(struct xs_handle *xsh, xs_transaction_t tid, path = (strlen(node) != 0) ? g_strdup_printf("%s/%s", node, key) : g_strdup(key); - value = xs_read(xsh, tid, path, NULL); + value = qemu_xen_xs_read(h, tid, path, NULL); trace_xs_node_vscanf(path, value); @@ -133,7 +128,7 @@ int xs_node_vscanf(struct xs_handle *xsh, xs_transaction_t tid, return rc; } -int xs_node_scanf(struct xs_handle *xsh, xs_transaction_t tid, +int xs_node_scanf(struct qemu_xs_handle *h, xs_transaction_t tid, const char *node, const char *key, Error **errp, const char *fmt, ...) { @@ -141,42 +136,35 @@ int xs_node_scanf(struct xs_handle *xsh, xs_transaction_t tid, int rc; va_start(ap, fmt); - rc = xs_node_vscanf(xsh, tid, node, key, errp, fmt, ap); + rc = xs_node_vscanf(h, tid, node, key, errp, fmt, ap); va_end(ap); return rc; } -void xs_node_watch(struct xs_handle *xsh, const char *node, const char *key, - char *token, Error **errp) +struct qemu_xs_watch *xs_node_watch(struct qemu_xs_handle *h, const char *node, + const char *key, xs_watch_fn fn, + void *opaque, Error **errp) { char *path; + struct qemu_xs_watch *w; path = (strlen(node) != 0) ? g_strdup_printf("%s/%s", node, key) : g_strdup(key); trace_xs_node_watch(path); - if (!xs_watch(xsh, path, token)) { + w = qemu_xen_xs_watch(h, path, fn, opaque); + if (!w) { error_setg_errno(errp, errno, "failed to watch node '%s'", path); } g_free(path); + + return w; } -void xs_node_unwatch(struct xs_handle *xsh, const char *node, - const char *key, const char *token, Error **errp) +void xs_node_unwatch(struct qemu_xs_handle *h, struct qemu_xs_watch *w) { - char *path; - - path = (strlen(node) != 0) ? g_strdup_printf("%s/%s", node, key) : - g_strdup(key); - - trace_xs_node_unwatch(path); - - if (!xs_unwatch(xsh, path, token)) { - error_setg_errno(errp, errno, "failed to unwatch node '%s'", path); - } - - g_free(path); + qemu_xen_xs_unwatch(h, w); } diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c index df3f6b9ae0..c59850b1de 100644 --- a/hw/xen/xen-bus.c +++ b/hw/xen/xen-bus.c @@ -62,7 +62,7 @@ static void xen_device_unplug(XenDevice *xendev, Error **errp) /* Mimic the way the Xen toolstack does an unplug */ again: - tid = xs_transaction_start(xenbus->xsh); + tid = qemu_xen_xs_transaction_start(xenbus->xsh); if (tid == XBT_NULL) { error_setg_errno(errp, errno, "failed xs_transaction_start"); return; @@ -80,7 +80,7 @@ again: goto abort; } - if (!xs_transaction_end(xenbus->xsh, tid, false)) { + if (!qemu_xen_xs_transaction_end(xenbus->xsh, tid, false)) { if (errno == EAGAIN) { goto again; } @@ -95,7 +95,7 @@ abort: * We only abort if there is already a failure so ignore any error * from ending the transaction. */ - xs_transaction_end(xenbus->xsh, tid, true); + qemu_xen_xs_transaction_end(xenbus->xsh, tid, true); } static void xen_bus_print_dev(Monitor *mon, DeviceState *dev, int indent) @@ -111,143 +111,6 @@ static char *xen_bus_get_dev_path(DeviceState *dev) return xen_device_get_backend_path(XEN_DEVICE(dev)); } -struct XenWatch { - char *node, *key; - char *token; - XenWatchHandler handler; - void *opaque; - Notifier notifier; -}; - -static void watch_notify(Notifier *n, void *data) -{ - XenWatch *watch = container_of(n, XenWatch, notifier); - const char *token = data; - - if (!strcmp(watch->token, token)) { - watch->handler(watch->opaque); - } -} - -static XenWatch *new_watch(const char *node, const char *key, - XenWatchHandler handler, void *opaque) -{ - XenWatch *watch = g_new0(XenWatch, 1); - QemuUUID uuid; - - qemu_uuid_generate(&uuid); - - watch->token = qemu_uuid_unparse_strdup(&uuid); - watch->node = g_strdup(node); - watch->key = g_strdup(key); - watch->handler = handler; - watch->opaque = opaque; - watch->notifier.notify = watch_notify; - - return watch; -} - -static void free_watch(XenWatch *watch) -{ - g_free(watch->token); - g_free(watch->key); - g_free(watch->node); - - g_free(watch); -} - -struct XenWatchList { - struct xs_handle *xsh; - NotifierList notifiers; -}; - -static void watch_list_event(void *opaque) -{ - XenWatchList *watch_list = opaque; - char **v; - const char *token; - - v = xs_check_watch(watch_list->xsh); - if (!v) { - return; - } - - token = v[XS_WATCH_TOKEN]; - - notifier_list_notify(&watch_list->notifiers, (void *)token); - - free(v); -} - -static XenWatchList *watch_list_create(struct xs_handle *xsh) -{ - XenWatchList *watch_list = g_new0(XenWatchList, 1); - - g_assert(xsh); - - watch_list->xsh = xsh; - notifier_list_init(&watch_list->notifiers); - qemu_set_fd_handler(xs_fileno(watch_list->xsh), watch_list_event, NULL, - watch_list); - - return watch_list; -} - -static void watch_list_destroy(XenWatchList *watch_list) -{ - g_assert(notifier_list_empty(&watch_list->notifiers)); - qemu_set_fd_handler(xs_fileno(watch_list->xsh), NULL, NULL, NULL); - g_free(watch_list); -} - -static XenWatch *watch_list_add(XenWatchList *watch_list, const char *node, - const char *key, XenWatchHandler handler, - void *opaque, Error **errp) -{ - ERRP_GUARD(); - XenWatch *watch = new_watch(node, key, handler, opaque); - - notifier_list_add(&watch_list->notifiers, &watch->notifier); - - xs_node_watch(watch_list->xsh, node, key, watch->token, errp); - if (*errp) { - notifier_remove(&watch->notifier); - free_watch(watch); - - return NULL; - } - - return watch; -} - -static void watch_list_remove(XenWatchList *watch_list, XenWatch *watch, - Error **errp) -{ - xs_node_unwatch(watch_list->xsh, watch->node, watch->key, watch->token, - errp); - - notifier_remove(&watch->notifier); - free_watch(watch); -} - -static XenWatch *xen_bus_add_watch(XenBus *xenbus, const char *node, - const char *key, XenWatchHandler handler, - Error **errp) -{ - trace_xen_bus_add_watch(node, key); - - return watch_list_add(xenbus->watch_list, node, key, handler, xenbus, - errp); -} - -static void xen_bus_remove_watch(XenBus *xenbus, XenWatch *watch, - Error **errp) -{ - trace_xen_bus_remove_watch(watch->node, watch->key); - - watch_list_remove(xenbus->watch_list, watch, errp); -} - static void xen_bus_backend_create(XenBus *xenbus, const char *type, const char *name, char *path, Error **errp) @@ -261,15 +124,15 @@ static void xen_bus_backend_create(XenBus *xenbus, const char *type, trace_xen_bus_backend_create(type, path); again: - tid = xs_transaction_start(xenbus->xsh); + tid = qemu_xen_xs_transaction_start(xenbus->xsh); if (tid == XBT_NULL) { error_setg(errp, "failed xs_transaction_start"); return; } - key = xs_directory(xenbus->xsh, tid, path, &n); + key = qemu_xen_xs_directory(xenbus->xsh, tid, path, &n); if (!key) { - if (!xs_transaction_end(xenbus->xsh, tid, true)) { + if (!qemu_xen_xs_transaction_end(xenbus->xsh, tid, true)) { error_setg_errno(errp, errno, "failed xs_transaction_end"); } return; @@ -300,7 +163,7 @@ again: free(key); - if (!xs_transaction_end(xenbus->xsh, tid, false)) { + if (!qemu_xen_xs_transaction_end(xenbus->xsh, tid, false)) { qobject_unref(opts); if (errno == EAGAIN) { @@ -327,7 +190,7 @@ static void xen_bus_type_enumerate(XenBus *xenbus, const char *type) trace_xen_bus_type_enumerate(type); - backend = xs_directory(xenbus->xsh, XBT_NULL, domain_path, &n); + backend = qemu_xen_xs_directory(xenbus->xsh, XBT_NULL, domain_path, &n); if (!backend) { goto out; } @@ -372,7 +235,7 @@ static void xen_bus_enumerate(XenBus *xenbus) trace_xen_bus_enumerate(); - type = xs_directory(xenbus->xsh, XBT_NULL, "backend", &n); + type = qemu_xen_xs_directory(xenbus->xsh, XBT_NULL, "backend", &n); if (!type) { return; } @@ -415,7 +278,7 @@ static void xen_bus_cleanup(XenBus *xenbus) } } -static void xen_bus_backend_changed(void *opaque) +static void xen_bus_backend_changed(void *opaque, const char *path) { XenBus *xenbus = opaque; @@ -434,7 +297,7 @@ static void xen_bus_unrealize(BusState *bus) for (i = 0; i < xenbus->backend_types; i++) { if (xenbus->backend_watch[i]) { - xen_bus_remove_watch(xenbus, xenbus->backend_watch[i], NULL); + xs_node_unwatch(xenbus->xsh, xenbus->backend_watch[i]); } } @@ -442,13 +305,8 @@ static void xen_bus_unrealize(BusState *bus) xenbus->backend_watch = NULL; } - if (xenbus->watch_list) { - watch_list_destroy(xenbus->watch_list); - xenbus->watch_list = NULL; - } - if (xenbus->xsh) { - xs_close(xenbus->xsh); + qemu_xen_xs_close(xenbus->xsh); } } @@ -463,7 +321,7 @@ static void xen_bus_realize(BusState *bus, Error **errp) trace_xen_bus_realize(); - xenbus->xsh = xs_open(0); + xenbus->xsh = qemu_xen_xs_open(); if (!xenbus->xsh) { error_setg_errno(errp, errno, "failed xs_open"); goto fail; @@ -476,19 +334,18 @@ static void xen_bus_realize(BusState *bus, Error **errp) xenbus->backend_id = 0; /* Assume lack of node means dom0 */ } - xenbus->watch_list = watch_list_create(xenbus->xsh); - module_call_init(MODULE_INIT_XEN_BACKEND); type = xen_backend_get_types(&xenbus->backend_types); - xenbus->backend_watch = g_new(XenWatch *, xenbus->backend_types); + xenbus->backend_watch = g_new(struct qemu_xs_watch *, + xenbus->backend_types); for (i = 0; i < xenbus->backend_types; i++) { char *node = g_strdup_printf("backend/%s", type[i]); xenbus->backend_watch[i] = - xen_bus_add_watch(xenbus, node, key, xen_bus_backend_changed, - &local_err); + xs_node_watch(xenbus->xsh, node, key, xen_bus_backend_changed, + xenbus, &local_err); if (local_err) { /* This need not be treated as a hard error so don't propagate */ error_reportf_err(local_err, @@ -631,7 +488,7 @@ static bool xen_device_frontend_is_active(XenDevice *xendev) } } -static void xen_device_backend_changed(void *opaque) +static void xen_device_backend_changed(void *opaque, const char *path) { XenDevice *xendev = opaque; const char *type = object_get_typename(OBJECT(xendev)); @@ -685,66 +542,35 @@ static void xen_device_backend_changed(void *opaque) } } -static XenWatch *xen_device_add_watch(XenDevice *xendev, const char *node, - const char *key, - XenWatchHandler handler, - Error **errp) -{ - const char *type = object_get_typename(OBJECT(xendev)); - - trace_xen_device_add_watch(type, xendev->name, node, key); - - return watch_list_add(xendev->watch_list, node, key, handler, xendev, - errp); -} - -static void xen_device_remove_watch(XenDevice *xendev, XenWatch *watch, - Error **errp) -{ - const char *type = object_get_typename(OBJECT(xendev)); - - trace_xen_device_remove_watch(type, xendev->name, watch->node, - watch->key); - - watch_list_remove(xendev->watch_list, watch, errp); -} - - static void xen_device_backend_create(XenDevice *xendev, Error **errp) { ERRP_GUARD(); XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev))); - struct xs_permissions perms[2]; xendev->backend_path = xen_device_get_backend_path(xendev); - perms[0].id = xenbus->backend_id; - perms[0].perms = XS_PERM_NONE; - perms[1].id = xendev->frontend_id; - perms[1].perms = XS_PERM_READ; - g_assert(xenbus->xsh); - xs_node_create(xenbus->xsh, XBT_NULL, xendev->backend_path, perms, - ARRAY_SIZE(perms), errp); + xs_node_create(xenbus->xsh, XBT_NULL, xendev->backend_path, + xenbus->backend_id, xendev->frontend_id, XS_PERM_READ, errp); if (*errp) { error_prepend(errp, "failed to create backend: "); return; } xendev->backend_state_watch = - xen_device_add_watch(xendev, xendev->backend_path, - "state", xen_device_backend_changed, - errp); + xs_node_watch(xendev->xsh, xendev->backend_path, + "state", xen_device_backend_changed, xendev, + errp); if (*errp) { error_prepend(errp, "failed to watch backend state: "); return; } xendev->backend_online_watch = - xen_device_add_watch(xendev, xendev->backend_path, - "online", xen_device_backend_changed, - errp); + xs_node_watch(xendev->xsh, xendev->backend_path, + "online", xen_device_backend_changed, xendev, + errp); if (*errp) { error_prepend(errp, "failed to watch backend online: "); return; @@ -757,12 +583,12 @@ static void xen_device_backend_destroy(XenDevice *xendev) Error *local_err = NULL; if (xendev->backend_online_watch) { - xen_device_remove_watch(xendev, xendev->backend_online_watch, NULL); + xs_node_unwatch(xendev->xsh, xendev->backend_online_watch); xendev->backend_online_watch = NULL; } if (xendev->backend_state_watch) { - xen_device_remove_watch(xendev, xendev->backend_state_watch, NULL); + xs_node_unwatch(xendev->xsh, xendev->backend_state_watch); xendev->backend_state_watch = NULL; } @@ -837,7 +663,7 @@ static void xen_device_frontend_set_state(XenDevice *xendev, } } -static void xen_device_frontend_changed(void *opaque) +static void xen_device_frontend_changed(void *opaque, const char *path) { XenDevice *xendev = opaque; XenDeviceClass *xendev_class = XEN_DEVICE_GET_CLASS(xendev); @@ -885,7 +711,6 @@ static void xen_device_frontend_create(XenDevice *xendev, Error **errp) { ERRP_GUARD(); XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev))); - struct xs_permissions perms[2]; xendev->frontend_path = xen_device_get_frontend_path(xendev); @@ -894,15 +719,11 @@ static void xen_device_frontend_create(XenDevice *xendev, Error **errp) * toolstack. */ if (!xen_device_frontend_exists(xendev)) { - perms[0].id = xendev->frontend_id; - perms[0].perms = XS_PERM_NONE; - perms[1].id = xenbus->backend_id; - perms[1].perms = XS_PERM_READ | XS_PERM_WRITE; - g_assert(xenbus->xsh); - xs_node_create(xenbus->xsh, XBT_NULL, xendev->frontend_path, perms, - ARRAY_SIZE(perms), errp); + xs_node_create(xenbus->xsh, XBT_NULL, xendev->frontend_path, + xendev->frontend_id, xenbus->backend_id, + XS_PERM_READ | XS_PERM_WRITE, errp); if (*errp) { error_prepend(errp, "failed to create frontend: "); return; @@ -910,8 +731,8 @@ static void xen_device_frontend_create(XenDevice *xendev, Error **errp) } xendev->frontend_state_watch = - xen_device_add_watch(xendev, xendev->frontend_path, "state", - xen_device_frontend_changed, errp); + xs_node_watch(xendev->xsh, xendev->frontend_path, "state", + xen_device_frontend_changed, xendev, errp); if (*errp) { error_prepend(errp, "failed to watch frontend state: "); } @@ -923,8 +744,7 @@ static void xen_device_frontend_destroy(XenDevice *xendev) Error *local_err = NULL; if (xendev->frontend_state_watch) { - xen_device_remove_watch(xendev, xendev->frontend_state_watch, - NULL); + xs_node_unwatch(xendev->xsh, xendev->frontend_state_watch); xendev->frontend_state_watch = NULL; } @@ -947,7 +767,7 @@ static void xen_device_frontend_destroy(XenDevice *xendev) void xen_device_set_max_grant_refs(XenDevice *xendev, unsigned int nr_refs, Error **errp) { - if (xengnttab_set_max_grants(xendev->xgth, nr_refs)) { + if (qemu_xen_gnttab_set_max_grants(xendev->xgth, nr_refs)) { error_setg_errno(errp, errno, "xengnttab_set_max_grants failed"); } } @@ -956,9 +776,8 @@ void *xen_device_map_grant_refs(XenDevice *xendev, uint32_t *refs, unsigned int nr_refs, int prot, Error **errp) { - void *map = xengnttab_map_domain_grant_refs(xendev->xgth, nr_refs, - xendev->frontend_id, refs, - prot); + void *map = qemu_xen_gnttab_map_refs(xendev->xgth, nr_refs, + xendev->frontend_id, refs, prot); if (!map) { error_setg_errno(errp, errno, @@ -968,112 +787,20 @@ void *xen_device_map_grant_refs(XenDevice *xendev, uint32_t *refs, return map; } -void xen_device_unmap_grant_refs(XenDevice *xendev, void *map, +void xen_device_unmap_grant_refs(XenDevice *xendev, void *map, uint32_t *refs, unsigned int nr_refs, Error **errp) { - if (xengnttab_unmap(xendev->xgth, map, nr_refs)) { + if (qemu_xen_gnttab_unmap(xendev->xgth, map, refs, nr_refs)) { error_setg_errno(errp, errno, "xengnttab_unmap failed"); } } -static void compat_copy_grant_refs(XenDevice *xendev, bool to_domain, - XenDeviceGrantCopySegment segs[], - unsigned int nr_segs, Error **errp) -{ - uint32_t *refs = g_new(uint32_t, nr_segs); - int prot = to_domain ? PROT_WRITE : PROT_READ; - void *map; - unsigned int i; - - for (i = 0; i < nr_segs; i++) { - XenDeviceGrantCopySegment *seg = &segs[i]; - - refs[i] = to_domain ? seg->dest.foreign.ref : - seg->source.foreign.ref; - } - - map = xengnttab_map_domain_grant_refs(xendev->xgth, nr_segs, - xendev->frontend_id, refs, - prot); - if (!map) { - error_setg_errno(errp, errno, - "xengnttab_map_domain_grant_refs failed"); - goto done; - } - - for (i = 0; i < nr_segs; i++) { - XenDeviceGrantCopySegment *seg = &segs[i]; - void *page = map + (i * XC_PAGE_SIZE); - - if (to_domain) { - memcpy(page + seg->dest.foreign.offset, seg->source.virt, - seg->len); - } else { - memcpy(seg->dest.virt, page + seg->source.foreign.offset, - seg->len); - } - } - - if (xengnttab_unmap(xendev->xgth, map, nr_segs)) { - error_setg_errno(errp, errno, "xengnttab_unmap failed"); - } - -done: - g_free(refs); -} - void xen_device_copy_grant_refs(XenDevice *xendev, bool to_domain, XenDeviceGrantCopySegment segs[], unsigned int nr_segs, Error **errp) { - xengnttab_grant_copy_segment_t *xengnttab_segs; - unsigned int i; - - if (!xendev->feature_grant_copy) { - compat_copy_grant_refs(xendev, to_domain, segs, nr_segs, errp); - return; - } - - xengnttab_segs = g_new0(xengnttab_grant_copy_segment_t, nr_segs); - - for (i = 0; i < nr_segs; i++) { - XenDeviceGrantCopySegment *seg = &segs[i]; - xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i]; - - if (to_domain) { - xengnttab_seg->flags = GNTCOPY_dest_gref; - xengnttab_seg->dest.foreign.domid = xendev->frontend_id; - xengnttab_seg->dest.foreign.ref = seg->dest.foreign.ref; - xengnttab_seg->dest.foreign.offset = seg->dest.foreign.offset; - xengnttab_seg->source.virt = seg->source.virt; - } else { - xengnttab_seg->flags = GNTCOPY_source_gref; - xengnttab_seg->source.foreign.domid = xendev->frontend_id; - xengnttab_seg->source.foreign.ref = seg->source.foreign.ref; - xengnttab_seg->source.foreign.offset = - seg->source.foreign.offset; - xengnttab_seg->dest.virt = seg->dest.virt; - } - - xengnttab_seg->len = seg->len; - } - - if (xengnttab_grant_copy(xendev->xgth, nr_segs, xengnttab_segs)) { - error_setg_errno(errp, errno, "xengnttab_grant_copy failed"); - goto done; - } - - for (i = 0; i < nr_segs; i++) { - xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i]; - - if (xengnttab_seg->status != GNTST_okay) { - error_setg(errp, "xengnttab_grant_copy seg[%u] failed", i); - break; - } - } - -done: - g_free(xengnttab_segs); + qemu_xen_gnttab_grant_copy(xendev->xgth, to_domain, xendev->frontend_id, + (XenGrantCopySegment *)segs, nr_segs, errp); } struct XenEventChannel { @@ -1095,12 +822,12 @@ static bool xen_device_poll(void *opaque) static void xen_device_event(void *opaque) { XenEventChannel *channel = opaque; - unsigned long port = xenevtchn_pending(channel->xeh); + unsigned long port = qemu_xen_evtchn_pending(channel->xeh); if (port == channel->local_port) { xen_device_poll(channel); - xenevtchn_unmask(channel->xeh, port); + qemu_xen_evtchn_unmask(channel->xeh, port); } } @@ -1115,11 +842,11 @@ void xen_device_set_event_channel_context(XenDevice *xendev, } if (channel->ctx) - aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true, + aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), true, NULL, NULL, NULL, NULL, NULL); channel->ctx = ctx; - aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true, + aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), true, xen_device_event, NULL, xen_device_poll, NULL, channel); } @@ -1131,13 +858,13 @@ XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev, XenEventChannel *channel = g_new0(XenEventChannel, 1); xenevtchn_port_or_error_t local_port; - channel->xeh = xenevtchn_open(NULL, 0); + channel->xeh = qemu_xen_evtchn_open(); if (!channel->xeh) { error_setg_errno(errp, errno, "failed xenevtchn_open"); goto fail; } - local_port = xenevtchn_bind_interdomain(channel->xeh, + local_port = qemu_xen_evtchn_bind_interdomain(channel->xeh, xendev->frontend_id, port); if (local_port < 0) { @@ -1160,7 +887,7 @@ XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev, fail: if (channel->xeh) { - xenevtchn_close(channel->xeh); + qemu_xen_evtchn_close(channel->xeh); } g_free(channel); @@ -1177,7 +904,7 @@ void xen_device_notify_event_channel(XenDevice *xendev, return; } - if (xenevtchn_notify(channel->xeh, channel->local_port) < 0) { + if (qemu_xen_evtchn_notify(channel->xeh, channel->local_port) < 0) { error_setg_errno(errp, errno, "xenevtchn_notify failed"); } } @@ -1193,14 +920,14 @@ void xen_device_unbind_event_channel(XenDevice *xendev, QLIST_REMOVE(channel, list); - aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true, + aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), true, NULL, NULL, NULL, NULL, NULL); - if (xenevtchn_unbind(channel->xeh, channel->local_port) < 0) { + if (qemu_xen_evtchn_unbind(channel->xeh, channel->local_port) < 0) { error_setg_errno(errp, errno, "xenevtchn_unbind failed"); } - xenevtchn_close(channel->xeh); + qemu_xen_evtchn_close(channel->xeh); g_free(channel); } @@ -1235,17 +962,12 @@ static void xen_device_unrealize(DeviceState *dev) xen_device_backend_destroy(xendev); if (xendev->xgth) { - xengnttab_close(xendev->xgth); + qemu_xen_gnttab_close(xendev->xgth); xendev->xgth = NULL; } - if (xendev->watch_list) { - watch_list_destroy(xendev->watch_list); - xendev->watch_list = NULL; - } - if (xendev->xsh) { - xs_close(xendev->xsh); + qemu_xen_xs_close(xendev->xsh); xendev->xsh = NULL; } @@ -1290,23 +1012,18 @@ static void xen_device_realize(DeviceState *dev, Error **errp) trace_xen_device_realize(type, xendev->name); - xendev->xsh = xs_open(0); + xendev->xsh = qemu_xen_xs_open(); if (!xendev->xsh) { error_setg_errno(errp, errno, "failed xs_open"); goto unrealize; } - xendev->watch_list = watch_list_create(xendev->xsh); - - xendev->xgth = xengnttab_open(NULL, 0); + xendev->xgth = qemu_xen_gnttab_open(); if (!xendev->xgth) { error_setg_errno(errp, errno, "failed xengnttab_open"); goto unrealize; } - xendev->feature_grant_copy = - (xengnttab_grant_copy(xendev->xgth, 0, NULL) == 0); - xen_device_backend_create(xendev, errp); if (*errp) { goto unrealize; @@ -1317,13 +1034,6 @@ static void xen_device_realize(DeviceState *dev, Error **errp) goto unrealize; } - if (xendev_class->realize) { - xendev_class->realize(xendev, errp); - if (*errp) { - goto unrealize; - } - } - xen_device_backend_printf(xendev, "frontend", "%s", xendev->frontend_path); xen_device_backend_printf(xendev, "frontend-id", "%u", @@ -1342,6 +1052,13 @@ static void xen_device_realize(DeviceState *dev, Error **errp) xen_device_frontend_set_state(xendev, XenbusStateInitialising, true); } + if (xendev_class->realize) { + xendev_class->realize(xendev, errp); + if (*errp) { + goto unrealize; + } + } + xendev->exit.notify = xen_device_exit; qemu_add_exit_notifier(&xendev->exit); return; diff --git a/hw/xen/xen-legacy-backend.c b/hw/xen/xen-legacy-backend.c index afba71f6eb..4ded3cec23 100644 --- a/hw/xen/xen-legacy-backend.c +++ b/hw/xen/xen-legacy-backend.c @@ -39,11 +39,10 @@ BusState *xen_sysbus; /* ------------------------------------------------------------- */ /* public */ -struct xs_handle *xenstore; +struct qemu_xs_handle *xenstore; const char *xen_protocol; /* private */ -static bool xen_feature_grant_copy; static int debug; int xenstore_write_be_str(struct XenLegacyDevice *xendev, const char *node, @@ -113,7 +112,7 @@ void xen_be_set_max_grant_refs(struct XenLegacyDevice *xendev, { assert(xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV); - if (xengnttab_set_max_grants(xendev->gnttabdev, nr_refs)) { + if (qemu_xen_gnttab_set_max_grants(xendev->gnttabdev, nr_refs)) { xen_pv_printf(xendev, 0, "xengnttab_set_max_grants failed: %s\n", strerror(errno)); } @@ -126,8 +125,8 @@ void *xen_be_map_grant_refs(struct XenLegacyDevice *xendev, uint32_t *refs, assert(xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV); - ptr = xengnttab_map_domain_grant_refs(xendev->gnttabdev, nr_refs, - xen_domid, refs, prot); + ptr = qemu_xen_gnttab_map_refs(xendev->gnttabdev, nr_refs, xen_domid, refs, + prot); if (!ptr) { xen_pv_printf(xendev, 0, "xengnttab_map_domain_grant_refs failed: %s\n", @@ -138,123 +137,31 @@ void *xen_be_map_grant_refs(struct XenLegacyDevice *xendev, uint32_t *refs, } void xen_be_unmap_grant_refs(struct XenLegacyDevice *xendev, void *ptr, - unsigned int nr_refs) + uint32_t *refs, unsigned int nr_refs) { assert(xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV); - if (xengnttab_unmap(xendev->gnttabdev, ptr, nr_refs)) { + if (qemu_xen_gnttab_unmap(xendev->gnttabdev, ptr, refs, nr_refs)) { xen_pv_printf(xendev, 0, "xengnttab_unmap failed: %s\n", strerror(errno)); } } -static int compat_copy_grant_refs(struct XenLegacyDevice *xendev, - bool to_domain, - XenGrantCopySegment segs[], - unsigned int nr_segs) -{ - uint32_t *refs = g_new(uint32_t, nr_segs); - int prot = to_domain ? PROT_WRITE : PROT_READ; - void *pages; - unsigned int i; - - for (i = 0; i < nr_segs; i++) { - XenGrantCopySegment *seg = &segs[i]; - - refs[i] = to_domain ? - seg->dest.foreign.ref : seg->source.foreign.ref; - } - - pages = xengnttab_map_domain_grant_refs(xendev->gnttabdev, nr_segs, - xen_domid, refs, prot); - if (!pages) { - xen_pv_printf(xendev, 0, - "xengnttab_map_domain_grant_refs failed: %s\n", - strerror(errno)); - g_free(refs); - return -1; - } - - for (i = 0; i < nr_segs; i++) { - XenGrantCopySegment *seg = &segs[i]; - void *page = pages + (i * XC_PAGE_SIZE); - - if (to_domain) { - memcpy(page + seg->dest.foreign.offset, seg->source.virt, - seg->len); - } else { - memcpy(seg->dest.virt, page + seg->source.foreign.offset, - seg->len); - } - } - - if (xengnttab_unmap(xendev->gnttabdev, pages, nr_segs)) { - xen_pv_printf(xendev, 0, "xengnttab_unmap failed: %s\n", - strerror(errno)); - } - - g_free(refs); - return 0; -} - int xen_be_copy_grant_refs(struct XenLegacyDevice *xendev, bool to_domain, XenGrantCopySegment segs[], unsigned int nr_segs) { - xengnttab_grant_copy_segment_t *xengnttab_segs; - unsigned int i; int rc; assert(xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV); - if (!xen_feature_grant_copy) { - return compat_copy_grant_refs(xendev, to_domain, segs, nr_segs); - } - - xengnttab_segs = g_new0(xengnttab_grant_copy_segment_t, nr_segs); - - for (i = 0; i < nr_segs; i++) { - XenGrantCopySegment *seg = &segs[i]; - xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i]; - - if (to_domain) { - xengnttab_seg->flags = GNTCOPY_dest_gref; - xengnttab_seg->dest.foreign.domid = xen_domid; - xengnttab_seg->dest.foreign.ref = seg->dest.foreign.ref; - xengnttab_seg->dest.foreign.offset = seg->dest.foreign.offset; - xengnttab_seg->source.virt = seg->source.virt; - } else { - xengnttab_seg->flags = GNTCOPY_source_gref; - xengnttab_seg->source.foreign.domid = xen_domid; - xengnttab_seg->source.foreign.ref = seg->source.foreign.ref; - xengnttab_seg->source.foreign.offset = - seg->source.foreign.offset; - xengnttab_seg->dest.virt = seg->dest.virt; - } - - xengnttab_seg->len = seg->len; - } - - rc = xengnttab_grant_copy(xendev->gnttabdev, nr_segs, xengnttab_segs); - + rc = qemu_xen_gnttab_grant_copy(xendev->gnttabdev, to_domain, xen_domid, + segs, nr_segs, NULL); if (rc) { - xen_pv_printf(xendev, 0, "xengnttab_copy failed: %s\n", - strerror(errno)); - } - - for (i = 0; i < nr_segs; i++) { - xengnttab_grant_copy_segment_t *xengnttab_seg = - &xengnttab_segs[i]; - - if (xengnttab_seg->status != GNTST_okay) { - xen_pv_printf(xendev, 0, "segment[%u] status: %d\n", i, - xengnttab_seg->status); - rc = -1; - } + xen_pv_printf(xendev, 0, "xengnttab_grant_copy failed: %s\n", + strerror(-rc)); } - - g_free(xengnttab_segs); return rc; } @@ -294,13 +201,13 @@ static struct XenLegacyDevice *xen_be_get_xendev(const char *type, int dom, xendev->debug = debug; xendev->local_port = -1; - xendev->evtchndev = xenevtchn_open(NULL, 0); + xendev->evtchndev = qemu_xen_evtchn_open(); if (xendev->evtchndev == NULL) { xen_pv_printf(NULL, 0, "can't open evtchn device\n"); qdev_unplug(DEVICE(xendev), NULL); return NULL; } - qemu_set_cloexec(xenevtchn_fd(xendev->evtchndev)); + qemu_set_cloexec(qemu_xen_evtchn_fd(xendev->evtchndev)); xen_pv_insert_xendev(xendev); @@ -367,6 +274,25 @@ static void xen_be_frontend_changed(struct XenLegacyDevice *xendev, } } +static void xenstore_update_fe(void *opaque, const char *watch) +{ + struct XenLegacyDevice *xendev = opaque; + const char *node; + unsigned int len; + + len = strlen(xendev->fe); + if (strncmp(xendev->fe, watch, len) != 0) { + return; + } + if (watch[len] != '/') { + return; + } + node = watch + len + 1; + + xen_be_frontend_changed(xendev, node); + xen_be_check_state(xendev); +} + /* ------------------------------------------------------------- */ /* Check for possible state transitions and perform them. */ @@ -380,7 +306,6 @@ static void xen_be_frontend_changed(struct XenLegacyDevice *xendev, */ static int xen_be_try_setup(struct XenLegacyDevice *xendev) { - char token[XEN_BUFSIZE]; int be_state; if (xenstore_read_be_int(xendev, "state", &be_state) == -1) { @@ -401,8 +326,9 @@ static int xen_be_try_setup(struct XenLegacyDevice *xendev) } /* setup frontend watch */ - snprintf(token, sizeof(token), "fe:%p", xendev); - if (!xs_watch(xenstore, xendev->fe, token)) { + xendev->watch = qemu_xen_xs_watch(xenstore, xendev->fe, xenstore_update_fe, + xendev); + if (!xendev->watch) { xen_pv_printf(xendev, 0, "watching frontend path (%s) failed\n", xendev->fe); return -1; @@ -466,7 +392,7 @@ static int xen_be_try_initialise(struct XenLegacyDevice *xendev) } if (xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV) { - xendev->gnttabdev = xengnttab_open(NULL, 0); + xendev->gnttabdev = qemu_xen_gnttab_open(); if (xendev->gnttabdev == NULL) { xen_pv_printf(NULL, 0, "can't open gnttab device\n"); return -1; @@ -524,7 +450,7 @@ static void xen_be_disconnect(struct XenLegacyDevice *xendev, xendev->ops->disconnect(xendev); } if (xendev->gnttabdev) { - xengnttab_close(xendev->gnttabdev); + qemu_xen_gnttab_close(xendev->gnttabdev); xendev->gnttabdev = NULL; } if (xendev->be_state != state) { @@ -591,46 +517,20 @@ void xen_be_check_state(struct XenLegacyDevice *xendev) /* ------------------------------------------------------------- */ -static int xenstore_scan(const char *type, int dom, struct XenDevOps *ops) -{ - struct XenLegacyDevice *xendev; - char path[XEN_BUFSIZE], token[XEN_BUFSIZE]; - char **dev = NULL; - unsigned int cdev, j; - - /* setup watch */ - snprintf(token, sizeof(token), "be:%p:%d:%p", type, dom, ops); - snprintf(path, sizeof(path), "backend/%s/%d", type, dom); - if (!xs_watch(xenstore, path, token)) { - xen_pv_printf(NULL, 0, "xen be: watching backend path (%s) failed\n", - path); - return -1; - } - - /* look for backends */ - dev = xs_directory(xenstore, 0, path, &cdev); - if (!dev) { - return 0; - } - for (j = 0; j < cdev; j++) { - xendev = xen_be_get_xendev(type, dom, atoi(dev[j]), ops); - if (xendev == NULL) { - continue; - } - xen_be_check_state(xendev); - } - free(dev); - return 0; -} +struct xenstore_be { + const char *type; + int dom; + struct XenDevOps *ops; +}; -void xenstore_update_be(char *watch, char *type, int dom, - struct XenDevOps *ops) +static void xenstore_update_be(void *opaque, const char *watch) { + struct xenstore_be *be = opaque; struct XenLegacyDevice *xendev; char path[XEN_BUFSIZE], *bepath; unsigned int len, dev; - len = snprintf(path, sizeof(path), "backend/%s/%d", type, dom); + len = snprintf(path, sizeof(path), "backend/%s/%d", be->type, be->dom); if (strncmp(path, watch, len) != 0) { return; } @@ -644,9 +544,9 @@ void xenstore_update_be(char *watch, char *type, int dom, return; } - xendev = xen_be_get_xendev(type, dom, dev, ops); + xendev = xen_be_get_xendev(be->type, be->dom, dev, be->ops); if (xendev != NULL) { - bepath = xs_read(xenstore, 0, xendev->be, &len); + bepath = qemu_xen_xs_read(xenstore, 0, xendev->be, &len); if (bepath == NULL) { xen_pv_del_xendev(xendev); } else { @@ -657,23 +557,41 @@ void xenstore_update_be(char *watch, char *type, int dom, } } -void xenstore_update_fe(char *watch, struct XenLegacyDevice *xendev) +static int xenstore_scan(const char *type, int dom, struct XenDevOps *ops) { - char *node; - unsigned int len; + struct XenLegacyDevice *xendev; + char path[XEN_BUFSIZE]; + struct xenstore_be *be = g_new0(struct xenstore_be, 1); + char **dev = NULL; + unsigned int cdev, j; - len = strlen(xendev->fe); - if (strncmp(xendev->fe, watch, len) != 0) { - return; - } - if (watch[len] != '/') { - return; + /* setup watch */ + be->type = type; + be->dom = dom; + be->ops = ops; + snprintf(path, sizeof(path), "backend/%s/%d", type, dom); + if (!qemu_xen_xs_watch(xenstore, path, xenstore_update_be, be)) { + xen_pv_printf(NULL, 0, "xen be: watching backend path (%s) failed\n", + path); + return -1; } - node = watch + len + 1; - xen_be_frontend_changed(xendev, node); - xen_be_check_state(xendev); + /* look for backends */ + dev = qemu_xen_xs_directory(xenstore, 0, path, &cdev); + if (!dev) { + return 0; + } + for (j = 0; j < cdev; j++) { + xendev = xen_be_get_xendev(type, dom, atoi(dev[j]), ops); + if (xendev == NULL) { + continue; + } + xen_be_check_state(xendev); + } + free(dev); + return 0; } + /* -------------------------------------------------------------------- */ static void xen_set_dynamic_sysbus(void) @@ -687,29 +605,17 @@ static void xen_set_dynamic_sysbus(void) void xen_be_init(void) { - xengnttab_handle *gnttabdev; - - xenstore = xs_daemon_open(); + xenstore = qemu_xen_xs_open(); if (!xenstore) { xen_pv_printf(NULL, 0, "can't connect to xenstored\n"); exit(1); } - qemu_set_fd_handler(xs_fileno(xenstore), xenstore_update, NULL, NULL); - - if (xen_xc == NULL || xen_fmem == NULL) { + if (xen_evtchn_ops == NULL || xen_gnttab_ops == NULL) { xen_pv_printf(NULL, 0, "Xen operations not set up\n"); exit(1); } - gnttabdev = xengnttab_open(NULL, 0); - if (gnttabdev != NULL) { - if (xengnttab_grant_copy(gnttabdev, 0, NULL) == 0) { - xen_feature_grant_copy = true; - } - xengnttab_close(gnttabdev); - } - xen_sysdev = qdev_new(TYPE_XENSYSDEV); sysbus_realize_and_unref(SYS_BUS_DEVICE(xen_sysdev), &error_fatal); xen_sysbus = qbus_new(TYPE_XENSYSBUS, xen_sysdev, "xen-sysbus"); @@ -751,14 +657,14 @@ int xen_be_bind_evtchn(struct XenLegacyDevice *xendev) if (xendev->local_port != -1) { return 0; } - xendev->local_port = xenevtchn_bind_interdomain + xendev->local_port = qemu_xen_evtchn_bind_interdomain (xendev->evtchndev, xendev->dom, xendev->remote_port); if (xendev->local_port == -1) { xen_pv_printf(xendev, 0, "xenevtchn_bind_interdomain failed\n"); return -1; } xen_pv_printf(xendev, 2, "bind evtchn port %d\n", xendev->local_port); - qemu_set_fd_handler(xenevtchn_fd(xendev->evtchndev), + qemu_set_fd_handler(qemu_xen_evtchn_fd(xendev->evtchndev), xen_pv_evtchn_event, NULL, xendev); return 0; } diff --git a/hw/xen/xen-operations.c b/hw/xen/xen-operations.c new file mode 100644 index 0000000000..4b78fbf4bd --- /dev/null +++ b/hw/xen/xen-operations.c @@ -0,0 +1,478 @@ +/* + * QEMU Xen backend support: Operations for true Xen + * + * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Authors: David Woodhouse <dwmw2@infradead.org> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/uuid.h" +#include "qapi/error.h" + +#include "hw/xen/xen_native.h" +#include "hw/xen/xen_backend_ops.h" + +/* + * If we have new enough libxenctrl then we do not want/need these compat + * interfaces, despite what the user supplied cflags might say. They + * must be undefined before including xenctrl.h + */ +#undef XC_WANT_COMPAT_EVTCHN_API +#undef XC_WANT_COMPAT_GNTTAB_API +#undef XC_WANT_COMPAT_MAP_FOREIGN_API + +#include <xenctrl.h> + +/* + * We don't support Xen prior to 4.2.0. + */ + +/* Xen 4.2 through 4.6 */ +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40701 + +typedef xc_evtchn xenevtchn_handle; +typedef evtchn_port_or_error_t xenevtchn_port_or_error_t; + +#define xenevtchn_open(l, f) xc_evtchn_open(l, f); +#define xenevtchn_close(h) xc_evtchn_close(h) +#define xenevtchn_fd(h) xc_evtchn_fd(h) +#define xenevtchn_pending(h) xc_evtchn_pending(h) +#define xenevtchn_notify(h, p) xc_evtchn_notify(h, p) +#define xenevtchn_bind_interdomain(h, d, p) xc_evtchn_bind_interdomain(h, d, p) +#define xenevtchn_unmask(h, p) xc_evtchn_unmask(h, p) +#define xenevtchn_unbind(h, p) xc_evtchn_unbind(h, p) + +typedef xc_gnttab xengnttab_handle; + +#define xengnttab_open(l, f) xc_gnttab_open(l, f) +#define xengnttab_close(h) xc_gnttab_close(h) +#define xengnttab_set_max_grants(h, n) xc_gnttab_set_max_grants(h, n) +#define xengnttab_map_grant_ref(h, d, r, p) xc_gnttab_map_grant_ref(h, d, r, p) +#define xengnttab_unmap(h, a, n) xc_gnttab_munmap(h, a, n) +#define xengnttab_map_grant_refs(h, c, d, r, p) \ + xc_gnttab_map_grant_refs(h, c, d, r, p) +#define xengnttab_map_domain_grant_refs(h, c, d, r, p) \ + xc_gnttab_map_domain_grant_refs(h, c, d, r, p) + +typedef xc_interface xenforeignmemory_handle; + +#else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40701 */ + +#include <xenevtchn.h> +#include <xengnttab.h> +#include <xenforeignmemory.h> + +#endif + +/* Xen before 4.8 */ + +static int libxengnttab_fallback_grant_copy(xengnttab_handle *xgt, + bool to_domain, uint32_t domid, + XenGrantCopySegment segs[], + unsigned int nr_segs, Error **errp) +{ + uint32_t *refs = g_new(uint32_t, nr_segs); + int prot = to_domain ? PROT_WRITE : PROT_READ; + void *map; + unsigned int i; + int rc = 0; + + for (i = 0; i < nr_segs; i++) { + XenGrantCopySegment *seg = &segs[i]; + + refs[i] = to_domain ? seg->dest.foreign.ref : + seg->source.foreign.ref; + } + map = xengnttab_map_domain_grant_refs(xgt, nr_segs, domid, refs, prot); + if (!map) { + if (errp) { + error_setg_errno(errp, errno, + "xengnttab_map_domain_grant_refs failed"); + } + rc = -errno; + goto done; + } + + for (i = 0; i < nr_segs; i++) { + XenGrantCopySegment *seg = &segs[i]; + void *page = map + (i * XEN_PAGE_SIZE); + + if (to_domain) { + memcpy(page + seg->dest.foreign.offset, seg->source.virt, + seg->len); + } else { + memcpy(seg->dest.virt, page + seg->source.foreign.offset, + seg->len); + } + } + + if (xengnttab_unmap(xgt, map, nr_segs)) { + if (errp) { + error_setg_errno(errp, errno, "xengnttab_unmap failed"); + } + rc = -errno; + } + +done: + g_free(refs); + return rc; +} + +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40800 + +static int libxengnttab_backend_grant_copy(xengnttab_handle *xgt, + bool to_domain, uint32_t domid, + XenGrantCopySegment *segs, + uint32_t nr_segs, Error **errp) +{ + xengnttab_grant_copy_segment_t *xengnttab_segs; + unsigned int i; + int rc; + + xengnttab_segs = g_new0(xengnttab_grant_copy_segment_t, nr_segs); + + for (i = 0; i < nr_segs; i++) { + XenGrantCopySegment *seg = &segs[i]; + xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i]; + + if (to_domain) { + xengnttab_seg->flags = GNTCOPY_dest_gref; + xengnttab_seg->dest.foreign.domid = domid; + xengnttab_seg->dest.foreign.ref = seg->dest.foreign.ref; + xengnttab_seg->dest.foreign.offset = seg->dest.foreign.offset; + xengnttab_seg->source.virt = seg->source.virt; + } else { + xengnttab_seg->flags = GNTCOPY_source_gref; + xengnttab_seg->source.foreign.domid = domid; + xengnttab_seg->source.foreign.ref = seg->source.foreign.ref; + xengnttab_seg->source.foreign.offset = + seg->source.foreign.offset; + xengnttab_seg->dest.virt = seg->dest.virt; + } + + xengnttab_seg->len = seg->len; + } + + if (xengnttab_grant_copy(xgt, nr_segs, xengnttab_segs)) { + if (errp) { + error_setg_errno(errp, errno, "xengnttab_grant_copy failed"); + } + rc = -errno; + goto done; + } + + rc = 0; + for (i = 0; i < nr_segs; i++) { + xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i]; + + if (xengnttab_seg->status != GNTST_okay) { + if (errp) { + error_setg(errp, "xengnttab_grant_copy seg[%u] failed", i); + } + rc = -EIO; + break; + } + } + +done: + g_free(xengnttab_segs); + return rc; +} +#endif + +static xenevtchn_handle *libxenevtchn_backend_open(void) +{ + return xenevtchn_open(NULL, 0); +} + +struct evtchn_backend_ops libxenevtchn_backend_ops = { + .open = libxenevtchn_backend_open, + .close = xenevtchn_close, + .bind_interdomain = xenevtchn_bind_interdomain, + .unbind = xenevtchn_unbind, + .get_fd = xenevtchn_fd, + .notify = xenevtchn_notify, + .unmask = xenevtchn_unmask, + .pending = xenevtchn_pending, +}; + +static xengnttab_handle *libxengnttab_backend_open(void) +{ + return xengnttab_open(NULL, 0); +} + +static int libxengnttab_backend_unmap(xengnttab_handle *xgt, + void *start_address, uint32_t *refs, + uint32_t count) +{ + return xengnttab_unmap(xgt, start_address, count); +} + + +static struct gnttab_backend_ops libxengnttab_backend_ops = { + .features = XEN_GNTTAB_OP_FEATURE_MAP_MULTIPLE, + .open = libxengnttab_backend_open, + .close = xengnttab_close, + .grant_copy = libxengnttab_fallback_grant_copy, + .set_max_grants = xengnttab_set_max_grants, + .map_refs = xengnttab_map_domain_grant_refs, + .unmap = libxengnttab_backend_unmap, +}; + +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40701 + +static void *libxenforeignmem_backend_map(uint32_t dom, void *addr, int prot, + size_t pages, xfn_pfn_t *pfns, + int *errs) +{ + if (errs) { + return xc_map_foreign_bulk(xen_xc, dom, prot, pfns, errs, pages); + } else { + return xc_map_foreign_pages(xen_xc, dom, prot, pfns, pages); + } +} + +static int libxenforeignmem_backend_unmap(void *addr, size_t pages) +{ + return munmap(addr, pages * XC_PAGE_SIZE); +} + +#else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40701 */ + +static void *libxenforeignmem_backend_map(uint32_t dom, void *addr, int prot, + size_t pages, xen_pfn_t *pfns, + int *errs) +{ + return xenforeignmemory_map2(xen_fmem, dom, addr, prot, 0, pages, pfns, + errs); +} + +static int libxenforeignmem_backend_unmap(void *addr, size_t pages) +{ + return xenforeignmemory_unmap(xen_fmem, addr, pages); +} + +#endif + +struct foreignmem_backend_ops libxenforeignmem_backend_ops = { + .map = libxenforeignmem_backend_map, + .unmap = libxenforeignmem_backend_unmap, +}; + +struct qemu_xs_handle { + struct xs_handle *xsh; + NotifierList notifiers; +}; + +static void watch_event(void *opaque) +{ + struct qemu_xs_handle *h = opaque; + + for (;;) { + char **v = xs_check_watch(h->xsh); + + if (!v) { + break; + } + + notifier_list_notify(&h->notifiers, v); + free(v); + } +} + +static struct qemu_xs_handle *libxenstore_open(void) +{ + struct xs_handle *xsh = xs_open(0); + struct qemu_xs_handle *h = g_new0(struct qemu_xs_handle, 1); + + if (!xsh) { + return NULL; + } + + h = g_new0(struct qemu_xs_handle, 1); + h->xsh = xsh; + + notifier_list_init(&h->notifiers); + qemu_set_fd_handler(xs_fileno(h->xsh), watch_event, NULL, h); + + return h; +} + +static void libxenstore_close(struct qemu_xs_handle *h) +{ + g_assert(notifier_list_empty(&h->notifiers)); + qemu_set_fd_handler(xs_fileno(h->xsh), NULL, NULL, NULL); + xs_close(h->xsh); + g_free(h); +} + +static char *libxenstore_get_domain_path(struct qemu_xs_handle *h, + unsigned int domid) +{ + return xs_get_domain_path(h->xsh, domid); +} + +static char **libxenstore_directory(struct qemu_xs_handle *h, + xs_transaction_t t, const char *path, + unsigned int *num) +{ + return xs_directory(h->xsh, t, path, num); +} + +static void *libxenstore_read(struct qemu_xs_handle *h, xs_transaction_t t, + const char *path, unsigned int *len) +{ + return xs_read(h->xsh, t, path, len); +} + +static bool libxenstore_write(struct qemu_xs_handle *h, xs_transaction_t t, + const char *path, const void *data, + unsigned int len) +{ + return xs_write(h->xsh, t, path, data, len); +} + +static bool libxenstore_create(struct qemu_xs_handle *h, xs_transaction_t t, + unsigned int owner, unsigned int domid, + unsigned int perms, const char *path) +{ + struct xs_permissions perms_list[] = { + { + .id = owner, + .perms = XS_PERM_NONE, + }, + { + .id = domid, + .perms = perms, + }, + }; + + if (!xs_mkdir(h->xsh, t, path)) { + return false; + } + + return xs_set_permissions(h->xsh, t, path, perms_list, + ARRAY_SIZE(perms_list)); +} + +static bool libxenstore_destroy(struct qemu_xs_handle *h, xs_transaction_t t, + const char *path) +{ + return xs_rm(h->xsh, t, path); +} + +struct qemu_xs_watch { + char *path; + char *token; + xs_watch_fn fn; + void *opaque; + Notifier notifier; +}; + +static void watch_notify(Notifier *n, void *data) +{ + struct qemu_xs_watch *w = container_of(n, struct qemu_xs_watch, notifier); + const char **v = data; + + if (!strcmp(w->token, v[XS_WATCH_TOKEN])) { + w->fn(w->opaque, v[XS_WATCH_PATH]); + } +} + +static struct qemu_xs_watch *new_watch(const char *path, xs_watch_fn fn, + void *opaque) +{ + struct qemu_xs_watch *w = g_new0(struct qemu_xs_watch, 1); + QemuUUID uuid; + + qemu_uuid_generate(&uuid); + + w->token = qemu_uuid_unparse_strdup(&uuid); + w->path = g_strdup(path); + w->fn = fn; + w->opaque = opaque; + w->notifier.notify = watch_notify; + + return w; +} + +static void free_watch(struct qemu_xs_watch *w) +{ + g_free(w->token); + g_free(w->path); + + g_free(w); +} + +static struct qemu_xs_watch *libxenstore_watch(struct qemu_xs_handle *h, + const char *path, xs_watch_fn fn, + void *opaque) +{ + struct qemu_xs_watch *w = new_watch(path, fn, opaque); + + notifier_list_add(&h->notifiers, &w->notifier); + + if (!xs_watch(h->xsh, path, w->token)) { + notifier_remove(&w->notifier); + free_watch(w); + return NULL; + } + + return w; +} + +static void libxenstore_unwatch(struct qemu_xs_handle *h, + struct qemu_xs_watch *w) +{ + xs_unwatch(h->xsh, w->path, w->token); + notifier_remove(&w->notifier); + free_watch(w); +} + +static xs_transaction_t libxenstore_transaction_start(struct qemu_xs_handle *h) +{ + return xs_transaction_start(h->xsh); +} + +static bool libxenstore_transaction_end(struct qemu_xs_handle *h, + xs_transaction_t t, bool abort) +{ + return xs_transaction_end(h->xsh, t, abort); +} + +struct xenstore_backend_ops libxenstore_backend_ops = { + .open = libxenstore_open, + .close = libxenstore_close, + .get_domain_path = libxenstore_get_domain_path, + .directory = libxenstore_directory, + .read = libxenstore_read, + .write = libxenstore_write, + .create = libxenstore_create, + .destroy = libxenstore_destroy, + .watch = libxenstore_watch, + .unwatch = libxenstore_unwatch, + .transaction_start = libxenstore_transaction_start, + .transaction_end = libxenstore_transaction_end, +}; + +void setup_xen_backend_ops(void) +{ +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40800 + xengnttab_handle *xgt = xengnttab_open(NULL, 0); + + if (xgt) { + if (xengnttab_grant_copy(xgt, 0, NULL) == 0) { + libxengnttab_backend_ops.grant_copy = libxengnttab_backend_grant_copy; + } + xengnttab_close(xgt); + } +#endif + xen_evtchn_ops = &libxenevtchn_backend_ops; + xen_gnttab_ops = &libxengnttab_backend_ops; + xen_foreignmem_ops = &libxenforeignmem_backend_ops; + xen_xenstore_ops = &libxenstore_backend_ops; +} diff --git a/hw/xen/xen_devconfig.c b/hw/xen/xen_devconfig.c index 46ee4a7f02..9b7304e544 100644 --- a/hw/xen/xen_devconfig.c +++ b/hw/xen/xen_devconfig.c @@ -11,11 +11,11 @@ static int xen_config_dev_dirs(const char *ftype, const char *btype, int vdev, { char *dom; - dom = xs_get_domain_path(xenstore, xen_domid); + dom = qemu_xen_xs_get_domain_path(xenstore, xen_domid); snprintf(fe, len, "%s/device/%s/%d", dom, ftype, vdev); free(dom); - dom = xs_get_domain_path(xenstore, 0); + dom = qemu_xen_xs_get_domain_path(xenstore, 0); snprintf(be, len, "%s/backend/%s/%d/%d", dom, btype, xen_domid, vdev); free(dom); diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index 85c93cffcf..2d33d178ad 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -60,9 +60,9 @@ #include "hw/pci/pci_bus.h" #include "hw/qdev-properties.h" #include "hw/qdev-properties-system.h" +#include "xen_pt.h" #include "hw/xen/xen.h" #include "hw/xen/xen-legacy-backend.h" -#include "xen_pt.h" #include "qemu/range.h" static bool has_igd_gfx_passthru; diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h index e184699740..b20744f7c7 100644 --- a/hw/xen/xen_pt.h +++ b/hw/xen/xen_pt.h @@ -1,7 +1,7 @@ #ifndef XEN_PT_H #define XEN_PT_H -#include "hw/xen/xen_common.h" +#include "hw/xen/xen_native.h" #include "xen-host-pci-device.h" #include "qom/object.h" diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c index 8b9b554352..2b8680b112 100644 --- a/hw/xen/xen_pt_config_init.c +++ b/hw/xen/xen_pt_config_init.c @@ -15,8 +15,8 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu/timer.h" -#include "hw/xen/xen-legacy-backend.h" #include "xen_pt.h" +#include "hw/xen/xen-legacy-backend.h" #define XEN_PT_MERGE_VALUE(value, data, val_mask) \ (((value) & (val_mask)) | ((data) & ~(val_mask))) diff --git a/hw/xen/xen_pt_graphics.c b/hw/xen/xen_pt_graphics.c index f303f67c9c..0aed3bb6fd 100644 --- a/hw/xen/xen_pt_graphics.c +++ b/hw/xen/xen_pt_graphics.c @@ -5,7 +5,6 @@ #include "qapi/error.h" #include "xen_pt.h" #include "xen-host-pci-device.h" -#include "hw/xen/xen-legacy-backend.h" static unsigned long igd_guest_opregion; static unsigned long igd_host_opregion; diff --git a/hw/xen/xen_pt_msi.c b/hw/xen/xen_pt_msi.c index b71563f98a..09cca4eecb 100644 --- a/hw/xen/xen_pt_msi.c +++ b/hw/xen/xen_pt_msi.c @@ -11,9 +11,9 @@ #include "qemu/osdep.h" -#include "hw/xen/xen-legacy-backend.h" -#include "xen_pt.h" #include "hw/i386/apic-msidef.h" +#include "xen_pt.h" +#include "hw/xen/xen-legacy-backend.h" #define XEN_PT_AUTO_ASSIGN -1 diff --git a/hw/xen/xen_pvdev.c b/hw/xen/xen_pvdev.c index 1a5177b354..be1504b82c 100644 --- a/hw/xen/xen_pvdev.c +++ b/hw/xen/xen_pvdev.c @@ -54,31 +54,17 @@ void xen_config_cleanup(void) struct xs_dirs *d; QTAILQ_FOREACH(d, &xs_cleanup, list) { - xs_rm(xenstore, 0, d->xs_dir); + qemu_xen_xs_destroy(xenstore, 0, d->xs_dir); } } int xenstore_mkdir(char *path, int p) { - struct xs_permissions perms[2] = { - { - .id = 0, /* set owner: dom0 */ - }, { - .id = xen_domid, - .perms = p, - } - }; - - if (!xs_mkdir(xenstore, 0, path)) { + if (!qemu_xen_xs_create(xenstore, 0, 0, xen_domid, p, path)) { xen_pv_printf(NULL, 0, "xs_mkdir %s: failed\n", path); return -1; } xenstore_cleanup_dir(g_strdup(path)); - - if (!xs_set_permissions(xenstore, 0, path, perms, 2)) { - xen_pv_printf(NULL, 0, "xs_set_permissions %s: failed\n", path); - return -1; - } return 0; } @@ -87,7 +73,7 @@ int xenstore_write_str(const char *base, const char *node, const char *val) char abspath[XEN_BUFSIZE]; snprintf(abspath, sizeof(abspath), "%s/%s", base, node); - if (!xs_write(xenstore, 0, abspath, val, strlen(val))) { + if (!qemu_xen_xs_write(xenstore, 0, abspath, val, strlen(val))) { return -1; } return 0; @@ -100,7 +86,7 @@ char *xenstore_read_str(const char *base, const char *node) char *str, *ret = NULL; snprintf(abspath, sizeof(abspath), "%s/%s", base, node); - str = xs_read(xenstore, 0, abspath, &len); + str = qemu_xen_xs_read(xenstore, 0, abspath, &len); if (str != NULL) { /* move to qemu-allocated memory to make sure * callers can savely g_free() stuff. */ @@ -152,29 +138,6 @@ int xenstore_read_uint64(const char *base, const char *node, uint64_t *uval) return rc; } -void xenstore_update(void *unused) -{ - char **vec = NULL; - intptr_t type, ops, ptr; - unsigned int dom, count; - - vec = xs_read_watch(xenstore, &count); - if (vec == NULL) { - goto cleanup; - } - - if (sscanf(vec[XS_WATCH_TOKEN], "be:%" PRIxPTR ":%d:%" PRIxPTR, - &type, &dom, &ops) == 3) { - xenstore_update_be(vec[XS_WATCH_PATH], (void *)type, dom, (void*)ops); - } - if (sscanf(vec[XS_WATCH_TOKEN], "fe:%" PRIxPTR, &ptr) == 1) { - xenstore_update_fe(vec[XS_WATCH_PATH], (void *)ptr); - } - -cleanup: - free(vec); -} - const char *xenbus_strstate(enum xenbus_state state) { static const char *const name[] = { @@ -238,14 +201,14 @@ void xen_pv_evtchn_event(void *opaque) struct XenLegacyDevice *xendev = opaque; evtchn_port_t port; - port = xenevtchn_pending(xendev->evtchndev); + port = qemu_xen_evtchn_pending(xendev->evtchndev); if (port != xendev->local_port) { xen_pv_printf(xendev, 0, "xenevtchn_pending returned %d (expected %d)\n", port, xendev->local_port); return; } - xenevtchn_unmask(xendev->evtchndev, port); + qemu_xen_evtchn_unmask(xendev->evtchndev, port); if (xendev->ops->event) { xendev->ops->event(xendev); @@ -257,15 +220,15 @@ void xen_pv_unbind_evtchn(struct XenLegacyDevice *xendev) if (xendev->local_port == -1) { return; } - qemu_set_fd_handler(xenevtchn_fd(xendev->evtchndev), NULL, NULL, NULL); - xenevtchn_unbind(xendev->evtchndev, xendev->local_port); + qemu_set_fd_handler(qemu_xen_evtchn_fd(xendev->evtchndev), NULL, NULL, NULL); + qemu_xen_evtchn_unbind(xendev->evtchndev, xendev->local_port); xen_pv_printf(xendev, 2, "unbind evtchn port %d\n", xendev->local_port); xendev->local_port = -1; } int xen_pv_send_notify(struct XenLegacyDevice *xendev) { - return xenevtchn_notify(xendev->evtchndev, xendev->local_port); + return qemu_xen_evtchn_notify(xendev->evtchndev, xendev->local_port); } /* ------------------------------------------------------------- */ @@ -299,17 +262,15 @@ void xen_pv_del_xendev(struct XenLegacyDevice *xendev) } if (xendev->fe) { - char token[XEN_BUFSIZE]; - snprintf(token, sizeof(token), "fe:%p", xendev); - xs_unwatch(xenstore, xendev->fe, token); + qemu_xen_xs_unwatch(xenstore, xendev->watch); g_free(xendev->fe); } if (xendev->evtchndev != NULL) { - xenevtchn_close(xendev->evtchndev); + qemu_xen_evtchn_close(xendev->evtchndev); } if (xendev->gnttabdev != NULL) { - xengnttab_close(xendev->gnttabdev); + qemu_xen_gnttab_close(xendev->gnttabdev); } QTAILQ_REMOVE(&xendevs, xendev, next); diff --git a/include/hw/xen/xen-bus-helper.h b/include/hw/xen/xen-bus-helper.h index 8782f30550..d8dcc2f010 100644 --- a/include/hw/xen/xen-bus-helper.h +++ b/include/hw/xen/xen-bus-helper.h @@ -8,40 +8,40 @@ #ifndef HW_XEN_BUS_HELPER_H #define HW_XEN_BUS_HELPER_H -#include "hw/xen/xen_common.h" +#include "hw/xen/xen_backend_ops.h" const char *xs_strstate(enum xenbus_state state); -void xs_node_create(struct xs_handle *xsh, xs_transaction_t tid, - const char *node, struct xs_permissions perms[], - unsigned int nr_perms, Error **errp); -void xs_node_destroy(struct xs_handle *xsh, xs_transaction_t tid, +void xs_node_create(struct qemu_xs_handle *h, xs_transaction_t tid, + const char *node, unsigned int owner, unsigned int domid, + unsigned int perms, Error **errp); +void xs_node_destroy(struct qemu_xs_handle *h, xs_transaction_t tid, const char *node, Error **errp); /* Write to node/key unless node is empty, in which case write to key */ -void xs_node_vprintf(struct xs_handle *xsh, xs_transaction_t tid, +void xs_node_vprintf(struct qemu_xs_handle *h, xs_transaction_t tid, const char *node, const char *key, Error **errp, const char *fmt, va_list ap) G_GNUC_PRINTF(6, 0); -void xs_node_printf(struct xs_handle *xsh, xs_transaction_t tid, +void xs_node_printf(struct qemu_xs_handle *h, xs_transaction_t tid, const char *node, const char *key, Error **errp, const char *fmt, ...) G_GNUC_PRINTF(6, 7); /* Read from node/key unless node is empty, in which case read from key */ -int xs_node_vscanf(struct xs_handle *xsh, xs_transaction_t tid, +int xs_node_vscanf(struct qemu_xs_handle *h, xs_transaction_t tid, const char *node, const char *key, Error **errp, const char *fmt, va_list ap) G_GNUC_SCANF(6, 0); -int xs_node_scanf(struct xs_handle *xsh, xs_transaction_t tid, +int xs_node_scanf(struct qemu_xs_handle *h, xs_transaction_t tid, const char *node, const char *key, Error **errp, const char *fmt, ...) G_GNUC_SCANF(6, 7); /* Watch node/key unless node is empty, in which case watch key */ -void xs_node_watch(struct xs_handle *xsh, const char *node, const char *key, - char *token, Error **errp); -void xs_node_unwatch(struct xs_handle *xsh, const char *node, const char *key, - const char *token, Error **errp); +struct qemu_xs_watch *xs_node_watch(struct qemu_xs_handle *h, const char *node, + const char *key, xs_watch_fn fn, + void *opaque, Error **errp); +void xs_node_unwatch(struct qemu_xs_handle *h, struct qemu_xs_watch *w); #endif /* HW_XEN_BUS_HELPER_H */ diff --git a/include/hw/xen/xen-bus.h b/include/hw/xen/xen-bus.h index 4d966a2dbb..f435898164 100644 --- a/include/hw/xen/xen-bus.h +++ b/include/hw/xen/xen-bus.h @@ -8,31 +8,25 @@ #ifndef HW_XEN_BUS_H #define HW_XEN_BUS_H -#include "hw/xen/xen_common.h" +#include "hw/xen/xen_backend_ops.h" #include "hw/sysbus.h" #include "qemu/notify.h" #include "qom/object.h" -typedef void (*XenWatchHandler)(void *opaque); - -typedef struct XenWatchList XenWatchList; -typedef struct XenWatch XenWatch; typedef struct XenEventChannel XenEventChannel; struct XenDevice { DeviceState qdev; domid_t frontend_id; char *name; - struct xs_handle *xsh; - XenWatchList *watch_list; + struct qemu_xs_handle *xsh; char *backend_path, *frontend_path; enum xenbus_state backend_state, frontend_state; Notifier exit; - XenWatch *backend_state_watch, *frontend_state_watch; + struct qemu_xs_watch *backend_state_watch, *frontend_state_watch; bool backend_online; - XenWatch *backend_online_watch; + struct qemu_xs_watch *backend_online_watch; xengnttab_handle *xgth; - bool feature_grant_copy; bool inactive; QLIST_HEAD(, XenEventChannel) event_channels; QLIST_ENTRY(XenDevice) list; @@ -64,10 +58,9 @@ OBJECT_DECLARE_TYPE(XenDevice, XenDeviceClass, XEN_DEVICE) struct XenBus { BusState qbus; domid_t backend_id; - struct xs_handle *xsh; - XenWatchList *watch_list; + struct qemu_xs_handle *xsh; unsigned int backend_types; - XenWatch **backend_watch; + struct qemu_xs_watch **backend_watch; QLIST_HEAD(, XenDevice) inactive_devices; }; @@ -102,7 +95,7 @@ void xen_device_set_max_grant_refs(XenDevice *xendev, unsigned int nr_refs, void *xen_device_map_grant_refs(XenDevice *xendev, uint32_t *refs, unsigned int nr_refs, int prot, Error **errp); -void xen_device_unmap_grant_refs(XenDevice *xendev, void *map, +void xen_device_unmap_grant_refs(XenDevice *xendev, void *map, uint32_t *refs, unsigned int nr_refs, Error **errp); typedef struct XenDeviceGrantCopySegment { diff --git a/include/hw/xen/xen-legacy-backend.h b/include/hw/xen/xen-legacy-backend.h index e31cd3a068..6c307c5f2c 100644 --- a/include/hw/xen/xen-legacy-backend.h +++ b/include/hw/xen/xen-legacy-backend.h @@ -1,7 +1,7 @@ #ifndef HW_XEN_LEGACY_BACKEND_H #define HW_XEN_LEGACY_BACKEND_H -#include "hw/xen/xen_common.h" +#include "hw/xen/xen_backend_ops.h" #include "hw/xen/xen_pvdev.h" #include "net/net.h" #include "qom/object.h" @@ -15,7 +15,7 @@ DECLARE_INSTANCE_CHECKER(XenLegacyDevice, XENBACKEND, TYPE_XENBACKEND) /* variables */ -extern struct xs_handle *xenstore; +extern struct qemu_xs_handle *xenstore; extern const char *xen_protocol; extern DeviceState *xen_sysdev; extern BusState *xen_sysbus; @@ -30,9 +30,6 @@ int xenstore_write_be_int64(struct XenLegacyDevice *xendev, const char *node, char *xenstore_read_be_str(struct XenLegacyDevice *xendev, const char *node); int xenstore_read_be_int(struct XenLegacyDevice *xendev, const char *node, int *ival); -void xenstore_update_fe(char *watch, struct XenLegacyDevice *xendev); -void xenstore_update_be(char *watch, char *type, int dom, - struct XenDevOps *ops); char *xenstore_read_fe_str(struct XenLegacyDevice *xendev, const char *node); int xenstore_read_fe_int(struct XenLegacyDevice *xendev, const char *node, int *ival); @@ -51,18 +48,7 @@ void xen_be_set_max_grant_refs(struct XenLegacyDevice *xendev, void *xen_be_map_grant_refs(struct XenLegacyDevice *xendev, uint32_t *refs, unsigned int nr_refs, int prot); void xen_be_unmap_grant_refs(struct XenLegacyDevice *xendev, void *ptr, - unsigned int nr_refs); - -typedef struct XenGrantCopySegment { - union { - void *virt; - struct { - uint32_t ref; - off_t offset; - } foreign; - } source, dest; - size_t len; -} XenGrantCopySegment; + uint32_t *refs, unsigned int nr_refs); int xen_be_copy_grant_refs(struct XenLegacyDevice *xendev, bool to_domain, XenGrantCopySegment segs[], @@ -75,9 +61,9 @@ static inline void *xen_be_map_grant_ref(struct XenLegacyDevice *xendev, } static inline void xen_be_unmap_grant_ref(struct XenLegacyDevice *xendev, - void *ptr) + void *ptr, uint32_t ref) { - return xen_be_unmap_grant_refs(xendev, ptr, 1); + return xen_be_unmap_grant_refs(xendev, ptr, &ref, 1); } /* actual backend drivers */ diff --git a/include/hw/xen/xen.h b/include/hw/xen/xen.h index 03983939f9..2bd8ec742d 100644 --- a/include/hw/xen/xen.h +++ b/include/hw/xen/xen.h @@ -8,15 +8,21 @@ #define QEMU_HW_XEN_H /* - * As a temporary measure while the headers are being untangled, define - * __XEN_TOOLS__ here before any Xen headers are included. Otherwise, if - * the Xen toolstack library headers are later included, they will find - * some of the "internal" definitions missing and the build will fail. In - * later commits, we'll end up with a rule that the native libraries have - * to be included first, which will ensure that the libraries get the - * version of Xen libraries that they expect. + * C files using Xen toolstack libraries will have included those headers + * already via xen_native.h, and having __XEM_TOOLS__ defined will have + * automatically set __XEN_INTERFACE_VERSION__ to the latest supported + * by the *system* Xen headers which were transitively included. + * + * C files which are part of the internal emulation, and which did not + * include xen_native.h, may need this defined so that the Xen headers + * imported to include/hw/xen/interface/ will expose the appropriate API + * version. + * + * This is why there's a rule that xen_native.h must be included first. */ -#define __XEN_TOOLS__ 1 +#ifndef __XEN_INTERFACE_VERSION__ +#define __XEN_INTERFACE_VERSION__ 0x00040e00 +#endif #include "exec/cpu-common.h" @@ -39,8 +45,6 @@ int xen_is_pirq_msi(uint32_t msi_data); qemu_irq *xen_interrupt_controller_init(void); -void xenstore_store_pv_console_info(int i, Chardev *chr); - void xen_register_framebuffer(struct MemoryRegion *mr); #endif /* QEMU_HW_XEN_H */ diff --git a/include/hw/xen/xen_backend_ops.h b/include/hw/xen/xen_backend_ops.h new file mode 100644 index 0000000000..90cca85f52 --- /dev/null +++ b/include/hw/xen/xen_backend_ops.h @@ -0,0 +1,408 @@ +/* + * QEMU Xen backend support + * + * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Authors: David Woodhouse <dwmw2@infradead.org> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_XEN_BACKEND_OPS_H +#define QEMU_XEN_BACKEND_OPS_H + +#include "hw/xen/xen.h" +#include "hw/xen/interface/xen.h" +#include "hw/xen/interface/io/xenbus.h" + +/* + * For the time being, these operations map fairly closely to the API of + * the actual Xen libraries, e.g. libxenevtchn. As we complete the migration + * from XenLegacyDevice back ends to the new XenDevice model, they may + * evolve to slightly higher-level APIs. + * + * The internal emulations do not emulate the Xen APIs entirely faithfully; + * only enough to be used by the Xen backend devices. For example, only one + * event channel can be bound to each handle, since that's sufficient for + * the device support (only the true Xen HVM backend uses more). And the + * behaviour of unmask() and pending() is different too because the device + * backends don't care. + */ + +typedef struct xenevtchn_handle xenevtchn_handle; +typedef int xenevtchn_port_or_error_t; +typedef uint32_t evtchn_port_t; +typedef uint16_t domid_t; +typedef uint32_t grant_ref_t; + +#define XEN_PAGE_SHIFT 12 +#define XEN_PAGE_SIZE (1UL << XEN_PAGE_SHIFT) +#define XEN_PAGE_MASK (~(XEN_PAGE_SIZE - 1)) + +#ifndef xen_rmb +#define xen_rmb() smp_rmb() +#endif +#ifndef xen_wmb +#define xen_wmb() smp_wmb() +#endif +#ifndef xen_mb +#define xen_mb() smp_mb() +#endif + +struct evtchn_backend_ops { + xenevtchn_handle *(*open)(void); + int (*bind_interdomain)(xenevtchn_handle *xc, uint32_t domid, + evtchn_port_t guest_port); + int (*unbind)(xenevtchn_handle *xc, evtchn_port_t port); + int (*close)(struct xenevtchn_handle *xc); + int (*get_fd)(struct xenevtchn_handle *xc); + int (*notify)(struct xenevtchn_handle *xc, evtchn_port_t port); + int (*unmask)(struct xenevtchn_handle *xc, evtchn_port_t port); + int (*pending)(struct xenevtchn_handle *xc); +}; + +extern struct evtchn_backend_ops *xen_evtchn_ops; + +static inline xenevtchn_handle *qemu_xen_evtchn_open(void) +{ + if (!xen_evtchn_ops) { + return NULL; + } + return xen_evtchn_ops->open(); +} + +static inline int qemu_xen_evtchn_bind_interdomain(xenevtchn_handle *xc, + uint32_t domid, + evtchn_port_t guest_port) +{ + if (!xen_evtchn_ops) { + return -ENOSYS; + } + return xen_evtchn_ops->bind_interdomain(xc, domid, guest_port); +} + +static inline int qemu_xen_evtchn_unbind(xenevtchn_handle *xc, + evtchn_port_t port) +{ + if (!xen_evtchn_ops) { + return -ENOSYS; + } + return xen_evtchn_ops->unbind(xc, port); +} + +static inline int qemu_xen_evtchn_close(xenevtchn_handle *xc) +{ + if (!xen_evtchn_ops) { + return -ENOSYS; + } + return xen_evtchn_ops->close(xc); +} + +static inline int qemu_xen_evtchn_fd(xenevtchn_handle *xc) +{ + if (!xen_evtchn_ops) { + return -ENOSYS; + } + return xen_evtchn_ops->get_fd(xc); +} + +static inline int qemu_xen_evtchn_notify(xenevtchn_handle *xc, + evtchn_port_t port) +{ + if (!xen_evtchn_ops) { + return -ENOSYS; + } + return xen_evtchn_ops->notify(xc, port); +} + +static inline int qemu_xen_evtchn_unmask(xenevtchn_handle *xc, + evtchn_port_t port) +{ + if (!xen_evtchn_ops) { + return -ENOSYS; + } + return xen_evtchn_ops->unmask(xc, port); +} + +static inline int qemu_xen_evtchn_pending(xenevtchn_handle *xc) +{ + if (!xen_evtchn_ops) { + return -ENOSYS; + } + return xen_evtchn_ops->pending(xc); +} + +typedef struct xengntdev_handle xengnttab_handle; + +typedef struct XenGrantCopySegment { + union { + void *virt; + struct { + uint32_t ref; + off_t offset; + } foreign; + } source, dest; + size_t len; +} XenGrantCopySegment; + +#define XEN_GNTTAB_OP_FEATURE_MAP_MULTIPLE (1U << 0) + +struct gnttab_backend_ops { + uint32_t features; + xengnttab_handle *(*open)(void); + int (*close)(xengnttab_handle *xgt); + int (*grant_copy)(xengnttab_handle *xgt, bool to_domain, uint32_t domid, + XenGrantCopySegment *segs, uint32_t nr_segs, + Error **errp); + int (*set_max_grants)(xengnttab_handle *xgt, uint32_t nr_grants); + void *(*map_refs)(xengnttab_handle *xgt, uint32_t count, uint32_t domid, + uint32_t *refs, int prot); + int (*unmap)(xengnttab_handle *xgt, void *start_address, uint32_t *refs, + uint32_t count); +}; + +extern struct gnttab_backend_ops *xen_gnttab_ops; + +static inline bool qemu_xen_gnttab_can_map_multi(void) +{ + return xen_gnttab_ops && + !!(xen_gnttab_ops->features & XEN_GNTTAB_OP_FEATURE_MAP_MULTIPLE); +} + +static inline xengnttab_handle *qemu_xen_gnttab_open(void) +{ + if (!xen_gnttab_ops) { + return NULL; + } + return xen_gnttab_ops->open(); +} + +static inline int qemu_xen_gnttab_close(xengnttab_handle *xgt) +{ + if (!xen_gnttab_ops) { + return -ENOSYS; + } + return xen_gnttab_ops->close(xgt); +} + +static inline int qemu_xen_gnttab_grant_copy(xengnttab_handle *xgt, + bool to_domain, uint32_t domid, + XenGrantCopySegment *segs, + uint32_t nr_segs, Error **errp) +{ + if (!xen_gnttab_ops) { + return -ENOSYS; + } + + return xen_gnttab_ops->grant_copy(xgt, to_domain, domid, segs, nr_segs, + errp); +} + +static inline int qemu_xen_gnttab_set_max_grants(xengnttab_handle *xgt, + uint32_t nr_grants) +{ + if (!xen_gnttab_ops) { + return -ENOSYS; + } + return xen_gnttab_ops->set_max_grants(xgt, nr_grants); +} + +static inline void *qemu_xen_gnttab_map_refs(xengnttab_handle *xgt, + uint32_t count, uint32_t domid, + uint32_t *refs, int prot) +{ + if (!xen_gnttab_ops) { + return NULL; + } + return xen_gnttab_ops->map_refs(xgt, count, domid, refs, prot); +} + +static inline int qemu_xen_gnttab_unmap(xengnttab_handle *xgt, + void *start_address, uint32_t *refs, + uint32_t count) +{ + if (!xen_gnttab_ops) { + return -ENOSYS; + } + return xen_gnttab_ops->unmap(xgt, start_address, refs, count); +} + +struct foreignmem_backend_ops { + void *(*map)(uint32_t dom, void *addr, int prot, size_t pages, + xen_pfn_t *pfns, int *errs); + int (*unmap)(void *addr, size_t pages); +}; + +extern struct foreignmem_backend_ops *xen_foreignmem_ops; + +static inline void *qemu_xen_foreignmem_map(uint32_t dom, void *addr, int prot, + size_t pages, xen_pfn_t *pfns, + int *errs) +{ + if (!xen_foreignmem_ops) { + return NULL; + } + return xen_foreignmem_ops->map(dom, addr, prot, pages, pfns, errs); +} + +static inline int qemu_xen_foreignmem_unmap(void *addr, size_t pages) +{ + if (!xen_foreignmem_ops) { + return -ENOSYS; + } + return xen_foreignmem_ops->unmap(addr, pages); +} + +typedef void (*xs_watch_fn)(void *opaque, const char *path); + +struct qemu_xs_handle; +struct qemu_xs_watch; +typedef uint32_t xs_transaction_t; + +#define XBT_NULL 0 + +#define XS_PERM_NONE 0x00 +#define XS_PERM_READ 0x01 +#define XS_PERM_WRITE 0x02 + +struct xenstore_backend_ops { + struct qemu_xs_handle *(*open)(void); + void (*close)(struct qemu_xs_handle *h); + char *(*get_domain_path)(struct qemu_xs_handle *h, unsigned int domid); + char **(*directory)(struct qemu_xs_handle *h, xs_transaction_t t, + const char *path, unsigned int *num); + void *(*read)(struct qemu_xs_handle *h, xs_transaction_t t, + const char *path, unsigned int *len); + bool (*write)(struct qemu_xs_handle *h, xs_transaction_t t, + const char *path, const void *data, unsigned int len); + bool (*create)(struct qemu_xs_handle *h, xs_transaction_t t, + unsigned int owner, unsigned int domid, + unsigned int perms, const char *path); + bool (*destroy)(struct qemu_xs_handle *h, xs_transaction_t t, + const char *path); + struct qemu_xs_watch *(*watch)(struct qemu_xs_handle *h, const char *path, + xs_watch_fn fn, void *opaque); + void (*unwatch)(struct qemu_xs_handle *h, struct qemu_xs_watch *w); + xs_transaction_t (*transaction_start)(struct qemu_xs_handle *h); + bool (*transaction_end)(struct qemu_xs_handle *h, xs_transaction_t t, + bool abort); +}; + +extern struct xenstore_backend_ops *xen_xenstore_ops; + +static inline struct qemu_xs_handle *qemu_xen_xs_open(void) +{ + if (!xen_xenstore_ops) { + return NULL; + } + return xen_xenstore_ops->open(); +} + +static inline void qemu_xen_xs_close(struct qemu_xs_handle *h) +{ + if (!xen_xenstore_ops) { + return; + } + xen_xenstore_ops->close(h); +} + +static inline char *qemu_xen_xs_get_domain_path(struct qemu_xs_handle *h, + unsigned int domid) +{ + if (!xen_xenstore_ops) { + return NULL; + } + return xen_xenstore_ops->get_domain_path(h, domid); +} + +static inline char **qemu_xen_xs_directory(struct qemu_xs_handle *h, + xs_transaction_t t, const char *path, + unsigned int *num) +{ + if (!xen_xenstore_ops) { + return NULL; + } + return xen_xenstore_ops->directory(h, t, path, num); +} + +static inline void *qemu_xen_xs_read(struct qemu_xs_handle *h, + xs_transaction_t t, const char *path, + unsigned int *len) +{ + if (!xen_xenstore_ops) { + return NULL; + } + return xen_xenstore_ops->read(h, t, path, len); +} + +static inline bool qemu_xen_xs_write(struct qemu_xs_handle *h, + xs_transaction_t t, const char *path, + const void *data, unsigned int len) +{ + if (!xen_xenstore_ops) { + return false; + } + return xen_xenstore_ops->write(h, t, path, data, len); +} + +static inline bool qemu_xen_xs_create(struct qemu_xs_handle *h, + xs_transaction_t t, unsigned int owner, + unsigned int domid, unsigned int perms, + const char *path) +{ + if (!xen_xenstore_ops) { + return false; + } + return xen_xenstore_ops->create(h, t, owner, domid, perms, path); +} + +static inline bool qemu_xen_xs_destroy(struct qemu_xs_handle *h, + xs_transaction_t t, const char *path) +{ + if (!xen_xenstore_ops) { + return false; + } + return xen_xenstore_ops->destroy(h, t, path); +} + +static inline struct qemu_xs_watch *qemu_xen_xs_watch(struct qemu_xs_handle *h, + const char *path, + xs_watch_fn fn, + void *opaque) +{ + if (!xen_xenstore_ops) { + return NULL; + } + return xen_xenstore_ops->watch(h, path, fn, opaque); +} + +static inline void qemu_xen_xs_unwatch(struct qemu_xs_handle *h, + struct qemu_xs_watch *w) +{ + if (!xen_xenstore_ops) { + return; + } + xen_xenstore_ops->unwatch(h, w); +} + +static inline xs_transaction_t qemu_xen_xs_transaction_start(struct qemu_xs_handle *h) +{ + if (!xen_xenstore_ops) { + return XBT_NULL; + } + return xen_xenstore_ops->transaction_start(h); +} + +static inline bool qemu_xen_xs_transaction_end(struct qemu_xs_handle *h, + xs_transaction_t t, bool abort) +{ + if (!xen_xenstore_ops) { + return false; + } + return xen_xenstore_ops->transaction_end(h, t, abort); +} + +void setup_xen_backend_ops(void); + +#endif /* QEMU_XEN_BACKEND_OPS_H */ diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_native.h index 9a13a756ae..6bcc83baf9 100644 --- a/include/hw/xen/xen_common.h +++ b/include/hw/xen/xen_native.h @@ -1,5 +1,9 @@ -#ifndef QEMU_HW_XEN_COMMON_H -#define QEMU_HW_XEN_COMMON_H +#ifndef QEMU_HW_XEN_NATIVE_H +#define QEMU_HW_XEN_NATIVE_H + +#ifdef __XEN_INTERFACE_VERSION__ +#error In Xen native files, include xen_native.h before other Xen headers +#endif /* * If we have new enough libxenctrl then we do not want/need these compat @@ -12,7 +16,6 @@ #include <xenctrl.h> #include <xenstore.h> -#include "hw/xen/interface/io/xenbus.h" #include "hw/xen/xen.h" #include "hw/pci/pci_device.h" @@ -28,49 +31,12 @@ extern xc_interface *xen_xc; #if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40701 typedef xc_interface xenforeignmemory_handle; -typedef xc_evtchn xenevtchn_handle; -typedef xc_gnttab xengnttab_handle; -typedef evtchn_port_or_error_t xenevtchn_port_or_error_t; - -#define xenevtchn_open(l, f) xc_evtchn_open(l, f); -#define xenevtchn_close(h) xc_evtchn_close(h) -#define xenevtchn_fd(h) xc_evtchn_fd(h) -#define xenevtchn_pending(h) xc_evtchn_pending(h) -#define xenevtchn_notify(h, p) xc_evtchn_notify(h, p) -#define xenevtchn_bind_interdomain(h, d, p) xc_evtchn_bind_interdomain(h, d, p) -#define xenevtchn_unmask(h, p) xc_evtchn_unmask(h, p) -#define xenevtchn_unbind(h, p) xc_evtchn_unbind(h, p) - -#define xengnttab_open(l, f) xc_gnttab_open(l, f) -#define xengnttab_close(h) xc_gnttab_close(h) -#define xengnttab_set_max_grants(h, n) xc_gnttab_set_max_grants(h, n) -#define xengnttab_map_grant_ref(h, d, r, p) xc_gnttab_map_grant_ref(h, d, r, p) -#define xengnttab_unmap(h, a, n) xc_gnttab_munmap(h, a, n) -#define xengnttab_map_grant_refs(h, c, d, r, p) \ - xc_gnttab_map_grant_refs(h, c, d, r, p) -#define xengnttab_map_domain_grant_refs(h, c, d, r, p) \ - xc_gnttab_map_domain_grant_refs(h, c, d, r, p) #define xenforeignmemory_open(l, f) xen_xc #define xenforeignmemory_close(h) -static inline void *xenforeignmemory_map(xc_interface *h, uint32_t dom, - int prot, size_t pages, - const xen_pfn_t arr[/*pages*/], - int err[/*pages*/]) -{ - if (err) - return xc_map_foreign_bulk(h, dom, prot, arr, err, pages); - else - return xc_map_foreign_pages(h, dom, prot, arr, pages); -} - -#define xenforeignmemory_unmap(h, p, s) munmap(p, s * XC_PAGE_SIZE) - #else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40701 */ -#include <xenevtchn.h> -#include <xengnttab.h> #include <xenforeignmemory.h> #endif @@ -660,31 +626,4 @@ static inline int xen_set_ioreq_server_state(domid_t dom, #endif -/* Xen before 4.8 */ - -#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40800 - -struct xengnttab_grant_copy_segment { - union xengnttab_copy_ptr { - void *virt; - struct { - uint32_t ref; - uint16_t offset; - uint16_t domid; - } foreign; - } source, dest; - uint16_t len; - uint16_t flags; - int16_t status; -}; - -typedef struct xengnttab_grant_copy_segment xengnttab_grant_copy_segment_t; - -static inline int xengnttab_grant_copy(xengnttab_handle *xgt, uint32_t count, - xengnttab_grant_copy_segment_t *segs) -{ - return -ENOSYS; -} -#endif - -#endif /* QEMU_HW_XEN_COMMON_H */ +#endif /* QEMU_HW_XEN_NATIVE_H */ diff --git a/include/hw/xen/xen_pvdev.h b/include/hw/xen/xen_pvdev.h index 7cd4bc2b82..ddad4b9f36 100644 --- a/include/hw/xen/xen_pvdev.h +++ b/include/hw/xen/xen_pvdev.h @@ -1,7 +1,9 @@ #ifndef QEMU_HW_XEN_PVDEV_H #define QEMU_HW_XEN_PVDEV_H -#include "hw/xen/xen_common.h" +#include "hw/qdev-core.h" +#include "hw/xen/xen_backend_ops.h" + /* ------------------------------------------------------------- */ #define XEN_BUFSIZE 1024 @@ -38,6 +40,7 @@ struct XenLegacyDevice { char name[64]; int debug; + struct qemu_xs_watch *watch; enum xenbus_state be_state; enum xenbus_state fe_state; int online; @@ -63,7 +66,6 @@ int xenstore_write_int64(const char *base, const char *node, int64_t ival); char *xenstore_read_str(const char *base, const char *node); int xenstore_read_int(const char *base, const char *node, int *ival); int xenstore_read_uint64(const char *base, const char *node, uint64_t *uval); -void xenstore_update(void *unused); const char *xenbus_strstate(enum xenbus_state state); diff --git a/softmmu/globals.c b/softmmu/globals.c index 0a4405614e..39678aa8c5 100644 --- a/softmmu/globals.c +++ b/softmmu/globals.c @@ -65,3 +65,7 @@ bool qemu_uuid_set; uint32_t xen_domid; enum xen_mode xen_mode = XEN_DISABLED; bool xen_domid_restrict; +struct evtchn_backend_ops *xen_evtchn_ops; +struct gnttab_backend_ops *xen_gnttab_ops; +struct foreignmem_backend_ops *xen_foreignmem_ops; +struct xenstore_backend_ops *xen_xenstore_ops; diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c index bad3131d08..0bb6c601c9 100644 --- a/target/i386/kvm/xen-emu.c +++ b/target/i386/kvm/xen-emu.c @@ -1406,6 +1406,11 @@ int kvm_xen_soft_reset(void) return err; } + err = xen_gnttab_reset(); + if (err) { + return err; + } + err = xen_xenstore_reset(); if (err) { return err; diff --git a/tests/unit/meson.build b/tests/unit/meson.build index 51f453e6c4..d9c0a7eae6 100644 --- a/tests/unit/meson.build +++ b/tests/unit/meson.build @@ -47,6 +47,7 @@ tests = { 'ptimer-test': ['ptimer-test-stubs.c', meson.project_source_root() / 'hw/core/ptimer.c'], 'test-qapi-util': [], 'test-interval-tree': [], + 'test-xs-node': [qom], } if have_system or have_tools diff --git a/tests/unit/test-xs-node.c b/tests/unit/test-xs-node.c new file mode 100644 index 0000000000..b80d10ff98 --- /dev/null +++ b/tests/unit/test-xs-node.c @@ -0,0 +1,871 @@ +/* + * QEMU XenStore XsNode testing + * + * Copyright © 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. + + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/module.h" + +static int nr_xs_nodes; +static GList *xs_node_list; + +#define XS_NODE_UNIT_TEST + +/* + * We don't need the core Xen definitions. And we *do* want to be able + * to run the unit tests even on architectures that Xen doesn't support + * (because life's too short to bother doing otherwise, and test coverage + * doesn't hurt). + */ +#define __XEN_PUBLIC_XEN_H__ +typedef unsigned long xen_pfn_t; + +#include "hw/i386/kvm/xenstore_impl.c" + +#define DOMID_QEMU 0 +#define DOMID_GUEST 1 + +static void dump_ref(const char *name, XsNode *n, int indent) +{ + int i; + + if (!indent && name) { + printf("%s:\n", name); + } + + for (i = 0; i < indent; i++) { + printf(" "); + } + + printf("->%p(%d, '%s'): '%.*s'%s%s\n", n, n->ref, n->name, + (int)(n->content ? n->content->len : strlen("<empty>")), + n->content ? (char *)n->content->data : "<empty>", + n->modified_in_tx ? " MODIFIED" : "", + n->deleted_in_tx ? " DELETED" : ""); + + if (n->children) { + g_hash_table_foreach(n->children, (void *)dump_ref, + GINT_TO_POINTER(indent + 2)); + } +} + +/* This doesn't happen in qemu but we want to make valgrind happy */ +static void xs_impl_delete(XenstoreImplState *s, bool last) +{ + int err; + + xs_impl_reset_watches(s, DOMID_GUEST); + g_assert(!s->nr_domu_watches); + + err = xs_impl_rm(s, DOMID_QEMU, XBT_NULL, "/local"); + g_assert(!err); + g_assert(s->nr_nodes == 1); + + g_hash_table_unref(s->watches); + g_hash_table_unref(s->transactions); + xs_node_unref(s->root); + g_free(s); + + if (!last) { + return; + } + + if (xs_node_list) { + GList *l; + for (l = xs_node_list; l; l = l->next) { + XsNode *n = l->data; + printf("Remaining node at %p name %s ref %u\n", n, n->name, + n->ref); + } + } + g_assert(!nr_xs_nodes); +} + +struct compare_walk { + char path[XENSTORE_ABS_PATH_MAX + 1]; + XsNode *parent_2; + bool compare_ok; +}; + + +static bool compare_perms(GList *p1, GList *p2) +{ + while (p1) { + if (!p2 || g_strcmp0(p1->data, p2->data)) { + return false; + } + p1 = p1->next; + p2 = p2->next; + } + return (p2 == NULL); +} + +static bool compare_content(GByteArray *c1, GByteArray *c2) +{ + size_t len1 = 0, len2 = 0; + + if (c1) { + len1 = c1->len; + } + if (c2) { + len2 = c2->len; + } + if (len1 != len2) { + return false; + } + + if (!len1) { + return true; + } + + return !memcmp(c1->data, c2->data, len1); +} + +static void compare_child(gpointer, gpointer, gpointer); + +static void compare_nodes(struct compare_walk *cw, XsNode *n1, XsNode *n2) +{ + int nr_children1 = 0, nr_children2 = 0; + + if (n1->children) { + nr_children1 = g_hash_table_size(n1->children); + } + if (n2->children) { + nr_children2 = g_hash_table_size(n2->children); + } + + if (n1->ref != n2->ref || + n1->deleted_in_tx != n2->deleted_in_tx || + n1->modified_in_tx != n2->modified_in_tx || + !compare_perms(n1->perms, n2->perms) || + !compare_content(n1->content, n2->content) || + nr_children1 != nr_children2) { + cw->compare_ok = false; + printf("Compare failure on '%s'\n", cw->path); + } + + if (nr_children1) { + XsNode *oldparent = cw->parent_2; + cw->parent_2 = n2; + g_hash_table_foreach(n1->children, compare_child, cw); + + cw->parent_2 = oldparent; + } +} + +static void compare_child(gpointer key, gpointer val, gpointer opaque) +{ + struct compare_walk *cw = opaque; + char *childname = key; + XsNode *child1 = val; + XsNode *child2 = g_hash_table_lookup(cw->parent_2->children, childname); + int pathlen = strlen(cw->path); + + if (!child2) { + cw->compare_ok = false; + printf("Child '%s' does not exist under '%s'\n", childname, cw->path); + return; + } + + strncat(cw->path, "/", sizeof(cw->path) - 1); + strncat(cw->path, childname, sizeof(cw->path) - 1); + + compare_nodes(cw, child1, child2); + cw->path[pathlen] = '\0'; +} + +static bool compare_trees(XsNode *n1, XsNode *n2) +{ + struct compare_walk cw; + + cw.path[0] = '\0'; + cw.parent_2 = n2; + cw.compare_ok = true; + + if (!n1 || !n2) { + return false; + } + + compare_nodes(&cw, n1, n2); + return cw.compare_ok; +} + +static void compare_tx(gpointer key, gpointer val, gpointer opaque) +{ + XenstoreImplState *s2 = opaque; + XsTransaction *t1 = val, *t2; + unsigned int tx_id = GPOINTER_TO_INT(key); + + t2 = g_hash_table_lookup(s2->transactions, key); + g_assert(t2); + + g_assert(t1->tx_id == tx_id); + g_assert(t2->tx_id == tx_id); + g_assert(t1->base_tx == t2->base_tx); + g_assert(t1->dom_id == t2->dom_id); + if (!compare_trees(t1->root, t2->root)) { + printf("Comparison failure in TX %u after serdes:\n", tx_id); + dump_ref("Original", t1->root, 0); + dump_ref("Deserialised", t2->root, 0); + g_assert(0); + } + g_assert(t1->nr_nodes == t2->nr_nodes); +} + +static int write_str(XenstoreImplState *s, unsigned int dom_id, + unsigned int tx_id, const char *path, + const char *content) +{ + GByteArray *d = g_byte_array_new(); + int err; + + g_byte_array_append(d, (void *)content, strlen(content)); + err = xs_impl_write(s, dom_id, tx_id, path, d); + g_byte_array_unref(d); + return err; +} + +static void watch_cb(void *_str, const char *path, const char *token) +{ + GString *str = _str; + + g_string_append(str, path); + g_string_append(str, token); +} + +static void check_serdes(XenstoreImplState *s) +{ + XenstoreImplState *s2 = xs_impl_create(DOMID_GUEST); + GByteArray *bytes = xs_impl_serialize(s); + int nr_transactions1, nr_transactions2; + int ret; + + ret = xs_impl_deserialize(s2, bytes, DOMID_GUEST, watch_cb, NULL); + g_assert(!ret); + + g_byte_array_unref(bytes); + + g_assert(s->last_tx == s2->last_tx); + g_assert(s->root_tx == s2->root_tx); + + if (!compare_trees(s->root, s2->root)) { + printf("Comparison failure in main tree after serdes:\n"); + dump_ref("Original", s->root, 0); + dump_ref("Deserialised", s2->root, 0); + g_assert(0); + } + + nr_transactions1 = g_hash_table_size(s->transactions); + nr_transactions2 = g_hash_table_size(s2->transactions); + g_assert(nr_transactions1 == nr_transactions2); + + g_hash_table_foreach(s->transactions, compare_tx, s2); + + g_assert(s->nr_domu_watches == s2->nr_domu_watches); + g_assert(s->nr_domu_transactions == s2->nr_domu_transactions); + g_assert(s->nr_nodes == s2->nr_nodes); + xs_impl_delete(s2, false); +} + +static XenstoreImplState *setup(void) +{ + XenstoreImplState *s = xs_impl_create(DOMID_GUEST); + char *abspath; + GList *perms; + int err; + + abspath = g_strdup_printf("/local/domain/%u", DOMID_GUEST); + + err = write_str(s, DOMID_QEMU, XBT_NULL, abspath, ""); + g_assert(!err); + g_assert(s->nr_nodes == 4); + + perms = g_list_append(NULL, g_strdup_printf("n%u", DOMID_QEMU)); + perms = g_list_append(perms, g_strdup_printf("r%u", DOMID_GUEST)); + + err = xs_impl_set_perms(s, DOMID_QEMU, XBT_NULL, abspath, perms); + g_assert(!err); + + g_list_free_full(perms, g_free); + g_free(abspath); + + abspath = g_strdup_printf("/local/domain/%u/some", DOMID_GUEST); + + err = write_str(s, DOMID_QEMU, XBT_NULL, abspath, ""); + g_assert(!err); + g_assert(s->nr_nodes == 5); + + perms = g_list_append(NULL, g_strdup_printf("n%u", DOMID_GUEST)); + + err = xs_impl_set_perms(s, DOMID_QEMU, XBT_NULL, abspath, perms); + g_assert(!err); + + g_list_free_full(perms, g_free); + g_free(abspath); + + return s; +} + +static void test_xs_node_simple(void) +{ + GByteArray *data = g_byte_array_new(); + XenstoreImplState *s = setup(); + GString *guest_watches = g_string_new(NULL); + GString *qemu_watches = g_string_new(NULL); + GList *items = NULL; + XsNode *old_root; + uint64_t gencnt; + int err; + + g_assert(s); + + err = xs_impl_watch(s, DOMID_GUEST, "some", "guestwatch", + watch_cb, guest_watches); + g_assert(!err); + g_assert(guest_watches->len == strlen("someguestwatch")); + g_assert(!strcmp(guest_watches->str, "someguestwatch")); + g_string_truncate(guest_watches, 0); + + err = xs_impl_watch(s, 0, "/local/domain/1/some", "qemuwatch", + watch_cb, qemu_watches); + g_assert(!err); + g_assert(qemu_watches->len == strlen("/local/domain/1/someqemuwatch")); + g_assert(!strcmp(qemu_watches->str, "/local/domain/1/someqemuwatch")); + g_string_truncate(qemu_watches, 0); + + /* Read gives ENOENT when it should */ + err = xs_impl_read(s, DOMID_GUEST, XBT_NULL, "foo", data); + g_assert(err == ENOENT); + + /* Write works */ + err = write_str(s, DOMID_GUEST, XBT_NULL, "some/relative/path", + "something"); + g_assert(s->nr_nodes == 7); + g_assert(!err); + g_assert(!strcmp(guest_watches->str, + "some/relative/pathguestwatch")); + g_assert(!strcmp(qemu_watches->str, + "/local/domain/1/some/relative/pathqemuwatch")); + + g_string_truncate(qemu_watches, 0); + g_string_truncate(guest_watches, 0); + xs_impl_reset_watches(s, 0); + + /* Read gives back what we wrote */ + err = xs_impl_read(s, DOMID_GUEST, XBT_NULL, "some/relative/path", data); + g_assert(!err); + g_assert(data->len == strlen("something")); + g_assert(!memcmp(data->data, "something", data->len)); + + /* Even if we use an abolute path */ + g_byte_array_set_size(data, 0); + err = xs_impl_read(s, DOMID_GUEST, XBT_NULL, + "/local/domain/1/some/relative/path", data); + g_assert(!err); + g_assert(data->len == strlen("something")); + + g_assert(!qemu_watches->len); + g_assert(!guest_watches->len); + /* Keep a copy, to force COW mode */ + old_root = xs_node_ref(s->root); + + /* Write somewhere we aren't allowed, in COW mode */ + err = write_str(s, DOMID_GUEST, XBT_NULL, "/local/domain/badplace", + "moredata"); + g_assert(err == EACCES); + g_assert(s->nr_nodes == 7); + + /* Write works again */ + err = write_str(s, DOMID_GUEST, XBT_NULL, + "/local/domain/1/some/relative/path2", + "something else"); + g_assert(!err); + g_assert(s->nr_nodes == 8); + g_assert(!qemu_watches->len); + g_assert(!strcmp(guest_watches->str, "some/relative/path2guestwatch")); + g_string_truncate(guest_watches, 0); + + /* Overwrite an existing node */ + err = write_str(s, DOMID_GUEST, XBT_NULL, "some/relative/path", + "another thing"); + g_assert(!err); + g_assert(s->nr_nodes == 8); + g_assert(!qemu_watches->len); + g_assert(!strcmp(guest_watches->str, "some/relative/pathguestwatch")); + g_string_truncate(guest_watches, 0); + + /* We can list the two files we wrote */ + err = xs_impl_directory(s, DOMID_GUEST, XBT_NULL, "some/relative", &gencnt, + &items); + g_assert(!err); + g_assert(items); + g_assert(gencnt == 2); + g_assert(!strcmp(items->data, "path")); + g_assert(items->next); + g_assert(!strcmp(items->next->data, "path2")); + g_assert(!items->next->next); + g_list_free_full(items, g_free); + + err = xs_impl_unwatch(s, DOMID_GUEST, "some", "guestwatch", + watch_cb, guest_watches); + g_assert(!err); + + err = xs_impl_unwatch(s, DOMID_GUEST, "some", "guestwatch", + watch_cb, guest_watches); + g_assert(err == ENOENT); + + err = xs_impl_watch(s, DOMID_GUEST, "some/relative/path2", "watchp2", + watch_cb, guest_watches); + g_assert(!err); + g_assert(guest_watches->len == strlen("some/relative/path2watchp2")); + g_assert(!strcmp(guest_watches->str, "some/relative/path2watchp2")); + g_string_truncate(guest_watches, 0); + + err = xs_impl_watch(s, DOMID_GUEST, "/local/domain/1/some/relative", + "watchrel", watch_cb, guest_watches); + g_assert(!err); + g_assert(guest_watches->len == + strlen("/local/domain/1/some/relativewatchrel")); + g_assert(!strcmp(guest_watches->str, + "/local/domain/1/some/relativewatchrel")); + g_string_truncate(guest_watches, 0); + + /* Write somewhere else which already existed */ + err = write_str(s, DOMID_GUEST, XBT_NULL, "some/relative", "moredata"); + g_assert(!err); + g_assert(s->nr_nodes == 8); + + /* Write somewhere we aren't allowed */ + err = write_str(s, DOMID_GUEST, XBT_NULL, "/local/domain/badplace", + "moredata"); + g_assert(err == EACCES); + + g_assert(!strcmp(guest_watches->str, + "/local/domain/1/some/relativewatchrel")); + g_string_truncate(guest_watches, 0); + + g_byte_array_set_size(data, 0); + err = xs_impl_read(s, DOMID_GUEST, XBT_NULL, "some/relative", data); + g_assert(!err); + g_assert(data->len == strlen("moredata")); + g_assert(!memcmp(data->data, "moredata", data->len)); + + /* Overwrite existing data */ + err = write_str(s, DOMID_GUEST, XBT_NULL, "some/relative", "otherdata"); + g_assert(!err); + g_string_truncate(guest_watches, 0); + + g_byte_array_set_size(data, 0); + err = xs_impl_read(s, DOMID_GUEST, XBT_NULL, "some/relative", data); + g_assert(!err); + g_assert(data->len == strlen("otherdata")); + g_assert(!memcmp(data->data, "otherdata", data->len)); + + /* Remove the subtree */ + err = xs_impl_rm(s, DOMID_GUEST, XBT_NULL, "some/relative"); + g_assert(!err); + g_assert(s->nr_nodes == 5); + + /* Each watch fires with the least specific relevant path */ + g_assert(strstr(guest_watches->str, + "some/relative/path2watchp2")); + g_assert(strstr(guest_watches->str, + "/local/domain/1/some/relativewatchrel")); + g_string_truncate(guest_watches, 0); + + g_byte_array_set_size(data, 0); + err = xs_impl_read(s, DOMID_GUEST, XBT_NULL, "some/relative", data); + g_assert(err == ENOENT); + g_byte_array_unref(data); + + xs_impl_reset_watches(s, DOMID_GUEST); + g_string_free(qemu_watches, true); + g_string_free(guest_watches, true); + xs_node_unref(old_root); + xs_impl_delete(s, true); +} + + +static void do_test_xs_node_tx(bool fail, bool commit) +{ + XenstoreImplState *s = setup(); + GString *watches = g_string_new(NULL); + GByteArray *data = g_byte_array_new(); + unsigned int tx_id = XBT_NULL; + int err; + + g_assert(s); + + /* Set a watch */ + err = xs_impl_watch(s, DOMID_GUEST, "some", "watch", + watch_cb, watches); + g_assert(!err); + g_assert(watches->len == strlen("somewatch")); + g_assert(!strcmp(watches->str, "somewatch")); + g_string_truncate(watches, 0); + + /* Write something */ + err = write_str(s, DOMID_GUEST, XBT_NULL, "some/relative/path", + "something"); + g_assert(s->nr_nodes == 7); + g_assert(!err); + g_assert(!strcmp(watches->str, + "some/relative/pathwatch")); + g_string_truncate(watches, 0); + + /* Create a transaction */ + err = xs_impl_transaction_start(s, DOMID_GUEST, &tx_id); + g_assert(!err); + + if (fail) { + /* Write something else in the root */ + err = write_str(s, DOMID_GUEST, XBT_NULL, "some/relative/path", + "another thing"); + g_assert(!err); + g_assert(s->nr_nodes == 7); + g_assert(!strcmp(watches->str, + "some/relative/pathwatch")); + g_string_truncate(watches, 0); + } + + g_assert(!watches->len); + + /* Perform a write in the transaction */ + err = write_str(s, DOMID_GUEST, tx_id, "some/relative/path", + "something else"); + g_assert(!err); + g_assert(s->nr_nodes == 7); + g_assert(!watches->len); + + err = xs_impl_read(s, DOMID_GUEST, XBT_NULL, "some/relative/path", data); + g_assert(!err); + if (fail) { + g_assert(data->len == strlen("another thing")); + g_assert(!memcmp(data->data, "another thing", data->len)); + } else { + g_assert(data->len == strlen("something")); + g_assert(!memcmp(data->data, "something", data->len)); + } + g_byte_array_set_size(data, 0); + + err = xs_impl_read(s, DOMID_GUEST, tx_id, "some/relative/path", data); + g_assert(!err); + g_assert(data->len == strlen("something else")); + g_assert(!memcmp(data->data, "something else", data->len)); + g_byte_array_set_size(data, 0); + + check_serdes(s); + + /* Attempt to commit the transaction */ + err = xs_impl_transaction_end(s, DOMID_GUEST, tx_id, commit); + if (commit && fail) { + g_assert(err == EAGAIN); + } else { + g_assert(!err); + } + if (commit && !fail) { + g_assert(!strcmp(watches->str, + "some/relative/pathwatch")); + g_string_truncate(watches, 0); + } else { + g_assert(!watches->len); + } + g_assert(s->nr_nodes == 7); + + check_serdes(s); + + err = xs_impl_unwatch(s, DOMID_GUEST, "some", "watch", + watch_cb, watches); + g_assert(!err); + + err = xs_impl_read(s, DOMID_GUEST, XBT_NULL, "some/relative/path", data); + g_assert(!err); + if (fail) { + g_assert(data->len == strlen("another thing")); + g_assert(!memcmp(data->data, "another thing", data->len)); + } else if (commit) { + g_assert(data->len == strlen("something else")); + g_assert(!memcmp(data->data, "something else", data->len)); + } else { + g_assert(data->len == strlen("something")); + g_assert(!memcmp(data->data, "something", data->len)); + } + g_byte_array_unref(data); + g_string_free(watches, true); + xs_impl_delete(s, true); +} + +static void test_xs_node_tx_fail(void) +{ + do_test_xs_node_tx(true, true); +} + +static void test_xs_node_tx_abort(void) +{ + do_test_xs_node_tx(false, false); + do_test_xs_node_tx(true, false); +} +static void test_xs_node_tx_succeed(void) +{ + do_test_xs_node_tx(false, true); +} + +static void test_xs_node_tx_rm(void) +{ + XenstoreImplState *s = setup(); + GString *watches = g_string_new(NULL); + GByteArray *data = g_byte_array_new(); + unsigned int tx_id = XBT_NULL; + int err; + + g_assert(s); + + /* Set a watch */ + err = xs_impl_watch(s, DOMID_GUEST, "some", "watch", + watch_cb, watches); + g_assert(!err); + g_assert(watches->len == strlen("somewatch")); + g_assert(!strcmp(watches->str, "somewatch")); + g_string_truncate(watches, 0); + + /* Write something */ + err = write_str(s, DOMID_GUEST, XBT_NULL, "some/deep/dark/relative/path", + "something"); + g_assert(!err); + g_assert(s->nr_nodes == 9); + g_assert(!strcmp(watches->str, + "some/deep/dark/relative/pathwatch")); + g_string_truncate(watches, 0); + + /* Create a transaction */ + err = xs_impl_transaction_start(s, DOMID_GUEST, &tx_id); + g_assert(!err); + + /* Delete the tree in the transaction */ + err = xs_impl_rm(s, DOMID_GUEST, tx_id, "some/deep/dark"); + g_assert(!err); + g_assert(s->nr_nodes == 9); + g_assert(!watches->len); + + err = xs_impl_read(s, DOMID_GUEST, XBT_NULL, "some/deep/dark/relative/path", + data); + g_assert(!err); + g_assert(data->len == strlen("something")); + g_assert(!memcmp(data->data, "something", data->len)); + g_byte_array_set_size(data, 0); + + check_serdes(s); + + /* Commit the transaction */ + err = xs_impl_transaction_end(s, DOMID_GUEST, tx_id, true); + g_assert(!err); + g_assert(s->nr_nodes == 6); + + g_assert(!strcmp(watches->str, "some/deep/darkwatch")); + g_string_truncate(watches, 0); + + /* Now the node is gone */ + err = xs_impl_read(s, DOMID_GUEST, XBT_NULL, "some/deep/dark/relative/path", + data); + g_assert(err == ENOENT); + g_byte_array_unref(data); + + err = xs_impl_unwatch(s, DOMID_GUEST, "some", "watch", + watch_cb, watches); + g_assert(!err); + + g_string_free(watches, true); + xs_impl_delete(s, true); +} + +static void test_xs_node_tx_resurrect(void) +{ + XenstoreImplState *s = setup(); + GString *watches = g_string_new(NULL); + GByteArray *data = g_byte_array_new(); + unsigned int tx_id = XBT_NULL; + int err; + + g_assert(s); + + /* Write something */ + err = write_str(s, DOMID_GUEST, XBT_NULL, "some/deep/dark/relative/path", + "something"); + g_assert(!err); + g_assert(s->nr_nodes == 9); + + /* Another node to remain shared */ + err = write_str(s, DOMID_GUEST, XBT_NULL, "some/place/safe", "keepme"); + g_assert(!err); + g_assert(s->nr_nodes == 11); + + /* This node will be wiped and resurrected */ + err = write_str(s, DOMID_GUEST, XBT_NULL, "some/deep/dark", + "foo"); + g_assert(!err); + g_assert(s->nr_nodes == 11); + + /* Set a watch */ + err = xs_impl_watch(s, DOMID_GUEST, "some", "watch", + watch_cb, watches); + g_assert(!err); + g_assert(watches->len == strlen("somewatch")); + g_assert(!strcmp(watches->str, "somewatch")); + g_string_truncate(watches, 0); + + /* Create a transaction */ + err = xs_impl_transaction_start(s, DOMID_GUEST, &tx_id); + g_assert(!err); + + /* Delete the tree in the transaction */ + err = xs_impl_rm(s, DOMID_GUEST, tx_id, "some/deep"); + g_assert(!err); + g_assert(s->nr_nodes == 11); + g_assert(!watches->len); + + /* Resurrect part of it */ + err = write_str(s, DOMID_GUEST, tx_id, "some/deep/dark/different/path", + "something"); + g_assert(!err); + g_assert(s->nr_nodes == 11); + + check_serdes(s); + + /* Commit the transaction */ + err = xs_impl_transaction_end(s, DOMID_GUEST, tx_id, true); + g_assert(!err); + g_assert(s->nr_nodes == 11); + + check_serdes(s); + + /* lost data */ + g_assert(strstr(watches->str, "some/deep/dark/different/pathwatch")); + /* topmost deleted */ + g_assert(strstr(watches->str, "some/deep/dark/relativewatch")); + /* lost data */ + g_assert(strstr(watches->str, "some/deep/darkwatch")); + + g_string_truncate(watches, 0); + + /* Now the node is gone */ + err = xs_impl_read(s, DOMID_GUEST, XBT_NULL, "some/deep/dark/relative/path", + data); + g_assert(err == ENOENT); + g_byte_array_unref(data); + + check_serdes(s); + + err = xs_impl_unwatch(s, DOMID_GUEST, "some", "watch", + watch_cb, watches); + g_assert(!err); + + g_string_free(watches, true); + xs_impl_delete(s, true); +} + +static void test_xs_node_tx_resurrect2(void) +{ + XenstoreImplState *s = setup(); + GString *watches = g_string_new(NULL); + GByteArray *data = g_byte_array_new(); + unsigned int tx_id = XBT_NULL; + int err; + + g_assert(s); + + /* Write something */ + err = write_str(s, DOMID_GUEST, XBT_NULL, "some/deep/dark/relative/path", + "something"); + g_assert(!err); + g_assert(s->nr_nodes == 9); + + /* Another node to remain shared */ + err = write_str(s, DOMID_GUEST, XBT_NULL, "some/place/safe", "keepme"); + g_assert(!err); + g_assert(s->nr_nodes == 11); + + /* This node will be wiped and resurrected */ + err = write_str(s, DOMID_GUEST, XBT_NULL, "some/deep/dark", + "foo"); + g_assert(!err); + g_assert(s->nr_nodes == 11); + + /* Set a watch */ + err = xs_impl_watch(s, DOMID_GUEST, "some", "watch", + watch_cb, watches); + g_assert(!err); + g_assert(watches->len == strlen("somewatch")); + g_assert(!strcmp(watches->str, "somewatch")); + g_string_truncate(watches, 0); + + /* Create a transaction */ + err = xs_impl_transaction_start(s, DOMID_GUEST, &tx_id); + g_assert(!err); + + /* Delete the tree in the transaction */ + err = xs_impl_rm(s, DOMID_GUEST, tx_id, "some/deep"); + g_assert(!err); + g_assert(s->nr_nodes == 11); + g_assert(!watches->len); + + /* Resurrect part of it */ + err = write_str(s, DOMID_GUEST, tx_id, "some/deep/dark/relative/path", + "something"); + g_assert(!err); + g_assert(s->nr_nodes == 11); + + check_serdes(s); + + /* Commit the transaction */ + err = xs_impl_transaction_end(s, DOMID_GUEST, tx_id, true); + g_assert(!err); + g_assert(s->nr_nodes == 11); + + check_serdes(s); + + /* lost data */ + g_assert(strstr(watches->str, "some/deep/dark/relative/pathwatch")); + /* lost data */ + g_assert(strstr(watches->str, "some/deep/darkwatch")); + + g_string_truncate(watches, 0); + + /* Now the node is gone */ + err = xs_impl_read(s, DOMID_GUEST, XBT_NULL, "some/deep/dark/relative/path", + data); + g_assert(!err); + g_assert(data->len == strlen("something")); + g_assert(!memcmp(data->data, "something", data->len)); + + g_byte_array_unref(data); + + check_serdes(s); + + err = xs_impl_unwatch(s, DOMID_GUEST, "some", "watch", + watch_cb, watches); + g_assert(!err); + + g_string_free(watches, true); + xs_impl_delete(s, true); +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + module_call_init(MODULE_INIT_QOM); + + g_test_add_func("/xs_node/simple", test_xs_node_simple); + g_test_add_func("/xs_node/tx_abort", test_xs_node_tx_abort); + g_test_add_func("/xs_node/tx_fail", test_xs_node_tx_fail); + g_test_add_func("/xs_node/tx_succeed", test_xs_node_tx_succeed); + g_test_add_func("/xs_node/tx_rm", test_xs_node_tx_rm); + g_test_add_func("/xs_node/tx_resurrect", test_xs_node_tx_resurrect); + g_test_add_func("/xs_node/tx_resurrect2", test_xs_node_tx_resurrect2); + + return g_test_run(); +} |