diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2018-08-17 18:24:38 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2018-08-17 18:24:38 +0100 |
commit | a544c9110d3971cc764c2dcd86a55b28534e4a63 (patch) | |
tree | ab1e426f7787fc7ea16e1556c737d27a01afac6f | |
parent | 0abaa41d936becd914a16ee1fe2a981d96d19428 (diff) | |
parent | 238e91728503d400e1c4e644e3a9b80f9e621682 (diff) |
Merge remote-tracking branch 'remotes/awilliam/tags/vfio-update-20180817.0' into staging
VFIO update 2018-08-17
- Enhance balloon inhibitor for multiple users and use around vfio
device assignment (Alex Williamson)
# gpg: Signature made Fri 17 Aug 2018 17:43:37 BST
# gpg: using RSA key 239B9B6E3BB08B22
# gpg: Good signature from "Alex Williamson <alex.williamson@redhat.com>"
# gpg: aka "Alex Williamson <alex@shazbot.org>"
# gpg: aka "Alex Williamson <alwillia@redhat.com>"
# gpg: aka "Alex Williamson <alex.l.williamson@gmail.com>"
# Primary key fingerprint: 42F6 C04E 540B D1A9 9E7B 8A90 239B 9B6E 3BB0 8B22
* remotes/awilliam/tags/vfio-update-20180817.0:
vfio/ccw/pci: Allow devices to opt-in for ballooning
vfio: Inhibit ballooning based on group attachment to a container
kvm: Use inhibit to prevent ballooning without synchronous mmu
balloon: Allow multiple inhibit users
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | accel/kvm/kvm-all.c | 4 | ||||
-rw-r--r-- | balloon.c | 13 | ||||
-rw-r--r-- | hw/vfio/ccw.c | 9 | ||||
-rw-r--r-- | hw/vfio/common.c | 51 | ||||
-rw-r--r-- | hw/vfio/pci.c | 26 | ||||
-rw-r--r-- | hw/vfio/trace-events | 1 | ||||
-rw-r--r-- | hw/virtio/virtio-balloon.c | 4 | ||||
-rw-r--r-- | include/hw/vfio/vfio-common.h | 2 |
8 files changed, 103 insertions, 7 deletions
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index eb7db92a5e..38f468d8e2 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -39,6 +39,7 @@ #include "trace.h" #include "hw/irq.h" #include "sysemu/sev.h" +#include "sysemu/balloon.h" #include "hw/boards.h" @@ -1698,6 +1699,9 @@ static int kvm_init(MachineState *ms) s->many_ioeventfds = kvm_check_many_ioeventfds(); s->sync_mmu = !!kvm_vm_check_extension(kvm_state, KVM_CAP_SYNC_MMU); + if (!s->sync_mmu) { + qemu_balloon_inhibit(true); + } return 0; @@ -26,6 +26,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" +#include "qemu/atomic.h" #include "exec/cpu-common.h" #include "sysemu/kvm.h" #include "sysemu/balloon.h" @@ -37,16 +38,22 @@ static QEMUBalloonEvent *balloon_event_fn; static QEMUBalloonStatus *balloon_stat_fn; static void *balloon_opaque; -static bool balloon_inhibited; +static int balloon_inhibit_count; bool qemu_balloon_is_inhibited(void) { - return balloon_inhibited; + return atomic_read(&balloon_inhibit_count) > 0; } void qemu_balloon_inhibit(bool state) { - balloon_inhibited = state; + if (state) { + atomic_inc(&balloon_inhibit_count); + } else { + atomic_dec(&balloon_inhibit_count); + } + + assert(atomic_read(&balloon_inhibit_count) >= 0); } static bool have_balloon(Error **errp) diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c index 351b305e1a..e96bbdc78b 100644 --- a/hw/vfio/ccw.c +++ b/hw/vfio/ccw.c @@ -349,6 +349,15 @@ static void vfio_ccw_get_device(VFIOGroup *group, VFIOCCWDevice *vcdev, } } + /* + * All vfio-ccw devices are believed to operate in a way compatible with + * memory ballooning, ie. pages pinned in the host are in the current + * working set of the guest driver and therefore never overlap with pages + * available to the guest balloon driver. This needs to be set before + * vfio_get_device() for vfio common to handle the balloon inhibitor. + */ + vcdev->vdev.balloon_allowed = true; + if (vfio_get_device(group, vcdev->cdev.mdevid, &vcdev->vdev, errp)) { goto out_err; } diff --git a/hw/vfio/common.c b/hw/vfio/common.c index fb396cf00a..cd1f4af18a 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -32,6 +32,7 @@ #include "hw/hw.h" #include "qemu/error-report.h" #include "qemu/range.h" +#include "sysemu/balloon.h" #include "sysemu/kvm.h" #include "trace.h" #include "qapi/error.h" @@ -1044,6 +1045,33 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, space = vfio_get_address_space(as); + /* + * VFIO is currently incompatible with memory ballooning insofar as the + * madvise to purge (zap) the page from QEMU's address space does not + * interact with the memory API and therefore leaves stale virtual to + * physical mappings in the IOMMU if the page was previously pinned. We + * therefore add a balloon inhibit for each group added to a container, + * whether the container is used individually or shared. This provides + * us with options to allow devices within a group to opt-in and allow + * ballooning, so long as it is done consistently for a group (for instance + * if the device is an mdev device where it is known that the host vendor + * driver will never pin pages outside of the working set of the guest + * driver, which would thus not be ballooning candidates). + * + * The first opportunity to induce pinning occurs here where we attempt to + * attach the group to existing containers within the AddressSpace. If any + * pages are already zapped from the virtual address space, such as from a + * previous ballooning opt-in, new pinning will cause valid mappings to be + * re-established. Likewise, when the overall MemoryListener for a new + * container is registered, a replay of mappings within the AddressSpace + * will occur, re-establishing any previously zapped pages as well. + * + * NB. Balloon inhibiting does not currently block operation of the + * balloon driver or revoke previously pinned pages, it only prevents + * calling madvise to modify the virtual mapping of ballooned pages. + */ + qemu_balloon_inhibit(true); + QLIST_FOREACH(container, &space->containers, next) { if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { group->container = container; @@ -1232,6 +1260,7 @@ close_fd_exit: close(fd); put_space_exit: + qemu_balloon_inhibit(false); vfio_put_address_space(space); return ret; @@ -1352,6 +1381,9 @@ void vfio_put_group(VFIOGroup *group) return; } + if (!group->balloon_allowed) { + qemu_balloon_inhibit(false); + } vfio_kvm_device_del_group(group); vfio_disconnect_container(group); QLIST_REMOVE(group, next); @@ -1387,6 +1419,25 @@ int vfio_get_device(VFIOGroup *group, const char *name, return ret; } + /* + * Clear the balloon inhibitor for this group if the driver knows the + * device operates compatibly with ballooning. Setting must be consistent + * per group, but since compatibility is really only possible with mdev + * currently, we expect singleton groups. + */ + if (vbasedev->balloon_allowed != group->balloon_allowed) { + if (!QLIST_EMPTY(&group->device_list)) { + error_setg(errp, + "Inconsistent device balloon setting within group"); + return -1; + } + + if (!group->balloon_allowed) { + group->balloon_allowed = true; + qemu_balloon_inhibit(false); + } + } + vbasedev->fd = fd; vbasedev->group = group; QLIST_INSERT_HEAD(&group->device_list, vbasedev, next); diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 6cbb8fa054..056f3a887a 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -2804,12 +2804,13 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); VFIODevice *vbasedev_iter; VFIOGroup *group; - char *tmp, group_path[PATH_MAX], *group_name; + char *tmp, *subsys, group_path[PATH_MAX], *group_name; Error *err = NULL; ssize_t len; struct stat st; int groupid; int i, ret; + bool is_mdev; if (!vdev->vbasedev.sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || @@ -2869,6 +2870,27 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) } } + /* + * Mediated devices *might* operate compatibly with memory ballooning, but + * we cannot know for certain, it depends on whether the mdev vendor driver + * stays in sync with the active working set of the guest driver. Prevent + * the x-balloon-allowed option unless this is minimally an mdev device. + */ + tmp = g_strdup_printf("%s/subsystem", vdev->vbasedev.sysfsdev); + subsys = realpath(tmp, NULL); + g_free(tmp); + is_mdev = (strcmp(subsys, "/sys/bus/mdev") == 0); + free(subsys); + + trace_vfio_mdev(vdev->vbasedev.name, is_mdev); + + if (vdev->vbasedev.balloon_allowed && !is_mdev) { + error_setg(errp, "x-balloon-allowed only potentially compatible " + "with mdev devices"); + vfio_put_group(group); + goto error; + } + ret = vfio_get_device(group, vdev->vbasedev.name, &vdev->vbasedev, errp); if (ret) { vfio_put_group(group); @@ -3170,6 +3192,8 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features, VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false), DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false), + DEFINE_PROP_BOOL("x-balloon-allowed", VFIOPCIDevice, + vbasedev.balloon_allowed, false), DEFINE_PROP_BOOL("x-no-kvm-intx", VFIOPCIDevice, no_kvm_intx, false), DEFINE_PROP_BOOL("x-no-kvm-msi", VFIOPCIDevice, no_kvm_msi, false), DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index d2a74952e3..a85e8662ea 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -39,6 +39,7 @@ vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: % vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx" vfio_populate_device_get_irq_info_failure(void) "VFIO_DEVICE_GET_IRQ_INFO failure: %m" vfio_realize(const char *name, int group_id) " (%s) group %d" +vfio_mdev(const char *name, bool is_mdev) " (%s) is_mdev %d" vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s 0x%x@0x%x" vfio_pci_reset(const char *name) " (%s)" vfio_pci_reset_flr(const char *name) "%s FLR/VFIO_DEVICE_RESET" diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index 1f7a87f094..b5425080c5 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -21,7 +21,6 @@ #include "hw/mem/pc-dimm.h" #include "sysemu/balloon.h" #include "hw/virtio/virtio-balloon.h" -#include "sysemu/kvm.h" #include "exec/address-spaces.h" #include "qapi/error.h" #include "qapi/qapi-events-misc.h" @@ -36,8 +35,7 @@ static void balloon_page(void *addr, int deflate) { - if (!qemu_balloon_is_inhibited() && (!kvm_enabled() || - kvm_has_sync_mmu())) { + if (!qemu_balloon_is_inhibited()) { qemu_madvise(addr, BALLOON_PAGE_SIZE, deflate ? QEMU_MADV_WILLNEED : QEMU_MADV_DONTNEED); } diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index a9036929b2..15ea6c26fd 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -112,6 +112,7 @@ typedef struct VFIODevice { bool reset_works; bool needs_reset; bool no_mmap; + bool balloon_allowed; VFIODeviceOps *ops; unsigned int num_irqs; unsigned int num_regions; @@ -131,6 +132,7 @@ typedef struct VFIOGroup { QLIST_HEAD(, VFIODevice) device_list; QLIST_ENTRY(VFIOGroup) next; QLIST_ENTRY(VFIOGroup) container_next; + bool balloon_allowed; } VFIOGroup; typedef struct VFIODMABuf { |