aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
Diffstat (limited to 'hw')
-rw-r--r--hw/vfio/ccw.c9
-rw-r--r--hw/vfio/common.c51
-rw-r--r--hw/vfio/pci.c26
-rw-r--r--hw/vfio/trace-events1
-rw-r--r--hw/virtio/virtio-balloon.c4
5 files changed, 87 insertions, 4 deletions
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index 351b305e1a..e96bbdc78b 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -349,6 +349,15 @@ static void vfio_ccw_get_device(VFIOGroup *group, VFIOCCWDevice *vcdev,
}
}
+ /*
+ * All vfio-ccw devices are believed to operate in a way compatible with
+ * memory ballooning, ie. pages pinned in the host are in the current
+ * working set of the guest driver and therefore never overlap with pages
+ * available to the guest balloon driver. This needs to be set before
+ * vfio_get_device() for vfio common to handle the balloon inhibitor.
+ */
+ vcdev->vdev.balloon_allowed = true;
+
if (vfio_get_device(group, vcdev->cdev.mdevid, &vcdev->vdev, errp)) {
goto out_err;
}
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index fb396cf00a..cd1f4af18a 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -32,6 +32,7 @@
#include "hw/hw.h"
#include "qemu/error-report.h"
#include "qemu/range.h"
+#include "sysemu/balloon.h"
#include "sysemu/kvm.h"
#include "trace.h"
#include "qapi/error.h"
@@ -1044,6 +1045,33 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
space = vfio_get_address_space(as);
+ /*
+ * VFIO is currently incompatible with memory ballooning insofar as the
+ * madvise to purge (zap) the page from QEMU's address space does not
+ * interact with the memory API and therefore leaves stale virtual to
+ * physical mappings in the IOMMU if the page was previously pinned. We
+ * therefore add a balloon inhibit for each group added to a container,
+ * whether the container is used individually or shared. This provides
+ * us with options to allow devices within a group to opt-in and allow
+ * ballooning, so long as it is done consistently for a group (for instance
+ * if the device is an mdev device where it is known that the host vendor
+ * driver will never pin pages outside of the working set of the guest
+ * driver, which would thus not be ballooning candidates).
+ *
+ * The first opportunity to induce pinning occurs here where we attempt to
+ * attach the group to existing containers within the AddressSpace. If any
+ * pages are already zapped from the virtual address space, such as from a
+ * previous ballooning opt-in, new pinning will cause valid mappings to be
+ * re-established. Likewise, when the overall MemoryListener for a new
+ * container is registered, a replay of mappings within the AddressSpace
+ * will occur, re-establishing any previously zapped pages as well.
+ *
+ * NB. Balloon inhibiting does not currently block operation of the
+ * balloon driver or revoke previously pinned pages, it only prevents
+ * calling madvise to modify the virtual mapping of ballooned pages.
+ */
+ qemu_balloon_inhibit(true);
+
QLIST_FOREACH(container, &space->containers, next) {
if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
group->container = container;
@@ -1232,6 +1260,7 @@ close_fd_exit:
close(fd);
put_space_exit:
+ qemu_balloon_inhibit(false);
vfio_put_address_space(space);
return ret;
@@ -1352,6 +1381,9 @@ void vfio_put_group(VFIOGroup *group)
return;
}
+ if (!group->balloon_allowed) {
+ qemu_balloon_inhibit(false);
+ }
vfio_kvm_device_del_group(group);
vfio_disconnect_container(group);
QLIST_REMOVE(group, next);
@@ -1387,6 +1419,25 @@ int vfio_get_device(VFIOGroup *group, const char *name,
return ret;
}
+ /*
+ * Clear the balloon inhibitor for this group if the driver knows the
+ * device operates compatibly with ballooning. Setting must be consistent
+ * per group, but since compatibility is really only possible with mdev
+ * currently, we expect singleton groups.
+ */
+ if (vbasedev->balloon_allowed != group->balloon_allowed) {
+ if (!QLIST_EMPTY(&group->device_list)) {
+ error_setg(errp,
+ "Inconsistent device balloon setting within group");
+ return -1;
+ }
+
+ if (!group->balloon_allowed) {
+ group->balloon_allowed = true;
+ qemu_balloon_inhibit(false);
+ }
+ }
+
vbasedev->fd = fd;
vbasedev->group = group;
QLIST_INSERT_HEAD(&group->device_list, vbasedev, next);
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 6cbb8fa054..056f3a887a 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2804,12 +2804,13 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
VFIODevice *vbasedev_iter;
VFIOGroup *group;
- char *tmp, group_path[PATH_MAX], *group_name;
+ char *tmp, *subsys, group_path[PATH_MAX], *group_name;
Error *err = NULL;
ssize_t len;
struct stat st;
int groupid;
int i, ret;
+ bool is_mdev;
if (!vdev->vbasedev.sysfsdev) {
if (!(~vdev->host.domain || ~vdev->host.bus ||
@@ -2869,6 +2870,27 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
}
}
+ /*
+ * Mediated devices *might* operate compatibly with memory ballooning, but
+ * we cannot know for certain, it depends on whether the mdev vendor driver
+ * stays in sync with the active working set of the guest driver. Prevent
+ * the x-balloon-allowed option unless this is minimally an mdev device.
+ */
+ tmp = g_strdup_printf("%s/subsystem", vdev->vbasedev.sysfsdev);
+ subsys = realpath(tmp, NULL);
+ g_free(tmp);
+ is_mdev = (strcmp(subsys, "/sys/bus/mdev") == 0);
+ free(subsys);
+
+ trace_vfio_mdev(vdev->vbasedev.name, is_mdev);
+
+ if (vdev->vbasedev.balloon_allowed && !is_mdev) {
+ error_setg(errp, "x-balloon-allowed only potentially compatible "
+ "with mdev devices");
+ vfio_put_group(group);
+ goto error;
+ }
+
ret = vfio_get_device(group, vdev->vbasedev.name, &vdev->vbasedev, errp);
if (ret) {
vfio_put_group(group);
@@ -3170,6 +3192,8 @@ static Property vfio_pci_dev_properties[] = {
DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features,
VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false),
DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
+ DEFINE_PROP_BOOL("x-balloon-allowed", VFIOPCIDevice,
+ vbasedev.balloon_allowed, false),
DEFINE_PROP_BOOL("x-no-kvm-intx", VFIOPCIDevice, no_kvm_intx, false),
DEFINE_PROP_BOOL("x-no-kvm-msi", VFIOPCIDevice, no_kvm_msi, false),
DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false),
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index d2a74952e3..a85e8662ea 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -39,6 +39,7 @@ vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %
vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
vfio_populate_device_get_irq_info_failure(void) "VFIO_DEVICE_GET_IRQ_INFO failure: %m"
vfio_realize(const char *name, int group_id) " (%s) group %d"
+vfio_mdev(const char *name, bool is_mdev) " (%s) is_mdev %d"
vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s 0x%x@0x%x"
vfio_pci_reset(const char *name) " (%s)"
vfio_pci_reset_flr(const char *name) "%s FLR/VFIO_DEVICE_RESET"
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index 1f7a87f094..b5425080c5 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -21,7 +21,6 @@
#include "hw/mem/pc-dimm.h"
#include "sysemu/balloon.h"
#include "hw/virtio/virtio-balloon.h"
-#include "sysemu/kvm.h"
#include "exec/address-spaces.h"
#include "qapi/error.h"
#include "qapi/qapi-events-misc.h"
@@ -36,8 +35,7 @@
static void balloon_page(void *addr, int deflate)
{
- if (!qemu_balloon_is_inhibited() && (!kvm_enabled() ||
- kvm_has_sync_mmu())) {
+ if (!qemu_balloon_is_inhibited()) {
qemu_madvise(addr, BALLOON_PAGE_SIZE,
deflate ? QEMU_MADV_WILLNEED : QEMU_MADV_DONTNEED);
}