diff options
author | David Hildenbrand <david@redhat.com> | 2023-09-26 20:57:21 +0200 |
---|---|---|
committer | David Hildenbrand <david@redhat.com> | 2023-10-12 14:15:21 +0200 |
commit | 552b25229cf7c65d31ac9a60b1d9202672dd9c61 (patch) | |
tree | b2162fe389d7b1ff920c034aae544779ec0ea624 /hw/virtio/vhost.c | |
parent | a51e5124a655b3dad80b36b18547cb1eca2c5eb2 (diff) |
vhost: Rework memslot filtering and fix "used_memslot" tracking
Having multiple vhost devices, some filtering out fd-less memslots and
some not, can mess up the "used_memslot" accounting. Consequently our
"free memslot" checks become unreliable and we might run out of free
memslots at runtime later.
An example sequence which can trigger a potential issue that involves
different vhost backends (vhost-kernel and vhost-user) and hotplugged
memory devices can be found at [1].
Let's make the filtering mechanism less generic and distinguish between
backends that support private memslots (without a fd) and ones that only
support shared memslots (with a fd). Track the used_memslots for both
cases separately and use the corresponding value when required.
Note: Most probably we should filter out MAP_PRIVATE fd-based RAM regions
(for example, via memory-backend-memfd,...,shared=off or as default with
memory-backend-file) as well. When not using MAP_SHARED, it might not work
as expected. Add a TODO for now.
[1] https://lkml.kernel.org/r/fad9136f-08d3-3fd9-71a1-502069c000cf@redhat.com
Message-ID: <20230926185738.277351-2-david@redhat.com>
Fixes: 988a27754bbb ("vhost: allow backends to filter memory sections")
Cc: Tiwei Bie <tiwei.bie@intel.com>
Acked-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
Diffstat (limited to 'hw/virtio/vhost.c')
-rw-r--r-- | hw/virtio/vhost.c | 56 |
1 files changed, 47 insertions, 9 deletions
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 9cfac40fde..0edd31a7e8 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -45,20 +45,33 @@ static struct vhost_log *vhost_log; static struct vhost_log *vhost_log_shm; +/* Memslots used by backends that support private memslots (without an fd). */ static unsigned int used_memslots; + +/* Memslots used by backends that only support shared memslots (with an fd). */ +static unsigned int used_shared_memslots; + static QLIST_HEAD(, vhost_dev) vhost_devices = QLIST_HEAD_INITIALIZER(vhost_devices); bool vhost_has_free_slot(void) { - unsigned int slots_limit = ~0U; + unsigned int free = UINT_MAX; struct vhost_dev *hdev; QLIST_FOREACH(hdev, &vhost_devices, entry) { unsigned int r = hdev->vhost_ops->vhost_backend_memslots_limit(hdev); - slots_limit = MIN(slots_limit, r); + unsigned int cur_free; + + if (hdev->vhost_ops->vhost_backend_no_private_memslots && + hdev->vhost_ops->vhost_backend_no_private_memslots(hdev)) { + cur_free = r - used_shared_memslots; + } else { + cur_free = r - used_memslots; + } + free = MIN(free, cur_free); } - return slots_limit > used_memslots; + return free > 0; } static void vhost_dev_sync_region(struct vhost_dev *dev, @@ -474,8 +487,7 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev, * vhost_section: identify sections needed for vhost access * * We only care about RAM sections here (where virtqueue and guest - * internals accessed by virtio might live). If we find one we still - * allow the backend to potentially filter it out of our list. + * internals accessed by virtio might live). */ static bool vhost_section(struct vhost_dev *dev, MemoryRegionSection *section) { @@ -502,8 +514,16 @@ static bool vhost_section(struct vhost_dev *dev, MemoryRegionSection *section) return false; } - if (dev->vhost_ops->vhost_backend_mem_section_filter && - !dev->vhost_ops->vhost_backend_mem_section_filter(dev, section)) { + /* + * Some backends (like vhost-user) can only handle memory regions + * that have an fd (can be mapped into a different process). Filter + * the ones without an fd out, if requested. + * + * TODO: we might have to limit to MAP_SHARED as well. + */ + if (memory_region_get_fd(section->mr) < 0 && + dev->vhost_ops->vhost_backend_no_private_memslots && + dev->vhost_ops->vhost_backend_no_private_memslots(dev)) { trace_vhost_reject_section(mr->name, 2); return false; } @@ -568,7 +588,14 @@ static void vhost_commit(MemoryListener *listener) dev->n_mem_sections * sizeof dev->mem->regions[0]; dev->mem = g_realloc(dev->mem, regions_size); dev->mem->nregions = dev->n_mem_sections; - used_memslots = dev->mem->nregions; + + if (dev->vhost_ops->vhost_backend_no_private_memslots && + dev->vhost_ops->vhost_backend_no_private_memslots(dev)) { + used_shared_memslots = dev->mem->nregions; + } else { + used_memslots = dev->mem->nregions; + } + for (i = 0; i < dev->n_mem_sections; i++) { struct vhost_memory_region *cur_vmr = dev->mem->regions + i; struct MemoryRegionSection *mrs = dev->mem_sections + i; @@ -1400,6 +1427,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, VhostBackendType backend_type, uint32_t busyloop_timeout, Error **errp) { + unsigned int used; uint64_t features; int i, r, n_initialized_vqs = 0; @@ -1495,7 +1523,17 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, memory_listener_register(&hdev->memory_listener, &address_space_memory); QLIST_INSERT_HEAD(&vhost_devices, hdev, entry); - if (used_memslots > hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) { + /* + * The listener we registered properly updated the corresponding counter. + * So we can trust that these values are accurate. + */ + if (hdev->vhost_ops->vhost_backend_no_private_memslots && + hdev->vhost_ops->vhost_backend_no_private_memslots(hdev)) { + used = used_shared_memslots; + } else { + used = used_memslots; + } + if (used > hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) { error_setg(errp, "vhost backend memory slots limit is less" " than current number of present memory slots"); r = -EINVAL; |