aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2024-10-25 13:35:13 +0100
committerPeter Maydell <peter.maydell@linaro.org>2024-10-25 13:35:13 +0100
commit94be8fd6926cb20c1aa8dd361c6bc10b39b9a376 (patch)
tree17bddae13bbbdce40753e6c309d2f906938c0ce4
parente17e57e862faf6e1f372385c18dcf6d3fd31158e (diff)
parent00b519c0bca0e933ed22e2e6f8bca6b23f41f950 (diff)
Merge tag 'pull-vfio-20241024' of https://github.com/legoater/qemu into staging
vfio queue: * Fixed size reported in vfio_state_pending_exact() * Added support for PMD or PUD aligned mappings # -----BEGIN PGP SIGNATURE----- # # iQIzBAABCAAdFiEEoPZlSPBIlev+awtgUaNDx8/77KEFAmcZ22wACgkQUaNDx8/7 # 7KHU5g/8Cr1487IJQb5cbpLu2Nviu3wjzhbCFFdbl99uLifdc0GK1P6fqDNQ7BVx # 2vpZgJRXLTxlUSTpreFw4z6TH7/C4HoNiluQV4l0vxqG/Y9q68SJBpT9WENwXUyY # +2laDmGQbUjDznxIFlmCgZZAssCIJNp0esNE9hvwkQCarZx9m+QQSSkeVHVWNFqX # +zTd4v076Q9hi53+4e7FlqFKaFoa54IcZe3gz+GjY/IXMqCDNFw9e9xJxML+zSg3 # HZ4/YMQj+EsKX2gm460EYBmt13kd0wdtFzA1MNc7XcSlBlLk/WmezpEzHZRubiLs # mbUZ68/cweJmrO0WatycWg9JwQ2q9FlKH1Acgun4Fcf8Zov5ovHuYAsWYbdGDbN1 # E7pY/XlUf6b7Vk+yAGTnKKRi6OguTEmVyRRFy/4V8TwvZNycbeOMebKilGQUGfKj # iLWuzF6NilT4ZGo7sWnlLZWcmrxN57wJh77GlmcqiqguskB8WGdh/SZSVCkkzr3y # PN3FGSTseNaxalcjECEFnfE8+bUShLei+I6fppTfqLBaLHJ72lRel0Cg07FS8oM4 # 3ev7etH7jFT5xET00DBamDXacgNtLqFqO6XIK3bFTkLmP0FFQi9u+bvy04IyTVCC # gd9Zg2vhxp0mjuwtelB+i7yD3pmA2LWFkEzoShpkH/h38CnpoyQ= # =+69I # -----END PGP SIGNATURE----- # gpg: Signature made Thu 24 Oct 2024 06:30:20 BST # gpg: using RSA key A0F66548F04895EBFE6B0B6051A343C7CFFBECA1 # gpg: Good signature from "Cédric Le Goater <clg@redhat.com>" [full] # gpg: aka "Cédric Le Goater <clg@kaod.org>" [full] # Primary key fingerprint: A0F6 6548 F048 95EB FE6B 0B60 51A3 43C7 CFFB ECA1 * tag 'pull-vfio-20241024' of https://github.com/legoater/qemu: vfio/helpers: Align mmaps vfio/helpers: Refactor vfio_region_mmap() error handling vfio/migration: Change trace formats from hex to decimal vfio/migration: Report only stop-copy size in vfio_state_pending_exact() Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--hw/vfio/helpers.c66
-rw-r--r--hw/vfio/migration.c3
-rw-r--r--hw/vfio/trace-events10
3 files changed, 52 insertions, 27 deletions
diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
index ea15c79db0..913796f437 100644
--- a/hw/vfio/helpers.c
+++ b/hw/vfio/helpers.c
@@ -27,6 +27,7 @@
#include "trace.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
+#include "qemu/units.h"
#include "monitor/monitor.h"
/*
@@ -395,7 +396,7 @@ static void vfio_subregion_unmap(VFIORegion *region, int index)
int vfio_region_mmap(VFIORegion *region)
{
- int i, prot = 0;
+ int i, ret, prot = 0;
char *name;
if (!region->mem) {
@@ -406,27 +407,40 @@ int vfio_region_mmap(VFIORegion *region)
prot |= region->flags & VFIO_REGION_INFO_FLAG_WRITE ? PROT_WRITE : 0;
for (i = 0; i < region->nr_mmaps; i++) {
- region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot,
- MAP_SHARED, region->vbasedev->fd,
- region->fd_offset +
- region->mmaps[i].offset);
- if (region->mmaps[i].mmap == MAP_FAILED) {
- int ret = -errno;
+ size_t align = MIN(1ULL << ctz64(region->mmaps[i].size), 1 * GiB);
+ void *map_base, *map_align;
- trace_vfio_region_mmap_fault(memory_region_name(region->mem), i,
- region->fd_offset +
- region->mmaps[i].offset,
- region->fd_offset +
- region->mmaps[i].offset +
- region->mmaps[i].size - 1, ret);
-
- region->mmaps[i].mmap = NULL;
+ /*
+ * Align the mmap for more efficient mapping in the kernel. Ideally
+ * we'd know the PMD and PUD mapping sizes to use as discrete alignment
+ * intervals, but we don't. As of Linux v6.12, the largest PUD size
+ * supporting huge pfnmap is 1GiB (ARCH_SUPPORTS_PUD_PFNMAP is only set
+ * on x86_64). Align by power-of-two size, capped at 1GiB.
+ *
+ * NB. qemu_memalign() and friends actually allocate memory, whereas
+ * the region size here can exceed host memory, therefore we manually
+ * create an oversized anonymous mapping and clean it up for alignment.
+ */
+ map_base = mmap(0, region->mmaps[i].size + align, PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (map_base == MAP_FAILED) {
+ ret = -errno;
+ goto no_mmap;
+ }
- for (i--; i >= 0; i--) {
- vfio_subregion_unmap(region, i);
- }
+ map_align = (void *)ROUND_UP((uintptr_t)map_base, (uintptr_t)align);
+ munmap(map_base, map_align - map_base);
+ munmap(map_align + region->mmaps[i].size,
+ align - (map_align - map_base));
- return ret;
+ region->mmaps[i].mmap = mmap(map_align, region->mmaps[i].size, prot,
+ MAP_SHARED | MAP_FIXED,
+ region->vbasedev->fd,
+ region->fd_offset +
+ region->mmaps[i].offset);
+ if (region->mmaps[i].mmap == MAP_FAILED) {
+ ret = -errno;
+ goto no_mmap;
}
name = g_strdup_printf("%s mmaps[%d]",
@@ -446,6 +460,20 @@ int vfio_region_mmap(VFIORegion *region)
}
return 0;
+
+no_mmap:
+ trace_vfio_region_mmap_fault(memory_region_name(region->mem), i,
+ region->fd_offset + region->mmaps[i].offset,
+ region->fd_offset + region->mmaps[i].offset +
+ region->mmaps[i].size - 1, ret);
+
+ region->mmaps[i].mmap = NULL;
+
+ for (i--; i >= 0; i--) {
+ vfio_subregion_unmap(region, i);
+ }
+
+ return ret;
}
void vfio_region_unmap(VFIORegion *region)
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 17199b73ae..992dc3b102 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -576,9 +576,6 @@ static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy,
if (vfio_device_state_is_precopy(vbasedev)) {
vfio_query_precopy_size(migration);
-
- *must_precopy +=
- migration->precopy_init_size + migration->precopy_dirty_size;
}
trace_vfio_state_pending_exact(vbasedev->name, *must_precopy, *can_postcopy,
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index c475c273fd..29789e8d27 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -151,7 +151,7 @@ vfio_display_edid_write_error(void) ""
vfio_load_cleanup(const char *name) " (%s)"
vfio_load_device_config_state(const char *name) " (%s)"
vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64
-vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size 0x%"PRIx64" ret %d"
+vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size %"PRIu64" ret %d"
vfio_migration_realize(const char *name) " (%s)"
vfio_migration_set_device_state(const char *name, const char *state) " (%s) state %s"
vfio_migration_set_state(const char *name, const char *new_state, const char *recover_state) " (%s) new state %s, recover state %s"
@@ -160,10 +160,10 @@ vfio_save_block(const char *name, int data_size) " (%s) data_size %d"
vfio_save_cleanup(const char *name) " (%s)"
vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d"
vfio_save_device_config_state(const char *name) " (%s)"
-vfio_save_iterate(const char *name, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64
-vfio_save_setup(const char *name, uint64_t data_buffer_size) " (%s) data buffer size 0x%"PRIx64
-vfio_state_pending_estimate(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64
-vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64
+vfio_save_iterate(const char *name, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy initial size %"PRIu64" precopy dirty size %"PRIu64
+vfio_save_setup(const char *name, uint64_t data_buffer_size) " (%s) data buffer size %"PRIu64
+vfio_state_pending_estimate(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy %"PRIu64" postcopy %"PRIu64" precopy initial size %"PRIu64" precopy dirty size %"PRIu64
+vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy %"PRIu64" postcopy %"PRIu64" stopcopy size %"PRIu64" precopy initial size %"PRIu64" precopy dirty size %"PRIu64
vfio_vmstate_change(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s"
vfio_vmstate_change_prepare(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s"