aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/block/aio.h31
-rw-r--r--include/exec/memory.h324
-rw-r--r--include/hw/block/block.h3
-rw-r--r--include/hw/vfio/vfio-common.h12
-rw-r--r--include/hw/virtio/virtio-mem.h3
-rw-r--r--include/migration/vmstate.h1
-rw-r--r--include/qemu/main-loop.h4
7 files changed, 350 insertions, 28 deletions
diff --git a/include/block/aio.h b/include/block/aio.h
index 10fcae1515..807edce9b5 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -292,19 +292,44 @@ void aio_context_acquire(AioContext *ctx);
void aio_context_release(AioContext *ctx);
/**
+ * aio_bh_schedule_oneshot_full: Allocate a new bottom half structure that will
+ * run only once and as soon as possible.
+ *
+ * @name: A human-readable identifier for debugging purposes.
+ */
+void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
+ const char *name);
+
+/**
* aio_bh_schedule_oneshot: Allocate a new bottom half structure that will run
* only once and as soon as possible.
+ *
+ * A convenience wrapper for aio_bh_schedule_oneshot_full() that uses cb as the
+ * name string.
*/
-void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
+#define aio_bh_schedule_oneshot(ctx, cb, opaque) \
+ aio_bh_schedule_oneshot_full((ctx), (cb), (opaque), (stringify(cb)))
/**
- * aio_bh_new: Allocate a new bottom half structure.
+ * aio_bh_new_full: Allocate a new bottom half structure.
*
* Bottom halves are lightweight callbacks whose invocation is guaranteed
* to be wait-free, thread-safe and signal-safe. The #QEMUBH structure
* is opaque and must be allocated prior to its use.
+ *
+ * @name: A human-readable identifier for debugging purposes.
+ */
+QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
+ const char *name);
+
+/**
+ * aio_bh_new: Allocate a new bottom half structure
+ *
+ * A convenience wrapper for aio_bh_new_full() that uses the cb as the name
+ * string.
*/
-QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
+#define aio_bh_new(ctx, cb, opaque) \
+ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)))
/**
* aio_notify: Force processing of pending events.
diff --git a/include/exec/memory.h b/include/exec/memory.h
index b116f7c64e..c3d417d317 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -42,6 +42,12 @@ typedef struct IOMMUMemoryRegionClass IOMMUMemoryRegionClass;
DECLARE_OBJ_CHECKERS(IOMMUMemoryRegion, IOMMUMemoryRegionClass,
IOMMU_MEMORY_REGION, TYPE_IOMMU_MEMORY_REGION)
+#define TYPE_RAM_DISCARD_MANAGER "qemu:ram-discard-manager"
+typedef struct RamDiscardManagerClass RamDiscardManagerClass;
+typedef struct RamDiscardManager RamDiscardManager;
+DECLARE_OBJ_CHECKERS(RamDiscardManager, RamDiscardManagerClass,
+ RAM_DISCARD_MANAGER, TYPE_RAM_DISCARD_MANAGER);
+
#ifdef CONFIG_FUZZ
void fuzz_dma_read_cb(size_t addr,
size_t len,
@@ -65,6 +71,28 @@ struct ReservedRegion {
unsigned type;
};
+/**
+ * struct MemoryRegionSection: describes a fragment of a #MemoryRegion
+ *
+ * @mr: the region, or %NULL if empty
+ * @fv: the flat view of the address space the region is mapped in
+ * @offset_within_region: the beginning of the section, relative to @mr's start
+ * @size: the size of the section; will not exceed @mr's boundaries
+ * @offset_within_address_space: the address of the first byte of the section
+ * relative to the region's address space
+ * @readonly: writes to this section are ignored
+ * @nonvolatile: this section is non-volatile
+ */
+struct MemoryRegionSection {
+ Int128 size;
+ MemoryRegion *mr;
+ FlatView *fv;
+ hwaddr offset_within_region;
+ hwaddr offset_within_address_space;
+ bool readonly;
+ bool nonvolatile;
+};
+
typedef struct IOMMUTLBEntry IOMMUTLBEntry;
/* See address_space_translate: bit 0 is read, bit 1 is write. */
@@ -448,6 +476,206 @@ struct IOMMUMemoryRegionClass {
Error **errp);
};
+typedef struct RamDiscardListener RamDiscardListener;
+typedef int (*NotifyRamPopulate)(RamDiscardListener *rdl,
+ MemoryRegionSection *section);
+typedef void (*NotifyRamDiscard)(RamDiscardListener *rdl,
+ MemoryRegionSection *section);
+
+struct RamDiscardListener {
+ /*
+ * @notify_populate:
+ *
+ * Notification that previously discarded memory is about to get populated.
+ * Listeners are able to object. If any listener objects, already
+ * successfully notified listeners are notified about a discard again.
+ *
+ * @rdl: the #RamDiscardListener getting notified
+ * @section: the #MemoryRegionSection to get populated. The section
+ * is aligned within the memory region to the minimum granularity
+ * unless it would exceed the registered section.
+ *
+ * Returns 0 on success. If the notification is rejected by the listener,
+ * an error is returned.
+ */
+ NotifyRamPopulate notify_populate;
+
+ /*
+ * @notify_discard:
+ *
+ * Notification that previously populated memory was discarded successfully
+ * and listeners should drop all references to such memory and prevent
+ * new population (e.g., unmap).
+ *
+ * @rdl: the #RamDiscardListener getting notified
+ * @section: the #MemoryRegionSection to get populated. The section
+ * is aligned within the memory region to the minimum granularity
+ * unless it would exceed the registered section.
+ */
+ NotifyRamDiscard notify_discard;
+
+ /*
+ * @double_discard_supported:
+ *
+ * The listener suppors getting @notify_discard notifications that span
+ * already discarded parts.
+ */
+ bool double_discard_supported;
+
+ MemoryRegionSection *section;
+ QLIST_ENTRY(RamDiscardListener) next;
+};
+
+static inline void ram_discard_listener_init(RamDiscardListener *rdl,
+ NotifyRamPopulate populate_fn,
+ NotifyRamDiscard discard_fn,
+ bool double_discard_supported)
+{
+ rdl->notify_populate = populate_fn;
+ rdl->notify_discard = discard_fn;
+ rdl->double_discard_supported = double_discard_supported;
+}
+
+typedef int (*ReplayRamPopulate)(MemoryRegionSection *section, void *opaque);
+
+/*
+ * RamDiscardManagerClass:
+ *
+ * A #RamDiscardManager coordinates which parts of specific RAM #MemoryRegion
+ * regions are currently populated to be used/accessed by the VM, notifying
+ * after parts were discarded (freeing up memory) and before parts will be
+ * populated (consuming memory), to be used/acessed by the VM.
+ *
+ * A #RamDiscardManager can only be set for a RAM #MemoryRegion while the
+ * #MemoryRegion isn't mapped yet; it cannot change while the #MemoryRegion is
+ * mapped.
+ *
+ * The #RamDiscardManager is intended to be used by technologies that are
+ * incompatible with discarding of RAM (e.g., VFIO, which may pin all
+ * memory inside a #MemoryRegion), and require proper coordination to only
+ * map the currently populated parts, to hinder parts that are expected to
+ * remain discarded from silently getting populated and consuming memory.
+ * Technologies that support discarding of RAM don't have to bother and can
+ * simply map the whole #MemoryRegion.
+ *
+ * An example #RamDiscardManager is virtio-mem, which logically (un)plugs
+ * memory within an assigned RAM #MemoryRegion, coordinated with the VM.
+ * Logically unplugging memory consists of discarding RAM. The VM agreed to not
+ * access unplugged (discarded) memory - especially via DMA. virtio-mem will
+ * properly coordinate with listeners before memory is plugged (populated),
+ * and after memory is unplugged (discarded).
+ *
+ * Listeners are called in multiples of the minimum granularity (unless it
+ * would exceed the registered range) and changes are aligned to the minimum
+ * granularity within the #MemoryRegion. Listeners have to prepare for memory
+ * becomming discarded in a different granularity than it was populated and the
+ * other way around.
+ */
+struct RamDiscardManagerClass {
+ /* private */
+ InterfaceClass parent_class;
+
+ /* public */
+
+ /**
+ * @get_min_granularity:
+ *
+ * Get the minimum granularity in which listeners will get notified
+ * about changes within the #MemoryRegion via the #RamDiscardManager.
+ *
+ * @rdm: the #RamDiscardManager
+ * @mr: the #MemoryRegion
+ *
+ * Returns the minimum granularity.
+ */
+ uint64_t (*get_min_granularity)(const RamDiscardManager *rdm,
+ const MemoryRegion *mr);
+
+ /**
+ * @is_populated:
+ *
+ * Check whether the given #MemoryRegionSection is completely populated
+ * (i.e., no parts are currently discarded) via the #RamDiscardManager.
+ * There are no alignment requirements.
+ *
+ * @rdm: the #RamDiscardManager
+ * @section: the #MemoryRegionSection
+ *
+ * Returns whether the given range is completely populated.
+ */
+ bool (*is_populated)(const RamDiscardManager *rdm,
+ const MemoryRegionSection *section);
+
+ /**
+ * @replay_populated:
+ *
+ * Call the #ReplayRamPopulate callback for all populated parts within the
+ * #MemoryRegionSection via the #RamDiscardManager.
+ *
+ * In case any call fails, no further calls are made.
+ *
+ * @rdm: the #RamDiscardManager
+ * @section: the #MemoryRegionSection
+ * @replay_fn: the #ReplayRamPopulate callback
+ * @opaque: pointer to forward to the callback
+ *
+ * Returns 0 on success, or a negative error if any notification failed.
+ */
+ int (*replay_populated)(const RamDiscardManager *rdm,
+ MemoryRegionSection *section,
+ ReplayRamPopulate replay_fn, void *opaque);
+
+ /**
+ * @register_listener:
+ *
+ * Register a #RamDiscardListener for the given #MemoryRegionSection and
+ * immediately notify the #RamDiscardListener about all populated parts
+ * within the #MemoryRegionSection via the #RamDiscardManager.
+ *
+ * In case any notification fails, no further notifications are triggered
+ * and an error is logged.
+ *
+ * @rdm: the #RamDiscardManager
+ * @rdl: the #RamDiscardListener
+ * @section: the #MemoryRegionSection
+ */
+ void (*register_listener)(RamDiscardManager *rdm,
+ RamDiscardListener *rdl,
+ MemoryRegionSection *section);
+
+ /**
+ * @unregister_listener:
+ *
+ * Unregister a previously registered #RamDiscardListener via the
+ * #RamDiscardManager after notifying the #RamDiscardListener about all
+ * populated parts becoming unpopulated within the registered
+ * #MemoryRegionSection.
+ *
+ * @rdm: the #RamDiscardManager
+ * @rdl: the #RamDiscardListener
+ */
+ void (*unregister_listener)(RamDiscardManager *rdm,
+ RamDiscardListener *rdl);
+};
+
+uint64_t ram_discard_manager_get_min_granularity(const RamDiscardManager *rdm,
+ const MemoryRegion *mr);
+
+bool ram_discard_manager_is_populated(const RamDiscardManager *rdm,
+ const MemoryRegionSection *section);
+
+int ram_discard_manager_replay_populated(const RamDiscardManager *rdm,
+ MemoryRegionSection *section,
+ ReplayRamPopulate replay_fn,
+ void *opaque);
+
+void ram_discard_manager_register_listener(RamDiscardManager *rdm,
+ RamDiscardListener *rdl,
+ MemoryRegionSection *section);
+
+void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
+ RamDiscardListener *rdl);
+
typedef struct CoalescedMemoryRange CoalescedMemoryRange;
typedef struct MemoryRegionIoeventfd MemoryRegionIoeventfd;
@@ -494,6 +722,7 @@ struct MemoryRegion {
const char *name;
unsigned ioeventfd_nb;
MemoryRegionIoeventfd *ioeventfds;
+ RamDiscardManager *rdm; /* Only for RAM */
};
struct IOMMUMemoryRegion {
@@ -825,28 +1054,6 @@ typedef bool (*flatview_cb)(Int128 start,
*/
void flatview_for_each_range(FlatView *fv, flatview_cb cb, void *opaque);
-/**
- * struct MemoryRegionSection: describes a fragment of a #MemoryRegion
- *
- * @mr: the region, or %NULL if empty
- * @fv: the flat view of the address space the region is mapped in
- * @offset_within_region: the beginning of the section, relative to @mr's start
- * @size: the size of the section; will not exceed @mr's boundaries
- * @offset_within_address_space: the address of the first byte of the section
- * relative to the region's address space
- * @readonly: writes to this section are ignored
- * @nonvolatile: this section is non-volatile
- */
-struct MemoryRegionSection {
- Int128 size;
- MemoryRegion *mr;
- FlatView *fv;
- hwaddr offset_within_region;
- hwaddr offset_within_address_space;
- bool readonly;
- bool nonvolatile;
-};
-
static inline bool MemoryRegionSection_eq(MemoryRegionSection *a,
MemoryRegionSection *b)
{
@@ -860,6 +1067,26 @@ static inline bool MemoryRegionSection_eq(MemoryRegionSection *a,
}
/**
+ * memory_region_section_new_copy: Copy a memory region section
+ *
+ * Allocate memory for a new copy, copy the memory region section, and
+ * properly take a reference on all relevant members.
+ *
+ * @s: the #MemoryRegionSection to copy
+ */
+MemoryRegionSection *memory_region_section_new_copy(MemoryRegionSection *s);
+
+/**
+ * memory_region_section_new_copy: Free a copied memory region section
+ *
+ * Free a copy of a memory section created via memory_region_section_new_copy().
+ * properly dropping references on all relevant members.
+ *
+ * @s: the #MemoryRegionSection to copy
+ */
+void memory_region_section_free_copy(MemoryRegionSection *s);
+
+/**
* memory_region_init: Initialize a memory region
*
* The region typically acts as a container for other memory regions. Use
@@ -2024,6 +2251,41 @@ bool memory_region_present(MemoryRegion *container, hwaddr addr);
bool memory_region_is_mapped(MemoryRegion *mr);
/**
+ * memory_region_get_ram_discard_manager: get the #RamDiscardManager for a
+ * #MemoryRegion
+ *
+ * The #RamDiscardManager cannot change while a memory region is mapped.
+ *
+ * @mr: the #MemoryRegion
+ */
+RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr);
+
+/**
+ * memory_region_has_ram_discard_manager: check whether a #MemoryRegion has a
+ * #RamDiscardManager assigned
+ *
+ * @mr: the #MemoryRegion
+ */
+static inline bool memory_region_has_ram_discard_manager(MemoryRegion *mr)
+{
+ return !!memory_region_get_ram_discard_manager(mr);
+}
+
+/**
+ * memory_region_set_ram_discard_manager: set the #RamDiscardManager for a
+ * #MemoryRegion
+ *
+ * This function must not be called for a mapped #MemoryRegion, a #MemoryRegion
+ * that does not cover RAM, or a #MemoryRegion that already has a
+ * #RamDiscardManager assigned.
+ *
+ * @mr: the #MemoryRegion
+ * @rdm: #RamDiscardManager to set
+ */
+void memory_region_set_ram_discard_manager(MemoryRegion *mr,
+ RamDiscardManager *rdm);
+
+/**
* memory_region_find: translate an address/size relative to a
* MemoryRegion into a #MemoryRegionSection.
*
@@ -2632,6 +2894,12 @@ static inline MemOp devend_memop(enum device_endian end)
int ram_block_discard_disable(bool state);
/*
+ * See ram_block_discard_disable(): only disable uncoordinated discards,
+ * keeping coordinated discards (via the RamDiscardManager) enabled.
+ */
+int ram_block_uncoordinated_discard_disable(bool state);
+
+/*
* Inhibit technologies that disable discarding of pages in RAM blocks.
*
* Returns 0 if successful. Returns -EBUSY if discards are already set to
@@ -2640,12 +2908,20 @@ int ram_block_discard_disable(bool state);
int ram_block_discard_require(bool state);
/*
- * Test if discarding of memory in ram blocks is disabled.
+ * See ram_block_discard_require(): only inhibit technologies that disable
+ * uncoordinated discarding of pages in RAM blocks, allowing co-existance with
+ * technologies that only inhibit uncoordinated discards (via the
+ * RamDiscardManager).
+ */
+int ram_block_coordinated_discard_require(bool state);
+
+/*
+ * Test if any discarding of memory in ram blocks is disabled.
*/
bool ram_block_discard_is_disabled(void);
/*
- * Test if discarding of memory in ram blocks is required to work reliably.
+ * Test if any discarding of memory in ram blocks is required to work reliably.
*/
bool ram_block_discard_is_required(void);
diff --git a/include/hw/block/block.h b/include/hw/block/block.h
index c172cbe65f..5902c0440a 100644
--- a/include/hw/block/block.h
+++ b/include/hw/block/block.h
@@ -19,6 +19,7 @@
typedef struct BlockConf {
BlockBackend *blk;
+ OnOffAuto backend_defaults;
uint32_t physical_block_size;
uint32_t logical_block_size;
uint32_t min_io_size;
@@ -48,6 +49,8 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf)
}
#define DEFINE_BLOCK_PROPERTIES_BASE(_state, _conf) \
+ DEFINE_PROP_ON_OFF_AUTO("backend_defaults", _state, \
+ _conf.backend_defaults, ON_OFF_AUTO_AUTO), \
DEFINE_PROP_BLOCKSIZE("logical_block_size", _state, \
_conf.logical_block_size), \
DEFINE_PROP_BLOCKSIZE("physical_block_size", _state, \
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 6141162d7a..8af11b0a76 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -88,9 +88,11 @@ typedef struct VFIOContainer {
uint64_t dirty_pgsizes;
uint64_t max_dirty_bitmap_size;
unsigned long pgsizes;
+ unsigned int dma_max_mappings;
QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
QLIST_HEAD(, VFIOGroup) group_list;
+ QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
QLIST_ENTRY(VFIOContainer) next;
} VFIOContainer;
@@ -102,6 +104,16 @@ typedef struct VFIOGuestIOMMU {
QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
} VFIOGuestIOMMU;
+typedef struct VFIORamDiscardListener {
+ VFIOContainer *container;
+ MemoryRegion *mr;
+ hwaddr offset_within_address_space;
+ hwaddr size;
+ uint64_t granularity;
+ RamDiscardListener listener;
+ QLIST_ENTRY(VFIORamDiscardListener) next;
+} VFIORamDiscardListener;
+
typedef struct VFIOHostDMAWindow {
hwaddr min_iova;
hwaddr max_iova;
diff --git a/include/hw/virtio/virtio-mem.h b/include/hw/virtio/virtio-mem.h
index 4eeb82d5dd..9a6e348fa2 100644
--- a/include/hw/virtio/virtio-mem.h
+++ b/include/hw/virtio/virtio-mem.h
@@ -67,6 +67,9 @@ struct VirtIOMEM {
/* don't migrate unplugged memory */
NotifierWithReturn precopy_notifier;
+
+ /* listeners to notify on plug/unplug activity. */
+ QLIST_HEAD(, RamDiscardListener) rdl_list;
};
struct VirtIOMEMClass {
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 8df7b69f38..017c03675c 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -153,6 +153,7 @@ typedef enum {
MIG_PRI_DEFAULT = 0,
MIG_PRI_IOMMU, /* Must happen before PCI devices */
MIG_PRI_PCI_BUS, /* Must happen before IOMMU */
+ MIG_PRI_VIRTIO_MEM, /* Must happen before IOMMU */
MIG_PRI_GICV3_ITS, /* Must happen before PCI devices */
MIG_PRI_GICV3, /* Must happen before the ITS */
MIG_PRI_MAX,
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
index 98aef5647c..8dbc6fcb89 100644
--- a/include/qemu/main-loop.h
+++ b/include/qemu/main-loop.h
@@ -294,7 +294,9 @@ void qemu_cond_timedwait_iothread(QemuCond *cond, int ms);
void qemu_fd_register(int fd);
-QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque);
+#define qemu_bh_new(cb, opaque) \
+ qemu_bh_new_full((cb), (opaque), (stringify(cb)))
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name);
void qemu_bh_schedule_idle(QEMUBH *bh);
enum {