From d24f31db3bcb46b09d8717850112d6a1bbee78f2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 26 Jun 2020 09:22:29 +0200 Subject: exec: Introduce ram_block_discard_(disable|require)() We want to replace qemu_balloon_inhibit() by something more generic. Especially, we want to make sure that technologies that really rely on RAM block discards to work reliably to run mutual exclusive with technologies that effectively break it. E.g., vfio will usually pin all guest memory, turning the virtio-balloon basically useless and make the VM consume more memory than reported via the balloon. While the balloon is special already (=> no guarantees, same behavior possible afer reboots and with huge pages), this will be different, especially, with virtio-mem. Let's implement a way such that we can make both types of technology run mutually exclusive. We'll convert existing balloon inhibitors in successive patches and add some new ones. Add the check to qemu_balloon_is_inhibited() for now. We might want to make virtio-balloon an acutal inhibitor in the future - however, that requires more thought to not break existing setups. Reviewed-by: Dr. David Alan Gilbert Cc: "Michael S. Tsirkin" Cc: Richard Henderson Cc: Paolo Bonzini Signed-off-by: David Hildenbrand Message-Id: <20200626072248.78761-3-david@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/exec/memory.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'include') diff --git a/include/exec/memory.h b/include/exec/memory.h index 7207025bd4..38ec38b9a8 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -2472,6 +2472,47 @@ static inline MemOp devend_memop(enum device_endian end) } #endif +/* + * Inhibit technologies that require discarding of pages in RAM blocks, e.g., + * to manage the actual amount of memory consumed by the VM (then, the memory + * provided by RAM blocks might be bigger than the desired memory consumption). + * This *must* be set if: + * - Discarding parts of a RAM blocks does not result in the change being + * reflected in the VM and the pages getting freed. + * - All memory in RAM blocks is pinned or duplicated, invaldiating any previous + * discards blindly. + * - Discarding parts of a RAM blocks will result in integrity issues (e.g., + * encrypted VMs). + * Technologies that only temporarily pin the current working set of a + * driver are fine, because we don't expect such pages to be discarded + * (esp. based on guest action like balloon inflation). + * + * This is *not* to be used to protect from concurrent discards (esp., + * postcopy). + * + * Returns 0 if successful. Returns -EBUSY if a technology that relies on + * discards to work reliably is active. + */ +int ram_block_discard_disable(bool state); + +/* + * Inhibit technologies that disable discarding of pages in RAM blocks. + * + * Returns 0 if successful. Returns -EBUSY if discards are already set to + * broken. + */ +int ram_block_discard_require(bool state); + +/* + * Test if discarding of memory in ram blocks is disabled. + */ +bool ram_block_discard_is_disabled(void); + +/* + * Test if discarding of memory in ram blocks is required to work reliably. + */ +bool ram_block_discard_is_required(void); + #endif #endif -- cgit v1.2.3 From aff92b828647839b956dfa647a18b3ce10058e6a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 26 Jun 2020 09:22:30 +0200 Subject: vfio: Convert to ram_block_discard_disable() VFIO is (except devices without a physical IOMMU or some mediated devices) incompatible with discarding of RAM. The kernel will pin basically all VM memory. Let's convert to ram_block_discard_disable(), which can now fail, in contrast to qemu_balloon_inhibit(). Leave "x-balloon-allowed" named as it is for now. Reviewed-by: Tony Krowiak Acked-by: Cornelia Huck Cc: Cornelia Huck Cc: Alex Williamson Cc: Christian Borntraeger Cc: Tony Krowiak Cc: Halil Pasic Cc: Pierre Morel Cc: Eric Farman Signed-off-by: David Hildenbrand Message-Id: <20200626072248.78761-4-david@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/hw/vfio/vfio-common.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index fd564209ac..c78f3ff559 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -108,7 +108,7 @@ typedef struct VFIODevice { bool reset_works; bool needs_reset; bool no_mmap; - bool balloon_allowed; + bool ram_block_discard_allowed; VFIODeviceOps *ops; unsigned int num_irqs; unsigned int num_regions; @@ -128,7 +128,7 @@ typedef struct VFIOGroup { QLIST_HEAD(, VFIODevice) device_list; QLIST_ENTRY(VFIOGroup) next; QLIST_ENTRY(VFIOGroup) container_next; - bool balloon_allowed; + bool ram_block_discard_allowed; } VFIOGroup; typedef struct VFIODMABuf { -- cgit v1.2.3 From 06df2e692a95509ee5f6e7d1663502adb74cb2a5 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 26 Jun 2020 09:22:33 +0200 Subject: virtio-balloon: Rip out qemu_balloon_inhibit() The only remaining special case is postcopy. It cannot handle concurrent discards yet, which would result in requesting already sent pages from the source. Special-case it in virtio-balloon instead. Introduce migration_in_incoming_postcopy(), to find out if incoming postcopy is active. Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Michael S. Tsirkin Cc: "Michael S. Tsirkin" Cc: Juan Quintela Cc: "Dr. David Alan Gilbert" Signed-off-by: David Hildenbrand Message-Id: <20200626072248.78761-7-david@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/migration/misc.h | 2 ++ include/sysemu/balloon.h | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/migration/misc.h b/include/migration/misc.h index d2762257aa..34e7d75713 100644 --- a/include/migration/misc.h +++ b/include/migration/misc.h @@ -69,6 +69,8 @@ bool migration_has_failed(MigrationState *); /* ...and after the device transmission */ bool migration_in_postcopy_after_devices(MigrationState *); void migration_global_dump(Monitor *mon); +/* True if incomming migration entered POSTCOPY_INCOMING_DISCARD */ +bool migration_in_incoming_postcopy(void); /* migration/block-dirty-bitmap.c */ void dirty_bitmap_mig_init(void); diff --git a/include/sysemu/balloon.h b/include/sysemu/balloon.h index aea0c44985..20a2defe3a 100644 --- a/include/sysemu/balloon.h +++ b/include/sysemu/balloon.h @@ -23,7 +23,5 @@ typedef void (QEMUBalloonStatus)(void *opaque, BalloonInfo *info); int qemu_add_balloon_handler(QEMUBalloonEvent *event_func, QEMUBalloonStatus *stat_func, void *opaque); void qemu_remove_balloon_handler(void *opaque); -bool qemu_balloon_is_inhibited(void); -void qemu_balloon_inhibit(bool state); #endif -- cgit v1.2.3 From 18b1d3c952d00ef1881852ee46cf41783dacf530 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 26 Jun 2020 09:22:36 +0200 Subject: migration/colo: Use ram_block_discard_disable() COLO will copy all memory in a RAM block, disable discarding of RAM. Reviewed-by: Dr. David Alan Gilbert Tested-by: Lukas Straub Cc: "Michael S. Tsirkin" Cc: Hailiang Zhang Cc: Juan Quintela Cc: "Dr. David Alan Gilbert" Signed-off-by: David Hildenbrand Message-Id: <20200626072248.78761-10-david@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/migration/colo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/migration/colo.h b/include/migration/colo.h index 1636e6f907..768e1f04c3 100644 --- a/include/migration/colo.h +++ b/include/migration/colo.h @@ -25,7 +25,7 @@ void migrate_start_colo_process(MigrationState *s); bool migration_in_colo_state(void); /* loadvm */ -void migration_incoming_enable_colo(void); +int migration_incoming_enable_colo(void); void migration_incoming_disable_colo(void); bool migration_incoming_colo_enabled(void); void *colo_process_incoming_thread(void *opaque); -- cgit v1.2.3 From 910b25766b336a94d69f5f6e1b55c10b3aeb4326 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 26 Jun 2020 09:22:37 +0200 Subject: virtio-mem: Paravirtualized memory hot(un)plug This is the very basic/initial version of virtio-mem. An introduction to virtio-mem can be found in the Linux kernel driver [1]. While it can be used in the current state for hotplug of a smaller amount of memory, it will heavily benefit from resizeable memory regions in the future. Each virtio-mem device manages a memory region (provided via a memory backend). After requested by the hypervisor ("requested-size"), the guest can try to plug/unplug blocks of memory within that region, in order to reach the requested size. Initially, and after a reboot, all memory is unplugged (except in special cases - reboot during postcopy). The guest may only try to plug/unplug blocks of memory within the usable region size. The usable region size is a little bigger than the requested size, to give the device driver some flexibility. The usable region size will only grow, except on reboots or when all memory is requested to get unplugged. The guest can never plug more memory than requested. Unplugged memory will get zapped/discarded, similar to in a balloon device. The block size is variable, however, it is always chosen in a way such that THP splits are avoided (e.g., 2MB). The state of each block (plugged/unplugged) is tracked in a bitmap. As virtio-mem devices (e.g., virtio-mem-pci) will be memory devices, we now expose "VirtioMEMDeviceInfo" via "query-memory-devices". -------------------------------------------------------------------------- There are two important follow-up items that are in the works: 1. Resizeable memory regions: Use resizeable allocations/RAM blocks to grow/shrink along with the usable region size. This avoids creating initially very big VMAs, RAM blocks, and KVM slots. 2. Protection of unplugged memory: Make sure the gust cannot actually make use of unplugged memory. Other follow-up items that are in the works: 1. Exclude unplugged memory during migration (via precopy notifier). 2. Handle remapping of memory. 3. Support for other architectures. -------------------------------------------------------------------------- Example usage (virtio-mem-pci is introduced in follow-up patches): Start QEMU with two virtio-mem devices (one per NUMA node): $ qemu-system-x86_64 -m 4G,maxmem=20G \ -smp sockets=2,cores=2 \ -numa node,nodeid=0,cpus=0-1 -numa node,nodeid=1,cpus=2-3 \ [...] -object memory-backend-ram,id=mem0,size=8G \ -device virtio-mem-pci,id=vm0,memdev=mem0,node=0,requested-size=0M \ -object memory-backend-ram,id=mem1,size=8G \ -device virtio-mem-pci,id=vm1,memdev=mem1,node=1,requested-size=1G Query the configuration: (qemu) info memory-devices Memory device [virtio-mem]: "vm0" memaddr: 0x140000000 node: 0 requested-size: 0 size: 0 max-size: 8589934592 block-size: 2097152 memdev: /objects/mem0 Memory device [virtio-mem]: "vm1" memaddr: 0x340000000 node: 1 requested-size: 1073741824 size: 1073741824 max-size: 8589934592 block-size: 2097152 memdev: /objects/mem1 Add some memory to node 0: (qemu) qom-set vm0 requested-size 500M Remove some memory from node 1: (qemu) qom-set vm1 requested-size 200M Query the configuration again: (qemu) info memory-devices Memory device [virtio-mem]: "vm0" memaddr: 0x140000000 node: 0 requested-size: 524288000 size: 524288000 max-size: 8589934592 block-size: 2097152 memdev: /objects/mem0 Memory device [virtio-mem]: "vm1" memaddr: 0x340000000 node: 1 requested-size: 209715200 size: 209715200 max-size: 8589934592 block-size: 2097152 memdev: /objects/mem1 [1] https://lkml.kernel.org/r/20200311171422.10484-1-david@redhat.com Cc: "Michael S. Tsirkin" Cc: Eric Blake Cc: Markus Armbruster Cc: "Dr. David Alan Gilbert" Cc: Igor Mammedov Signed-off-by: David Hildenbrand Message-Id: <20200626072248.78761-11-david@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/hw/virtio/virtio-mem.h | 78 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 include/hw/virtio/virtio-mem.h (limited to 'include') diff --git a/include/hw/virtio/virtio-mem.h b/include/hw/virtio/virtio-mem.h new file mode 100644 index 0000000000..6981096f7c --- /dev/null +++ b/include/hw/virtio/virtio-mem.h @@ -0,0 +1,78 @@ +/* + * Virtio MEM device + * + * Copyright (C) 2020 Red Hat, Inc. + * + * Authors: + * David Hildenbrand + * + * This work is licensed under the terms of the GNU GPL, version 2. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_VIRTIO_MEM_H +#define HW_VIRTIO_MEM_H + +#include "standard-headers/linux/virtio_mem.h" +#include "hw/virtio/virtio.h" +#include "qapi/qapi-types-misc.h" +#include "sysemu/hostmem.h" + +#define TYPE_VIRTIO_MEM "virtio-mem" + +#define VIRTIO_MEM(obj) \ + OBJECT_CHECK(VirtIOMEM, (obj), TYPE_VIRTIO_MEM) +#define VIRTIO_MEM_CLASS(oc) \ + OBJECT_CLASS_CHECK(VirtIOMEMClass, (oc), TYPE_VIRTIO_MEM) +#define VIRTIO_MEM_GET_CLASS(obj) \ + OBJECT_GET_CLASS(VirtIOMEMClass, (obj), TYPE_VIRTIO_MEM) + +#define VIRTIO_MEM_MEMDEV_PROP "memdev" +#define VIRTIO_MEM_NODE_PROP "node" +#define VIRTIO_MEM_SIZE_PROP "size" +#define VIRTIO_MEM_REQUESTED_SIZE_PROP "requested-size" +#define VIRTIO_MEM_BLOCK_SIZE_PROP "block-size" +#define VIRTIO_MEM_ADDR_PROP "memaddr" + +typedef struct VirtIOMEM { + VirtIODevice parent_obj; + + /* guest -> host request queue */ + VirtQueue *vq; + + /* bitmap used to track unplugged memory */ + int32_t bitmap_size; + unsigned long *bitmap; + + /* assigned memory backend and memory region */ + HostMemoryBackend *memdev; + + /* NUMA node */ + uint32_t node; + + /* assigned address of the region in guest physical memory */ + uint64_t addr; + + /* usable region size (<= region_size) */ + uint64_t usable_region_size; + + /* actual size (how much the guest plugged) */ + uint64_t size; + + /* requested size */ + uint64_t requested_size; + + /* block size and alignment */ + uint64_t block_size; +} VirtIOMEM; + +typedef struct VirtIOMEMClass { + /* private */ + VirtIODevice parent; + + /* public */ + void (*fill_device_info)(const VirtIOMEM *vmen, VirtioMEMDeviceInfo *vi); + MemoryRegion *(*get_memory_region)(VirtIOMEM *vmem, Error **errp); +} VirtIOMEMClass; + +#endif -- cgit v1.2.3 From 0b9a2443a48b806c775b47c4a1e9effdab03ddf2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 26 Jun 2020 09:22:38 +0200 Subject: virtio-pci: Proxy for virtio-mem Let's add a proxy for virtio-mem, make it a memory device, and pass-through the properties. Reviewed-by: Pankaj Gupta Cc: "Michael S. Tsirkin" Cc: Marcel Apfelbaum Cc: "Dr. David Alan Gilbert" Cc: Igor Mammedov Signed-off-by: David Hildenbrand Message-Id: <20200626072248.78761-12-david@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/hw/pci/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index a4e9c33416..c1bf7d5356 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -87,6 +87,7 @@ extern bool pci_available; #define PCI_DEVICE_ID_VIRTIO_VSOCK 0x1012 #define PCI_DEVICE_ID_VIRTIO_PMEM 0x1013 #define PCI_DEVICE_ID_VIRTIO_IOMMU 0x1014 +#define PCI_DEVICE_ID_VIRTIO_MEM 0x1015 #define PCI_VENDOR_ID_REDHAT 0x1b36 #define PCI_DEVICE_ID_REDHAT_BRIDGE 0x0001 -- cgit v1.2.3 From c95b4437da6832f3bef9129de897cc7a8334ea8e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 26 Jun 2020 09:22:43 +0200 Subject: virtio-mem: Allow notifiers for size changes We want to send qapi events in case the size of a virtio-mem device changes. This allows upper layers to always know how much memory is actually currently consumed via a virtio-mem device. Unfortuantely, we have to report the id of our proxy device. Let's provide an easy way for our proxy device to register, so it can send the qapi events. Piggy-backing on the notifier infrastructure (although we'll only ever have one notifier registered) seems to be an easy way. Reviewed-by: Dr. David Alan Gilbert Cc: "Michael S. Tsirkin" Cc: "Dr. David Alan Gilbert" Cc: Igor Mammedov Signed-off-by: David Hildenbrand Message-Id: <20200626072248.78761-17-david@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/hw/virtio/virtio-mem.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/hw/virtio/virtio-mem.h b/include/hw/virtio/virtio-mem.h index 6981096f7c..b74c77cd42 100644 --- a/include/hw/virtio/virtio-mem.h +++ b/include/hw/virtio/virtio-mem.h @@ -64,6 +64,9 @@ typedef struct VirtIOMEM { /* block size and alignment */ uint64_t block_size; + + /* notifiers to notify when "size" changes */ + NotifierList size_change_notifiers; } VirtIOMEM; typedef struct VirtIOMEMClass { @@ -73,6 +76,8 @@ typedef struct VirtIOMEMClass { /* public */ void (*fill_device_info)(const VirtIOMEM *vmen, VirtioMEMDeviceInfo *vi); MemoryRegion *(*get_memory_region)(VirtIOMEM *vmem, Error **errp); + void (*add_size_change_notifier)(VirtIOMEM *vmem, Notifier *notifier); + void (*remove_size_change_notifier)(VirtIOMEM *vmem, Notifier *notifier); } VirtIOMEMClass; #endif -- cgit v1.2.3 From 0bc7806c5a670fa0bd160caf07489a5106a67d55 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 26 Jun 2020 09:22:47 +0200 Subject: virtio-mem: Exclude unplugged memory during migration The content of unplugged memory is undefined and should not be migrated, ever. Exclude all unplugged memory during precopy using the precopy notifier infrastructure introduced for free page hinting in virtio-balloon. Unplugged memory is marked as "not dirty", meaning it won't be considered for migration. Cc: "Michael S. Tsirkin" Cc: "Dr. David Alan Gilbert" Signed-off-by: David Hildenbrand Message-Id: <20200626072248.78761-21-david@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/hw/virtio/virtio-mem.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/hw/virtio/virtio-mem.h b/include/hw/virtio/virtio-mem.h index b74c77cd42..0778224964 100644 --- a/include/hw/virtio/virtio-mem.h +++ b/include/hw/virtio/virtio-mem.h @@ -67,6 +67,9 @@ typedef struct VirtIOMEM { /* notifiers to notify when "size" changes */ NotifierList size_change_notifiers; + + /* don't migrate unplugged memory */ + NotifierWithReturn precopy_notifier; } VirtIOMEM; typedef struct VirtIOMEMClass { -- cgit v1.2.3 From 195784a0cfad57b06cba6d67f286039d5a01babf Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 26 Jun 2020 09:22:48 +0200 Subject: numa: Auto-enable NUMA when any memory devices are possible Let's auto-enable it also when maxmem is specified but no slots are defined. This will result in us properly creating ACPI srat tables, indicating the maximum possible PFN to the guest OS. Based on this, e.g., Linux will enable the swiotlb properly. This avoids having to manually force the switolb on (swiotlb=force) in Linux in case we're booting only using DMA memory (e.g., 2GB on x86-64), and virtio-mem adds memory later on that really needs the swiotlb to be used for DMA. Let's take care of backwards compatibility if somebody has a setup that specifies "maxram" without "slots". Reported-by: Alex Shi Cc: Peter Maydell Cc: Eduardo Habkost Cc: Marcel Apfelbaum Cc: Sergio Lopez Cc: Paolo Bonzini Cc: Richard Henderson Cc: "Michael S. Tsirkin" Cc: Igor Mammedov Cc: qemu-arm@nongnu.org Signed-off-by: David Hildenbrand Message-Id: <20200626072248.78761-22-david@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/hw/boards.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/hw/boards.h b/include/hw/boards.h index 18815d9be2..426ce5f625 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -207,6 +207,7 @@ struct MachineClass { const char **valid_cpu_types; strList *allowed_dynamic_sysbus_devices; bool auto_enable_numa_with_memhp; + bool auto_enable_numa_with_memdev; void (*numa_auto_assign_ram)(MachineClass *mc, NodeInfo *nodes, int nb_nodes, ram_addr_t size); bool ignore_boot_device_suffixes; -- cgit v1.2.3 From 0165daae5c353bd0d2b72fb39993ece8a845ad75 Mon Sep 17 00:00:00 2001 From: Cindy Lu Date: Wed, 1 Jul 2020 22:55:25 +0800 Subject: net: introduce qemu_get_peer This is a small function that can get the peer from given NetClientState and queue_index Signed-off-by: Cindy Lu Message-Id: <20200701145538.22333-2-lulu@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- include/net/net.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/net.h b/include/net/net.h index 39085d9444..e7ef42d62b 100644 --- a/include/net/net.h +++ b/include/net/net.h @@ -176,6 +176,7 @@ void hmp_info_network(Monitor *mon, const QDict *qdict); void net_socket_rs_init(SocketReadState *rs, SocketReadStateFinalize *finalize, bool vnet_hdr); +NetClientState *qemu_get_peer(NetClientState *nc, int queue_index); /* NIC info */ -- cgit v1.2.3 From b2a5f62a2284a8933f6f6cda48797b7263b96970 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 1 Jul 2020 22:55:27 +0800 Subject: virtio-bus: introduce queue_enabled method This patch introduces queue_enabled() method which allows the transport to implement its own way to report whether or not a queue is enabled. Signed-off-by: Jason Wang Signed-off-by: Cindy Lu Message-Id: <20200701145538.22333-4-lulu@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- include/hw/virtio/virtio-bus.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/hw/virtio/virtio-bus.h b/include/hw/virtio/virtio-bus.h index 38c9399cd4..0f6f215925 100644 --- a/include/hw/virtio/virtio-bus.h +++ b/include/hw/virtio/virtio-bus.h @@ -83,6 +83,10 @@ typedef struct VirtioBusClass { */ int (*ioeventfd_assign)(DeviceState *d, EventNotifier *notifier, int n, bool assign); + /* + * Whether queue number n is enabled. + */ + bool (*queue_enabled)(DeviceState *d, int n); /* * Does the transport have variable vring alignment? * (ie can it ever call virtio_queue_set_align()?) -- cgit v1.2.3 From 68513bcd88d5c6d81cfa6563537c20430facfca7 Mon Sep 17 00:00:00 2001 From: Cindy Lu Date: Wed, 1 Jul 2020 22:55:30 +0800 Subject: vhost: introduce new VhostOps vhost_dev_start This patch introduces new VhostOps vhost_dev_start callback which allows the vhost_net set the start/stop status to backend Signed-off-by: Cindy Lu Message-Id: <20200701145538.22333-7-lulu@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- include/hw/virtio/vhost-backend.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index 6f6670783f..b80f344cd6 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -112,6 +112,7 @@ typedef int (*vhost_get_inflight_fd_op)(struct vhost_dev *dev, typedef int (*vhost_set_inflight_fd_op)(struct vhost_dev *dev, struct vhost_inflight *inflight); +typedef int (*vhost_dev_start_op)(struct vhost_dev *dev, bool started); typedef struct VhostOps { VhostBackendType backend_type; vhost_backend_init vhost_backend_init; @@ -152,6 +153,7 @@ typedef struct VhostOps { vhost_backend_mem_section_filter_op vhost_backend_mem_section_filter; vhost_get_inflight_fd_op vhost_get_inflight_fd; vhost_set_inflight_fd_op vhost_set_inflight_fd; + vhost_dev_start_op vhost_dev_start; } VhostOps; extern const VhostOps user_ops; -- cgit v1.2.3 From 35f20bb76922420edce8196e6215b0ca49443378 Mon Sep 17 00:00:00 2001 From: Cindy Lu Date: Wed, 1 Jul 2020 22:55:32 +0800 Subject: vhost: introduce new VhostOps vhost_vq_get_addr This patch introduces new VhostOps vhost_vq_get_addr_op callback to get the vring addr from the backend Signed-off-by: Cindy Lu Message-Id: <20200701145538.22333-9-lulu@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- include/hw/virtio/vhost-backend.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index b80f344cd6..fa84abac97 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -34,6 +34,7 @@ struct vhost_vring_state; struct vhost_vring_addr; struct vhost_scsi_target; struct vhost_iotlb_msg; +struct vhost_virtqueue; typedef int (*vhost_backend_init)(struct vhost_dev *dev, void *opaque); typedef int (*vhost_backend_cleanup)(struct vhost_dev *dev); @@ -113,6 +114,10 @@ typedef int (*vhost_set_inflight_fd_op)(struct vhost_dev *dev, struct vhost_inflight *inflight); typedef int (*vhost_dev_start_op)(struct vhost_dev *dev, bool started); + +typedef int (*vhost_vq_get_addr_op)(struct vhost_dev *dev, + struct vhost_vring_addr *addr, + struct vhost_virtqueue *vq); typedef struct VhostOps { VhostBackendType backend_type; vhost_backend_init vhost_backend_init; @@ -154,6 +159,7 @@ typedef struct VhostOps { vhost_get_inflight_fd_op vhost_get_inflight_fd; vhost_set_inflight_fd_op vhost_set_inflight_fd; vhost_dev_start_op vhost_dev_start; + vhost_vq_get_addr_op vhost_vq_get_addr; } VhostOps; extern const VhostOps user_ops; -- cgit v1.2.3 From b4ab225c34944658ddff62e4ee8127c3838bcf1d Mon Sep 17 00:00:00 2001 From: Cindy Lu Date: Wed, 1 Jul 2020 22:55:33 +0800 Subject: vhost: implement vhost_vq_get_addr method use vhost_vq_get_addr callback to get the vq address from backend Signed-off-by: Cindy Lu Message-Id: <20200701145538.22333-10-lulu@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- include/hw/virtio/vhost-backend.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index fa84abac97..bfc24207e2 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -118,6 +118,9 @@ typedef int (*vhost_dev_start_op)(struct vhost_dev *dev, bool started); typedef int (*vhost_vq_get_addr_op)(struct vhost_dev *dev, struct vhost_vring_addr *addr, struct vhost_virtqueue *vq); + +typedef int (*vhost_get_device_id_op)(struct vhost_dev *dev, uint32_t *dev_id); + typedef struct VhostOps { VhostBackendType backend_type; vhost_backend_init vhost_backend_init; @@ -160,6 +163,7 @@ typedef struct VhostOps { vhost_set_inflight_fd_op vhost_set_inflight_fd; vhost_dev_start_op vhost_dev_start; vhost_vq_get_addr_op vhost_vq_get_addr; + vhost_get_device_id_op vhost_get_device_id; } VhostOps; extern const VhostOps user_ops; -- cgit v1.2.3 From f6c99c3438c09706694f7bf1f3057c36d1bd0c21 Mon Sep 17 00:00:00 2001 From: Cindy Lu Date: Wed, 1 Jul 2020 22:55:34 +0800 Subject: vhost: introduce new VhostOps vhost_force_iommu This patch introduces new VhostOps vhost_force_iommu callback to force enable features bit VIRTIO_F_IOMMU_PLATFORM. Signed-off-by: Cindy Lu Message-Id: <20200701145538.22333-11-lulu@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- include/hw/virtio/vhost-backend.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index bfc24207e2..e7cb8d028c 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -121,6 +121,8 @@ typedef int (*vhost_vq_get_addr_op)(struct vhost_dev *dev, typedef int (*vhost_get_device_id_op)(struct vhost_dev *dev, uint32_t *dev_id); +typedef bool (*vhost_force_iommu_op)(struct vhost_dev *dev); + typedef struct VhostOps { VhostBackendType backend_type; vhost_backend_init vhost_backend_init; @@ -164,6 +166,7 @@ typedef struct VhostOps { vhost_dev_start_op vhost_dev_start; vhost_vq_get_addr_op vhost_vq_get_addr; vhost_get_device_id_op vhost_get_device_id; + vhost_force_iommu_op vhost_force_iommu; } VhostOps; extern const VhostOps user_ops; -- cgit v1.2.3 From 38140cc4d9713dc9af78090503105bf9c82b6bff Mon Sep 17 00:00:00 2001 From: Cindy Lu Date: Wed, 1 Jul 2020 22:55:36 +0800 Subject: vhost_net: introduce set_config & get_config This patch introduces set_config & get_config method which allows vhost_net set/get the config to backend Signed-off-by: Cindy Lu Message-Id: <20200701145538.22333-13-lulu@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- include/net/vhost_net.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h index 77e47398c4..172b0051d8 100644 --- a/include/net/vhost_net.h +++ b/include/net/vhost_net.h @@ -28,6 +28,11 @@ void vhost_net_cleanup(VHostNetState *net); uint64_t vhost_net_get_features(VHostNetState *net, uint64_t features); void vhost_net_ack_features(VHostNetState *net, uint64_t features); +int vhost_net_get_config(struct vhost_net *net, uint8_t *config, + uint32_t config_len); + +int vhost_net_set_config(struct vhost_net *net, const uint8_t *data, + uint32_t offset, uint32_t size, uint32_t flags); bool vhost_net_virtqueue_pending(VHostNetState *net, int n); void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, int idx, bool mask); -- cgit v1.2.3 From 108a64818e69be0a97cde3838d768f2d9910c08b Mon Sep 17 00:00:00 2001 From: Cindy Lu Date: Wed, 1 Jul 2020 22:55:37 +0800 Subject: vhost-vdpa: introduce vhost-vdpa backend Currently we have 2 types of vhost backends in QEMU: vhost kernel and vhost-user. The above patch provides a generic device for vDPA purpose, this vDPA device exposes to user space a non-vendor-specific configuration interface for setting up a vhost HW accelerator, this patch set introduces a third vhost backend called vhost-vdpa based on the vDPA interface. Vhost-vdpa usage: qemu-system-x86_64 -cpu host -enable-kvm \ ...... -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-id,id=vhost-vdpa0 \ -device virtio-net-pci,netdev=vhost-vdpa0,page-per-vq=on \ Signed-off-by: Lingshan zhu Signed-off-by: Tiwei Bie Signed-off-by: Cindy Lu Signed-off-by: Jason Wang Message-Id: <20200701145538.22333-14-lulu@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- include/hw/virtio/vhost-backend.h | 4 +++- include/hw/virtio/vhost-vdpa.h | 26 ++++++++++++++++++++++++++ include/hw/virtio/vhost.h | 7 +++++++ 3 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 include/hw/virtio/vhost-vdpa.h (limited to 'include') diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index e7cb8d028c..8825bd278f 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -17,7 +17,8 @@ typedef enum VhostBackendType { VHOST_BACKEND_TYPE_NONE = 0, VHOST_BACKEND_TYPE_KERNEL = 1, VHOST_BACKEND_TYPE_USER = 2, - VHOST_BACKEND_TYPE_MAX = 3, + VHOST_BACKEND_TYPE_VDPA = 3, + VHOST_BACKEND_TYPE_MAX = 4, } VhostBackendType; typedef enum VhostSetConfigType { @@ -170,6 +171,7 @@ typedef struct VhostOps { } VhostOps; extern const VhostOps user_ops; +extern const VhostOps vdpa_ops; int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type); diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h new file mode 100644 index 0000000000..6455663388 --- /dev/null +++ b/include/hw/virtio/vhost-vdpa.h @@ -0,0 +1,26 @@ +/* + * vhost-vdpa.h + * + * Copyright(c) 2017-2018 Intel Corporation. + * Copyright(c) 2020 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef HW_VIRTIO_VHOST_VDPA_H +#define HW_VIRTIO_VHOST_VDPA_H + +#include "hw/virtio/virtio.h" + +typedef struct vhost_vdpa { + int device_fd; + uint32_t msg_type; + MemoryListener listener; +} VhostVDPA; + +extern AddressSpace address_space_memory; +extern int vhost_vdpa_get_device_id(struct vhost_dev *dev, + uint32_t *device_id); +#endif diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index 085450c6f8..767a95ec0b 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -92,6 +92,13 @@ struct vhost_dev { const VhostDevConfigOps *config_ops; }; +struct vhost_net { + struct vhost_dev dev; + struct vhost_virtqueue vqs[2]; + int backend; + NetClientState *nc; +}; + int vhost_dev_init(struct vhost_dev *hdev, void *opaque, VhostBackendType backend_type, uint32_t busyloop_timeout); -- cgit v1.2.3 From 1e0a84ea49b68b7cf60e229d91fd16333e0b7a90 Mon Sep 17 00:00:00 2001 From: Cindy Lu Date: Wed, 1 Jul 2020 22:55:38 +0800 Subject: vhost-vdpa: introduce vhost-vdpa net client This patch set introduces a new net client type: vhost-vdpa. vhost-vdpa net client will set up a vDPA device which is specified by a "vhostdev" parameter. Signed-off-by: Lingshan Zhu Signed-off-by: Tiwei Bie Signed-off-by: Cindy Lu Signed-off-by: Jason Wang Message-Id: <20200701145538.22333-15-lulu@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- include/net/vhost-vdpa.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 include/net/vhost-vdpa.h (limited to 'include') diff --git a/include/net/vhost-vdpa.h b/include/net/vhost-vdpa.h new file mode 100644 index 0000000000..45e34b7cfc --- /dev/null +++ b/include/net/vhost-vdpa.h @@ -0,0 +1,22 @@ +/* + * vhost-vdpa.h + * + * Copyright(c) 2017-2018 Intel Corporation. + * Copyright(c) 2020 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef VHOST_VDPA_H +#define VHOST_VDPA_H + +#define TYPE_VHOST_VDPA "vhost-vdpa" + +struct vhost_net *vhost_vdpa_get_vhost_net(NetClientState *nc); +uint64_t vhost_vdpa_get_acked_features(NetClientState *nc); + +extern const int vdpa_feature_bits[]; + +#endif /* VHOST_VDPA_H */ -- cgit v1.2.3