diff options
Diffstat (limited to 'hw/virtio')
-rw-r--r-- | hw/virtio/Makefile.objs | 4 | ||||
-rw-r--r-- | hw/virtio/trace-events | 3 | ||||
-rw-r--r-- | hw/virtio/vhost-user-vsock-pci.c | 84 | ||||
-rw-r--r-- | hw/virtio/vhost-user-vsock.c | 181 | ||||
-rw-r--r-- | hw/virtio/vhost-user.c | 638 | ||||
-rw-r--r-- | hw/virtio/vhost-vsock-common.c | 258 | ||||
-rw-r--r-- | hw/virtio/vhost-vsock.c | 283 | ||||
-rw-r--r-- | hw/virtio/vhost.c | 61 | ||||
-rw-r--r-- | hw/virtio/virtio-balloon.c | 137 | ||||
-rw-r--r-- | hw/virtio/virtio-pci.c | 12 |
10 files changed, 1279 insertions, 382 deletions
diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs index 4e4d39a0a4..13e75f171f 100644 --- a/hw/virtio/Makefile.objs +++ b/hw/virtio/Makefile.objs @@ -17,10 +17,12 @@ obj-$(CONFIG_VIRTIO_PMEM) += virtio-pmem.o common-obj-$(call land,$(CONFIG_VIRTIO_PMEM),$(CONFIG_VIRTIO_PCI)) += virtio-pmem-pci.o obj-$(call land,$(CONFIG_VHOST_USER_FS),$(CONFIG_VIRTIO_PCI)) += vhost-user-fs-pci.o obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o -obj-$(CONFIG_VHOST_VSOCK) += vhost-vsock.o +obj-$(CONFIG_VHOST_VSOCK) += vhost-vsock-common.o vhost-vsock.o +obj-$(CONFIG_VHOST_USER_VSOCK) += vhost-vsock-common.o vhost-user-vsock.o ifeq ($(CONFIG_VIRTIO_PCI),y) obj-$(CONFIG_VHOST_VSOCK) += vhost-vsock-pci.o +obj-$(CONFIG_VHOST_USER_VSOCK) += vhost-user-vsock-pci.o obj-$(CONFIG_VHOST_USER_BLK) += vhost-user-blk-pci.o obj-$(CONFIG_VHOST_USER_INPUT) += vhost-user-input-pci.o obj-$(CONFIG_VHOST_USER_SCSI) += vhost-user-scsi-pci.o diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index e83500bee9..6427a0047d 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -5,7 +5,8 @@ vhost_commit(bool started, bool changed) "Started: %d Changed: %d" vhost_region_add_section(const char *name, uint64_t gpa, uint64_t size, uint64_t host) "%s: 0x%"PRIx64"+0x%"PRIx64" @ 0x%"PRIx64 vhost_region_add_section_merge(const char *name, uint64_t new_size, uint64_t gpa, uint64_t owr) "%s: size: 0x%"PRIx64 " gpa: 0x%"PRIx64 " owr: 0x%"PRIx64 vhost_region_add_section_aligned(const char *name, uint64_t gpa, uint64_t size, uint64_t host) "%s: 0x%"PRIx64"+0x%"PRIx64" @ 0x%"PRIx64 -vhost_section(const char *name, int r) "%s:%d" +vhost_section(const char *name) "%s" +vhost_reject_section(const char *name, int d) "%s:%d" vhost_iotlb_miss(void *dev, int step) "%p step %d" # vhost-user.c diff --git a/hw/virtio/vhost-user-vsock-pci.c b/hw/virtio/vhost-user-vsock-pci.c new file mode 100644 index 0000000000..0a6847e6fc --- /dev/null +++ b/hw/virtio/vhost-user-vsock-pci.c @@ -0,0 +1,84 @@ +/* + * Vhost-user vsock PCI Bindings + * + * Copyright 2020 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#include "qemu/osdep.h" + +#include "virtio-pci.h" +#include "hw/qdev-properties.h" +#include "hw/virtio/vhost-user-vsock.h" + +typedef struct VHostUserVSockPCI VHostUserVSockPCI; + +/* + * vhost-user-vsock-pci: This extends VirtioPCIProxy. + */ +#define TYPE_VHOST_USER_VSOCK_PCI "vhost-user-vsock-pci-base" +#define VHOST_USER_VSOCK_PCI(obj) \ + OBJECT_CHECK(VHostUserVSockPCI, (obj), TYPE_VHOST_USER_VSOCK_PCI) + +struct VHostUserVSockPCI { + VirtIOPCIProxy parent_obj; + VHostUserVSock vdev; +}; + +/* vhost-user-vsock-pci */ + +static Property vhost_user_vsock_pci_properties[] = { + DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vhost_user_vsock_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ + VHostUserVSockPCI *dev = VHOST_USER_VSOCK_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&dev->vdev); + + qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus)); + object_property_set_bool(OBJECT(vdev), true, "realized", errp); +} + +static void vhost_user_vsock_pci_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); + k->realize = vhost_user_vsock_pci_realize; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + device_class_set_props(dc, vhost_user_vsock_pci_properties); + pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; + pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_VSOCK; + pcidev_k->revision = 0x00; + pcidev_k->class_id = PCI_CLASS_COMMUNICATION_OTHER; +} + +static void vhost_user_vsock_pci_instance_init(Object *obj) +{ + VHostUserVSockPCI *dev = VHOST_USER_VSOCK_PCI(obj); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VHOST_USER_VSOCK); +} + +static const VirtioPCIDeviceTypeInfo vhost_user_vsock_pci_info = { + .base_name = TYPE_VHOST_USER_VSOCK_PCI, + .generic_name = "vhost-user-vsock-pci", + .transitional_name = "vhost-user-vsock-pci-transitional", + .non_transitional_name = "vhost-user-vsock-pci-non-transitional", + .instance_size = sizeof(VHostUserVSockPCI), + .instance_init = vhost_user_vsock_pci_instance_init, + .class_init = vhost_user_vsock_pci_class_init, +}; + +static void virtio_pci_vhost_register(void) +{ + virtio_pci_types_register(&vhost_user_vsock_pci_info); +} + +type_init(virtio_pci_vhost_register) diff --git a/hw/virtio/vhost-user-vsock.c b/hw/virtio/vhost-user-vsock.c new file mode 100644 index 0000000000..3534a39d62 --- /dev/null +++ b/hw/virtio/vhost-user-vsock.c @@ -0,0 +1,181 @@ +/* + * Vhost-user vsock virtio device + * + * Copyright 2020 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#include "qemu/osdep.h" + +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "hw/qdev-properties.h" +#include "hw/virtio/vhost-user-vsock.h" + +static const int user_feature_bits[] = { + VIRTIO_F_VERSION_1, + VIRTIO_RING_F_INDIRECT_DESC, + VIRTIO_RING_F_EVENT_IDX, + VIRTIO_F_NOTIFY_ON_EMPTY, + VHOST_INVALID_FEATURE_BIT +}; + +static void vuv_get_config(VirtIODevice *vdev, uint8_t *config) +{ + VHostUserVSock *vsock = VHOST_USER_VSOCK(vdev); + + memcpy(config, &vsock->vsockcfg, sizeof(struct virtio_vsock_config)); +} + +static int vuv_handle_config_change(struct vhost_dev *dev) +{ + VHostUserVSock *vsock = VHOST_USER_VSOCK(dev->vdev); + int ret = vhost_dev_get_config(dev, (uint8_t *)&vsock->vsockcfg, + sizeof(struct virtio_vsock_config)); + if (ret < 0) { + error_report("get config space failed"); + return -1; + } + + virtio_notify_config(dev->vdev); + + return 0; +} + +const VhostDevConfigOps vsock_ops = { + .vhost_dev_config_notifier = vuv_handle_config_change, +}; + +static void vuv_set_status(VirtIODevice *vdev, uint8_t status) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; + + if (!vdev->vm_running) { + should_start = false; + } + + if (vvc->vhost_dev.started == should_start) { + return; + } + + if (should_start) { + int ret = vhost_vsock_common_start(vdev); + if (ret < 0) { + return; + } + } else { + vhost_vsock_common_stop(vdev); + } +} + +static uint64_t vuv_get_features(VirtIODevice *vdev, + uint64_t features, + Error **errp) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + + return vhost_get_features(&vvc->vhost_dev, user_feature_bits, features); +} + +static const VMStateDescription vuv_vmstate = { + .name = "vhost-user-vsock", + .unmigratable = 1, +}; + +static void vuv_device_realize(DeviceState *dev, Error **errp) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(dev); + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserVSock *vsock = VHOST_USER_VSOCK(dev); + int ret; + + if (!vsock->conf.chardev.chr) { + error_setg(errp, "missing chardev"); + return; + } + + if (!vhost_user_init(&vsock->vhost_user, &vsock->conf.chardev, errp)) { + return; + } + + vhost_vsock_common_realize(vdev, "vhost-user-vsock"); + + vhost_dev_set_config_notifier(&vvc->vhost_dev, &vsock_ops); + + ret = vhost_dev_init(&vvc->vhost_dev, &vsock->vhost_user, + VHOST_BACKEND_TYPE_USER, 0); + if (ret < 0) { + error_setg_errno(errp, -ret, "vhost_dev_init failed"); + goto err_virtio; + } + + ret = vhost_dev_get_config(&vvc->vhost_dev, (uint8_t *)&vsock->vsockcfg, + sizeof(struct virtio_vsock_config)); + if (ret < 0) { + error_setg_errno(errp, -ret, "get config space failed"); + goto err_vhost_dev; + } + + return; + +err_vhost_dev: + vhost_dev_cleanup(&vvc->vhost_dev); +err_virtio: + vhost_vsock_common_unrealize(vdev); + vhost_user_cleanup(&vsock->vhost_user); + return; +} + +static void vuv_device_unrealize(DeviceState *dev) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(dev); + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserVSock *vsock = VHOST_USER_VSOCK(dev); + + /* This will stop vhost backend if appropriate. */ + vuv_set_status(vdev, 0); + + vhost_dev_cleanup(&vvc->vhost_dev); + + vhost_vsock_common_unrealize(vdev); + + vhost_user_cleanup(&vsock->vhost_user); + +} + +static Property vuv_properties[] = { + DEFINE_PROP_CHR("chardev", VHostUserVSock, conf.chardev), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vuv_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + + device_class_set_props(dc, vuv_properties); + dc->vmsd = &vuv_vmstate; + vdc->realize = vuv_device_realize; + vdc->unrealize = vuv_device_unrealize; + vdc->get_features = vuv_get_features; + vdc->get_config = vuv_get_config; + vdc->set_status = vuv_set_status; +} + +static const TypeInfo vuv_info = { + .name = TYPE_VHOST_USER_VSOCK, + .parent = TYPE_VHOST_VSOCK_COMMON, + .instance_size = sizeof(VHostUserVSock), + .class_init = vuv_class_init, +}; + +static void vuv_register_types(void) +{ + type_register_static(&vuv_info); +} + +type_init(vuv_register_types) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index ec21e8fbe8..4d6cd4e58a 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -35,11 +35,29 @@ #include <linux/userfaultfd.h> #endif -#define VHOST_MEMORY_MAX_NREGIONS 8 +#define VHOST_MEMORY_BASELINE_NREGIONS 8 #define VHOST_USER_F_PROTOCOL_FEATURES 30 #define VHOST_USER_SLAVE_MAX_FDS 8 /* + * Set maximum number of RAM slots supported to + * the maximum number supported by the target + * hardware plaform. + */ +#if defined(TARGET_X86) || defined(TARGET_X86_64) || \ + defined(TARGET_ARM) || defined(TARGET_ARM_64) +#include "hw/acpi/acpi.h" +#define VHOST_USER_MAX_RAM_SLOTS ACPI_MAX_RAM_SLOTS + +#elif defined(TARGET_PPC) || defined(TARGET_PPC_64) +#include "hw/ppc/spapr.h" +#define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS + +#else +#define VHOST_USER_MAX_RAM_SLOTS 512 +#endif + +/* * Maximum size of virtio device config space */ #define VHOST_USER_MAX_CONFIG_SIZE 256 @@ -59,6 +77,8 @@ enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, + /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */ + VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, VHOST_USER_PROTOCOL_F_MAX }; @@ -100,6 +120,10 @@ typedef enum VhostUserRequest { VHOST_USER_SET_INFLIGHT_FD = 32, VHOST_USER_GPU_SET_SOCKET = 33, VHOST_USER_RESET_DEVICE = 34, + /* Message number 35 reserved for VHOST_USER_VRING_KICK. */ + VHOST_USER_GET_MAX_MEM_SLOTS = 36, + VHOST_USER_ADD_MEM_REG = 37, + VHOST_USER_REM_MEM_REG = 38, VHOST_USER_MAX } VhostUserRequest; @@ -121,9 +145,14 @@ typedef struct VhostUserMemoryRegion { typedef struct VhostUserMemory { uint32_t nregions; uint32_t padding; - VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; + VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS]; } VhostUserMemory; +typedef struct VhostUserMemRegMsg { + uint32_t padding; + VhostUserMemoryRegion region; +} VhostUserMemRegMsg; + typedef struct VhostUserLog { uint64_t mmap_size; uint64_t mmap_offset; @@ -182,6 +211,7 @@ typedef union { struct vhost_vring_state state; struct vhost_vring_addr addr; VhostUserMemory memory; + VhostUserMemRegMsg mem_reg; VhostUserLog log; struct vhost_iotlb_msg iotlb; VhostUserConfig config; @@ -210,7 +240,7 @@ struct vhost_user { int slave_fd; NotifierWithReturn postcopy_notifier; struct PostCopyFD postcopy_fd; - uint64_t postcopy_client_bases[VHOST_MEMORY_MAX_NREGIONS]; + uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS]; /* Length of the region_rb and region_rb_offset arrays */ size_t region_rb_len; /* RAMBlock associated with a given region */ @@ -222,6 +252,16 @@ struct vhost_user { /* True once we've entered postcopy_listen */ bool postcopy_listen; + + /* Our current regions */ + int num_shadow_regions; + struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS]; +}; + +struct scrub_regions { + struct vhost_memory_region *region; + int reg_idx; + int fd_idx; }; static bool ioeventfd_enabled(void) @@ -370,7 +410,7 @@ int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd) static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, struct vhost_log *log) { - int fds[VHOST_MEMORY_MAX_NREGIONS]; + int fds[VHOST_USER_MAX_RAM_SLOTS]; size_t fd_num = 0; bool shmfd = virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_LOG_SHMFD); @@ -407,6 +447,27 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, return 0; } +static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset, + int *fd) +{ + MemoryRegion *mr; + + assert((uintptr_t)addr == addr); + mr = memory_region_from_host((void *)(uintptr_t)addr, offset); + *fd = memory_region_get_fd(mr); + + return mr; +} + +static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst, + struct vhost_memory_region *src) +{ + assert(src != NULL && dst != NULL); + dst->userspace_addr = src->userspace_addr; + dst->memory_size = src->memory_size; + dst->guest_phys_addr = src->guest_phys_addr; +} + static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, struct vhost_dev *dev, VhostUserMsg *msg, @@ -417,19 +478,17 @@ static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, ram_addr_t offset; MemoryRegion *mr; struct vhost_memory_region *reg; + VhostUserMemoryRegion region_buffer; msg->hdr.request = VHOST_USER_SET_MEM_TABLE; for (i = 0; i < dev->mem->nregions; ++i) { reg = dev->mem->regions + i; - assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); - mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr, - &offset); - fd = memory_region_get_fd(mr); + mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); if (fd > 0) { if (track_ramblocks) { - assert(*fd_num < VHOST_MEMORY_MAX_NREGIONS); + assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS); trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name, reg->memory_size, reg->guest_phys_addr, @@ -437,16 +496,12 @@ static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, offset); u->region_rb_offset[i] = offset; u->region_rb[i] = mr->ram_block; - } else if (*fd_num == VHOST_MEMORY_MAX_NREGIONS) { + } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) { error_report("Failed preparing vhost-user memory table msg"); return -1; } - msg->payload.memory.regions[*fd_num].userspace_addr = - reg->userspace_addr; - msg->payload.memory.regions[*fd_num].memory_size = - reg->memory_size; - msg->payload.memory.regions[*fd_num].guest_phys_addr = - reg->guest_phys_addr; + vhost_user_fill_msg_region(®ion_buffer, reg); + msg->payload.memory.regions[*fd_num] = region_buffer; msg->payload.memory.regions[*fd_num].mmap_offset = offset; fds[(*fd_num)++] = fd; } else if (track_ramblocks) { @@ -470,11 +525,335 @@ static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, return 1; } +static inline bool reg_equal(struct vhost_memory_region *shadow_reg, + struct vhost_memory_region *vdev_reg) +{ + return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr && + shadow_reg->userspace_addr == vdev_reg->userspace_addr && + shadow_reg->memory_size == vdev_reg->memory_size; +} + +static void scrub_shadow_regions(struct vhost_dev *dev, + struct scrub_regions *add_reg, + int *nr_add_reg, + struct scrub_regions *rem_reg, + int *nr_rem_reg, uint64_t *shadow_pcb, + bool track_ramblocks) +{ + struct vhost_user *u = dev->opaque; + bool found[VHOST_USER_MAX_RAM_SLOTS] = {}; + struct vhost_memory_region *reg, *shadow_reg; + int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0; + ram_addr_t offset; + MemoryRegion *mr; + bool matching; + + /* + * Find memory regions present in our shadow state which are not in + * the device's current memory state. + * + * Mark regions in both the shadow and device state as "found". + */ + for (i = 0; i < u->num_shadow_regions; i++) { + shadow_reg = &u->shadow_regions[i]; + matching = false; + + for (j = 0; j < dev->mem->nregions; j++) { + reg = &dev->mem->regions[j]; + + mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); + + if (reg_equal(shadow_reg, reg)) { + matching = true; + found[j] = true; + if (track_ramblocks) { + /* + * Reset postcopy client bases, region_rb, and + * region_rb_offset in case regions are removed. + */ + if (fd > 0) { + u->region_rb_offset[j] = offset; + u->region_rb[j] = mr->ram_block; + shadow_pcb[j] = u->postcopy_client_bases[i]; + } else { + u->region_rb_offset[j] = 0; + u->region_rb[j] = NULL; + } + } + break; + } + } + + /* + * If the region was not found in the current device memory state + * create an entry for it in the removed list. + */ + if (!matching) { + rem_reg[rm_idx].region = shadow_reg; + rem_reg[rm_idx++].reg_idx = i; + } + } + + /* + * For regions not marked "found", create entries in the added list. + * + * Note their indexes in the device memory state and the indexes of their + * file descriptors. + */ + for (i = 0; i < dev->mem->nregions; i++) { + reg = &dev->mem->regions[i]; + mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); + if (fd > 0) { + ++fd_num; + } + + /* + * If the region was in both the shadow and device state we don't + * need to send a VHOST_USER_ADD_MEM_REG message for it. + */ + if (found[i]) { + continue; + } + + add_reg[add_idx].region = reg; + add_reg[add_idx].reg_idx = i; + add_reg[add_idx++].fd_idx = fd_num; + } + *nr_rem_reg = rm_idx; + *nr_add_reg = add_idx; + + return; +} + +static int send_remove_regions(struct vhost_dev *dev, + struct scrub_regions *remove_reg, + int nr_rem_reg, VhostUserMsg *msg, + bool reply_supported) +{ + struct vhost_user *u = dev->opaque; + struct vhost_memory_region *shadow_reg; + int i, fd, shadow_reg_idx, ret; + ram_addr_t offset; + VhostUserMemoryRegion region_buffer; + + /* + * The regions in remove_reg appear in the same order they do in the + * shadow table. Therefore we can minimize memory copies by iterating + * through remove_reg backwards. + */ + for (i = nr_rem_reg - 1; i >= 0; i--) { + shadow_reg = remove_reg[i].region; + shadow_reg_idx = remove_reg[i].reg_idx; + + vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd); + + if (fd > 0) { + msg->hdr.request = VHOST_USER_REM_MEM_REG; + vhost_user_fill_msg_region(®ion_buffer, shadow_reg); + msg->payload.mem_reg.region = region_buffer; + + if (vhost_user_write(dev, msg, &fd, 1) < 0) { + return -1; + } + + if (reply_supported) { + ret = process_message_reply(dev, msg); + if (ret) { + return ret; + } + } + } + + /* + * At this point we know the backend has unmapped the region. It is now + * safe to remove it from the shadow table. + */ + memmove(&u->shadow_regions[shadow_reg_idx], + &u->shadow_regions[shadow_reg_idx + 1], + sizeof(struct vhost_memory_region) * + (u->num_shadow_regions - shadow_reg_idx)); + u->num_shadow_regions--; + } + + return 0; +} + +static int send_add_regions(struct vhost_dev *dev, + struct scrub_regions *add_reg, int nr_add_reg, + VhostUserMsg *msg, uint64_t *shadow_pcb, + bool reply_supported, bool track_ramblocks) +{ + struct vhost_user *u = dev->opaque; + int i, fd, ret, reg_idx, reg_fd_idx; + struct vhost_memory_region *reg; + MemoryRegion *mr; + ram_addr_t offset; + VhostUserMsg msg_reply; + VhostUserMemoryRegion region_buffer; + + for (i = 0; i < nr_add_reg; i++) { + reg = add_reg[i].region; + reg_idx = add_reg[i].reg_idx; + reg_fd_idx = add_reg[i].fd_idx; + + mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); + + if (fd > 0) { + if (track_ramblocks) { + trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name, + reg->memory_size, + reg->guest_phys_addr, + reg->userspace_addr, + offset); + u->region_rb_offset[reg_idx] = offset; + u->region_rb[reg_idx] = mr->ram_block; + } + msg->hdr.request = VHOST_USER_ADD_MEM_REG; + vhost_user_fill_msg_region(®ion_buffer, reg); + msg->payload.mem_reg.region = region_buffer; + msg->payload.mem_reg.region.mmap_offset = offset; + + if (vhost_user_write(dev, msg, &fd, 1) < 0) { + return -1; + } + + if (track_ramblocks) { + uint64_t reply_gpa; + + if (vhost_user_read(dev, &msg_reply) < 0) { + return -1; + } + + reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr; + + if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) { + error_report("%s: Received unexpected msg type." + "Expected %d received %d", __func__, + VHOST_USER_ADD_MEM_REG, + msg_reply.hdr.request); + return -1; + } + + /* + * We're using the same structure, just reusing one of the + * fields, so it should be the same size. + */ + if (msg_reply.hdr.size != msg->hdr.size) { + error_report("%s: Unexpected size for postcopy reply " + "%d vs %d", __func__, msg_reply.hdr.size, + msg->hdr.size); + return -1; + } + + /* Get the postcopy client base from the backend's reply. */ + if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) { + shadow_pcb[reg_idx] = + msg_reply.payload.mem_reg.region.userspace_addr; + trace_vhost_user_set_mem_table_postcopy( + msg_reply.payload.mem_reg.region.userspace_addr, + msg->payload.mem_reg.region.userspace_addr, + reg_fd_idx, reg_idx); + } else { + error_report("%s: invalid postcopy reply for region. " + "Got guest physical address %" PRIX64 ", expected " + "%" PRIX64, __func__, reply_gpa, + dev->mem->regions[reg_idx].guest_phys_addr); + return -1; + } + } else if (reply_supported) { + ret = process_message_reply(dev, msg); + if (ret) { + return ret; + } + } + } else if (track_ramblocks) { + u->region_rb_offset[reg_idx] = 0; + u->region_rb[reg_idx] = NULL; + } + + /* + * At this point, we know the backend has mapped in the new + * region, if the region has a valid file descriptor. + * + * The region should now be added to the shadow table. + */ + u->shadow_regions[u->num_shadow_regions].guest_phys_addr = + reg->guest_phys_addr; + u->shadow_regions[u->num_shadow_regions].userspace_addr = + reg->userspace_addr; + u->shadow_regions[u->num_shadow_regions].memory_size = + reg->memory_size; + u->num_shadow_regions++; + } + + return 0; +} + +static int vhost_user_add_remove_regions(struct vhost_dev *dev, + VhostUserMsg *msg, + bool reply_supported, + bool track_ramblocks) +{ + struct vhost_user *u = dev->opaque; + struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS]; + struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS]; + uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {}; + int nr_add_reg, nr_rem_reg; + + msg->hdr.size = sizeof(msg->payload.mem_reg.padding) + + sizeof(VhostUserMemoryRegion); + + /* Find the regions which need to be removed or added. */ + scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg, + shadow_pcb, track_ramblocks); + + if (nr_rem_reg && send_remove_regions(dev, rem_reg, nr_rem_reg, msg, + reply_supported) < 0) + { + goto err; + } + + if (nr_add_reg && send_add_regions(dev, add_reg, nr_add_reg, msg, + shadow_pcb, reply_supported, track_ramblocks) < 0) + { + goto err; + } + + if (track_ramblocks) { + memcpy(u->postcopy_client_bases, shadow_pcb, + sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); + /* + * Now we've registered this with the postcopy code, we ack to the + * client, because now we're in the position to be able to deal with + * any faults it generates. + */ + /* TODO: Use this for failure cases as well with a bad value. */ + msg->hdr.size = sizeof(msg->payload.u64); + msg->payload.u64 = 0; /* OK */ + + if (vhost_user_write(dev, msg, NULL, 0) < 0) { + return -1; + } + } + + return 0; + +err: + if (track_ramblocks) { + memcpy(u->postcopy_client_bases, shadow_pcb, + sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); + } + + return -1; +} + static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, - struct vhost_memory *mem) + struct vhost_memory *mem, + bool reply_supported, + bool config_mem_slots) { struct vhost_user *u = dev->opaque; - int fds[VHOST_MEMORY_MAX_NREGIONS]; + int fds[VHOST_MEMORY_BASELINE_NREGIONS]; size_t fd_num = 0; VhostUserMsg msg_reply; int region_i, msg_i; @@ -494,71 +873,84 @@ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, u->region_rb_len = dev->mem->nregions; } - if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, + if (config_mem_slots) { + if (vhost_user_add_remove_regions(dev, &msg, reply_supported, true) < 0) { - return -1; - } + return -1; + } + } else { + if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, + true) < 0) { + return -1; + } - if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { - return -1; - } + if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { + return -1; + } - if (vhost_user_read(dev, &msg_reply) < 0) { - return -1; - } + if (vhost_user_read(dev, &msg_reply) < 0) { + return -1; + } - if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { - error_report("%s: Received unexpected msg type." - "Expected %d received %d", __func__, - VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); - return -1; - } - /* We're using the same structure, just reusing one of the - * fields, so it should be the same size. - */ - if (msg_reply.hdr.size != msg.hdr.size) { - error_report("%s: Unexpected size for postcopy reply " - "%d vs %d", __func__, msg_reply.hdr.size, msg.hdr.size); - return -1; - } - - memset(u->postcopy_client_bases, 0, - sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS); - - /* They're in the same order as the regions that were sent - * but some of the regions were skipped (above) if they - * didn't have fd's - */ - for (msg_i = 0, region_i = 0; - region_i < dev->mem->nregions; - region_i++) { - if (msg_i < fd_num && - msg_reply.payload.memory.regions[msg_i].guest_phys_addr == - dev->mem->regions[region_i].guest_phys_addr) { - u->postcopy_client_bases[region_i] = - msg_reply.payload.memory.regions[msg_i].userspace_addr; - trace_vhost_user_set_mem_table_postcopy( - msg_reply.payload.memory.regions[msg_i].userspace_addr, - msg.payload.memory.regions[msg_i].userspace_addr, - msg_i, region_i); - msg_i++; + if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { + error_report("%s: Received unexpected msg type." + "Expected %d received %d", __func__, + VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); + return -1; + } + + /* + * We're using the same structure, just reusing one of the + * fields, so it should be the same size. + */ + if (msg_reply.hdr.size != msg.hdr.size) { + error_report("%s: Unexpected size for postcopy reply " + "%d vs %d", __func__, msg_reply.hdr.size, + msg.hdr.size); + return -1; + } + + memset(u->postcopy_client_bases, 0, + sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); + + /* + * They're in the same order as the regions that were sent + * but some of the regions were skipped (above) if they + * didn't have fd's + */ + for (msg_i = 0, region_i = 0; + region_i < dev->mem->nregions; + region_i++) { + if (msg_i < fd_num && + msg_reply.payload.memory.regions[msg_i].guest_phys_addr == + dev->mem->regions[region_i].guest_phys_addr) { + u->postcopy_client_bases[region_i] = + msg_reply.payload.memory.regions[msg_i].userspace_addr; + trace_vhost_user_set_mem_table_postcopy( + msg_reply.payload.memory.regions[msg_i].userspace_addr, + msg.payload.memory.regions[msg_i].userspace_addr, + msg_i, region_i); + msg_i++; + } + } + if (msg_i != fd_num) { + error_report("%s: postcopy reply not fully consumed " + "%d vs %zd", + __func__, msg_i, fd_num); + return -1; + } + + /* + * Now we've registered this with the postcopy code, we ack to the + * client, because now we're in the position to be able to deal + * with any faults it generates. + */ + /* TODO: Use this for failure cases as well with a bad value. */ + msg.hdr.size = sizeof(msg.payload.u64); + msg.payload.u64 = 0; /* OK */ + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { + return -1; } - } - if (msg_i != fd_num) { - error_report("%s: postcopy reply not fully consumed " - "%d vs %zd", - __func__, msg_i, fd_num); - return -1; - } - /* Now we've registered this with the postcopy code, we ack to the client, - * because now we're in the position to be able to deal with any faults - * it generates. - */ - /* TODO: Use this for failure cases as well with a bad value */ - msg.hdr.size = sizeof(msg.payload.u64); - msg.payload.u64 = 0; /* OK */ - if (vhost_user_write(dev, &msg, NULL, 0) < 0) { - return -1; } return 0; @@ -568,17 +960,22 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev, struct vhost_memory *mem) { struct vhost_user *u = dev->opaque; - int fds[VHOST_MEMORY_MAX_NREGIONS]; + int fds[VHOST_MEMORY_BASELINE_NREGIONS]; size_t fd_num = 0; bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; bool reply_supported = virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_REPLY_ACK); + bool config_mem_slots = + virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS); if (do_postcopy) { - /* Postcopy has enough differences that it's best done in it's own + /* + * Postcopy has enough differences that it's best done in it's own * version */ - return vhost_user_set_mem_table_postcopy(dev, mem); + return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported, + config_mem_slots); } VhostUserMsg msg = { @@ -589,17 +986,23 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev, msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; } - if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, + if (config_mem_slots) { + if (vhost_user_add_remove_regions(dev, &msg, reply_supported, false) < 0) { - return -1; - } - - if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { - return -1; - } + return -1; + } + } else { + if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, + false) < 0) { + return -1; + } + if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { + return -1; + } - if (reply_supported) { - return process_message_reply(dev, &msg); + if (reply_supported) { + return process_message_reply(dev, &msg); + } } return 0; @@ -764,7 +1167,7 @@ static int vhost_set_vring_file(struct vhost_dev *dev, VhostUserRequest request, struct vhost_vring_file *file) { - int fds[VHOST_MEMORY_MAX_NREGIONS]; + int fds[VHOST_USER_MAX_RAM_SLOTS]; size_t fd_num = 0; VhostUserMsg msg = { .hdr.request = request, @@ -880,6 +1283,23 @@ static int vhost_user_set_owner(struct vhost_dev *dev) return 0; } +static int vhost_user_get_max_memslots(struct vhost_dev *dev, + uint64_t *max_memslots) +{ + uint64_t backend_max_memslots; + int err; + + err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS, + &backend_max_memslots); + if (err < 0) { + return err; + } + + *max_memslots = backend_max_memslots; + + return 0; +} + static int vhost_user_reset_device(struct vhost_dev *dev) { VhostUserMsg msg = { @@ -1377,7 +1797,7 @@ static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque) { - uint64_t features, protocol_features; + uint64_t features, protocol_features, ram_slots; struct vhost_user *u; int err; @@ -1439,6 +1859,27 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque) "slave-req protocol features."); return -1; } + + /* get max memory regions if backend supports configurable RAM slots */ + if (!virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) { + u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS; + } else { + err = vhost_user_get_max_memslots(dev, &ram_slots); + if (err < 0) { + return err; + } + + if (ram_slots < u->user->memory_slots) { + error_report("The backend specified a max ram slots limit " + "of %" PRIu64", when the prior validated limit was %d. " + "This limit should never decrease.", ram_slots, + u->user->memory_slots); + return -1; + } + + u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS); + } } if (dev->migration_blocker == NULL && @@ -1504,7 +1945,9 @@ static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) static int vhost_user_memslots_limit(struct vhost_dev *dev) { - return VHOST_MEMORY_MAX_NREGIONS; + struct vhost_user *u = dev->opaque; + + return u->user->memory_slots; } static bool vhost_user_requires_shm_log(struct vhost_dev *dev) @@ -1545,13 +1988,9 @@ static bool vhost_user_can_merge(struct vhost_dev *dev, { ram_addr_t offset; int mfd, rfd; - MemoryRegion *mr; - - mr = memory_region_from_host((void *)(uintptr_t)start1, &offset); - mfd = memory_region_get_fd(mr); - mr = memory_region_from_host((void *)(uintptr_t)start2, &offset); - rfd = memory_region_get_fd(mr); + (void)vhost_user_get_mr_data(start1, &offset, &mfd); + (void)vhost_user_get_mr_data(start2, &offset, &rfd); return mfd == rfd; } @@ -1893,6 +2332,7 @@ bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) return false; } user->chr = chr; + user->memory_slots = 0; return true; } diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c new file mode 100644 index 0000000000..5b2ebf3496 --- /dev/null +++ b/hw/virtio/vhost-vsock-common.c @@ -0,0 +1,258 @@ +/* + * Parent class for vhost-vsock devices + * + * Copyright 2015-2020 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#include "qemu/osdep.h" +#include "standard-headers/linux/virtio_vsock.h" +#include "qapi/error.h" +#include "hw/virtio/virtio-access.h" +#include "qemu/error-report.h" +#include "hw/qdev-properties.h" +#include "hw/virtio/vhost-vsock.h" +#include "qemu/iov.h" +#include "monitor/monitor.h" + +int vhost_vsock_common_start(VirtIODevice *vdev) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int ret; + int i; + + if (!k->set_guest_notifiers) { + error_report("binding does not support guest notifiers"); + return -ENOSYS; + } + + ret = vhost_dev_enable_notifiers(&vvc->vhost_dev, vdev); + if (ret < 0) { + error_report("Error enabling host notifiers: %d", -ret); + return ret; + } + + ret = k->set_guest_notifiers(qbus->parent, vvc->vhost_dev.nvqs, true); + if (ret < 0) { + error_report("Error binding guest notifier: %d", -ret); + goto err_host_notifiers; + } + + vvc->vhost_dev.acked_features = vdev->guest_features; + ret = vhost_dev_start(&vvc->vhost_dev, vdev); + if (ret < 0) { + error_report("Error starting vhost: %d", -ret); + goto err_guest_notifiers; + } + + /* + * guest_notifier_mask/pending not used yet, so just unmask + * everything here. virtio-pci will do the right thing by + * enabling/disabling irqfd. + */ + for (i = 0; i < vvc->vhost_dev.nvqs; i++) { + vhost_virtqueue_mask(&vvc->vhost_dev, vdev, i, false); + } + + return 0; + +err_guest_notifiers: + k->set_guest_notifiers(qbus->parent, vvc->vhost_dev.nvqs, false); +err_host_notifiers: + vhost_dev_disable_notifiers(&vvc->vhost_dev, vdev); + return ret; +} + +void vhost_vsock_common_stop(VirtIODevice *vdev) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int ret; + + if (!k->set_guest_notifiers) { + return; + } + + vhost_dev_stop(&vvc->vhost_dev, vdev); + + ret = k->set_guest_notifiers(qbus->parent, vvc->vhost_dev.nvqs, false); + if (ret < 0) { + error_report("vhost guest notifier cleanup failed: %d", ret); + return; + } + + vhost_dev_disable_notifiers(&vvc->vhost_dev, vdev); +} + + +static void vhost_vsock_common_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ + /* Do nothing */ +} + +static void vhost_vsock_common_guest_notifier_mask(VirtIODevice *vdev, int idx, + bool mask) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + + vhost_virtqueue_mask(&vvc->vhost_dev, vdev, idx, mask); +} + +static bool vhost_vsock_common_guest_notifier_pending(VirtIODevice *vdev, + int idx) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + + return vhost_virtqueue_pending(&vvc->vhost_dev, idx); +} + +static void vhost_vsock_common_send_transport_reset(VHostVSockCommon *vvc) +{ + VirtQueueElement *elem; + VirtQueue *vq = vvc->event_vq; + struct virtio_vsock_event event = { + .id = cpu_to_le32(VIRTIO_VSOCK_EVENT_TRANSPORT_RESET), + }; + + elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); + if (!elem) { + error_report("vhost-vsock missed transport reset event"); + return; + } + + if (elem->out_num) { + error_report("invalid vhost-vsock event virtqueue element with " + "out buffers"); + goto out; + } + + if (iov_from_buf(elem->in_sg, elem->in_num, 0, + &event, sizeof(event)) != sizeof(event)) { + error_report("vhost-vsock event virtqueue element is too short"); + goto out; + } + + virtqueue_push(vq, elem, sizeof(event)); + virtio_notify(VIRTIO_DEVICE(vvc), vq); + +out: + g_free(elem); +} + +static void vhost_vsock_common_post_load_timer_cleanup(VHostVSockCommon *vvc) +{ + if (!vvc->post_load_timer) { + return; + } + + timer_del(vvc->post_load_timer); + timer_free(vvc->post_load_timer); + vvc->post_load_timer = NULL; +} + +static void vhost_vsock_common_post_load_timer_cb(void *opaque) +{ + VHostVSockCommon *vvc = opaque; + + vhost_vsock_common_post_load_timer_cleanup(vvc); + vhost_vsock_common_send_transport_reset(vvc); +} + +int vhost_vsock_common_pre_save(void *opaque) +{ + VHostVSockCommon *vvc = opaque; + + /* + * At this point, backend must be stopped, otherwise + * it might keep writing to memory. + */ + assert(!vvc->vhost_dev.started); + + return 0; +} + +int vhost_vsock_common_post_load(void *opaque, int version_id) +{ + VHostVSockCommon *vvc = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(vvc); + + if (virtio_queue_get_addr(vdev, 2)) { + /* + * Defer transport reset event to a vm clock timer so that virtqueue + * changes happen after migration has completed. + */ + assert(!vvc->post_load_timer); + vvc->post_load_timer = + timer_new_ns(QEMU_CLOCK_VIRTUAL, + vhost_vsock_common_post_load_timer_cb, + vvc); + timer_mod(vvc->post_load_timer, 1); + } + return 0; +} + +void vhost_vsock_common_realize(VirtIODevice *vdev, const char *name) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + + virtio_init(vdev, name, VIRTIO_ID_VSOCK, + sizeof(struct virtio_vsock_config)); + + /* Receive and transmit queues belong to vhost */ + vvc->recv_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, + vhost_vsock_common_handle_output); + vvc->trans_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, + vhost_vsock_common_handle_output); + + /* The event queue belongs to QEMU */ + vvc->event_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, + vhost_vsock_common_handle_output); + + vvc->vhost_dev.nvqs = ARRAY_SIZE(vvc->vhost_vqs); + vvc->vhost_dev.vqs = vvc->vhost_vqs; + + vvc->post_load_timer = NULL; +} + +void vhost_vsock_common_unrealize(VirtIODevice *vdev) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + + vhost_vsock_common_post_load_timer_cleanup(vvc); + + virtio_delete_queue(vvc->recv_vq); + virtio_delete_queue(vvc->trans_vq); + virtio_delete_queue(vvc->event_vq); + virtio_cleanup(vdev); +} + +static void vhost_vsock_common_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + vdc->guest_notifier_mask = vhost_vsock_common_guest_notifier_mask; + vdc->guest_notifier_pending = vhost_vsock_common_guest_notifier_pending; +} + +static const TypeInfo vhost_vsock_common_info = { + .name = TYPE_VHOST_VSOCK_COMMON, + .parent = TYPE_VIRTIO_DEVICE, + .instance_size = sizeof(VHostVSockCommon), + .class_init = vhost_vsock_common_class_init, + .abstract = true, +}; + +static void vhost_vsock_common_register_types(void) +{ + type_register_static(&vhost_vsock_common_info); +} + +type_init(vhost_vsock_common_register_types) diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c index 4a228f5168..c8f0699b4f 100644 --- a/hw/virtio/vhost-vsock.c +++ b/hw/virtio/vhost-vsock.c @@ -12,24 +12,14 @@ */ #include "qemu/osdep.h" -#include <sys/ioctl.h> #include "standard-headers/linux/virtio_vsock.h" #include "qapi/error.h" -#include "hw/virtio/virtio-bus.h" #include "hw/virtio/virtio-access.h" #include "qemu/error-report.h" #include "hw/qdev-properties.h" #include "hw/virtio/vhost-vsock.h" -#include "qemu/iov.h" -#include "qemu/module.h" #include "monitor/monitor.h" -enum { - VHOST_VSOCK_SAVEVM_VERSION = 0, - - VHOST_VSOCK_QUEUE_SIZE = 128, -}; - static void vhost_vsock_get_config(VirtIODevice *vdev, uint8_t *config) { VHostVSock *vsock = VHOST_VSOCK(vdev); @@ -39,16 +29,18 @@ static void vhost_vsock_get_config(VirtIODevice *vdev, uint8_t *config) memcpy(config, &vsockcfg, sizeof(vsockcfg)); } -static int vhost_vsock_set_guest_cid(VHostVSock *vsock) +static int vhost_vsock_set_guest_cid(VirtIODevice *vdev) { - const VhostOps *vhost_ops = vsock->vhost_dev.vhost_ops; + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + VHostVSock *vsock = VHOST_VSOCK(vdev); + const VhostOps *vhost_ops = vvc->vhost_dev.vhost_ops; int ret; if (!vhost_ops->vhost_vsock_set_guest_cid) { return -ENOSYS; } - ret = vhost_ops->vhost_vsock_set_guest_cid(&vsock->vhost_dev, + ret = vhost_ops->vhost_vsock_set_guest_cid(&vvc->vhost_dev, vsock->conf.guest_cid); if (ret < 0) { return -errno; @@ -56,123 +48,58 @@ static int vhost_vsock_set_guest_cid(VHostVSock *vsock) return 0; } -static int vhost_vsock_set_running(VHostVSock *vsock, int start) +static int vhost_vsock_set_running(VirtIODevice *vdev, int start) { - const VhostOps *vhost_ops = vsock->vhost_dev.vhost_ops; + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + const VhostOps *vhost_ops = vvc->vhost_dev.vhost_ops; int ret; if (!vhost_ops->vhost_vsock_set_running) { return -ENOSYS; } - ret = vhost_ops->vhost_vsock_set_running(&vsock->vhost_dev, start); + ret = vhost_ops->vhost_vsock_set_running(&vvc->vhost_dev, start); if (ret < 0) { return -errno; } return 0; } -static void vhost_vsock_start(VirtIODevice *vdev) -{ - VHostVSock *vsock = VHOST_VSOCK(vdev); - BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); - VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); - int ret; - int i; - - if (!k->set_guest_notifiers) { - error_report("binding does not support guest notifiers"); - return; - } - - ret = vhost_dev_enable_notifiers(&vsock->vhost_dev, vdev); - if (ret < 0) { - error_report("Error enabling host notifiers: %d", -ret); - return; - } - - ret = k->set_guest_notifiers(qbus->parent, vsock->vhost_dev.nvqs, true); - if (ret < 0) { - error_report("Error binding guest notifier: %d", -ret); - goto err_host_notifiers; - } - - vsock->vhost_dev.acked_features = vdev->guest_features; - ret = vhost_dev_start(&vsock->vhost_dev, vdev); - if (ret < 0) { - error_report("Error starting vhost: %d", -ret); - goto err_guest_notifiers; - } - - ret = vhost_vsock_set_running(vsock, 1); - if (ret < 0) { - error_report("Error starting vhost vsock: %d", -ret); - goto err_dev_start; - } - - /* guest_notifier_mask/pending not used yet, so just unmask - * everything here. virtio-pci will do the right thing by - * enabling/disabling irqfd. - */ - for (i = 0; i < vsock->vhost_dev.nvqs; i++) { - vhost_virtqueue_mask(&vsock->vhost_dev, vdev, i, false); - } - - return; - -err_dev_start: - vhost_dev_stop(&vsock->vhost_dev, vdev); -err_guest_notifiers: - k->set_guest_notifiers(qbus->parent, vsock->vhost_dev.nvqs, false); -err_host_notifiers: - vhost_dev_disable_notifiers(&vsock->vhost_dev, vdev); -} - -static void vhost_vsock_stop(VirtIODevice *vdev) -{ - VHostVSock *vsock = VHOST_VSOCK(vdev); - BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); - VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); - int ret; - - if (!k->set_guest_notifiers) { - return; - } - - ret = vhost_vsock_set_running(vsock, 0); - if (ret < 0) { - error_report("vhost vsock set running failed: %d", ret); - return; - } - - vhost_dev_stop(&vsock->vhost_dev, vdev); - - ret = k->set_guest_notifiers(qbus->parent, vsock->vhost_dev.nvqs, false); - if (ret < 0) { - error_report("vhost guest notifier cleanup failed: %d", ret); - return; - } - - vhost_dev_disable_notifiers(&vsock->vhost_dev, vdev); -} static void vhost_vsock_set_status(VirtIODevice *vdev, uint8_t status) { - VHostVSock *vsock = VHOST_VSOCK(vdev); + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; + int ret; if (!vdev->vm_running) { should_start = false; } - if (vsock->vhost_dev.started == should_start) { + if (vvc->vhost_dev.started == should_start) { return; } if (should_start) { - vhost_vsock_start(vdev); + ret = vhost_vsock_common_start(vdev); + if (ret < 0) { + return; + } + + ret = vhost_vsock_set_running(vdev, 1); + if (ret < 0) { + vhost_vsock_common_stop(vdev); + error_report("Error starting vhost vsock: %d", -ret); + return; + } } else { - vhost_vsock_stop(vdev); + ret = vhost_vsock_set_running(vdev, 0); + if (ret < 0) { + error_report("vhost vsock set running failed: %d", ret); + return; + } + + vhost_vsock_common_stop(vdev); } } @@ -184,108 +111,6 @@ static uint64_t vhost_vsock_get_features(VirtIODevice *vdev, return requested_features; } -static void vhost_vsock_handle_output(VirtIODevice *vdev, VirtQueue *vq) -{ - /* Do nothing */ -} - -static void vhost_vsock_guest_notifier_mask(VirtIODevice *vdev, int idx, - bool mask) -{ - VHostVSock *vsock = VHOST_VSOCK(vdev); - - vhost_virtqueue_mask(&vsock->vhost_dev, vdev, idx, mask); -} - -static bool vhost_vsock_guest_notifier_pending(VirtIODevice *vdev, int idx) -{ - VHostVSock *vsock = VHOST_VSOCK(vdev); - - return vhost_virtqueue_pending(&vsock->vhost_dev, idx); -} - -static void vhost_vsock_send_transport_reset(VHostVSock *vsock) -{ - VirtQueueElement *elem; - VirtQueue *vq = vsock->event_vq; - struct virtio_vsock_event event = { - .id = cpu_to_le32(VIRTIO_VSOCK_EVENT_TRANSPORT_RESET), - }; - - elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); - if (!elem) { - error_report("vhost-vsock missed transport reset event"); - return; - } - - if (elem->out_num) { - error_report("invalid vhost-vsock event virtqueue element with " - "out buffers"); - goto out; - } - - if (iov_from_buf(elem->in_sg, elem->in_num, 0, - &event, sizeof(event)) != sizeof(event)) { - error_report("vhost-vsock event virtqueue element is too short"); - goto out; - } - - virtqueue_push(vq, elem, sizeof(event)); - virtio_notify(VIRTIO_DEVICE(vsock), vq); - -out: - g_free(elem); -} - -static void vhost_vsock_post_load_timer_cleanup(VHostVSock *vsock) -{ - if (!vsock->post_load_timer) { - return; - } - - timer_del(vsock->post_load_timer); - timer_free(vsock->post_load_timer); - vsock->post_load_timer = NULL; -} - -static void vhost_vsock_post_load_timer_cb(void *opaque) -{ - VHostVSock *vsock = opaque; - - vhost_vsock_post_load_timer_cleanup(vsock); - vhost_vsock_send_transport_reset(vsock); -} - -static int vhost_vsock_pre_save(void *opaque) -{ - VHostVSock *vsock = opaque; - - /* At this point, backend must be stopped, otherwise - * it might keep writing to memory. */ - assert(!vsock->vhost_dev.started); - - return 0; -} - -static int vhost_vsock_post_load(void *opaque, int version_id) -{ - VHostVSock *vsock = opaque; - VirtIODevice *vdev = VIRTIO_DEVICE(vsock); - - if (virtio_queue_get_addr(vdev, 2)) { - /* Defer transport reset event to a vm clock timer so that virtqueue - * changes happen after migration has completed. - */ - assert(!vsock->post_load_timer); - vsock->post_load_timer = - timer_new_ns(QEMU_CLOCK_VIRTUAL, - vhost_vsock_post_load_timer_cb, - vsock); - timer_mod(vsock->post_load_timer, 1); - } - return 0; -} - static const VMStateDescription vmstate_virtio_vhost_vsock = { .name = "virtio-vhost_vsock", .minimum_version_id = VHOST_VSOCK_SAVEVM_VERSION, @@ -294,12 +119,13 @@ static const VMStateDescription vmstate_virtio_vhost_vsock = { VMSTATE_VIRTIO_DEVICE, VMSTATE_END_OF_LIST() }, - .pre_save = vhost_vsock_pre_save, - .post_load = vhost_vsock_post_load, + .pre_save = vhost_vsock_common_pre_save, + .post_load = vhost_vsock_common_post_load, }; static void vhost_vsock_device_realize(DeviceState *dev, Error **errp) { + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(dev); VirtIODevice *vdev = VIRTIO_DEVICE(dev); VHostVSock *vsock = VHOST_VSOCK(dev); int vhostfd; @@ -331,46 +157,29 @@ static void vhost_vsock_device_realize(DeviceState *dev, Error **errp) } } - virtio_init(vdev, "vhost-vsock", VIRTIO_ID_VSOCK, - sizeof(struct virtio_vsock_config)); - - /* Receive and transmit queues belong to vhost */ - vsock->recv_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, - vhost_vsock_handle_output); - vsock->trans_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, - vhost_vsock_handle_output); + vhost_vsock_common_realize(vdev, "vhost-vsock"); - /* The event queue belongs to QEMU */ - vsock->event_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, - vhost_vsock_handle_output); - - vsock->vhost_dev.nvqs = ARRAY_SIZE(vsock->vhost_vqs); - vsock->vhost_dev.vqs = vsock->vhost_vqs; - ret = vhost_dev_init(&vsock->vhost_dev, (void *)(uintptr_t)vhostfd, + ret = vhost_dev_init(&vvc->vhost_dev, (void *)(uintptr_t)vhostfd, VHOST_BACKEND_TYPE_KERNEL, 0); if (ret < 0) { error_setg_errno(errp, -ret, "vhost-vsock: vhost_dev_init failed"); goto err_virtio; } - ret = vhost_vsock_set_guest_cid(vsock); + ret = vhost_vsock_set_guest_cid(vdev); if (ret < 0) { error_setg_errno(errp, -ret, "vhost-vsock: unable to set guest cid"); goto err_vhost_dev; } - vsock->post_load_timer = NULL; return; err_vhost_dev: - vhost_dev_cleanup(&vsock->vhost_dev); + vhost_dev_cleanup(&vvc->vhost_dev); /* vhost_dev_cleanup() closes the vhostfd passed to vhost_dev_init() */ vhostfd = -1; err_virtio: - virtio_delete_queue(vsock->recv_vq); - virtio_delete_queue(vsock->trans_vq); - virtio_delete_queue(vsock->event_vq); - virtio_cleanup(vdev); + vhost_vsock_common_unrealize(vdev); if (vhostfd >= 0) { close(vhostfd); } @@ -379,19 +188,14 @@ err_virtio: static void vhost_vsock_device_unrealize(DeviceState *dev) { + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(dev); VirtIODevice *vdev = VIRTIO_DEVICE(dev); - VHostVSock *vsock = VHOST_VSOCK(dev); - - vhost_vsock_post_load_timer_cleanup(vsock); /* This will stop vhost backend if appropriate. */ vhost_vsock_set_status(vdev, 0); - vhost_dev_cleanup(&vsock->vhost_dev); - virtio_delete_queue(vsock->recv_vq); - virtio_delete_queue(vsock->trans_vq); - virtio_delete_queue(vsock->event_vq); - virtio_cleanup(vdev); + vhost_dev_cleanup(&vvc->vhost_dev); + vhost_vsock_common_unrealize(vdev); } static Property vhost_vsock_properties[] = { @@ -407,19 +211,16 @@ static void vhost_vsock_class_init(ObjectClass *klass, void *data) device_class_set_props(dc, vhost_vsock_properties); dc->vmsd = &vmstate_virtio_vhost_vsock; - set_bit(DEVICE_CATEGORY_MISC, dc->categories); vdc->realize = vhost_vsock_device_realize; vdc->unrealize = vhost_vsock_device_unrealize; vdc->get_features = vhost_vsock_get_features; vdc->get_config = vhost_vsock_get_config; vdc->set_status = vhost_vsock_set_status; - vdc->guest_notifier_mask = vhost_vsock_guest_notifier_mask; - vdc->guest_notifier_pending = vhost_vsock_guest_notifier_pending; } static const TypeInfo vhost_vsock_info = { .name = TYPE_VHOST_VSOCK, - .parent = TYPE_VIRTIO_DEVICE, + .parent = TYPE_VHOST_VSOCK_COMMON, .instance_size = sizeof(VHostVSock), .class_init = vhost_vsock_class_init, }; diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index aff98a0ede..5fd25fe520 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -27,6 +27,7 @@ #include "migration/blocker.h" #include "migration/qemu-file-types.h" #include "sysemu/dma.h" +#include "sysemu/tcg.h" #include "trace.h" /* enabled until disconnected backend stabilizes */ @@ -403,26 +404,50 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev, return r; } +/* + * vhost_section: identify sections needed for vhost access + * + * We only care about RAM sections here (where virtqueue and guest + * internals accessed by virtio might live). If we find one we still + * allow the backend to potentially filter it out of our list. + */ static bool vhost_section(struct vhost_dev *dev, MemoryRegionSection *section) { - bool result; - bool log_dirty = memory_region_get_dirty_log_mask(section->mr) & - ~(1 << DIRTY_MEMORY_MIGRATION); - result = memory_region_is_ram(section->mr) && - !memory_region_is_rom(section->mr); - - /* Vhost doesn't handle any block which is doing dirty-tracking other - * than migration; this typically fires on VGA areas. - */ - result &= !log_dirty; + MemoryRegion *mr = section->mr; + + if (memory_region_is_ram(mr) && !memory_region_is_rom(mr)) { + uint8_t dirty_mask = memory_region_get_dirty_log_mask(mr); + uint8_t handled_dirty; + + /* + * Kernel based vhost doesn't handle any block which is doing + * dirty-tracking other than migration for which it has + * specific logging support. However for TCG the kernel never + * gets involved anyway so we can also ignore it's + * self-modiying code detection flags. However a vhost-user + * client could still confuse a TCG guest if it re-writes + * executable memory that has already been translated. + */ + handled_dirty = (1 << DIRTY_MEMORY_MIGRATION) | + (1 << DIRTY_MEMORY_CODE); - if (result && dev->vhost_ops->vhost_backend_mem_section_filter) { - result &= - dev->vhost_ops->vhost_backend_mem_section_filter(dev, section); - } + if (dirty_mask & ~handled_dirty) { + trace_vhost_reject_section(mr->name, 1); + return false; + } + + if (dev->vhost_ops->vhost_backend_mem_section_filter && + !dev->vhost_ops->vhost_backend_mem_section_filter(dev, section)) { + trace_vhost_reject_section(mr->name, 2); + return false; + } - trace_vhost_section(section->mr->name, result); - return result; + trace_vhost_section(mr->name); + return true; + } else { + trace_vhost_reject_section(mr->name, 3); + return false; + } } static void vhost_begin(MemoryListener *listener) @@ -809,12 +834,12 @@ err_features: return r; } -static int vhost_migration_log(MemoryListener *listener, int enable) +static int vhost_migration_log(MemoryListener *listener, bool enable) { struct vhost_dev *dev = container_of(listener, struct vhost_dev, memory_listener); int r; - if (!!enable == dev->log_enabled) { + if (enable == dev->log_enabled) { return 0; } if (!dev->started) { diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index 065cd450f1..10507b2a43 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -321,6 +321,67 @@ static void balloon_stats_set_poll_interval(Object *obj, Visitor *v, balloon_stats_change_timer(s, 0); } +static void virtio_balloon_handle_report(VirtIODevice *vdev, VirtQueue *vq) +{ + VirtIOBalloon *dev = VIRTIO_BALLOON(vdev); + VirtQueueElement *elem; + + while ((elem = virtqueue_pop(vq, sizeof(VirtQueueElement)))) { + unsigned int i; + + /* + * When we discard the page it has the effect of removing the page + * from the hypervisor itself and causing it to be zeroed when it + * is returned to us. So we must not discard the page if it is + * accessible by another device or process, or if the guest is + * expecting it to retain a non-zero value. + */ + if (qemu_balloon_is_inhibited() || dev->poison_val) { + goto skip_element; + } + + for (i = 0; i < elem->in_num; i++) { + void *addr = elem->in_sg[i].iov_base; + size_t size = elem->in_sg[i].iov_len; + ram_addr_t ram_offset; + RAMBlock *rb; + + /* + * There is no need to check the memory section to see if + * it is ram/readonly/romd like there is for handle_output + * below. If the region is not meant to be written to then + * address_space_map will have allocated a bounce buffer + * and it will be freed in address_space_unmap and trigger + * and unassigned_mem_write before failing to copy over the + * buffer. If more than one bad descriptor is provided it + * will return NULL after the first bounce buffer and fail + * to map any resources. + */ + rb = qemu_ram_block_from_host(addr, false, &ram_offset); + if (!rb) { + trace_virtio_balloon_bad_addr(elem->in_addr[i]); + continue; + } + + /* + * For now we will simply ignore unaligned memory regions, or + * regions that overrun the end of the RAMBlock. + */ + if (!QEMU_IS_ALIGNED(ram_offset | size, qemu_ram_pagesize(rb)) || + (ram_offset + size) > qemu_ram_get_used_length(rb)) { + continue; + } + + ram_block_discard_range(rb, ram_offset, size); + } + +skip_element: + virtqueue_push(vq, elem, 0); + virtio_notify(vdev, vq); + g_free(elem); + } +} + static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) { VirtIOBalloon *s = VIRTIO_BALLOON(vdev); @@ -634,6 +695,7 @@ static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data) config.num_pages = cpu_to_le32(dev->num_pages); config.actual = cpu_to_le32(dev->actual); + config.poison_val = cpu_to_le32(dev->poison_val); if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) { config.free_page_report_cmd_id = @@ -683,6 +745,14 @@ static ram_addr_t get_current_ram_size(void) return size; } +static bool virtio_balloon_page_poison_support(void *opaque) +{ + VirtIOBalloon *s = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(s); + + return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON); +} + static void virtio_balloon_set_config(VirtIODevice *vdev, const uint8_t *config_data) { @@ -697,6 +767,10 @@ static void virtio_balloon_set_config(VirtIODevice *vdev, qapi_event_send_balloon_change(vm_ram_size - ((ram_addr_t) dev->actual << VIRTIO_BALLOON_PFN_SHIFT)); } + dev->poison_val = 0; + if (virtio_balloon_page_poison_support(dev)) { + dev->poison_val = le32_to_cpu(config.poison_val); + } trace_virtio_balloon_set_config(dev->actual, oldactual); } @@ -755,6 +829,17 @@ static const VMStateDescription vmstate_virtio_balloon_free_page_report = { } }; +static const VMStateDescription vmstate_virtio_balloon_page_poison = { + .name = "vitio-balloon-device/page-poison", + .version_id = 1, + .minimum_version_id = 1, + .needed = virtio_balloon_page_poison_support, + .fields = (VMStateField[]) { + VMSTATE_UINT32(poison_val, VirtIOBalloon), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_virtio_balloon_device = { .name = "virtio-balloon-device", .version_id = 1, @@ -767,6 +852,7 @@ static const VMStateDescription vmstate_virtio_balloon_device = { }, .subsections = (const VMStateDescription * []) { &vmstate_virtio_balloon_free_page_report, + &vmstate_virtio_balloon_page_poison, NULL } }; @@ -789,6 +875,13 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp) return; } + if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_FREE_PAGE_HINT) && + !s->iothread) { + error_setg(errp, "'free-page-hint' requires 'iothread' to be set"); + virtio_cleanup(vdev); + return; + } + s->ivq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output); s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output); s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats); @@ -797,25 +890,18 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp) VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { s->free_page_vq = virtio_add_queue(vdev, VIRTQUEUE_MAX_SIZE, virtio_balloon_handle_free_page_vq); - s->free_page_report_status = FREE_PAGE_REPORT_S_STOP; - s->free_page_report_cmd_id = - VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN; - s->free_page_report_notify.notify = - virtio_balloon_free_page_report_notify; precopy_add_notifier(&s->free_page_report_notify); - if (s->iothread) { - object_ref(OBJECT(s->iothread)); - s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread), - virtio_ballloon_get_free_page_hints, s); - qemu_mutex_init(&s->free_page_lock); - qemu_cond_init(&s->free_page_cond); - s->block_iothread = false; - } else { - /* Simply disable this feature if the iothread wasn't created. */ - s->host_features &= ~(1 << VIRTIO_BALLOON_F_FREE_PAGE_HINT); - virtio_error(vdev, "iothread is missing"); - } + + object_ref(OBJECT(s->iothread)); + s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread), + virtio_ballloon_get_free_page_hints, s); } + + if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_REPORTING)) { + s->reporting_vq = virtio_add_queue(vdev, 32, + virtio_balloon_handle_report); + } + reset_stats(s); } @@ -824,8 +910,9 @@ static void virtio_balloon_device_unrealize(DeviceState *dev) VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtIOBalloon *s = VIRTIO_BALLOON(dev); - if (virtio_balloon_free_page_support(s)) { + if (s->free_page_bh) { qemu_bh_delete(s->free_page_bh); + object_unref(OBJECT(s->iothread)); virtio_balloon_free_page_stop(s); precopy_remove_notifier(&s->free_page_report_notify); } @@ -838,6 +925,9 @@ static void virtio_balloon_device_unrealize(DeviceState *dev) if (s->free_page_vq) { virtio_delete_queue(s->free_page_vq); } + if (s->reporting_vq) { + virtio_delete_queue(s->reporting_vq); + } virtio_cleanup(vdev); } @@ -854,6 +944,8 @@ static void virtio_balloon_device_reset(VirtIODevice *vdev) g_free(s->stats_vq_elem); s->stats_vq_elem = NULL; } + + s->poison_val = 0; } static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status) @@ -892,6 +984,11 @@ static void virtio_balloon_instance_init(Object *obj) { VirtIOBalloon *s = VIRTIO_BALLOON(obj); + qemu_mutex_init(&s->free_page_lock); + qemu_cond_init(&s->free_page_cond); + s->free_page_report_cmd_id = VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN; + s->free_page_report_notify.notify = virtio_balloon_free_page_report_notify; + object_property_add(obj, "guest-stats", "guest statistics", balloon_stats_get_all, NULL, NULL, s); @@ -916,6 +1013,10 @@ static Property virtio_balloon_properties[] = { VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false), DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features, VIRTIO_BALLOON_F_FREE_PAGE_HINT, false), + DEFINE_PROP_BIT("page-poison", VirtIOBalloon, host_features, + VIRTIO_BALLOON_F_PAGE_POISON, true), + DEFINE_PROP_BIT("free-page-reporting", VirtIOBalloon, host_features, + VIRTIO_BALLOON_F_REPORTING, false), /* QEMU 4.0 accidentally changed the config size even when free-page-hint * is disabled, resulting in QEMU 3.1 migration incompatibility. This * property retains this quirk for QEMU 4.1 machine types. diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index d028c17c24..7bc8c1c056 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1273,16 +1273,20 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr, virtio_queue_set_vector(vdev, vdev->queue_sel, val); break; case VIRTIO_PCI_COMMON_Q_ENABLE: - virtio_queue_set_num(vdev, vdev->queue_sel, - proxy->vqs[vdev->queue_sel].num); - virtio_queue_set_rings(vdev, vdev->queue_sel, + if (val == 1) { + virtio_queue_set_num(vdev, vdev->queue_sel, + proxy->vqs[vdev->queue_sel].num); + virtio_queue_set_rings(vdev, vdev->queue_sel, ((uint64_t)proxy->vqs[vdev->queue_sel].desc[1]) << 32 | proxy->vqs[vdev->queue_sel].desc[0], ((uint64_t)proxy->vqs[vdev->queue_sel].avail[1]) << 32 | proxy->vqs[vdev->queue_sel].avail[0], ((uint64_t)proxy->vqs[vdev->queue_sel].used[1]) << 32 | proxy->vqs[vdev->queue_sel].used[0]); - proxy->vqs[vdev->queue_sel].enabled = 1; + proxy->vqs[vdev->queue_sel].enabled = 1; + } else { + virtio_error(vdev, "wrong value for queue_enable %"PRIx64, val); + } break; case VIRTIO_PCI_COMMON_Q_DESCLO: proxy->vqs[vdev->queue_sel].desc[0] = val; |