diff options
74 files changed, 2177 insertions, 748 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 3abe3faa4e..13396310d9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1531,7 +1531,7 @@ F: hw/acpi/* F: hw/smbios/* F: hw/i386/acpi-build.[hc] F: hw/arm/virt-acpi-build.c -F: tests/qtest/bios-tables-test.c +F: tests/qtest/bios-tables-test* F: tests/qtest/acpi-utils.[hc] F: tests/data/acpi/ @@ -2328,6 +2328,7 @@ S: Maintained F: qtest.c F: accel/qtest.c F: tests/qtest/ +X: tests/qtest/bios-tables-test-allowed-diff.h Device Fuzzing M: Alexander Bulekov <alxndr@bu.edu> diff --git a/chardev/char-socket.c b/chardev/char-socket.c index db253d4024..18e762643b 100644 --- a/chardev/char-socket.c +++ b/chardev/char-socket.c @@ -175,15 +175,16 @@ static int tcp_chr_write(Chardev *chr, const uint8_t *buf, int len) if (ret < 0 && errno != EAGAIN) { if (tcp_chr_read_poll(chr) <= 0) { + /* Perform disconnect and return error. */ tcp_chr_disconnect_locked(chr); - return len; } /* else let the read handler finish it properly */ } return ret; } else { - /* XXX: indicate an error ? */ - return len; + /* Indicate an error. */ + errno = EIO; + return -1; } } @@ -7196,6 +7196,9 @@ if test "$vhost_crypto" = "yes" ; then fi if test "$vhost_vsock" = "yes" ; then echo "CONFIG_VHOST_VSOCK=y" >> $config_host_mak + if test "$vhost_user" = "yes" ; then + echo "CONFIG_VHOST_USER_VSOCK=y" >> $config_host_mak + fi fi if test "$vhost_kernel" = "yes" ; then echo "CONFIG_VHOST_KERNEL=y" >> $config_host_mak diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c index 3bca996c62..d315db1396 100644 --- a/contrib/libvhost-user/libvhost-user.c +++ b/contrib/libvhost-user/libvhost-user.c @@ -137,6 +137,9 @@ vu_request_to_string(unsigned int req) REQ(VHOST_USER_SET_INFLIGHT_FD), REQ(VHOST_USER_GPU_SET_SOCKET), REQ(VHOST_USER_VRING_KICK), + REQ(VHOST_USER_GET_MAX_MEM_SLOTS), + REQ(VHOST_USER_ADD_MEM_REG), + REQ(VHOST_USER_REM_MEM_REG), REQ(VHOST_USER_MAX), }; #undef REQ @@ -266,7 +269,7 @@ have_userfault(void) static bool vu_message_read(VuDev *dev, int conn_fd, VhostUserMsg *vmsg) { - char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { }; + char control[CMSG_SPACE(VHOST_MEMORY_BASELINE_NREGIONS * sizeof(int))] = {}; struct iovec iov = { .iov_base = (char *)vmsg, .iov_len = VHOST_USER_HDR_SIZE, @@ -337,7 +340,7 @@ vu_message_write(VuDev *dev, int conn_fd, VhostUserMsg *vmsg) { int rc; uint8_t *p = (uint8_t *)vmsg; - char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { }; + char control[CMSG_SPACE(VHOST_MEMORY_BASELINE_NREGIONS * sizeof(int))] = {}; struct iovec iov = { .iov_base = (char *)vmsg, .iov_len = VHOST_USER_HDR_SIZE, @@ -350,7 +353,7 @@ vu_message_write(VuDev *dev, int conn_fd, VhostUserMsg *vmsg) struct cmsghdr *cmsg; memset(control, 0, sizeof(control)); - assert(vmsg->fd_num <= VHOST_MEMORY_MAX_NREGIONS); + assert(vmsg->fd_num <= VHOST_MEMORY_BASELINE_NREGIONS); if (vmsg->fd_num > 0) { size_t fdsize = vmsg->fd_num * sizeof(int); msg.msg_controllen = CMSG_SPACE(fdsize); @@ -495,6 +498,16 @@ static bool vu_get_features_exec(VuDev *dev, VhostUserMsg *vmsg) { vmsg->payload.u64 = + /* + * The following VIRTIO feature bits are supported by our virtqueue + * implementation: + */ + 1ULL << VIRTIO_F_NOTIFY_ON_EMPTY | + 1ULL << VIRTIO_RING_F_INDIRECT_DESC | + 1ULL << VIRTIO_RING_F_EVENT_IDX | + 1ULL << VIRTIO_F_VERSION_1 | + + /* vhost-user feature bits */ 1ULL << VHOST_F_LOG_ALL | 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; @@ -584,6 +597,244 @@ map_ring(VuDev *dev, VuVirtq *vq) } static bool +generate_faults(VuDev *dev) { + int i; + for (i = 0; i < dev->nregions; i++) { + VuDevRegion *dev_region = &dev->regions[i]; + int ret; +#ifdef UFFDIO_REGISTER + /* + * We should already have an open ufd. Mark each memory + * range as ufd. + * Discard any mapping we have here; note I can't use MADV_REMOVE + * or fallocate to make the hole since I don't want to lose + * data that's already arrived in the shared process. + * TODO: How to do hugepage + */ + ret = madvise((void *)(uintptr_t)dev_region->mmap_addr, + dev_region->size + dev_region->mmap_offset, + MADV_DONTNEED); + if (ret) { + fprintf(stderr, + "%s: Failed to madvise(DONTNEED) region %d: %s\n", + __func__, i, strerror(errno)); + } + /* + * Turn off transparent hugepages so we dont get lose wakeups + * in neighbouring pages. + * TODO: Turn this backon later. + */ + ret = madvise((void *)(uintptr_t)dev_region->mmap_addr, + dev_region->size + dev_region->mmap_offset, + MADV_NOHUGEPAGE); + if (ret) { + /* + * Note: This can happen legally on kernels that are configured + * without madvise'able hugepages + */ + fprintf(stderr, + "%s: Failed to madvise(NOHUGEPAGE) region %d: %s\n", + __func__, i, strerror(errno)); + } + struct uffdio_register reg_struct; + reg_struct.range.start = (uintptr_t)dev_region->mmap_addr; + reg_struct.range.len = dev_region->size + dev_region->mmap_offset; + reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING; + + if (ioctl(dev->postcopy_ufd, UFFDIO_REGISTER, ®_struct)) { + vu_panic(dev, "%s: Failed to userfault region %d " + "@%p + size:%zx offset: %zx: (ufd=%d)%s\n", + __func__, i, + dev_region->mmap_addr, + dev_region->size, dev_region->mmap_offset, + dev->postcopy_ufd, strerror(errno)); + return false; + } + if (!(reg_struct.ioctls & ((__u64)1 << _UFFDIO_COPY))) { + vu_panic(dev, "%s Region (%d) doesn't support COPY", + __func__, i); + return false; + } + DPRINT("%s: region %d: Registered userfault for %" + PRIx64 " + %" PRIx64 "\n", __func__, i, + (uint64_t)reg_struct.range.start, + (uint64_t)reg_struct.range.len); + /* Now it's registered we can let the client at it */ + if (mprotect((void *)(uintptr_t)dev_region->mmap_addr, + dev_region->size + dev_region->mmap_offset, + PROT_READ | PROT_WRITE)) { + vu_panic(dev, "failed to mprotect region %d for postcopy (%s)", + i, strerror(errno)); + return false; + } + /* TODO: Stash 'zero' support flags somewhere */ +#endif + } + + return true; +} + +static bool +vu_add_mem_reg(VuDev *dev, VhostUserMsg *vmsg) { + int i; + bool track_ramblocks = dev->postcopy_listening; + VhostUserMemoryRegion m = vmsg->payload.memreg.region, *msg_region = &m; + VuDevRegion *dev_region = &dev->regions[dev->nregions]; + void *mmap_addr; + + /* + * If we are in postcopy mode and we receive a u64 payload with a 0 value + * we know all the postcopy client bases have been recieved, and we + * should start generating faults. + */ + if (track_ramblocks && + vmsg->size == sizeof(vmsg->payload.u64) && + vmsg->payload.u64 == 0) { + (void)generate_faults(dev); + return false; + } + + DPRINT("Adding region: %d\n", dev->nregions); + DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n", + msg_region->guest_phys_addr); + DPRINT(" memory_size: 0x%016"PRIx64"\n", + msg_region->memory_size); + DPRINT(" userspace_addr 0x%016"PRIx64"\n", + msg_region->userspace_addr); + DPRINT(" mmap_offset 0x%016"PRIx64"\n", + msg_region->mmap_offset); + + dev_region->gpa = msg_region->guest_phys_addr; + dev_region->size = msg_region->memory_size; + dev_region->qva = msg_region->userspace_addr; + dev_region->mmap_offset = msg_region->mmap_offset; + + /* + * We don't use offset argument of mmap() since the + * mapped address has to be page aligned, and we use huge + * pages. + */ + if (track_ramblocks) { + /* + * In postcopy we're using PROT_NONE here to catch anyone + * accessing it before we userfault. + */ + mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset, + PROT_NONE, MAP_SHARED, + vmsg->fds[0], 0); + } else { + mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset, + PROT_READ | PROT_WRITE, MAP_SHARED, vmsg->fds[0], + 0); + } + + if (mmap_addr == MAP_FAILED) { + vu_panic(dev, "region mmap error: %s", strerror(errno)); + } else { + dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr; + DPRINT(" mmap_addr: 0x%016"PRIx64"\n", + dev_region->mmap_addr); + } + + close(vmsg->fds[0]); + + if (track_ramblocks) { + /* + * Return the address to QEMU so that it can translate the ufd + * fault addresses back. + */ + msg_region->userspace_addr = (uintptr_t)(mmap_addr + + dev_region->mmap_offset); + + /* Send the message back to qemu with the addresses filled in. */ + vmsg->fd_num = 0; + if (!vu_send_reply(dev, dev->sock, vmsg)) { + vu_panic(dev, "failed to respond to add-mem-region for postcopy"); + return false; + } + + DPRINT("Successfully added new region in postcopy\n"); + dev->nregions++; + return false; + + } else { + for (i = 0; i < dev->max_queues; i++) { + if (dev->vq[i].vring.desc) { + if (map_ring(dev, &dev->vq[i])) { + vu_panic(dev, "remapping queue %d for new memory region", + i); + } + } + } + + DPRINT("Successfully added new region\n"); + dev->nregions++; + vmsg_set_reply_u64(vmsg, 0); + return true; + } +} + +static inline bool reg_equal(VuDevRegion *vudev_reg, + VhostUserMemoryRegion *msg_reg) +{ + if (vudev_reg->gpa == msg_reg->guest_phys_addr && + vudev_reg->qva == msg_reg->userspace_addr && + vudev_reg->size == msg_reg->memory_size) { + return true; + } + + return false; +} + +static bool +vu_rem_mem_reg(VuDev *dev, VhostUserMsg *vmsg) { + int i, j; + bool found = false; + VuDevRegion shadow_regions[VHOST_USER_MAX_RAM_SLOTS] = {}; + VhostUserMemoryRegion m = vmsg->payload.memreg.region, *msg_region = &m; + + DPRINT("Removing region:\n"); + DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n", + msg_region->guest_phys_addr); + DPRINT(" memory_size: 0x%016"PRIx64"\n", + msg_region->memory_size); + DPRINT(" userspace_addr 0x%016"PRIx64"\n", + msg_region->userspace_addr); + DPRINT(" mmap_offset 0x%016"PRIx64"\n", + msg_region->mmap_offset); + + for (i = 0, j = 0; i < dev->nregions; i++) { + if (!reg_equal(&dev->regions[i], msg_region)) { + shadow_regions[j].gpa = dev->regions[i].gpa; + shadow_regions[j].size = dev->regions[i].size; + shadow_regions[j].qva = dev->regions[i].qva; + shadow_regions[j].mmap_offset = dev->regions[i].mmap_offset; + j++; + } else { + found = true; + VuDevRegion *r = &dev->regions[i]; + void *m = (void *) (uintptr_t) r->mmap_addr; + + if (m) { + munmap(m, r->size + r->mmap_offset); + } + } + } + + if (found) { + memcpy(dev->regions, shadow_regions, + sizeof(VuDevRegion) * VHOST_USER_MAX_RAM_SLOTS); + DPRINT("Successfully removed a region\n"); + dev->nregions--; + vmsg_set_reply_u64(vmsg, 0); + } else { + vu_panic(dev, "Specified region not found\n"); + } + + return true; +} + +static bool vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg) { int i; @@ -655,74 +906,7 @@ vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg) } /* OK, now we can go and register the memory and generate faults */ - for (i = 0; i < dev->nregions; i++) { - VuDevRegion *dev_region = &dev->regions[i]; - int ret; -#ifdef UFFDIO_REGISTER - /* We should already have an open ufd. Mark each memory - * range as ufd. - * Discard any mapping we have here; note I can't use MADV_REMOVE - * or fallocate to make the hole since I don't want to lose - * data that's already arrived in the shared process. - * TODO: How to do hugepage - */ - ret = madvise((void *)(uintptr_t)dev_region->mmap_addr, - dev_region->size + dev_region->mmap_offset, - MADV_DONTNEED); - if (ret) { - fprintf(stderr, - "%s: Failed to madvise(DONTNEED) region %d: %s\n", - __func__, i, strerror(errno)); - } - /* Turn off transparent hugepages so we dont get lose wakeups - * in neighbouring pages. - * TODO: Turn this backon later. - */ - ret = madvise((void *)(uintptr_t)dev_region->mmap_addr, - dev_region->size + dev_region->mmap_offset, - MADV_NOHUGEPAGE); - if (ret) { - /* Note: This can happen legally on kernels that are configured - * without madvise'able hugepages - */ - fprintf(stderr, - "%s: Failed to madvise(NOHUGEPAGE) region %d: %s\n", - __func__, i, strerror(errno)); - } - struct uffdio_register reg_struct; - reg_struct.range.start = (uintptr_t)dev_region->mmap_addr; - reg_struct.range.len = dev_region->size + dev_region->mmap_offset; - reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING; - - if (ioctl(dev->postcopy_ufd, UFFDIO_REGISTER, ®_struct)) { - vu_panic(dev, "%s: Failed to userfault region %d " - "@%p + size:%zx offset: %zx: (ufd=%d)%s\n", - __func__, i, - dev_region->mmap_addr, - dev_region->size, dev_region->mmap_offset, - dev->postcopy_ufd, strerror(errno)); - return false; - } - if (!(reg_struct.ioctls & ((__u64)1 << _UFFDIO_COPY))) { - vu_panic(dev, "%s Region (%d) doesn't support COPY", - __func__, i); - return false; - } - DPRINT("%s: region %d: Registered userfault for %" - PRIx64 " + %" PRIx64 "\n", __func__, i, - (uint64_t)reg_struct.range.start, - (uint64_t)reg_struct.range.len); - /* Now it's registered we can let the client at it */ - if (mprotect((void *)(uintptr_t)dev_region->mmap_addr, - dev_region->size + dev_region->mmap_offset, - PROT_READ | PROT_WRITE)) { - vu_panic(dev, "failed to mprotect region %d for postcopy (%s)", - i, strerror(errno)); - return false; - } - /* TODO: Stash 'zero' support flags somewhere */ -#endif - } + (void)generate_faults(dev); return false; } @@ -1220,7 +1404,8 @@ vu_get_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg) 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ | 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD | - 1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK; + 1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK | + 1ULL << VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS; if (have_userfault()) { features |= 1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT; @@ -1554,6 +1739,22 @@ vu_handle_vring_kick(VuDev *dev, VhostUserMsg *vmsg) return false; } +static bool vu_handle_get_max_memslots(VuDev *dev, VhostUserMsg *vmsg) +{ + vmsg->flags = VHOST_USER_REPLY_MASK | VHOST_USER_VERSION; + vmsg->size = sizeof(vmsg->payload.u64); + vmsg->payload.u64 = VHOST_USER_MAX_RAM_SLOTS; + vmsg->fd_num = 0; + + if (!vu_message_write(dev, dev->sock, vmsg)) { + vu_panic(dev, "Failed to send max ram slots: %s\n", strerror(errno)); + } + + DPRINT("u64: 0x%016"PRIx64"\n", (uint64_t) VHOST_USER_MAX_RAM_SLOTS); + + return false; +} + static bool vu_process_message(VuDev *dev, VhostUserMsg *vmsg) { @@ -1638,6 +1839,12 @@ vu_process_message(VuDev *dev, VhostUserMsg *vmsg) return vu_set_inflight_fd(dev, vmsg); case VHOST_USER_VRING_KICK: return vu_handle_vring_kick(dev, vmsg); + case VHOST_USER_GET_MAX_MEM_SLOTS: + return vu_handle_get_max_memslots(dev, vmsg); + case VHOST_USER_ADD_MEM_REG: + return vu_add_mem_reg(dev, vmsg); + case VHOST_USER_REM_MEM_REG: + return vu_rem_mem_reg(dev, vmsg); default: vmsg_close_fds(vmsg); vu_panic(dev, "Unhandled request: %d", vmsg->request); diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h index f30394fab6..844c37c648 100644 --- a/contrib/libvhost-user/libvhost-user.h +++ b/contrib/libvhost-user/libvhost-user.h @@ -28,7 +28,13 @@ #define VIRTQUEUE_MAX_SIZE 1024 -#define VHOST_MEMORY_MAX_NREGIONS 8 +#define VHOST_MEMORY_BASELINE_NREGIONS 8 + +/* + * Set a reasonable maximum number of ram slots, which will be supported by + * any architecture. + */ +#define VHOST_USER_MAX_RAM_SLOTS 32 typedef enum VhostSetConfigType { VHOST_SET_CONFIG_TYPE_MASTER = 0, @@ -55,6 +61,7 @@ enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14, + VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, VHOST_USER_PROTOCOL_F_MAX }; @@ -97,6 +104,9 @@ typedef enum VhostUserRequest { VHOST_USER_SET_INFLIGHT_FD = 32, VHOST_USER_GPU_SET_SOCKET = 33, VHOST_USER_VRING_KICK = 35, + VHOST_USER_GET_MAX_MEM_SLOTS = 36, + VHOST_USER_ADD_MEM_REG = 37, + VHOST_USER_REM_MEM_REG = 38, VHOST_USER_MAX } VhostUserRequest; @@ -120,9 +130,14 @@ typedef struct VhostUserMemoryRegion { typedef struct VhostUserMemory { uint32_t nregions; uint32_t padding; - VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; + VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS]; } VhostUserMemory; +typedef struct VhostUserMemRegMsg { + uint32_t padding; + VhostUserMemoryRegion region; +} VhostUserMemRegMsg; + typedef struct VhostUserLog { uint64_t mmap_size; uint64_t mmap_offset; @@ -175,13 +190,14 @@ typedef struct VhostUserMsg { struct vhost_vring_state state; struct vhost_vring_addr addr; VhostUserMemory memory; + VhostUserMemRegMsg memreg; VhostUserLog log; VhostUserConfig config; VhostUserVringArea area; VhostUserInflight inflight; } payload; - int fds[VHOST_MEMORY_MAX_NREGIONS]; + int fds[VHOST_MEMORY_BASELINE_NREGIONS]; int fd_num; uint8_t *data; } VU_PACKED VhostUserMsg; @@ -359,7 +375,7 @@ typedef struct VuDevInflightInfo { struct VuDev { int sock; uint32_t nregions; - VuDevRegion regions[VHOST_MEMORY_MAX_NREGIONS]; + VuDevRegion regions[VHOST_USER_MAX_RAM_SLOTS]; VuVirtq *vq; VuDevInflightInfo inflight_info; int log_call_fd; diff --git a/contrib/vhost-user-blk/vhost-user-blk.c b/contrib/vhost-user-blk/vhost-user-blk.c index 6fd91c7e99..25eccd02b5 100644 --- a/contrib/vhost-user-blk/vhost-user-blk.c +++ b/contrib/vhost-user-blk/vhost-user-blk.c @@ -382,9 +382,7 @@ vub_get_features(VuDev *dev) 1ull << VIRTIO_BLK_F_DISCARD | 1ull << VIRTIO_BLK_F_WRITE_ZEROES | #endif - 1ull << VIRTIO_BLK_F_CONFIG_WCE | - 1ull << VIRTIO_F_VERSION_1 | - 1ull << VHOST_USER_F_PROTOCOL_FEATURES; + 1ull << VIRTIO_BLK_F_CONFIG_WCE; if (vdev_blk->enable_ro) { features |= 1ull << VIRTIO_BLK_F_RO; diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst index 3b1b6602c7..688b7c6900 100644 --- a/docs/interop/vhost-user.rst +++ b/docs/interop/vhost-user.rst @@ -815,6 +815,7 @@ Protocol features #define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12 #define VHOST_USER_PROTOCOL_F_RESET_DEVICE 13 #define VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS 14 + #define VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS 15 Master message types -------------------- @@ -1263,6 +1264,49 @@ Master message types The state.num field is currently reserved and must be set to 0. +``VHOST_USER_GET_MAX_MEM_SLOTS`` + :id: 36 + :equivalent ioctl: N/A + :slave payload: u64 + + When the ``VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS`` protocol + feature has been successfully negotiated, this message is submitted + by master to the slave. The slave should return the message with a + u64 payload containing the maximum number of memory slots for + QEMU to expose to the guest. The value returned by the backend + will be capped at the maximum number of ram slots which can be + supported by the target platform. + +``VHOST_USER_ADD_MEM_REG`` + :id: 37 + :equivalent ioctl: N/A + :slave payload: memory region + + When the ``VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS`` protocol + feature has been successfully negotiated, this message is submitted + by the master to the slave. The message payload contains a memory + region descriptor struct, describing a region of guest memory which + the slave device must map in. When the + ``VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS`` protocol feature has + been successfully negotiated, along with the + ``VHOST_USER_REM_MEM_REG`` message, this message is used to set and + update the memory tables of the slave device. + +``VHOST_USER_REM_MEM_REG`` + :id: 38 + :equivalent ioctl: N/A + :slave payload: memory region + + When the ``VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS`` protocol + feature has been successfully negotiated, this message is submitted + by the master to the slave. The message payload contains a memory + region descriptor struct, describing a region of guest memory which + the slave device must unmap. When the + ``VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS`` protocol feature has + been successfully negotiated, along with the + ``VHOST_USER_ADD_MEM_REG`` message, this message is used to set and + update the memory tables of the slave device. + Slave message types ------------------- diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c index 3681ec6e3d..2cb7b991ef 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c @@ -26,6 +26,7 @@ #include "qemu/bitops.h" #include "sysemu/numa.h" #include "hw/boards.h" +#include "hw/acpi/tpm.h" static GArray *build_alloc_array(void) { @@ -1865,9 +1866,9 @@ void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, } /* SLEEP_CONTROL_REG */ - build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); + build_append_gas_from_struct(tbl, &f->sleep_ctl); /* SLEEP_STATUS_REG */ - build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); + build_append_gas_from_struct(tbl, &f->sleep_sts); /* TODO: extra fields need to be added to support revisions above rev5 */ assert(f->rev == 5); @@ -1877,6 +1878,50 @@ build_hdr: "FACP", tbl->len - fadt_start, f->rev, oem_id, oem_table_id); } +void build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog) +{ + Acpi20TPM2 *tpm2_ptr = acpi_data_push(table_data, sizeof(AcpiTableHeader)); + unsigned log_addr_size = sizeof(tpm2_ptr->log_area_start_address); + unsigned log_addr_offset = + (char *)&tpm2_ptr->log_area_start_address - table_data->data; + uint8_t start_method_params[12] = {}; + TPMIf *tpmif = tpm_find(); + + /* platform class */ + build_append_int_noprefix(table_data, TPM2_ACPI_CLASS_CLIENT, 2); + /* reserved */ + build_append_int_noprefix(table_data, 0, 2); + if (TPM_IS_TIS_ISA(tpmif) || TPM_IS_TIS_SYSBUS(tpmif)) { + /* address of control area */ + build_append_int_noprefix(table_data, 0, 8); + /* start method */ + build_append_int_noprefix(table_data, TPM2_START_METHOD_MMIO, 4); + } else if (TPM_IS_CRB(tpmif)) { + build_append_int_noprefix(table_data, TPM_CRB_ADDR_CTRL, 8); + build_append_int_noprefix(table_data, TPM2_START_METHOD_CRB, 4); + } else { + g_warn_if_reached(); + } + + /* platform specific parameters */ + g_array_append_vals(table_data, &start_method_params, 12); + + /* log area minimum length */ + build_append_int_noprefix(table_data, TPM_LOG_AREA_MINIMUM_SIZE, 4); + + acpi_data_push(tcpalog, TPM_LOG_AREA_MINIMUM_SIZE); + bios_linker_loader_alloc(linker, ACPI_BUILD_TPMLOG_FILE, tcpalog, 1, + false); + + /* log area start address to be filled by Guest linker */ + build_append_int_noprefix(table_data, 0, 8); + bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE, + log_addr_offset, log_addr_size, + ACPI_BUILD_TPMLOG_FILE, 0); + build_header(linker, table_data, + (void *)tpm2_ptr, "TPM2", sizeof(*tpm2_ptr), 4, NULL, NULL); +} + /* ACPI 5.0: 6.4.3.8.2 Serial Bus Connection Descriptors */ static Aml *aml_serial_bus_device(uint8_t serial_bus_type, uint8_t flags, uint16_t type_flags, diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c index b1cbdd86b6..1cb34111e5 100644 --- a/hw/acpi/generic_event_device.c +++ b/hw/acpi/generic_event_device.c @@ -142,7 +142,7 @@ void build_ged_aml(Aml *table, const char *name, HotplugHandler *hotplug_dev, } /* Memory read by the GED _EVT AML dynamic method */ -static uint64_t ged_read(void *opaque, hwaddr addr, unsigned size) +static uint64_t ged_evt_read(void *opaque, hwaddr addr, unsigned size) { uint64_t val = 0; GEDState *ged_st = opaque; @@ -161,14 +161,14 @@ static uint64_t ged_read(void *opaque, hwaddr addr, unsigned size) } /* Nothing is expected to be written to the GED memory region */ -static void ged_write(void *opaque, hwaddr addr, uint64_t data, - unsigned int size) +static void ged_evt_write(void *opaque, hwaddr addr, uint64_t data, + unsigned int size) { } -static const MemoryRegionOps ged_ops = { - .read = ged_read, - .write = ged_write, +static const MemoryRegionOps ged_evt_ops = { + .read = ged_evt_read, + .write = ged_evt_write, .endianness = DEVICE_LITTLE_ENDIAN, .valid = { .min_access_size = 4, @@ -287,9 +287,9 @@ static void acpi_ged_initfn(Object *obj) SysBusDevice *sbd = SYS_BUS_DEVICE(obj); GEDState *ged_st = &s->ged_state; - memory_region_init_io(&ged_st->io, obj, &ged_ops, ged_st, + memory_region_init_io(&ged_st->evt, obj, &ged_evt_ops, ged_st, TYPE_ACPI_GED, ACPI_GED_EVT_SEL_LEN); - sysbus_init_mmio(sbd, &ged_st->io); + sysbus_init_mmio(sbd, &ged_st->evt); sysbus_init_irq(sbd, &s->irq); diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c index 9316d12b70..8f7cc16add 100644 --- a/hw/acpi/nvdimm.c +++ b/hw/acpi/nvdimm.c @@ -28,6 +28,7 @@ #include "qemu/osdep.h" #include "qemu/uuid.h" +#include "qapi/error.h" #include "hw/acpi/acpi.h" #include "hw/acpi/aml-build.h" #include "hw/acpi/bios-linker-loader.h" @@ -1334,6 +1335,28 @@ static void nvdimm_build_ssdt(GArray *table_offsets, GArray *table_data, free_aml_allocator(); } +void nvdimm_build_srat(GArray *table_data) +{ + GSList *device_list = nvdimm_get_device_list(); + + for (; device_list; device_list = device_list->next) { + AcpiSratMemoryAffinity *numamem = NULL; + DeviceState *dev = device_list->data; + Object *obj = OBJECT(dev); + uint64_t addr, size; + int node; + + node = object_property_get_int(obj, PC_DIMM_NODE_PROP, &error_abort); + addr = object_property_get_uint(obj, PC_DIMM_ADDR_PROP, &error_abort); + size = object_property_get_uint(obj, PC_DIMM_SIZE_PROP, &error_abort); + + numamem = acpi_data_push(table_data, sizeof *numamem); + build_srat_memory(numamem, addr, size, node, + MEM_AFFINITY_ENABLED | MEM_AFFINITY_NON_VOLATILE); + } + g_slist_free(device_list); +} + void nvdimm_build_acpi(GArray *table_offsets, GArray *table_data, BIOSLinker *linker, NVDIMMState *state, uint32_t ram_slots) diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index 1b0a584c7b..ca31f70f7f 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -41,12 +41,14 @@ #include "hw/acpi/pci.h" #include "hw/acpi/memory_hotplug.h" #include "hw/acpi/generic_event_device.h" +#include "hw/acpi/tpm.h" #include "hw/pci/pcie_host.h" #include "hw/pci/pci.h" #include "hw/arm/virt.h" #include "hw/mem/nvdimm.h" #include "sysemu/numa.h" #include "sysemu/reset.h" +#include "sysemu/tpm.h" #include "kvm_arm.h" #include "migration/vmstate.h" #include "hw/acpi/ghes.h" @@ -539,6 +541,10 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) } } + if (ms->nvdimms_state->is_enabled) { + nvdimm_build_srat(table_data); + } + if (ms->device_memory) { numamem = acpi_data_push(table_data, sizeof *numamem); build_srat_memory(numamem, ms->device_memory->base, @@ -844,6 +850,11 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) build_iort(tables_blob, tables->linker, vms); } + if (tpm_get_version(tpm_find()) == TPM_VERSION_2_0) { + acpi_add_table(table_offsets, tables_blob); + build_tpm2(tables_blob, tables->linker, tables->tcpalog); + } + /* XSDT is pointed to by RSDP */ xsdt = tables_blob->len; build_xsdt(tables_blob, tables->linker, table_offsets, NULL, NULL); diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index 9d8c0b3909..76838e76d3 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -349,6 +349,19 @@ static void vhost_user_blk_disconnect(DeviceState *dev) vhost_dev_cleanup(&s->dev); } +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event); + +static void vhost_user_blk_chr_closed_bh(void *opaque) +{ + DeviceState *dev = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBlk *s = VHOST_USER_BLK(vdev); + + vhost_user_blk_disconnect(dev); + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, + NULL, opaque, NULL, true); +} + static void vhost_user_blk_event(void *opaque, QEMUChrEvent event) { DeviceState *dev = opaque; @@ -363,7 +376,30 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event) } break; case CHR_EVENT_CLOSED: - vhost_user_blk_disconnect(dev); + /* + * A close event may happen during a read/write, but vhost + * code assumes the vhost_dev remains setup, so delay the + * stop & clear. There are two possible paths to hit this + * disconnect event: + * 1. When VM is in the RUN_STATE_PRELAUNCH state. The + * vhost_user_blk_device_realize() is a caller. + * 2. In tha main loop phase after VM start. + * + * For p2 the disconnect event will be delayed. We can't + * do the same for p1, because we are not running the loop + * at this moment. So just skip this step and perform + * disconnect in the caller function. + * + * TODO: maybe it is a good idea to make the same fix + * for other vhost-user devices. + */ + if (runstate_is_running()) { + AioContext *ctx = qemu_get_current_aio_context(); + + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, NULL, NULL, + NULL, NULL, false); + aio_bh_schedule_oneshot(ctx, vhost_user_blk_chr_closed_bh, opaque); + } break; case CHR_EVENT_BREAK: case CHR_EVENT_MUX_IN: diff --git a/hw/char/parallel.c b/hw/char/parallel.c index 8dd67d1375..c0f34bf924 100644 --- a/hw/char/parallel.c +++ b/hw/char/parallel.c @@ -28,6 +28,7 @@ #include "qemu/module.h" #include "chardev/char-parallel.h" #include "chardev/char-fe.h" +#include "hw/acpi/aml-build.h" #include "hw/irq.h" #include "hw/isa/isa.h" #include "hw/qdev-properties.h" @@ -568,6 +569,25 @@ static void parallel_isa_realizefn(DeviceState *dev, Error **errp) s, "parallel"); } +static void parallel_isa_build_aml(ISADevice *isadev, Aml *scope) +{ + ISAParallelState *isa = ISA_PARALLEL(isadev); + Aml *dev; + Aml *crs; + + crs = aml_resource_template(); + aml_append(crs, aml_io(AML_DECODE16, isa->iobase, isa->iobase, 0x08, 0x08)); + aml_append(crs, aml_irq_no_flags(isa->isairq)); + + dev = aml_device("LPT%d", isa->index + 1); + aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0400"))); + aml_append(dev, aml_name_decl("_UID", aml_int(isa->index + 1))); + aml_append(dev, aml_name_decl("_STA", aml_int(0xf))); + aml_append(dev, aml_name_decl("_CRS", crs)); + + aml_append(scope, dev); +} + /* Memory mapped interface */ static uint64_t parallel_mm_readfn(void *opaque, hwaddr addr, unsigned size) { @@ -624,9 +644,11 @@ static Property parallel_isa_properties[] = { static void parallel_isa_class_initfn(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + ISADeviceClass *isa = ISA_DEVICE_CLASS(klass); dc->realize = parallel_isa_realizefn; dc->vmsd = &vmstate_parallel_isa; + isa->build_aml = parallel_isa_build_aml; device_class_set_props(dc, parallel_isa_properties); set_bit(DEVICE_CATEGORY_INPUT, dc->categories); } diff --git a/hw/char/serial-isa.c b/hw/char/serial-isa.c index f9b6eed783..165e320e65 100644 --- a/hw/char/serial-isa.c +++ b/hw/char/serial-isa.c @@ -27,6 +27,7 @@ #include "qapi/error.h" #include "qemu/module.h" #include "sysemu/sysemu.h" +#include "hw/acpi/aml-build.h" #include "hw/char/serial.h" #include "hw/isa/isa.h" #include "hw/qdev-properties.h" @@ -81,6 +82,25 @@ static void serial_isa_realizefn(DeviceState *dev, Error **errp) isa_register_ioport(isadev, &s->io, isa->iobase); } +static void serial_isa_build_aml(ISADevice *isadev, Aml *scope) +{ + ISASerialState *isa = ISA_SERIAL(isadev); + Aml *dev; + Aml *crs; + + crs = aml_resource_template(); + aml_append(crs, aml_io(AML_DECODE16, isa->iobase, isa->iobase, 0x00, 0x08)); + aml_append(crs, aml_irq_no_flags(isa->isairq)); + + dev = aml_device("COM%d", isa->index + 1); + aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0501"))); + aml_append(dev, aml_name_decl("_UID", aml_int(isa->index + 1))); + aml_append(dev, aml_name_decl("_STA", aml_int(0xf))); + aml_append(dev, aml_name_decl("_CRS", crs)); + + aml_append(scope, dev); +} + static const VMStateDescription vmstate_isa_serial = { .name = "serial", .version_id = 3, @@ -103,9 +123,11 @@ static Property serial_isa_properties[] = { static void serial_isa_class_initfn(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + ISADeviceClass *isa = ISA_DEVICE_CLASS(klass); dc->realize = serial_isa_realizefn; dc->vmsd = &vmstate_isa_serial; + isa->build_aml = serial_isa_build_aml; device_class_set_props(dc, serial_isa_properties); set_bit(DEVICE_CATEGORY_INPUT, dc->categories); } diff --git a/hw/core/machine.c b/hw/core/machine.c index bb3a7b18b1..9eca7d8c9b 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -28,7 +28,9 @@ #include "hw/mem/nvdimm.h" #include "migration/vmstate.h" -GlobalProperty hw_compat_5_0[] = {}; +GlobalProperty hw_compat_5_0[] = { + { "virtio-balloon-device", "page-poison", "false" }, +}; const size_t hw_compat_5_0_len = G_N_ELEMENTS(hw_compat_5_0); GlobalProperty hw_compat_4_2[] = { diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs index 8ce1b26533..6abc74551a 100644 --- a/hw/i386/Makefile.objs +++ b/hw/i386/Makefile.objs @@ -16,4 +16,5 @@ obj-$(CONFIG_VMMOUSE) += vmmouse.o obj-$(CONFIG_PC) += port92.o obj-y += kvmvapic.o +obj-$(CONFIG_ACPI) += acpi-common.o obj-$(CONFIG_PC) += acpi-build.o diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 2e15f6848e..473cbdfffd 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -24,6 +24,7 @@ #include "qapi/error.h" #include "qapi/qmp/qnum.h" #include "acpi-build.h" +#include "acpi-common.h" #include "qemu/bitmap.h" #include "qemu/error-report.h" #include "hw/pci/pci.h" @@ -89,9 +90,6 @@ #define ACPI_BUILD_DPRINTF(fmt, ...) #endif -/* Default IOAPIC ID */ -#define ACPI_BUILD_IOAPIC_ID 0x0 - typedef struct AcpiPmInfo { bool s3_disabled; bool s4_disabled; @@ -327,125 +325,6 @@ build_facs(GArray *table_data) facs->length = cpu_to_le32(sizeof(*facs)); } -void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid, - const CPUArchIdList *apic_ids, GArray *entry) -{ - uint32_t apic_id = apic_ids->cpus[uid].arch_id; - - /* ACPI spec says that LAPIC entry for non present - * CPU may be omitted from MADT or it must be marked - * as disabled. However omitting non present CPU from - * MADT breaks hotplug on linux. So possible CPUs - * should be put in MADT but kept disabled. - */ - if (apic_id < 255) { - AcpiMadtProcessorApic *apic = acpi_data_push(entry, sizeof *apic); - - apic->type = ACPI_APIC_PROCESSOR; - apic->length = sizeof(*apic); - apic->processor_id = uid; - apic->local_apic_id = apic_id; - if (apic_ids->cpus[uid].cpu != NULL) { - apic->flags = cpu_to_le32(1); - } else { - apic->flags = cpu_to_le32(0); - } - } else { - AcpiMadtProcessorX2Apic *apic = acpi_data_push(entry, sizeof *apic); - - apic->type = ACPI_APIC_LOCAL_X2APIC; - apic->length = sizeof(*apic); - apic->uid = cpu_to_le32(uid); - apic->x2apic_id = cpu_to_le32(apic_id); - if (apic_ids->cpus[uid].cpu != NULL) { - apic->flags = cpu_to_le32(1); - } else { - apic->flags = cpu_to_le32(0); - } - } -} - -static void -build_madt(GArray *table_data, BIOSLinker *linker, PCMachineState *pcms) -{ - MachineClass *mc = MACHINE_GET_CLASS(pcms); - X86MachineState *x86ms = X86_MACHINE(pcms); - const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(MACHINE(pcms)); - int madt_start = table_data->len; - AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(pcms->acpi_dev); - AcpiDeviceIf *adev = ACPI_DEVICE_IF(pcms->acpi_dev); - bool x2apic_mode = false; - - AcpiMultipleApicTable *madt; - AcpiMadtIoApic *io_apic; - AcpiMadtIntsrcovr *intsrcovr; - int i; - - madt = acpi_data_push(table_data, sizeof *madt); - madt->local_apic_address = cpu_to_le32(APIC_DEFAULT_ADDRESS); - madt->flags = cpu_to_le32(1); - - for (i = 0; i < apic_ids->len; i++) { - adevc->madt_cpu(adev, i, apic_ids, table_data); - if (apic_ids->cpus[i].arch_id > 254) { - x2apic_mode = true; - } - } - - io_apic = acpi_data_push(table_data, sizeof *io_apic); - io_apic->type = ACPI_APIC_IO; - io_apic->length = sizeof(*io_apic); - io_apic->io_apic_id = ACPI_BUILD_IOAPIC_ID; - io_apic->address = cpu_to_le32(IO_APIC_DEFAULT_ADDRESS); - io_apic->interrupt = cpu_to_le32(0); - - if (x86ms->apic_xrupt_override) { - intsrcovr = acpi_data_push(table_data, sizeof *intsrcovr); - intsrcovr->type = ACPI_APIC_XRUPT_OVERRIDE; - intsrcovr->length = sizeof(*intsrcovr); - intsrcovr->source = 0; - intsrcovr->gsi = cpu_to_le32(2); - intsrcovr->flags = cpu_to_le16(0); /* conforms to bus specifications */ - } - for (i = 1; i < 16; i++) { -#define ACPI_BUILD_PCI_IRQS ((1<<5) | (1<<9) | (1<<10) | (1<<11)) - if (!(ACPI_BUILD_PCI_IRQS & (1 << i))) { - /* No need for a INT source override structure. */ - continue; - } - intsrcovr = acpi_data_push(table_data, sizeof *intsrcovr); - intsrcovr->type = ACPI_APIC_XRUPT_OVERRIDE; - intsrcovr->length = sizeof(*intsrcovr); - intsrcovr->source = i; - intsrcovr->gsi = cpu_to_le32(i); - intsrcovr->flags = cpu_to_le16(0xd); /* active high, level triggered */ - } - - if (x2apic_mode) { - AcpiMadtLocalX2ApicNmi *local_nmi; - - local_nmi = acpi_data_push(table_data, sizeof *local_nmi); - local_nmi->type = ACPI_APIC_LOCAL_X2APIC_NMI; - local_nmi->length = sizeof(*local_nmi); - local_nmi->uid = 0xFFFFFFFF; /* all processors */ - local_nmi->flags = cpu_to_le16(0); - local_nmi->lint = 1; /* ACPI_LINT1 */ - } else { - AcpiMadtLocalNmi *local_nmi; - - local_nmi = acpi_data_push(table_data, sizeof *local_nmi); - local_nmi->type = ACPI_APIC_LOCAL_NMI; - local_nmi->length = sizeof(*local_nmi); - local_nmi->processor_id = 0xff; /* all processors */ - local_nmi->flags = cpu_to_le16(0); - local_nmi->lint = 1; /* ACPI_LINT1 */ - } - - build_header(linker, table_data, - (void *)(table_data->data + madt_start), "APIC", - table_data->len - madt_start, 1, NULL, NULL); -} - static void build_append_pcihp_notify_entry(Aml *method, int slot) { Aml *if_ctx; @@ -1137,22 +1016,6 @@ static Aml *build_fdc_device_aml(ISADevice *fdc) return dev; } -static Aml *build_rtc_device_aml(void) -{ - Aml *dev; - Aml *crs; - - dev = aml_device("RTC"); - aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0B00"))); - crs = aml_resource_template(); - aml_append(crs, aml_io(AML_DECODE16, 0x0070, 0x0070, 0x10, 0x02)); - aml_append(crs, aml_irq_no_flags(8)); - aml_append(crs, aml_io(AML_DECODE16, 0x0072, 0x0072, 0x02, 0x06)); - aml_append(dev, aml_name_decl("_CRS", crs)); - - return dev; -} - static Aml *build_kbd_device_aml(void) { Aml *dev; @@ -1189,87 +1052,6 @@ static Aml *build_mouse_device_aml(void) return dev; } -static Aml *build_lpt_device_aml(void) -{ - Aml *dev; - Aml *crs; - Aml *method; - Aml *if_ctx; - Aml *else_ctx; - Aml *zero = aml_int(0); - Aml *is_present = aml_local(0); - - dev = aml_device("LPT"); - aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0400"))); - - method = aml_method("_STA", 0, AML_NOTSERIALIZED); - aml_append(method, aml_store(aml_name("LPEN"), is_present)); - if_ctx = aml_if(aml_equal(is_present, zero)); - { - aml_append(if_ctx, aml_return(aml_int(0x00))); - } - aml_append(method, if_ctx); - else_ctx = aml_else(); - { - aml_append(else_ctx, aml_return(aml_int(0x0f))); - } - aml_append(method, else_ctx); - aml_append(dev, method); - - crs = aml_resource_template(); - aml_append(crs, aml_io(AML_DECODE16, 0x0378, 0x0378, 0x08, 0x08)); - aml_append(crs, aml_irq_no_flags(7)); - aml_append(dev, aml_name_decl("_CRS", crs)); - - return dev; -} - -static Aml *build_com_device_aml(uint8_t uid) -{ - Aml *dev; - Aml *crs; - Aml *method; - Aml *if_ctx; - Aml *else_ctx; - Aml *zero = aml_int(0); - Aml *is_present = aml_local(0); - const char *enabled_field = "CAEN"; - uint8_t irq = 4; - uint16_t io_port = 0x03F8; - - assert(uid == 1 || uid == 2); - if (uid == 2) { - enabled_field = "CBEN"; - irq = 3; - io_port = 0x02F8; - } - - dev = aml_device("COM%d", uid); - aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0501"))); - aml_append(dev, aml_name_decl("_UID", aml_int(uid))); - - method = aml_method("_STA", 0, AML_NOTSERIALIZED); - aml_append(method, aml_store(aml_name("%s", enabled_field), is_present)); - if_ctx = aml_if(aml_equal(is_present, zero)); - { - aml_append(if_ctx, aml_return(aml_int(0x00))); - } - aml_append(method, if_ctx); - else_ctx = aml_else(); - { - aml_append(else_ctx, aml_return(aml_int(0x0f))); - } - aml_append(method, else_ctx); - aml_append(dev, method); - - crs = aml_resource_template(); - aml_append(crs, aml_io(AML_DECODE16, io_port, io_port, 0x00, 0x08)); - aml_append(crs, aml_irq_no_flags(irq)); - aml_append(dev, aml_name_decl("_CRS", crs)); - - return dev; -} - static void build_isa_devices_aml(Aml *table) { ISADevice *fdc = pc_find_fdc0(); @@ -1278,15 +1060,11 @@ static void build_isa_devices_aml(Aml *table) Aml *scope = aml_scope("_SB.PCI0.ISA"); Object *obj = object_resolve_path_type("", TYPE_ISA_BUS, &ambiguous); - aml_append(scope, build_rtc_device_aml()); aml_append(scope, build_kbd_device_aml()); aml_append(scope, build_mouse_device_aml()); if (fdc) { aml_append(scope, build_fdc_device_aml(fdc)); } - aml_append(scope, build_lpt_device_aml()); - aml_append(scope, build_com_device_aml(1)); - aml_append(scope, build_com_device_aml(2)); if (ambiguous) { error_report("Multiple ISA busses, unable to define IPMI ACPI data"); @@ -2295,36 +2073,6 @@ build_tpm_tcpa(GArray *table_data, BIOSLinker *linker, GArray *tcpalog) (void *)tcpa, "TCPA", sizeof(*tcpa), 2, NULL, NULL); } -static void -build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog) -{ - Acpi20TPM2 *tpm2_ptr = acpi_data_push(table_data, sizeof *tpm2_ptr); - unsigned log_addr_size = sizeof(tpm2_ptr->log_area_start_address); - unsigned log_addr_offset = - (char *)&tpm2_ptr->log_area_start_address - table_data->data; - - tpm2_ptr->platform_class = cpu_to_le16(TPM2_ACPI_CLASS_CLIENT); - if (TPM_IS_TIS_ISA(tpm_find())) { - tpm2_ptr->control_area_address = cpu_to_le64(0); - tpm2_ptr->start_method = cpu_to_le32(TPM2_START_METHOD_MMIO); - } else if (TPM_IS_CRB(tpm_find())) { - tpm2_ptr->control_area_address = cpu_to_le64(TPM_CRB_ADDR_CTRL); - tpm2_ptr->start_method = cpu_to_le32(TPM2_START_METHOD_CRB); - } else { - g_warn_if_reached(); - } - - tpm2_ptr->log_area_minimum_length = - cpu_to_le32(TPM_LOG_AREA_MINIMUM_SIZE); - - /* log area start address to be filled by Guest linker */ - bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE, - log_addr_offset, log_addr_size, - ACPI_BUILD_TPMLOG_FILE, 0); - build_header(linker, table_data, - (void *)tpm2_ptr, "TPM2", sizeof(*tpm2_ptr), 4, NULL, NULL); -} - #define HOLE_640K_START (640 * KiB) #define HOLE_640K_END (1 * MiB) @@ -2428,6 +2176,11 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) MEM_AFFINITY_ENABLED); } } + + if (machine->nvdimms_state->is_enabled) { + nvdimm_build_srat(table_data); + } + slots = (table_data->len - numa_start) / sizeof *numamem; for (; slots < pcms->numa_nodes + 2; slots++) { numamem = acpi_data_push(table_data, sizeof *numamem); @@ -2834,7 +2587,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) aml_len += tables_blob->len - fadt; acpi_add_table(table_offsets, tables_blob); - build_madt(tables_blob, tables->linker, pcms); + acpi_build_madt(tables_blob, tables->linker, x86ms, + ACPI_DEVICE_IF(pcms->acpi_dev), true); vmgenid_dev = find_vmgenid_dev(); if (vmgenid_dev) { @@ -2848,10 +2602,10 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) build_hpet(tables_blob, tables->linker); } if (misc.tpm_version != TPM_VERSION_UNSPEC) { - acpi_add_table(table_offsets, tables_blob); - build_tpm_tcpa(tables_blob, tables->linker, tables->tcpalog); - - if (misc.tpm_version == TPM_VERSION_2_0) { + if (misc.tpm_version == TPM_VERSION_1_2) { + acpi_add_table(table_offsets, tables_blob); + build_tpm_tcpa(tables_blob, tables->linker, tables->tcpalog); + } else { /* TPM_VERSION_2_0 */ acpi_add_table(table_offsets, tables_blob); build_tpm2(tables_blob, tables->linker, tables->tcpalog); } diff --git a/hw/i386/acpi-common.c b/hw/i386/acpi-common.c new file mode 100644 index 0000000000..ab9b00581a --- /dev/null +++ b/hw/i386/acpi-common.c @@ -0,0 +1,156 @@ +/* Support for generating ACPI tables and passing them to Guests + * + * Copyright (C) 2008-2010 Kevin O'Connor <kevin@koconnor.net> + * Copyright (C) 2006 Fabrice Bellard + * Copyright (C) 2013 Red Hat Inc + * + * Author: Michael S. Tsirkin <mst@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" + +#include "exec/memory.h" +#include "hw/acpi/acpi.h" +#include "hw/acpi/aml-build.h" +#include "hw/acpi/utils.h" +#include "hw/i386/pc.h" +#include "target/i386/cpu.h" + +#include "acpi-build.h" +#include "acpi-common.h" + +void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid, + const CPUArchIdList *apic_ids, GArray *entry) +{ + uint32_t apic_id = apic_ids->cpus[uid].arch_id; + + /* ACPI spec says that LAPIC entry for non present + * CPU may be omitted from MADT or it must be marked + * as disabled. However omitting non present CPU from + * MADT breaks hotplug on linux. So possible CPUs + * should be put in MADT but kept disabled. + */ + if (apic_id < 255) { + AcpiMadtProcessorApic *apic = acpi_data_push(entry, sizeof *apic); + + apic->type = ACPI_APIC_PROCESSOR; + apic->length = sizeof(*apic); + apic->processor_id = uid; + apic->local_apic_id = apic_id; + if (apic_ids->cpus[uid].cpu != NULL) { + apic->flags = cpu_to_le32(1); + } else { + apic->flags = cpu_to_le32(0); + } + } else { + AcpiMadtProcessorX2Apic *apic = acpi_data_push(entry, sizeof *apic); + + apic->type = ACPI_APIC_LOCAL_X2APIC; + apic->length = sizeof(*apic); + apic->uid = cpu_to_le32(uid); + apic->x2apic_id = cpu_to_le32(apic_id); + if (apic_ids->cpus[uid].cpu != NULL) { + apic->flags = cpu_to_le32(1); + } else { + apic->flags = cpu_to_le32(0); + } + } +} + +void acpi_build_madt(GArray *table_data, BIOSLinker *linker, + X86MachineState *x86ms, AcpiDeviceIf *adev, + bool has_pci) +{ + MachineClass *mc = MACHINE_GET_CLASS(x86ms); + const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(MACHINE(x86ms)); + int madt_start = table_data->len; + AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(adev); + bool x2apic_mode = false; + + AcpiMultipleApicTable *madt; + AcpiMadtIoApic *io_apic; + AcpiMadtIntsrcovr *intsrcovr; + int i; + + madt = acpi_data_push(table_data, sizeof *madt); + madt->local_apic_address = cpu_to_le32(APIC_DEFAULT_ADDRESS); + madt->flags = cpu_to_le32(1); + + for (i = 0; i < apic_ids->len; i++) { + adevc->madt_cpu(adev, i, apic_ids, table_data); + if (apic_ids->cpus[i].arch_id > 254) { + x2apic_mode = true; + } + } + + io_apic = acpi_data_push(table_data, sizeof *io_apic); + io_apic->type = ACPI_APIC_IO; + io_apic->length = sizeof(*io_apic); + io_apic->io_apic_id = ACPI_BUILD_IOAPIC_ID; + io_apic->address = cpu_to_le32(IO_APIC_DEFAULT_ADDRESS); + io_apic->interrupt = cpu_to_le32(0); + + if (x86ms->apic_xrupt_override) { + intsrcovr = acpi_data_push(table_data, sizeof *intsrcovr); + intsrcovr->type = ACPI_APIC_XRUPT_OVERRIDE; + intsrcovr->length = sizeof(*intsrcovr); + intsrcovr->source = 0; + intsrcovr->gsi = cpu_to_le32(2); + intsrcovr->flags = cpu_to_le16(0); /* conforms to bus specifications */ + } + + if (has_pci) { + for (i = 1; i < 16; i++) { +#define ACPI_BUILD_PCI_IRQS ((1<<5) | (1<<9) | (1<<10) | (1<<11)) + if (!(ACPI_BUILD_PCI_IRQS & (1 << i))) { + /* No need for a INT source override structure. */ + continue; + } + intsrcovr = acpi_data_push(table_data, sizeof *intsrcovr); + intsrcovr->type = ACPI_APIC_XRUPT_OVERRIDE; + intsrcovr->length = sizeof(*intsrcovr); + intsrcovr->source = i; + intsrcovr->gsi = cpu_to_le32(i); + intsrcovr->flags = cpu_to_le16(0xd); /* active high, level triggered */ + } + } + + if (x2apic_mode) { + AcpiMadtLocalX2ApicNmi *local_nmi; + + local_nmi = acpi_data_push(table_data, sizeof *local_nmi); + local_nmi->type = ACPI_APIC_LOCAL_X2APIC_NMI; + local_nmi->length = sizeof(*local_nmi); + local_nmi->uid = 0xFFFFFFFF; /* all processors */ + local_nmi->flags = cpu_to_le16(0); + local_nmi->lint = 1; /* ACPI_LINT1 */ + } else { + AcpiMadtLocalNmi *local_nmi; + + local_nmi = acpi_data_push(table_data, sizeof *local_nmi); + local_nmi->type = ACPI_APIC_LOCAL_NMI; + local_nmi->length = sizeof(*local_nmi); + local_nmi->processor_id = 0xff; /* all processors */ + local_nmi->flags = cpu_to_le16(0); + local_nmi->lint = 1; /* ACPI_LINT1 */ + } + + build_header(linker, table_data, + (void *)(table_data->data + madt_start), "APIC", + table_data->len - madt_start, 1, NULL, NULL); +} + diff --git a/hw/i386/acpi-common.h b/hw/i386/acpi-common.h new file mode 100644 index 0000000000..9cac18dddf --- /dev/null +++ b/hw/i386/acpi-common.h @@ -0,0 +1,15 @@ +#ifndef HW_I386_ACPI_COMMON_H +#define HW_I386_ACPI_COMMON_H +#include "include/hw/acpi/acpi_dev_interface.h" + +#include "include/hw/acpi/bios-linker-loader.h" +#include "include/hw/i386/x86.h" + +/* Default IOAPIC ID */ +#define ACPI_BUILD_IOAPIC_ID 0x0 + +void acpi_build_madt(GArray *table_data, BIOSLinker *linker, + X86MachineState *x86ms, AcpiDeviceIf *adev, + bool has_pci); + +#endif diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c index 0adbd77553..aefb416c8f 100644 --- a/hw/pci-host/i440fx.c +++ b/hw/pci-host/i440fx.c @@ -23,6 +23,7 @@ */ #include "qemu/osdep.h" +#include "qemu/units.h" #include "qemu/range.h" #include "hw/i386/pc.h" #include "hw/pci/pci.h" @@ -301,7 +302,7 @@ PCIBus *i440fx_init(const char *host_type, const char *pci_type, memory_region_set_enabled(&f->smram_region, true); /* smram, as seen by SMM CPUs */ - memory_region_init(&f->smram, OBJECT(d), "smram", 1ull << 32); + memory_region_init(&f->smram, OBJECT(d), "smram", 4 * GiB); memory_region_set_enabled(&f->smram, true); memory_region_init_alias(&f->low_smram, OBJECT(d), "smram-low", f->ram_memory, 0xa0000, 0x20000); diff --git a/hw/pci-host/prep.c b/hw/pci-host/prep.c index 1a02e9a670..88e2fc66a9 100644 --- a/hw/pci-host/prep.c +++ b/hw/pci-host/prep.c @@ -294,7 +294,7 @@ static void raven_pcihost_initfn(Object *obj) &s->pci_memory, &s->pci_io, 0, TYPE_PCI_BUS); /* Bus master address space */ - memory_region_init(&s->bm, obj, "bm-raven", UINT32_MAX); + memory_region_init(&s->bm, obj, "bm-raven", 4 * GiB); memory_region_init_alias(&s->bm_pci_memory_alias, obj, "bm-pci-memory", &s->pci_memory, 0, memory_region_size(&s->pci_memory)); diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index 352aeecfa7..b788f17b2c 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -589,7 +589,7 @@ static void mch_realize(PCIDevice *d, Error **errp) memory_region_set_enabled(&mch->open_high_smram, false); /* smram, as seen by SMM CPUs */ - memory_region_init(&mch->smram, OBJECT(mch), "smram", 1ull << 32); + memory_region_init(&mch->smram, OBJECT(mch), "smram", 4 * GiB); memory_region_set_enabled(&mch->smram, true); memory_region_init_alias(&mch->low_smram, OBJECT(mch), "smram-low", mch->ram_memory, MCH_HOST_BRIDGE_SMRAM_C_BASE, diff --git a/hw/pci-host/versatile.c b/hw/pci-host/versatile.c index cfb9a78ea6..8ddfb8772a 100644 --- a/hw/pci-host/versatile.c +++ b/hw/pci-host/versatile.c @@ -8,6 +8,7 @@ */ #include "qemu/osdep.h" +#include "qemu/units.h" #include "hw/sysbus.h" #include "migration/vmstate.h" #include "hw/irq.h" @@ -399,8 +400,8 @@ static void pci_vpb_realize(DeviceState *dev, Error **errp) pci_map_irq_fn mapfn; int i; - memory_region_init(&s->pci_io_space, OBJECT(s), "pci_io", 1ULL << 32); - memory_region_init(&s->pci_mem_space, OBJECT(s), "pci_mem", 1ULL << 32); + memory_region_init(&s->pci_io_space, OBJECT(s), "pci_io", 4 * GiB); + memory_region_init(&s->pci_mem_space, OBJECT(s), "pci_mem", 4 * GiB); pci_root_bus_new_inplace(&s->pci_bus, sizeof(s->pci_bus), dev, "pci", &s->pci_mem_space, &s->pci_io_space, diff --git a/hw/pci/msix.c b/hw/pci/msix.c index 29187898f2..e6a5559038 100644 --- a/hw/pci/msix.c +++ b/hw/pci/msix.c @@ -199,6 +199,9 @@ static const MemoryRegionOps msix_table_mmio_ops = { .endianness = DEVICE_LITTLE_ENDIAN, .valid = { .min_access_size = 4, + .max_access_size = 8, + }, + .impl = { .max_access_size = 4, }, }; @@ -227,6 +230,9 @@ static const MemoryRegionOps msix_pba_mmio_ops = { .endianness = DEVICE_LITTLE_ENDIAN, .valid = { .min_access_size = 4, + .max_access_size = 8, + }, + .impl = { .max_access_size = 4, }, }; diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 70c66965f5..a60cf3ae3b 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -1381,6 +1381,8 @@ uint32_t pci_default_read_config(PCIDevice *d, { uint32_t val = 0; + assert(address + len <= pci_config_size(d)); + if (pci_is_express_downstream_port(d) && ranges_overlap(address, len, d->exp.exp_cap + PCI_EXP_LNKSTA, 2)) { pcie_sync_bridge_lnk(d); @@ -1394,6 +1396,8 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int int i, was_irq_disabled = pci_irq_disabled(d); uint32_t val = val_in; + assert(addr + l <= pci_config_size(d)); + for (i = 0; i < l; val >>= 8, ++i) { uint8_t wmask = d->wmask[addr + i]; uint8_t w1cmask = d->w1cmask[addr + i]; @@ -1772,6 +1776,7 @@ static PciDeviceInfo *qmp_query_pci_device(PCIDevice *dev, PCIBus *bus, info->regions = qmp_query_pci_regions(dev); info->qdev_id = g_strdup(dev->qdev.id ? dev->qdev.id : ""); + info->irq_pin = dev->config[PCI_INTERRUPT_PIN]; if (dev->config[PCI_INTERRUPT_PIN] != 0) { info->has_irq = true; info->irq = dev->config[PCI_INTERRUPT_LINE]; @@ -1887,7 +1892,18 @@ PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus, if (test_bit(DEVICE_CATEGORY_NETWORK, dc->categories) && dc->user_creatable) { const char *name = object_class_get_name(list->data); - g_ptr_array_add(pci_nic_models, (gpointer)name); + /* + * A network device might also be something else than a NIC, see + * e.g. the "rocker" device. Thus we have to look for the "netdev" + * property, too. Unfortunately, some devices like virtio-net only + * create this property during instance_init, so we have to create + * a temporary instance here to be able to check it. + */ + Object *obj = object_new_with_class(OBJECT_CLASS(dc)); + if (object_property_find(obj, "netdev", NULL)) { + g_ptr_array_add(pci_nic_models, (gpointer)name); + } + object_unref(obj); } next = list->next; g_slist_free_1(list); diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c index 97967d12eb..3789c17edc 100644 --- a/hw/pci/pci_bridge.c +++ b/hw/pci/pci_bridge.c @@ -30,6 +30,7 @@ */ #include "qemu/osdep.h" +#include "qemu/units.h" #include "hw/pci/pci_bridge.h" #include "hw/pci/pci_bus.h" #include "qemu/module.h" @@ -381,7 +382,7 @@ void pci_bridge_initfn(PCIDevice *dev, const char *typename) memory_region_init(&br->address_space_mem, OBJECT(br), "pci_bridge_pci", UINT64_MAX); sec_bus->address_space_io = &br->address_space_io; memory_region_init(&br->address_space_io, OBJECT(br), "pci_bridge_io", - UINT32_MAX); + 4 * GiB); br->windows = pci_bridge_region_init(br); QLIST_INIT(&sec_bus->child); QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling); @@ -422,14 +423,14 @@ int pci_bridge_qemu_reserve_cap_init(PCIDevice *dev, int cap_offset, } if (res_reserve.mem_non_pref != (uint64_t)-1 && - res_reserve.mem_non_pref >= (1ULL << 32)) { + res_reserve.mem_non_pref >= 4 * GiB) { error_setg(errp, "PCI resource reserve cap: mem-reserve must be less than 4G"); return -EINVAL; } if (res_reserve.mem_pref_32 != (uint64_t)-1 && - res_reserve.mem_pref_32 >= (1ULL << 32)) { + res_reserve.mem_pref_32 >= 4 * GiB) { error_setg(errp, "PCI resource reserve cap: pref32-reserve must be less than 4G"); return -EINVAL; diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index f50e10b8fb..5b9c022d91 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -407,6 +407,17 @@ static void pcie_cap_slot_plug_common(PCIDevice *hotplug_dev, DeviceState *dev, void pcie_cap_slot_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { + PCIDevice *hotplug_pdev = PCI_DEVICE(hotplug_dev); + uint8_t *exp_cap = hotplug_pdev->config + hotplug_pdev->exp.exp_cap; + uint32_t sltcap = pci_get_word(exp_cap + PCI_EXP_SLTCAP); + + /* Check if hot-plug is disabled on the slot */ + if (dev->hotplugged && (sltcap & PCI_EXP_SLTCAP_HPC) == 0) { + error_setg(errp, "Hot-plug failed: unsupported by the port device '%s'", + DEVICE(hotplug_pdev)->id); + return; + } + pcie_cap_slot_plug_common(PCI_DEVICE(hotplug_dev), dev, errp); } @@ -415,7 +426,6 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, { PCIDevice *hotplug_pdev = PCI_DEVICE(hotplug_dev); uint8_t *exp_cap = hotplug_pdev->config + hotplug_pdev->exp.exp_cap; - uint32_t sltcap = pci_get_word(exp_cap + PCI_EXP_SLTCAP); PCIDevice *pci_dev = PCI_DEVICE(dev); /* Don't send event when device is enabled during qemu machine creation: @@ -431,13 +441,6 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, return; } - /* Check if hot-plug is disabled on the slot */ - if ((sltcap & PCI_EXP_SLTCAP_HPC) == 0) { - error_setg(errp, "Hot-plug failed: unsupported by the port device '%s'", - DEVICE(hotplug_pdev)->id); - return; - } - /* To enable multifunction hot-plug, we just ensure the function * 0 added last. When function 0 is added, we set the sltsta and * inform OS via event notification. diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c index 9c30cbdcd7..1e9fa0f33f 100644 --- a/hw/rtc/mc146818rtc.c +++ b/hw/rtc/mc146818rtc.c @@ -27,6 +27,7 @@ #include "qemu/cutils.h" #include "qemu/module.h" #include "qemu/bcd.h" +#include "hw/acpi/aml-build.h" #include "hw/irq.h" #include "hw/qdev-properties.h" #include "qemu/timer.h" @@ -1007,13 +1008,36 @@ static void rtc_resetdev(DeviceState *d) } } +static void rtc_build_aml(ISADevice *isadev, Aml *scope) +{ + Aml *dev; + Aml *crs; + + /* + * Reserving 8 io ports here, following what physical hardware + * does, even though qemu only responds to the first two ports. + */ + crs = aml_resource_template(); + aml_append(crs, aml_io(AML_DECODE16, RTC_ISA_BASE, RTC_ISA_BASE, + 0x01, 0x08)); + aml_append(crs, aml_irq_no_flags(RTC_ISA_IRQ)); + + dev = aml_device("RTC"); + aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0B00"))); + aml_append(dev, aml_name_decl("_CRS", crs)); + + aml_append(scope, dev); +} + static void rtc_class_initfn(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + ISADeviceClass *isa = ISA_DEVICE_CLASS(klass); dc->realize = rtc_realizefn; dc->reset = rtc_resetdev; dc->vmsd = &vmstate_rtc; + isa->build_aml = rtc_build_aml; device_class_set_props(dc, mc146818rtc_properties); } diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs index 4e4d39a0a4..13e75f171f 100644 --- a/hw/virtio/Makefile.objs +++ b/hw/virtio/Makefile.objs @@ -17,10 +17,12 @@ obj-$(CONFIG_VIRTIO_PMEM) += virtio-pmem.o common-obj-$(call land,$(CONFIG_VIRTIO_PMEM),$(CONFIG_VIRTIO_PCI)) += virtio-pmem-pci.o obj-$(call land,$(CONFIG_VHOST_USER_FS),$(CONFIG_VIRTIO_PCI)) += vhost-user-fs-pci.o obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o -obj-$(CONFIG_VHOST_VSOCK) += vhost-vsock.o +obj-$(CONFIG_VHOST_VSOCK) += vhost-vsock-common.o vhost-vsock.o +obj-$(CONFIG_VHOST_USER_VSOCK) += vhost-vsock-common.o vhost-user-vsock.o ifeq ($(CONFIG_VIRTIO_PCI),y) obj-$(CONFIG_VHOST_VSOCK) += vhost-vsock-pci.o +obj-$(CONFIG_VHOST_USER_VSOCK) += vhost-user-vsock-pci.o obj-$(CONFIG_VHOST_USER_BLK) += vhost-user-blk-pci.o obj-$(CONFIG_VHOST_USER_INPUT) += vhost-user-input-pci.o obj-$(CONFIG_VHOST_USER_SCSI) += vhost-user-scsi-pci.o diff --git a/hw/virtio/vhost-user-vsock-pci.c b/hw/virtio/vhost-user-vsock-pci.c new file mode 100644 index 0000000000..0a6847e6fc --- /dev/null +++ b/hw/virtio/vhost-user-vsock-pci.c @@ -0,0 +1,84 @@ +/* + * Vhost-user vsock PCI Bindings + * + * Copyright 2020 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#include "qemu/osdep.h" + +#include "virtio-pci.h" +#include "hw/qdev-properties.h" +#include "hw/virtio/vhost-user-vsock.h" + +typedef struct VHostUserVSockPCI VHostUserVSockPCI; + +/* + * vhost-user-vsock-pci: This extends VirtioPCIProxy. + */ +#define TYPE_VHOST_USER_VSOCK_PCI "vhost-user-vsock-pci-base" +#define VHOST_USER_VSOCK_PCI(obj) \ + OBJECT_CHECK(VHostUserVSockPCI, (obj), TYPE_VHOST_USER_VSOCK_PCI) + +struct VHostUserVSockPCI { + VirtIOPCIProxy parent_obj; + VHostUserVSock vdev; +}; + +/* vhost-user-vsock-pci */ + +static Property vhost_user_vsock_pci_properties[] = { + DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vhost_user_vsock_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ + VHostUserVSockPCI *dev = VHOST_USER_VSOCK_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&dev->vdev); + + qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus)); + object_property_set_bool(OBJECT(vdev), true, "realized", errp); +} + +static void vhost_user_vsock_pci_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); + k->realize = vhost_user_vsock_pci_realize; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + device_class_set_props(dc, vhost_user_vsock_pci_properties); + pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; + pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_VSOCK; + pcidev_k->revision = 0x00; + pcidev_k->class_id = PCI_CLASS_COMMUNICATION_OTHER; +} + +static void vhost_user_vsock_pci_instance_init(Object *obj) +{ + VHostUserVSockPCI *dev = VHOST_USER_VSOCK_PCI(obj); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VHOST_USER_VSOCK); +} + +static const VirtioPCIDeviceTypeInfo vhost_user_vsock_pci_info = { + .base_name = TYPE_VHOST_USER_VSOCK_PCI, + .generic_name = "vhost-user-vsock-pci", + .transitional_name = "vhost-user-vsock-pci-transitional", + .non_transitional_name = "vhost-user-vsock-pci-non-transitional", + .instance_size = sizeof(VHostUserVSockPCI), + .instance_init = vhost_user_vsock_pci_instance_init, + .class_init = vhost_user_vsock_pci_class_init, +}; + +static void virtio_pci_vhost_register(void) +{ + virtio_pci_types_register(&vhost_user_vsock_pci_info); +} + +type_init(virtio_pci_vhost_register) diff --git a/hw/virtio/vhost-user-vsock.c b/hw/virtio/vhost-user-vsock.c new file mode 100644 index 0000000000..3534a39d62 --- /dev/null +++ b/hw/virtio/vhost-user-vsock.c @@ -0,0 +1,181 @@ +/* + * Vhost-user vsock virtio device + * + * Copyright 2020 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#include "qemu/osdep.h" + +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "hw/qdev-properties.h" +#include "hw/virtio/vhost-user-vsock.h" + +static const int user_feature_bits[] = { + VIRTIO_F_VERSION_1, + VIRTIO_RING_F_INDIRECT_DESC, + VIRTIO_RING_F_EVENT_IDX, + VIRTIO_F_NOTIFY_ON_EMPTY, + VHOST_INVALID_FEATURE_BIT +}; + +static void vuv_get_config(VirtIODevice *vdev, uint8_t *config) +{ + VHostUserVSock *vsock = VHOST_USER_VSOCK(vdev); + + memcpy(config, &vsock->vsockcfg, sizeof(struct virtio_vsock_config)); +} + +static int vuv_handle_config_change(struct vhost_dev *dev) +{ + VHostUserVSock *vsock = VHOST_USER_VSOCK(dev->vdev); + int ret = vhost_dev_get_config(dev, (uint8_t *)&vsock->vsockcfg, + sizeof(struct virtio_vsock_config)); + if (ret < 0) { + error_report("get config space failed"); + return -1; + } + + virtio_notify_config(dev->vdev); + + return 0; +} + +const VhostDevConfigOps vsock_ops = { + .vhost_dev_config_notifier = vuv_handle_config_change, +}; + +static void vuv_set_status(VirtIODevice *vdev, uint8_t status) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; + + if (!vdev->vm_running) { + should_start = false; + } + + if (vvc->vhost_dev.started == should_start) { + return; + } + + if (should_start) { + int ret = vhost_vsock_common_start(vdev); + if (ret < 0) { + return; + } + } else { + vhost_vsock_common_stop(vdev); + } +} + +static uint64_t vuv_get_features(VirtIODevice *vdev, + uint64_t features, + Error **errp) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + + return vhost_get_features(&vvc->vhost_dev, user_feature_bits, features); +} + +static const VMStateDescription vuv_vmstate = { + .name = "vhost-user-vsock", + .unmigratable = 1, +}; + +static void vuv_device_realize(DeviceState *dev, Error **errp) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(dev); + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserVSock *vsock = VHOST_USER_VSOCK(dev); + int ret; + + if (!vsock->conf.chardev.chr) { + error_setg(errp, "missing chardev"); + return; + } + + if (!vhost_user_init(&vsock->vhost_user, &vsock->conf.chardev, errp)) { + return; + } + + vhost_vsock_common_realize(vdev, "vhost-user-vsock"); + + vhost_dev_set_config_notifier(&vvc->vhost_dev, &vsock_ops); + + ret = vhost_dev_init(&vvc->vhost_dev, &vsock->vhost_user, + VHOST_BACKEND_TYPE_USER, 0); + if (ret < 0) { + error_setg_errno(errp, -ret, "vhost_dev_init failed"); + goto err_virtio; + } + + ret = vhost_dev_get_config(&vvc->vhost_dev, (uint8_t *)&vsock->vsockcfg, + sizeof(struct virtio_vsock_config)); + if (ret < 0) { + error_setg_errno(errp, -ret, "get config space failed"); + goto err_vhost_dev; + } + + return; + +err_vhost_dev: + vhost_dev_cleanup(&vvc->vhost_dev); +err_virtio: + vhost_vsock_common_unrealize(vdev); + vhost_user_cleanup(&vsock->vhost_user); + return; +} + +static void vuv_device_unrealize(DeviceState *dev) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(dev); + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserVSock *vsock = VHOST_USER_VSOCK(dev); + + /* This will stop vhost backend if appropriate. */ + vuv_set_status(vdev, 0); + + vhost_dev_cleanup(&vvc->vhost_dev); + + vhost_vsock_common_unrealize(vdev); + + vhost_user_cleanup(&vsock->vhost_user); + +} + +static Property vuv_properties[] = { + DEFINE_PROP_CHR("chardev", VHostUserVSock, conf.chardev), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vuv_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + + device_class_set_props(dc, vuv_properties); + dc->vmsd = &vuv_vmstate; + vdc->realize = vuv_device_realize; + vdc->unrealize = vuv_device_unrealize; + vdc->get_features = vuv_get_features; + vdc->get_config = vuv_get_config; + vdc->set_status = vuv_set_status; +} + +static const TypeInfo vuv_info = { + .name = TYPE_VHOST_USER_VSOCK, + .parent = TYPE_VHOST_VSOCK_COMMON, + .instance_size = sizeof(VHostUserVSock), + .class_init = vuv_class_init, +}; + +static void vuv_register_types(void) +{ + type_register_static(&vuv_info); +} + +type_init(vuv_register_types) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index ec21e8fbe8..4d6cd4e58a 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -35,11 +35,29 @@ #include <linux/userfaultfd.h> #endif -#define VHOST_MEMORY_MAX_NREGIONS 8 +#define VHOST_MEMORY_BASELINE_NREGIONS 8 #define VHOST_USER_F_PROTOCOL_FEATURES 30 #define VHOST_USER_SLAVE_MAX_FDS 8 /* + * Set maximum number of RAM slots supported to + * the maximum number supported by the target + * hardware plaform. + */ +#if defined(TARGET_X86) || defined(TARGET_X86_64) || \ + defined(TARGET_ARM) || defined(TARGET_ARM_64) +#include "hw/acpi/acpi.h" +#define VHOST_USER_MAX_RAM_SLOTS ACPI_MAX_RAM_SLOTS + +#elif defined(TARGET_PPC) || defined(TARGET_PPC_64) +#include "hw/ppc/spapr.h" +#define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS + +#else +#define VHOST_USER_MAX_RAM_SLOTS 512 +#endif + +/* * Maximum size of virtio device config space */ #define VHOST_USER_MAX_CONFIG_SIZE 256 @@ -59,6 +77,8 @@ enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, + /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */ + VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, VHOST_USER_PROTOCOL_F_MAX }; @@ -100,6 +120,10 @@ typedef enum VhostUserRequest { VHOST_USER_SET_INFLIGHT_FD = 32, VHOST_USER_GPU_SET_SOCKET = 33, VHOST_USER_RESET_DEVICE = 34, + /* Message number 35 reserved for VHOST_USER_VRING_KICK. */ + VHOST_USER_GET_MAX_MEM_SLOTS = 36, + VHOST_USER_ADD_MEM_REG = 37, + VHOST_USER_REM_MEM_REG = 38, VHOST_USER_MAX } VhostUserRequest; @@ -121,9 +145,14 @@ typedef struct VhostUserMemoryRegion { typedef struct VhostUserMemory { uint32_t nregions; uint32_t padding; - VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; + VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS]; } VhostUserMemory; +typedef struct VhostUserMemRegMsg { + uint32_t padding; + VhostUserMemoryRegion region; +} VhostUserMemRegMsg; + typedef struct VhostUserLog { uint64_t mmap_size; uint64_t mmap_offset; @@ -182,6 +211,7 @@ typedef union { struct vhost_vring_state state; struct vhost_vring_addr addr; VhostUserMemory memory; + VhostUserMemRegMsg mem_reg; VhostUserLog log; struct vhost_iotlb_msg iotlb; VhostUserConfig config; @@ -210,7 +240,7 @@ struct vhost_user { int slave_fd; NotifierWithReturn postcopy_notifier; struct PostCopyFD postcopy_fd; - uint64_t postcopy_client_bases[VHOST_MEMORY_MAX_NREGIONS]; + uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS]; /* Length of the region_rb and region_rb_offset arrays */ size_t region_rb_len; /* RAMBlock associated with a given region */ @@ -222,6 +252,16 @@ struct vhost_user { /* True once we've entered postcopy_listen */ bool postcopy_listen; + + /* Our current regions */ + int num_shadow_regions; + struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS]; +}; + +struct scrub_regions { + struct vhost_memory_region *region; + int reg_idx; + int fd_idx; }; static bool ioeventfd_enabled(void) @@ -370,7 +410,7 @@ int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd) static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, struct vhost_log *log) { - int fds[VHOST_MEMORY_MAX_NREGIONS]; + int fds[VHOST_USER_MAX_RAM_SLOTS]; size_t fd_num = 0; bool shmfd = virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_LOG_SHMFD); @@ -407,6 +447,27 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, return 0; } +static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset, + int *fd) +{ + MemoryRegion *mr; + + assert((uintptr_t)addr == addr); + mr = memory_region_from_host((void *)(uintptr_t)addr, offset); + *fd = memory_region_get_fd(mr); + + return mr; +} + +static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst, + struct vhost_memory_region *src) +{ + assert(src != NULL && dst != NULL); + dst->userspace_addr = src->userspace_addr; + dst->memory_size = src->memory_size; + dst->guest_phys_addr = src->guest_phys_addr; +} + static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, struct vhost_dev *dev, VhostUserMsg *msg, @@ -417,19 +478,17 @@ static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, ram_addr_t offset; MemoryRegion *mr; struct vhost_memory_region *reg; + VhostUserMemoryRegion region_buffer; msg->hdr.request = VHOST_USER_SET_MEM_TABLE; for (i = 0; i < dev->mem->nregions; ++i) { reg = dev->mem->regions + i; - assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); - mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr, - &offset); - fd = memory_region_get_fd(mr); + mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); if (fd > 0) { if (track_ramblocks) { - assert(*fd_num < VHOST_MEMORY_MAX_NREGIONS); + assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS); trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name, reg->memory_size, reg->guest_phys_addr, @@ -437,16 +496,12 @@ static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, offset); u->region_rb_offset[i] = offset; u->region_rb[i] = mr->ram_block; - } else if (*fd_num == VHOST_MEMORY_MAX_NREGIONS) { + } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) { error_report("Failed preparing vhost-user memory table msg"); return -1; } - msg->payload.memory.regions[*fd_num].userspace_addr = - reg->userspace_addr; - msg->payload.memory.regions[*fd_num].memory_size = - reg->memory_size; - msg->payload.memory.regions[*fd_num].guest_phys_addr = - reg->guest_phys_addr; + vhost_user_fill_msg_region(®ion_buffer, reg); + msg->payload.memory.regions[*fd_num] = region_buffer; msg->payload.memory.regions[*fd_num].mmap_offset = offset; fds[(*fd_num)++] = fd; } else if (track_ramblocks) { @@ -470,11 +525,335 @@ static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, return 1; } +static inline bool reg_equal(struct vhost_memory_region *shadow_reg, + struct vhost_memory_region *vdev_reg) +{ + return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr && + shadow_reg->userspace_addr == vdev_reg->userspace_addr && + shadow_reg->memory_size == vdev_reg->memory_size; +} + +static void scrub_shadow_regions(struct vhost_dev *dev, + struct scrub_regions *add_reg, + int *nr_add_reg, + struct scrub_regions *rem_reg, + int *nr_rem_reg, uint64_t *shadow_pcb, + bool track_ramblocks) +{ + struct vhost_user *u = dev->opaque; + bool found[VHOST_USER_MAX_RAM_SLOTS] = {}; + struct vhost_memory_region *reg, *shadow_reg; + int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0; + ram_addr_t offset; + MemoryRegion *mr; + bool matching; + + /* + * Find memory regions present in our shadow state which are not in + * the device's current memory state. + * + * Mark regions in both the shadow and device state as "found". + */ + for (i = 0; i < u->num_shadow_regions; i++) { + shadow_reg = &u->shadow_regions[i]; + matching = false; + + for (j = 0; j < dev->mem->nregions; j++) { + reg = &dev->mem->regions[j]; + + mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); + + if (reg_equal(shadow_reg, reg)) { + matching = true; + found[j] = true; + if (track_ramblocks) { + /* + * Reset postcopy client bases, region_rb, and + * region_rb_offset in case regions are removed. + */ + if (fd > 0) { + u->region_rb_offset[j] = offset; + u->region_rb[j] = mr->ram_block; + shadow_pcb[j] = u->postcopy_client_bases[i]; + } else { + u->region_rb_offset[j] = 0; + u->region_rb[j] = NULL; + } + } + break; + } + } + + /* + * If the region was not found in the current device memory state + * create an entry for it in the removed list. + */ + if (!matching) { + rem_reg[rm_idx].region = shadow_reg; + rem_reg[rm_idx++].reg_idx = i; + } + } + + /* + * For regions not marked "found", create entries in the added list. + * + * Note their indexes in the device memory state and the indexes of their + * file descriptors. + */ + for (i = 0; i < dev->mem->nregions; i++) { + reg = &dev->mem->regions[i]; + mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); + if (fd > 0) { + ++fd_num; + } + + /* + * If the region was in both the shadow and device state we don't + * need to send a VHOST_USER_ADD_MEM_REG message for it. + */ + if (found[i]) { + continue; + } + + add_reg[add_idx].region = reg; + add_reg[add_idx].reg_idx = i; + add_reg[add_idx++].fd_idx = fd_num; + } + *nr_rem_reg = rm_idx; + *nr_add_reg = add_idx; + + return; +} + +static int send_remove_regions(struct vhost_dev *dev, + struct scrub_regions *remove_reg, + int nr_rem_reg, VhostUserMsg *msg, + bool reply_supported) +{ + struct vhost_user *u = dev->opaque; + struct vhost_memory_region *shadow_reg; + int i, fd, shadow_reg_idx, ret; + ram_addr_t offset; + VhostUserMemoryRegion region_buffer; + + /* + * The regions in remove_reg appear in the same order they do in the + * shadow table. Therefore we can minimize memory copies by iterating + * through remove_reg backwards. + */ + for (i = nr_rem_reg - 1; i >= 0; i--) { + shadow_reg = remove_reg[i].region; + shadow_reg_idx = remove_reg[i].reg_idx; + + vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd); + + if (fd > 0) { + msg->hdr.request = VHOST_USER_REM_MEM_REG; + vhost_user_fill_msg_region(®ion_buffer, shadow_reg); + msg->payload.mem_reg.region = region_buffer; + + if (vhost_user_write(dev, msg, &fd, 1) < 0) { + return -1; + } + + if (reply_supported) { + ret = process_message_reply(dev, msg); + if (ret) { + return ret; + } + } + } + + /* + * At this point we know the backend has unmapped the region. It is now + * safe to remove it from the shadow table. + */ + memmove(&u->shadow_regions[shadow_reg_idx], + &u->shadow_regions[shadow_reg_idx + 1], + sizeof(struct vhost_memory_region) * + (u->num_shadow_regions - shadow_reg_idx)); + u->num_shadow_regions--; + } + + return 0; +} + +static int send_add_regions(struct vhost_dev *dev, + struct scrub_regions *add_reg, int nr_add_reg, + VhostUserMsg *msg, uint64_t *shadow_pcb, + bool reply_supported, bool track_ramblocks) +{ + struct vhost_user *u = dev->opaque; + int i, fd, ret, reg_idx, reg_fd_idx; + struct vhost_memory_region *reg; + MemoryRegion *mr; + ram_addr_t offset; + VhostUserMsg msg_reply; + VhostUserMemoryRegion region_buffer; + + for (i = 0; i < nr_add_reg; i++) { + reg = add_reg[i].region; + reg_idx = add_reg[i].reg_idx; + reg_fd_idx = add_reg[i].fd_idx; + + mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); + + if (fd > 0) { + if (track_ramblocks) { + trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name, + reg->memory_size, + reg->guest_phys_addr, + reg->userspace_addr, + offset); + u->region_rb_offset[reg_idx] = offset; + u->region_rb[reg_idx] = mr->ram_block; + } + msg->hdr.request = VHOST_USER_ADD_MEM_REG; + vhost_user_fill_msg_region(®ion_buffer, reg); + msg->payload.mem_reg.region = region_buffer; + msg->payload.mem_reg.region.mmap_offset = offset; + + if (vhost_user_write(dev, msg, &fd, 1) < 0) { + return -1; + } + + if (track_ramblocks) { + uint64_t reply_gpa; + + if (vhost_user_read(dev, &msg_reply) < 0) { + return -1; + } + + reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr; + + if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) { + error_report("%s: Received unexpected msg type." + "Expected %d received %d", __func__, + VHOST_USER_ADD_MEM_REG, + msg_reply.hdr.request); + return -1; + } + + /* + * We're using the same structure, just reusing one of the + * fields, so it should be the same size. + */ + if (msg_reply.hdr.size != msg->hdr.size) { + error_report("%s: Unexpected size for postcopy reply " + "%d vs %d", __func__, msg_reply.hdr.size, + msg->hdr.size); + return -1; + } + + /* Get the postcopy client base from the backend's reply. */ + if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) { + shadow_pcb[reg_idx] = + msg_reply.payload.mem_reg.region.userspace_addr; + trace_vhost_user_set_mem_table_postcopy( + msg_reply.payload.mem_reg.region.userspace_addr, + msg->payload.mem_reg.region.userspace_addr, + reg_fd_idx, reg_idx); + } else { + error_report("%s: invalid postcopy reply for region. " + "Got guest physical address %" PRIX64 ", expected " + "%" PRIX64, __func__, reply_gpa, + dev->mem->regions[reg_idx].guest_phys_addr); + return -1; + } + } else if (reply_supported) { + ret = process_message_reply(dev, msg); + if (ret) { + return ret; + } + } + } else if (track_ramblocks) { + u->region_rb_offset[reg_idx] = 0; + u->region_rb[reg_idx] = NULL; + } + + /* + * At this point, we know the backend has mapped in the new + * region, if the region has a valid file descriptor. + * + * The region should now be added to the shadow table. + */ + u->shadow_regions[u->num_shadow_regions].guest_phys_addr = + reg->guest_phys_addr; + u->shadow_regions[u->num_shadow_regions].userspace_addr = + reg->userspace_addr; + u->shadow_regions[u->num_shadow_regions].memory_size = + reg->memory_size; + u->num_shadow_regions++; + } + + return 0; +} + +static int vhost_user_add_remove_regions(struct vhost_dev *dev, + VhostUserMsg *msg, + bool reply_supported, + bool track_ramblocks) +{ + struct vhost_user *u = dev->opaque; + struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS]; + struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS]; + uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {}; + int nr_add_reg, nr_rem_reg; + + msg->hdr.size = sizeof(msg->payload.mem_reg.padding) + + sizeof(VhostUserMemoryRegion); + + /* Find the regions which need to be removed or added. */ + scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg, + shadow_pcb, track_ramblocks); + + if (nr_rem_reg && send_remove_regions(dev, rem_reg, nr_rem_reg, msg, + reply_supported) < 0) + { + goto err; + } + + if (nr_add_reg && send_add_regions(dev, add_reg, nr_add_reg, msg, + shadow_pcb, reply_supported, track_ramblocks) < 0) + { + goto err; + } + + if (track_ramblocks) { + memcpy(u->postcopy_client_bases, shadow_pcb, + sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); + /* + * Now we've registered this with the postcopy code, we ack to the + * client, because now we're in the position to be able to deal with + * any faults it generates. + */ + /* TODO: Use this for failure cases as well with a bad value. */ + msg->hdr.size = sizeof(msg->payload.u64); + msg->payload.u64 = 0; /* OK */ + + if (vhost_user_write(dev, msg, NULL, 0) < 0) { + return -1; + } + } + + return 0; + +err: + if (track_ramblocks) { + memcpy(u->postcopy_client_bases, shadow_pcb, + sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); + } + + return -1; +} + static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, - struct vhost_memory *mem) + struct vhost_memory *mem, + bool reply_supported, + bool config_mem_slots) { struct vhost_user *u = dev->opaque; - int fds[VHOST_MEMORY_MAX_NREGIONS]; + int fds[VHOST_MEMORY_BASELINE_NREGIONS]; size_t fd_num = 0; VhostUserMsg msg_reply; int region_i, msg_i; @@ -494,71 +873,84 @@ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, u->region_rb_len = dev->mem->nregions; } - if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, + if (config_mem_slots) { + if (vhost_user_add_remove_regions(dev, &msg, reply_supported, true) < 0) { - return -1; - } + return -1; + } + } else { + if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, + true) < 0) { + return -1; + } - if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { - return -1; - } + if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { + return -1; + } - if (vhost_user_read(dev, &msg_reply) < 0) { - return -1; - } + if (vhost_user_read(dev, &msg_reply) < 0) { + return -1; + } - if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { - error_report("%s: Received unexpected msg type." - "Expected %d received %d", __func__, - VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); - return -1; - } - /* We're using the same structure, just reusing one of the - * fields, so it should be the same size. - */ - if (msg_reply.hdr.size != msg.hdr.size) { - error_report("%s: Unexpected size for postcopy reply " - "%d vs %d", __func__, msg_reply.hdr.size, msg.hdr.size); - return -1; - } - - memset(u->postcopy_client_bases, 0, - sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS); - - /* They're in the same order as the regions that were sent - * but some of the regions were skipped (above) if they - * didn't have fd's - */ - for (msg_i = 0, region_i = 0; - region_i < dev->mem->nregions; - region_i++) { - if (msg_i < fd_num && - msg_reply.payload.memory.regions[msg_i].guest_phys_addr == - dev->mem->regions[region_i].guest_phys_addr) { - u->postcopy_client_bases[region_i] = - msg_reply.payload.memory.regions[msg_i].userspace_addr; - trace_vhost_user_set_mem_table_postcopy( - msg_reply.payload.memory.regions[msg_i].userspace_addr, - msg.payload.memory.regions[msg_i].userspace_addr, - msg_i, region_i); - msg_i++; + if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { + error_report("%s: Received unexpected msg type." + "Expected %d received %d", __func__, + VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); + return -1; + } + + /* + * We're using the same structure, just reusing one of the + * fields, so it should be the same size. + */ + if (msg_reply.hdr.size != msg.hdr.size) { + error_report("%s: Unexpected size for postcopy reply " + "%d vs %d", __func__, msg_reply.hdr.size, + msg.hdr.size); + return -1; + } + + memset(u->postcopy_client_bases, 0, + sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); + + /* + * They're in the same order as the regions that were sent + * but some of the regions were skipped (above) if they + * didn't have fd's + */ + for (msg_i = 0, region_i = 0; + region_i < dev->mem->nregions; + region_i++) { + if (msg_i < fd_num && + msg_reply.payload.memory.regions[msg_i].guest_phys_addr == + dev->mem->regions[region_i].guest_phys_addr) { + u->postcopy_client_bases[region_i] = + msg_reply.payload.memory.regions[msg_i].userspace_addr; + trace_vhost_user_set_mem_table_postcopy( + msg_reply.payload.memory.regions[msg_i].userspace_addr, + msg.payload.memory.regions[msg_i].userspace_addr, + msg_i, region_i); + msg_i++; + } + } + if (msg_i != fd_num) { + error_report("%s: postcopy reply not fully consumed " + "%d vs %zd", + __func__, msg_i, fd_num); + return -1; + } + + /* + * Now we've registered this with the postcopy code, we ack to the + * client, because now we're in the position to be able to deal + * with any faults it generates. + */ + /* TODO: Use this for failure cases as well with a bad value. */ + msg.hdr.size = sizeof(msg.payload.u64); + msg.payload.u64 = 0; /* OK */ + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { + return -1; } - } - if (msg_i != fd_num) { - error_report("%s: postcopy reply not fully consumed " - "%d vs %zd", - __func__, msg_i, fd_num); - return -1; - } - /* Now we've registered this with the postcopy code, we ack to the client, - * because now we're in the position to be able to deal with any faults - * it generates. - */ - /* TODO: Use this for failure cases as well with a bad value */ - msg.hdr.size = sizeof(msg.payload.u64); - msg.payload.u64 = 0; /* OK */ - if (vhost_user_write(dev, &msg, NULL, 0) < 0) { - return -1; } return 0; @@ -568,17 +960,22 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev, struct vhost_memory *mem) { struct vhost_user *u = dev->opaque; - int fds[VHOST_MEMORY_MAX_NREGIONS]; + int fds[VHOST_MEMORY_BASELINE_NREGIONS]; size_t fd_num = 0; bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; bool reply_supported = virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_REPLY_ACK); + bool config_mem_slots = + virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS); if (do_postcopy) { - /* Postcopy has enough differences that it's best done in it's own + /* + * Postcopy has enough differences that it's best done in it's own * version */ - return vhost_user_set_mem_table_postcopy(dev, mem); + return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported, + config_mem_slots); } VhostUserMsg msg = { @@ -589,17 +986,23 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev, msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; } - if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, + if (config_mem_slots) { + if (vhost_user_add_remove_regions(dev, &msg, reply_supported, false) < 0) { - return -1; - } - - if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { - return -1; - } + return -1; + } + } else { + if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, + false) < 0) { + return -1; + } + if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { + return -1; + } - if (reply_supported) { - return process_message_reply(dev, &msg); + if (reply_supported) { + return process_message_reply(dev, &msg); + } } return 0; @@ -764,7 +1167,7 @@ static int vhost_set_vring_file(struct vhost_dev *dev, VhostUserRequest request, struct vhost_vring_file *file) { - int fds[VHOST_MEMORY_MAX_NREGIONS]; + int fds[VHOST_USER_MAX_RAM_SLOTS]; size_t fd_num = 0; VhostUserMsg msg = { .hdr.request = request, @@ -880,6 +1283,23 @@ static int vhost_user_set_owner(struct vhost_dev *dev) return 0; } +static int vhost_user_get_max_memslots(struct vhost_dev *dev, + uint64_t *max_memslots) +{ + uint64_t backend_max_memslots; + int err; + + err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS, + &backend_max_memslots); + if (err < 0) { + return err; + } + + *max_memslots = backend_max_memslots; + + return 0; +} + static int vhost_user_reset_device(struct vhost_dev *dev) { VhostUserMsg msg = { @@ -1377,7 +1797,7 @@ static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque) { - uint64_t features, protocol_features; + uint64_t features, protocol_features, ram_slots; struct vhost_user *u; int err; @@ -1439,6 +1859,27 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque) "slave-req protocol features."); return -1; } + + /* get max memory regions if backend supports configurable RAM slots */ + if (!virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) { + u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS; + } else { + err = vhost_user_get_max_memslots(dev, &ram_slots); + if (err < 0) { + return err; + } + + if (ram_slots < u->user->memory_slots) { + error_report("The backend specified a max ram slots limit " + "of %" PRIu64", when the prior validated limit was %d. " + "This limit should never decrease.", ram_slots, + u->user->memory_slots); + return -1; + } + + u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS); + } } if (dev->migration_blocker == NULL && @@ -1504,7 +1945,9 @@ static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) static int vhost_user_memslots_limit(struct vhost_dev *dev) { - return VHOST_MEMORY_MAX_NREGIONS; + struct vhost_user *u = dev->opaque; + + return u->user->memory_slots; } static bool vhost_user_requires_shm_log(struct vhost_dev *dev) @@ -1545,13 +1988,9 @@ static bool vhost_user_can_merge(struct vhost_dev *dev, { ram_addr_t offset; int mfd, rfd; - MemoryRegion *mr; - - mr = memory_region_from_host((void *)(uintptr_t)start1, &offset); - mfd = memory_region_get_fd(mr); - mr = memory_region_from_host((void *)(uintptr_t)start2, &offset); - rfd = memory_region_get_fd(mr); + (void)vhost_user_get_mr_data(start1, &offset, &mfd); + (void)vhost_user_get_mr_data(start2, &offset, &rfd); return mfd == rfd; } @@ -1893,6 +2332,7 @@ bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) return false; } user->chr = chr; + user->memory_slots = 0; return true; } diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c new file mode 100644 index 0000000000..5b2ebf3496 --- /dev/null +++ b/hw/virtio/vhost-vsock-common.c @@ -0,0 +1,258 @@ +/* + * Parent class for vhost-vsock devices + * + * Copyright 2015-2020 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#include "qemu/osdep.h" +#include "standard-headers/linux/virtio_vsock.h" +#include "qapi/error.h" +#include "hw/virtio/virtio-access.h" +#include "qemu/error-report.h" +#include "hw/qdev-properties.h" +#include "hw/virtio/vhost-vsock.h" +#include "qemu/iov.h" +#include "monitor/monitor.h" + +int vhost_vsock_common_start(VirtIODevice *vdev) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int ret; + int i; + + if (!k->set_guest_notifiers) { + error_report("binding does not support guest notifiers"); + return -ENOSYS; + } + + ret = vhost_dev_enable_notifiers(&vvc->vhost_dev, vdev); + if (ret < 0) { + error_report("Error enabling host notifiers: %d", -ret); + return ret; + } + + ret = k->set_guest_notifiers(qbus->parent, vvc->vhost_dev.nvqs, true); + if (ret < 0) { + error_report("Error binding guest notifier: %d", -ret); + goto err_host_notifiers; + } + + vvc->vhost_dev.acked_features = vdev->guest_features; + ret = vhost_dev_start(&vvc->vhost_dev, vdev); + if (ret < 0) { + error_report("Error starting vhost: %d", -ret); + goto err_guest_notifiers; + } + + /* + * guest_notifier_mask/pending not used yet, so just unmask + * everything here. virtio-pci will do the right thing by + * enabling/disabling irqfd. + */ + for (i = 0; i < vvc->vhost_dev.nvqs; i++) { + vhost_virtqueue_mask(&vvc->vhost_dev, vdev, i, false); + } + + return 0; + +err_guest_notifiers: + k->set_guest_notifiers(qbus->parent, vvc->vhost_dev.nvqs, false); +err_host_notifiers: + vhost_dev_disable_notifiers(&vvc->vhost_dev, vdev); + return ret; +} + +void vhost_vsock_common_stop(VirtIODevice *vdev) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int ret; + + if (!k->set_guest_notifiers) { + return; + } + + vhost_dev_stop(&vvc->vhost_dev, vdev); + + ret = k->set_guest_notifiers(qbus->parent, vvc->vhost_dev.nvqs, false); + if (ret < 0) { + error_report("vhost guest notifier cleanup failed: %d", ret); + return; + } + + vhost_dev_disable_notifiers(&vvc->vhost_dev, vdev); +} + + +static void vhost_vsock_common_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ + /* Do nothing */ +} + +static void vhost_vsock_common_guest_notifier_mask(VirtIODevice *vdev, int idx, + bool mask) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + + vhost_virtqueue_mask(&vvc->vhost_dev, vdev, idx, mask); +} + +static bool vhost_vsock_common_guest_notifier_pending(VirtIODevice *vdev, + int idx) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + + return vhost_virtqueue_pending(&vvc->vhost_dev, idx); +} + +static void vhost_vsock_common_send_transport_reset(VHostVSockCommon *vvc) +{ + VirtQueueElement *elem; + VirtQueue *vq = vvc->event_vq; + struct virtio_vsock_event event = { + .id = cpu_to_le32(VIRTIO_VSOCK_EVENT_TRANSPORT_RESET), + }; + + elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); + if (!elem) { + error_report("vhost-vsock missed transport reset event"); + return; + } + + if (elem->out_num) { + error_report("invalid vhost-vsock event virtqueue element with " + "out buffers"); + goto out; + } + + if (iov_from_buf(elem->in_sg, elem->in_num, 0, + &event, sizeof(event)) != sizeof(event)) { + error_report("vhost-vsock event virtqueue element is too short"); + goto out; + } + + virtqueue_push(vq, elem, sizeof(event)); + virtio_notify(VIRTIO_DEVICE(vvc), vq); + +out: + g_free(elem); +} + +static void vhost_vsock_common_post_load_timer_cleanup(VHostVSockCommon *vvc) +{ + if (!vvc->post_load_timer) { + return; + } + + timer_del(vvc->post_load_timer); + timer_free(vvc->post_load_timer); + vvc->post_load_timer = NULL; +} + +static void vhost_vsock_common_post_load_timer_cb(void *opaque) +{ + VHostVSockCommon *vvc = opaque; + + vhost_vsock_common_post_load_timer_cleanup(vvc); + vhost_vsock_common_send_transport_reset(vvc); +} + +int vhost_vsock_common_pre_save(void *opaque) +{ + VHostVSockCommon *vvc = opaque; + + /* + * At this point, backend must be stopped, otherwise + * it might keep writing to memory. + */ + assert(!vvc->vhost_dev.started); + + return 0; +} + +int vhost_vsock_common_post_load(void *opaque, int version_id) +{ + VHostVSockCommon *vvc = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(vvc); + + if (virtio_queue_get_addr(vdev, 2)) { + /* + * Defer transport reset event to a vm clock timer so that virtqueue + * changes happen after migration has completed. + */ + assert(!vvc->post_load_timer); + vvc->post_load_timer = + timer_new_ns(QEMU_CLOCK_VIRTUAL, + vhost_vsock_common_post_load_timer_cb, + vvc); + timer_mod(vvc->post_load_timer, 1); + } + return 0; +} + +void vhost_vsock_common_realize(VirtIODevice *vdev, const char *name) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + + virtio_init(vdev, name, VIRTIO_ID_VSOCK, + sizeof(struct virtio_vsock_config)); + + /* Receive and transmit queues belong to vhost */ + vvc->recv_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, + vhost_vsock_common_handle_output); + vvc->trans_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, + vhost_vsock_common_handle_output); + + /* The event queue belongs to QEMU */ + vvc->event_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, + vhost_vsock_common_handle_output); + + vvc->vhost_dev.nvqs = ARRAY_SIZE(vvc->vhost_vqs); + vvc->vhost_dev.vqs = vvc->vhost_vqs; + + vvc->post_load_timer = NULL; +} + +void vhost_vsock_common_unrealize(VirtIODevice *vdev) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + + vhost_vsock_common_post_load_timer_cleanup(vvc); + + virtio_delete_queue(vvc->recv_vq); + virtio_delete_queue(vvc->trans_vq); + virtio_delete_queue(vvc->event_vq); + virtio_cleanup(vdev); +} + +static void vhost_vsock_common_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + vdc->guest_notifier_mask = vhost_vsock_common_guest_notifier_mask; + vdc->guest_notifier_pending = vhost_vsock_common_guest_notifier_pending; +} + +static const TypeInfo vhost_vsock_common_info = { + .name = TYPE_VHOST_VSOCK_COMMON, + .parent = TYPE_VIRTIO_DEVICE, + .instance_size = sizeof(VHostVSockCommon), + .class_init = vhost_vsock_common_class_init, + .abstract = true, +}; + +static void vhost_vsock_common_register_types(void) +{ + type_register_static(&vhost_vsock_common_info); +} + +type_init(vhost_vsock_common_register_types) diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c index 4a228f5168..c8f0699b4f 100644 --- a/hw/virtio/vhost-vsock.c +++ b/hw/virtio/vhost-vsock.c @@ -12,24 +12,14 @@ */ #include "qemu/osdep.h" -#include <sys/ioctl.h> #include "standard-headers/linux/virtio_vsock.h" #include "qapi/error.h" -#include "hw/virtio/virtio-bus.h" #include "hw/virtio/virtio-access.h" #include "qemu/error-report.h" #include "hw/qdev-properties.h" #include "hw/virtio/vhost-vsock.h" -#include "qemu/iov.h" -#include "qemu/module.h" #include "monitor/monitor.h" -enum { - VHOST_VSOCK_SAVEVM_VERSION = 0, - - VHOST_VSOCK_QUEUE_SIZE = 128, -}; - static void vhost_vsock_get_config(VirtIODevice *vdev, uint8_t *config) { VHostVSock *vsock = VHOST_VSOCK(vdev); @@ -39,16 +29,18 @@ static void vhost_vsock_get_config(VirtIODevice *vdev, uint8_t *config) memcpy(config, &vsockcfg, sizeof(vsockcfg)); } -static int vhost_vsock_set_guest_cid(VHostVSock *vsock) +static int vhost_vsock_set_guest_cid(VirtIODevice *vdev) { - const VhostOps *vhost_ops = vsock->vhost_dev.vhost_ops; + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + VHostVSock *vsock = VHOST_VSOCK(vdev); + const VhostOps *vhost_ops = vvc->vhost_dev.vhost_ops; int ret; if (!vhost_ops->vhost_vsock_set_guest_cid) { return -ENOSYS; } - ret = vhost_ops->vhost_vsock_set_guest_cid(&vsock->vhost_dev, + ret = vhost_ops->vhost_vsock_set_guest_cid(&vvc->vhost_dev, vsock->conf.guest_cid); if (ret < 0) { return -errno; @@ -56,123 +48,58 @@ static int vhost_vsock_set_guest_cid(VHostVSock *vsock) return 0; } -static int vhost_vsock_set_running(VHostVSock *vsock, int start) +static int vhost_vsock_set_running(VirtIODevice *vdev, int start) { - const VhostOps *vhost_ops = vsock->vhost_dev.vhost_ops; + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + const VhostOps *vhost_ops = vvc->vhost_dev.vhost_ops; int ret; if (!vhost_ops->vhost_vsock_set_running) { return -ENOSYS; } - ret = vhost_ops->vhost_vsock_set_running(&vsock->vhost_dev, start); + ret = vhost_ops->vhost_vsock_set_running(&vvc->vhost_dev, start); if (ret < 0) { return -errno; } return 0; } -static void vhost_vsock_start(VirtIODevice *vdev) -{ - VHostVSock *vsock = VHOST_VSOCK(vdev); - BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); - VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); - int ret; - int i; - - if (!k->set_guest_notifiers) { - error_report("binding does not support guest notifiers"); - return; - } - - ret = vhost_dev_enable_notifiers(&vsock->vhost_dev, vdev); - if (ret < 0) { - error_report("Error enabling host notifiers: %d", -ret); - return; - } - - ret = k->set_guest_notifiers(qbus->parent, vsock->vhost_dev.nvqs, true); - if (ret < 0) { - error_report("Error binding guest notifier: %d", -ret); - goto err_host_notifiers; - } - - vsock->vhost_dev.acked_features = vdev->guest_features; - ret = vhost_dev_start(&vsock->vhost_dev, vdev); - if (ret < 0) { - error_report("Error starting vhost: %d", -ret); - goto err_guest_notifiers; - } - - ret = vhost_vsock_set_running(vsock, 1); - if (ret < 0) { - error_report("Error starting vhost vsock: %d", -ret); - goto err_dev_start; - } - - /* guest_notifier_mask/pending not used yet, so just unmask - * everything here. virtio-pci will do the right thing by - * enabling/disabling irqfd. - */ - for (i = 0; i < vsock->vhost_dev.nvqs; i++) { - vhost_virtqueue_mask(&vsock->vhost_dev, vdev, i, false); - } - - return; - -err_dev_start: - vhost_dev_stop(&vsock->vhost_dev, vdev); -err_guest_notifiers: - k->set_guest_notifiers(qbus->parent, vsock->vhost_dev.nvqs, false); -err_host_notifiers: - vhost_dev_disable_notifiers(&vsock->vhost_dev, vdev); -} - -static void vhost_vsock_stop(VirtIODevice *vdev) -{ - VHostVSock *vsock = VHOST_VSOCK(vdev); - BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); - VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); - int ret; - - if (!k->set_guest_notifiers) { - return; - } - - ret = vhost_vsock_set_running(vsock, 0); - if (ret < 0) { - error_report("vhost vsock set running failed: %d", ret); - return; - } - - vhost_dev_stop(&vsock->vhost_dev, vdev); - - ret = k->set_guest_notifiers(qbus->parent, vsock->vhost_dev.nvqs, false); - if (ret < 0) { - error_report("vhost guest notifier cleanup failed: %d", ret); - return; - } - - vhost_dev_disable_notifiers(&vsock->vhost_dev, vdev); -} static void vhost_vsock_set_status(VirtIODevice *vdev, uint8_t status) { - VHostVSock *vsock = VHOST_VSOCK(vdev); + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; + int ret; if (!vdev->vm_running) { should_start = false; } - if (vsock->vhost_dev.started == should_start) { + if (vvc->vhost_dev.started == should_start) { return; } if (should_start) { - vhost_vsock_start(vdev); + ret = vhost_vsock_common_start(vdev); + if (ret < 0) { + return; + } + + ret = vhost_vsock_set_running(vdev, 1); + if (ret < 0) { + vhost_vsock_common_stop(vdev); + error_report("Error starting vhost vsock: %d", -ret); + return; + } } else { - vhost_vsock_stop(vdev); + ret = vhost_vsock_set_running(vdev, 0); + if (ret < 0) { + error_report("vhost vsock set running failed: %d", ret); + return; + } + + vhost_vsock_common_stop(vdev); } } @@ -184,108 +111,6 @@ static uint64_t vhost_vsock_get_features(VirtIODevice *vdev, return requested_features; } -static void vhost_vsock_handle_output(VirtIODevice *vdev, VirtQueue *vq) -{ - /* Do nothing */ -} - -static void vhost_vsock_guest_notifier_mask(VirtIODevice *vdev, int idx, - bool mask) -{ - VHostVSock *vsock = VHOST_VSOCK(vdev); - - vhost_virtqueue_mask(&vsock->vhost_dev, vdev, idx, mask); -} - -static bool vhost_vsock_guest_notifier_pending(VirtIODevice *vdev, int idx) -{ - VHostVSock *vsock = VHOST_VSOCK(vdev); - - return vhost_virtqueue_pending(&vsock->vhost_dev, idx); -} - -static void vhost_vsock_send_transport_reset(VHostVSock *vsock) -{ - VirtQueueElement *elem; - VirtQueue *vq = vsock->event_vq; - struct virtio_vsock_event event = { - .id = cpu_to_le32(VIRTIO_VSOCK_EVENT_TRANSPORT_RESET), - }; - - elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); - if (!elem) { - error_report("vhost-vsock missed transport reset event"); - return; - } - - if (elem->out_num) { - error_report("invalid vhost-vsock event virtqueue element with " - "out buffers"); - goto out; - } - - if (iov_from_buf(elem->in_sg, elem->in_num, 0, - &event, sizeof(event)) != sizeof(event)) { - error_report("vhost-vsock event virtqueue element is too short"); - goto out; - } - - virtqueue_push(vq, elem, sizeof(event)); - virtio_notify(VIRTIO_DEVICE(vsock), vq); - -out: - g_free(elem); -} - -static void vhost_vsock_post_load_timer_cleanup(VHostVSock *vsock) -{ - if (!vsock->post_load_timer) { - return; - } - - timer_del(vsock->post_load_timer); - timer_free(vsock->post_load_timer); - vsock->post_load_timer = NULL; -} - -static void vhost_vsock_post_load_timer_cb(void *opaque) -{ - VHostVSock *vsock = opaque; - - vhost_vsock_post_load_timer_cleanup(vsock); - vhost_vsock_send_transport_reset(vsock); -} - -static int vhost_vsock_pre_save(void *opaque) -{ - VHostVSock *vsock = opaque; - - /* At this point, backend must be stopped, otherwise - * it might keep writing to memory. */ - assert(!vsock->vhost_dev.started); - - return 0; -} - -static int vhost_vsock_post_load(void *opaque, int version_id) -{ - VHostVSock *vsock = opaque; - VirtIODevice *vdev = VIRTIO_DEVICE(vsock); - - if (virtio_queue_get_addr(vdev, 2)) { - /* Defer transport reset event to a vm clock timer so that virtqueue - * changes happen after migration has completed. - */ - assert(!vsock->post_load_timer); - vsock->post_load_timer = - timer_new_ns(QEMU_CLOCK_VIRTUAL, - vhost_vsock_post_load_timer_cb, - vsock); - timer_mod(vsock->post_load_timer, 1); - } - return 0; -} - static const VMStateDescription vmstate_virtio_vhost_vsock = { .name = "virtio-vhost_vsock", .minimum_version_id = VHOST_VSOCK_SAVEVM_VERSION, @@ -294,12 +119,13 @@ static const VMStateDescription vmstate_virtio_vhost_vsock = { VMSTATE_VIRTIO_DEVICE, VMSTATE_END_OF_LIST() }, - .pre_save = vhost_vsock_pre_save, - .post_load = vhost_vsock_post_load, + .pre_save = vhost_vsock_common_pre_save, + .post_load = vhost_vsock_common_post_load, }; static void vhost_vsock_device_realize(DeviceState *dev, Error **errp) { + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(dev); VirtIODevice *vdev = VIRTIO_DEVICE(dev); VHostVSock *vsock = VHOST_VSOCK(dev); int vhostfd; @@ -331,46 +157,29 @@ static void vhost_vsock_device_realize(DeviceState *dev, Error **errp) } } - virtio_init(vdev, "vhost-vsock", VIRTIO_ID_VSOCK, - sizeof(struct virtio_vsock_config)); - - /* Receive and transmit queues belong to vhost */ - vsock->recv_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, - vhost_vsock_handle_output); - vsock->trans_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, - vhost_vsock_handle_output); + vhost_vsock_common_realize(vdev, "vhost-vsock"); - /* The event queue belongs to QEMU */ - vsock->event_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, - vhost_vsock_handle_output); - - vsock->vhost_dev.nvqs = ARRAY_SIZE(vsock->vhost_vqs); - vsock->vhost_dev.vqs = vsock->vhost_vqs; - ret = vhost_dev_init(&vsock->vhost_dev, (void *)(uintptr_t)vhostfd, + ret = vhost_dev_init(&vvc->vhost_dev, (void *)(uintptr_t)vhostfd, VHOST_BACKEND_TYPE_KERNEL, 0); if (ret < 0) { error_setg_errno(errp, -ret, "vhost-vsock: vhost_dev_init failed"); goto err_virtio; } - ret = vhost_vsock_set_guest_cid(vsock); + ret = vhost_vsock_set_guest_cid(vdev); if (ret < 0) { error_setg_errno(errp, -ret, "vhost-vsock: unable to set guest cid"); goto err_vhost_dev; } - vsock->post_load_timer = NULL; return; err_vhost_dev: - vhost_dev_cleanup(&vsock->vhost_dev); + vhost_dev_cleanup(&vvc->vhost_dev); /* vhost_dev_cleanup() closes the vhostfd passed to vhost_dev_init() */ vhostfd = -1; err_virtio: - virtio_delete_queue(vsock->recv_vq); - virtio_delete_queue(vsock->trans_vq); - virtio_delete_queue(vsock->event_vq); - virtio_cleanup(vdev); + vhost_vsock_common_unrealize(vdev); if (vhostfd >= 0) { close(vhostfd); } @@ -379,19 +188,14 @@ err_virtio: static void vhost_vsock_device_unrealize(DeviceState *dev) { + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(dev); VirtIODevice *vdev = VIRTIO_DEVICE(dev); - VHostVSock *vsock = VHOST_VSOCK(dev); - - vhost_vsock_post_load_timer_cleanup(vsock); /* This will stop vhost backend if appropriate. */ vhost_vsock_set_status(vdev, 0); - vhost_dev_cleanup(&vsock->vhost_dev); - virtio_delete_queue(vsock->recv_vq); - virtio_delete_queue(vsock->trans_vq); - virtio_delete_queue(vsock->event_vq); - virtio_cleanup(vdev); + vhost_dev_cleanup(&vvc->vhost_dev); + vhost_vsock_common_unrealize(vdev); } static Property vhost_vsock_properties[] = { @@ -407,19 +211,16 @@ static void vhost_vsock_class_init(ObjectClass *klass, void *data) device_class_set_props(dc, vhost_vsock_properties); dc->vmsd = &vmstate_virtio_vhost_vsock; - set_bit(DEVICE_CATEGORY_MISC, dc->categories); vdc->realize = vhost_vsock_device_realize; vdc->unrealize = vhost_vsock_device_unrealize; vdc->get_features = vhost_vsock_get_features; vdc->get_config = vhost_vsock_get_config; vdc->set_status = vhost_vsock_set_status; - vdc->guest_notifier_mask = vhost_vsock_guest_notifier_mask; - vdc->guest_notifier_pending = vhost_vsock_guest_notifier_pending; } static const TypeInfo vhost_vsock_info = { .name = TYPE_VHOST_VSOCK, - .parent = TYPE_VIRTIO_DEVICE, + .parent = TYPE_VHOST_VSOCK_COMMON, .instance_size = sizeof(VHostVSock), .class_init = vhost_vsock_class_init, }; diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index 065cd450f1..10507b2a43 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -321,6 +321,67 @@ static void balloon_stats_set_poll_interval(Object *obj, Visitor *v, balloon_stats_change_timer(s, 0); } +static void virtio_balloon_handle_report(VirtIODevice *vdev, VirtQueue *vq) +{ + VirtIOBalloon *dev = VIRTIO_BALLOON(vdev); + VirtQueueElement *elem; + + while ((elem = virtqueue_pop(vq, sizeof(VirtQueueElement)))) { + unsigned int i; + + /* + * When we discard the page it has the effect of removing the page + * from the hypervisor itself and causing it to be zeroed when it + * is returned to us. So we must not discard the page if it is + * accessible by another device or process, or if the guest is + * expecting it to retain a non-zero value. + */ + if (qemu_balloon_is_inhibited() || dev->poison_val) { + goto skip_element; + } + + for (i = 0; i < elem->in_num; i++) { + void *addr = elem->in_sg[i].iov_base; + size_t size = elem->in_sg[i].iov_len; + ram_addr_t ram_offset; + RAMBlock *rb; + + /* + * There is no need to check the memory section to see if + * it is ram/readonly/romd like there is for handle_output + * below. If the region is not meant to be written to then + * address_space_map will have allocated a bounce buffer + * and it will be freed in address_space_unmap and trigger + * and unassigned_mem_write before failing to copy over the + * buffer. If more than one bad descriptor is provided it + * will return NULL after the first bounce buffer and fail + * to map any resources. + */ + rb = qemu_ram_block_from_host(addr, false, &ram_offset); + if (!rb) { + trace_virtio_balloon_bad_addr(elem->in_addr[i]); + continue; + } + + /* + * For now we will simply ignore unaligned memory regions, or + * regions that overrun the end of the RAMBlock. + */ + if (!QEMU_IS_ALIGNED(ram_offset | size, qemu_ram_pagesize(rb)) || + (ram_offset + size) > qemu_ram_get_used_length(rb)) { + continue; + } + + ram_block_discard_range(rb, ram_offset, size); + } + +skip_element: + virtqueue_push(vq, elem, 0); + virtio_notify(vdev, vq); + g_free(elem); + } +} + static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) { VirtIOBalloon *s = VIRTIO_BALLOON(vdev); @@ -634,6 +695,7 @@ static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data) config.num_pages = cpu_to_le32(dev->num_pages); config.actual = cpu_to_le32(dev->actual); + config.poison_val = cpu_to_le32(dev->poison_val); if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) { config.free_page_report_cmd_id = @@ -683,6 +745,14 @@ static ram_addr_t get_current_ram_size(void) return size; } +static bool virtio_balloon_page_poison_support(void *opaque) +{ + VirtIOBalloon *s = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(s); + + return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON); +} + static void virtio_balloon_set_config(VirtIODevice *vdev, const uint8_t *config_data) { @@ -697,6 +767,10 @@ static void virtio_balloon_set_config(VirtIODevice *vdev, qapi_event_send_balloon_change(vm_ram_size - ((ram_addr_t) dev->actual << VIRTIO_BALLOON_PFN_SHIFT)); } + dev->poison_val = 0; + if (virtio_balloon_page_poison_support(dev)) { + dev->poison_val = le32_to_cpu(config.poison_val); + } trace_virtio_balloon_set_config(dev->actual, oldactual); } @@ -755,6 +829,17 @@ static const VMStateDescription vmstate_virtio_balloon_free_page_report = { } }; +static const VMStateDescription vmstate_virtio_balloon_page_poison = { + .name = "vitio-balloon-device/page-poison", + .version_id = 1, + .minimum_version_id = 1, + .needed = virtio_balloon_page_poison_support, + .fields = (VMStateField[]) { + VMSTATE_UINT32(poison_val, VirtIOBalloon), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_virtio_balloon_device = { .name = "virtio-balloon-device", .version_id = 1, @@ -767,6 +852,7 @@ static const VMStateDescription vmstate_virtio_balloon_device = { }, .subsections = (const VMStateDescription * []) { &vmstate_virtio_balloon_free_page_report, + &vmstate_virtio_balloon_page_poison, NULL } }; @@ -789,6 +875,13 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp) return; } + if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_FREE_PAGE_HINT) && + !s->iothread) { + error_setg(errp, "'free-page-hint' requires 'iothread' to be set"); + virtio_cleanup(vdev); + return; + } + s->ivq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output); s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output); s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats); @@ -797,25 +890,18 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp) VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { s->free_page_vq = virtio_add_queue(vdev, VIRTQUEUE_MAX_SIZE, virtio_balloon_handle_free_page_vq); - s->free_page_report_status = FREE_PAGE_REPORT_S_STOP; - s->free_page_report_cmd_id = - VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN; - s->free_page_report_notify.notify = - virtio_balloon_free_page_report_notify; precopy_add_notifier(&s->free_page_report_notify); - if (s->iothread) { - object_ref(OBJECT(s->iothread)); - s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread), - virtio_ballloon_get_free_page_hints, s); - qemu_mutex_init(&s->free_page_lock); - qemu_cond_init(&s->free_page_cond); - s->block_iothread = false; - } else { - /* Simply disable this feature if the iothread wasn't created. */ - s->host_features &= ~(1 << VIRTIO_BALLOON_F_FREE_PAGE_HINT); - virtio_error(vdev, "iothread is missing"); - } + + object_ref(OBJECT(s->iothread)); + s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread), + virtio_ballloon_get_free_page_hints, s); } + + if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_REPORTING)) { + s->reporting_vq = virtio_add_queue(vdev, 32, + virtio_balloon_handle_report); + } + reset_stats(s); } @@ -824,8 +910,9 @@ static void virtio_balloon_device_unrealize(DeviceState *dev) VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtIOBalloon *s = VIRTIO_BALLOON(dev); - if (virtio_balloon_free_page_support(s)) { + if (s->free_page_bh) { qemu_bh_delete(s->free_page_bh); + object_unref(OBJECT(s->iothread)); virtio_balloon_free_page_stop(s); precopy_remove_notifier(&s->free_page_report_notify); } @@ -838,6 +925,9 @@ static void virtio_balloon_device_unrealize(DeviceState *dev) if (s->free_page_vq) { virtio_delete_queue(s->free_page_vq); } + if (s->reporting_vq) { + virtio_delete_queue(s->reporting_vq); + } virtio_cleanup(vdev); } @@ -854,6 +944,8 @@ static void virtio_balloon_device_reset(VirtIODevice *vdev) g_free(s->stats_vq_elem); s->stats_vq_elem = NULL; } + + s->poison_val = 0; } static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status) @@ -892,6 +984,11 @@ static void virtio_balloon_instance_init(Object *obj) { VirtIOBalloon *s = VIRTIO_BALLOON(obj); + qemu_mutex_init(&s->free_page_lock); + qemu_cond_init(&s->free_page_cond); + s->free_page_report_cmd_id = VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN; + s->free_page_report_notify.notify = virtio_balloon_free_page_report_notify; + object_property_add(obj, "guest-stats", "guest statistics", balloon_stats_get_all, NULL, NULL, s); @@ -916,6 +1013,10 @@ static Property virtio_balloon_properties[] = { VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false), DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features, VIRTIO_BALLOON_F_FREE_PAGE_HINT, false), + DEFINE_PROP_BIT("page-poison", VirtIOBalloon, host_features, + VIRTIO_BALLOON_F_PAGE_POISON, true), + DEFINE_PROP_BIT("free-page-reporting", VirtIOBalloon, host_features, + VIRTIO_BALLOON_F_REPORTING, false), /* QEMU 4.0 accidentally changed the config size even when free-page-hint * is disabled, resulting in QEMU 3.1 migration incompatibility. This * property retains this quirk for QEMU 4.1 machine types. diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index d028c17c24..7bc8c1c056 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1273,16 +1273,20 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr, virtio_queue_set_vector(vdev, vdev->queue_sel, val); break; case VIRTIO_PCI_COMMON_Q_ENABLE: - virtio_queue_set_num(vdev, vdev->queue_sel, - proxy->vqs[vdev->queue_sel].num); - virtio_queue_set_rings(vdev, vdev->queue_sel, + if (val == 1) { + virtio_queue_set_num(vdev, vdev->queue_sel, + proxy->vqs[vdev->queue_sel].num); + virtio_queue_set_rings(vdev, vdev->queue_sel, ((uint64_t)proxy->vqs[vdev->queue_sel].desc[1]) << 32 | proxy->vqs[vdev->queue_sel].desc[0], ((uint64_t)proxy->vqs[vdev->queue_sel].avail[1]) << 32 | proxy->vqs[vdev->queue_sel].avail[0], ((uint64_t)proxy->vqs[vdev->queue_sel].used[1]) << 32 | proxy->vqs[vdev->queue_sel].used[0]); - proxy->vqs[vdev->queue_sel].enabled = 1; + proxy->vqs[vdev->queue_sel].enabled = 1; + } else { + virtio_error(vdev, "wrong value for queue_enable %"PRIx64, val); + } break; case VIRTIO_PCI_COMMON_Q_DESCLO: proxy->vqs[vdev->queue_sel].desc[0] = val; diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h index c13327fa78..3be9ab5049 100644 --- a/include/hw/acpi/acpi-defs.h +++ b/include/hw/acpi/acpi-defs.h @@ -88,6 +88,8 @@ typedef struct AcpiFadtData { struct AcpiGenericAddress pm_tmr; /* PM_TMR_BLK */ struct AcpiGenericAddress gpe0_blk; /* GPE0_BLK */ struct AcpiGenericAddress reset_reg; /* RESET_REG */ + struct AcpiGenericAddress sleep_ctl; /* SLEEP_CONTROL_REG */ + struct AcpiGenericAddress sleep_sts; /* SLEEP_STATUS_REG */ uint8_t reset_val; /* RESET_VALUE */ uint8_t rev; /* Revision */ uint32_t flags; /* Flags */ diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h index ed7c89309e..d27da03d64 100644 --- a/include/hw/acpi/aml-build.h +++ b/include/hw/acpi/aml-build.h @@ -437,4 +437,6 @@ void build_slit(GArray *table_data, BIOSLinker *linker, MachineState *ms); void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, const char *oem_id, const char *oem_table_id); + +void build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog); #endif diff --git a/include/hw/acpi/generic_event_device.h b/include/hw/acpi/generic_event_device.h index 83917de024..90a9180db5 100644 --- a/include/hw/acpi/generic_event_device.h +++ b/include/hw/acpi/generic_event_device.h @@ -86,7 +86,7 @@ #define ACPI_GED_NVDIMM_HOTPLUG_EVT 0x4 typedef struct GEDState { - MemoryRegion io; + MemoryRegion evt; uint32_t sel; } GEDState; diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h index a3c08955e8..b67a1aedf6 100644 --- a/include/hw/mem/nvdimm.h +++ b/include/hw/mem/nvdimm.h @@ -155,6 +155,7 @@ typedef struct NVDIMMState NVDIMMState; void nvdimm_init_acpi_state(NVDIMMState *state, MemoryRegion *io, struct AcpiGenericAddress dsm_io, FWCfgState *fw_cfg, Object *owner); +void nvdimm_build_srat(GArray *table_data); void nvdimm_build_acpi(GArray *table_offsets, GArray *table_data, BIOSLinker *linker, NVDIMMState *state, uint32_t ram_slots); diff --git a/include/hw/virtio/vhost-user-vsock.h b/include/hw/virtio/vhost-user-vsock.h new file mode 100644 index 0000000000..4e128a4b9f --- /dev/null +++ b/include/hw/virtio/vhost-user-vsock.h @@ -0,0 +1,36 @@ +/* + * Vhost-user vsock virtio device + * + * Copyright 2020 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#ifndef _QEMU_VHOST_USER_VSOCK_H +#define _QEMU_VHOST_USER_VSOCK_H + +#include "hw/virtio/vhost-vsock-common.h" +#include "hw/virtio/vhost-user.h" +#include "standard-headers/linux/virtio_vsock.h" + +#define TYPE_VHOST_USER_VSOCK "vhost-user-vsock-device" +#define VHOST_USER_VSOCK(obj) \ + OBJECT_CHECK(VHostUserVSock, (obj), TYPE_VHOST_USER_VSOCK) + +typedef struct { + CharBackend chardev; +} VHostUserVSockConf; + +typedef struct { + /*< private >*/ + VHostVSockCommon parent; + VhostUserState vhost_user; + VHostUserVSockConf conf; + struct virtio_vsock_config vsockcfg; + + /*< public >*/ +} VHostUserVSock; + +#endif /* _QEMU_VHOST_USER_VSOCK_H */ diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h index 811e325f42..a9abca3288 100644 --- a/include/hw/virtio/vhost-user.h +++ b/include/hw/virtio/vhost-user.h @@ -20,6 +20,7 @@ typedef struct VhostUserHostNotifier { typedef struct VhostUserState { CharBackend *chr; VhostUserHostNotifier notifier[VIRTIO_QUEUE_MAX]; + int memory_slots; } VhostUserState; bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp); diff --git a/include/hw/virtio/vhost-vsock-common.h b/include/hw/virtio/vhost-vsock-common.h new file mode 100644 index 0000000000..f8b4aaae00 --- /dev/null +++ b/include/hw/virtio/vhost-vsock-common.h @@ -0,0 +1,47 @@ +/* + * Parent class for vhost-vsock devices + * + * Copyright 2015-2020 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#ifndef _QEMU_VHOST_VSOCK_COMMON_H +#define _QEMU_VHOST_VSOCK_COMMON_H + +#include "hw/virtio/virtio.h" +#include "hw/virtio/vhost.h" + +#define TYPE_VHOST_VSOCK_COMMON "vhost-vsock-common" +#define VHOST_VSOCK_COMMON(obj) \ + OBJECT_CHECK(VHostVSockCommon, (obj), TYPE_VHOST_VSOCK_COMMON) + +enum { + VHOST_VSOCK_SAVEVM_VERSION = 0, + + VHOST_VSOCK_QUEUE_SIZE = 128, +}; + +typedef struct { + VirtIODevice parent; + + struct vhost_virtqueue vhost_vqs[2]; + struct vhost_dev vhost_dev; + + VirtQueue *event_vq; + VirtQueue *recv_vq; + VirtQueue *trans_vq; + + QEMUTimer *post_load_timer; +} VHostVSockCommon; + +int vhost_vsock_common_start(VirtIODevice *vdev); +void vhost_vsock_common_stop(VirtIODevice *vdev); +int vhost_vsock_common_pre_save(void *opaque); +int vhost_vsock_common_post_load(void *opaque, int version_id); +void vhost_vsock_common_realize(VirtIODevice *vdev, const char *name); +void vhost_vsock_common_unrealize(VirtIODevice *vdev); + +#endif /* _QEMU_VHOST_VSOCK_COMMON_H */ diff --git a/include/hw/virtio/vhost-vsock.h b/include/hw/virtio/vhost-vsock.h index bc5a988ee5..8cbb7b90f9 100644 --- a/include/hw/virtio/vhost-vsock.h +++ b/include/hw/virtio/vhost-vsock.h @@ -14,8 +14,7 @@ #ifndef QEMU_VHOST_VSOCK_H #define QEMU_VHOST_VSOCK_H -#include "hw/virtio/virtio.h" -#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-vsock-common.h" #define TYPE_VHOST_VSOCK "vhost-vsock-device" #define VHOST_VSOCK(obj) \ @@ -28,14 +27,8 @@ typedef struct { typedef struct { /*< private >*/ - VirtIODevice parent; + VHostVSockCommon parent; VHostVSockConf conf; - struct vhost_virtqueue vhost_vqs[2]; - struct vhost_dev vhost_dev; - VirtQueue *event_vq; - VirtQueue *recv_vq; - VirtQueue *trans_vq; - QEMUTimer *post_load_timer; /*< public >*/ } VHostVSock; diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h index d1c968d237..d49fef00ce 100644 --- a/include/hw/virtio/virtio-balloon.h +++ b/include/hw/virtio/virtio-balloon.h @@ -42,7 +42,7 @@ enum virtio_balloon_free_page_report_status { typedef struct VirtIOBalloon { VirtIODevice parent_obj; - VirtQueue *ivq, *dvq, *svq, *free_page_vq; + VirtQueue *ivq, *dvq, *svq, *free_page_vq, *reporting_vq; uint32_t free_page_report_status; uint32_t num_pages; uint32_t actual; @@ -70,6 +70,7 @@ typedef struct VirtIOBalloon { uint32_t host_features; bool qemu_4_0_config_size; + uint32_t poison_val; } VirtIOBalloon; #endif diff --git a/include/sysemu/tpm.h b/include/sysemu/tpm.h index f37851b1aa..03fb25941c 100644 --- a/include/sysemu/tpm.h +++ b/include/sysemu/tpm.h @@ -50,6 +50,8 @@ typedef struct TPMIfClass { #define TPM_IS_TIS_ISA(chr) \ object_dynamic_cast(OBJECT(chr), TYPE_TPM_TIS_ISA) +#define TPM_IS_TIS_SYSBUS(chr) \ + object_dynamic_cast(OBJECT(chr), TYPE_TPM_TIS_SYSBUS) #define TPM_IS_CRB(chr) \ object_dynamic_cast(OBJECT(chr), TYPE_TPM_CRB) #define TPM_IS_SPAPR(chr) \ diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c index 9c61e769ca..e03adf0d4d 100644 --- a/monitor/hmp-cmds.c +++ b/monitor/hmp-cmds.c @@ -688,7 +688,8 @@ static void hmp_info_pci_device(Monitor *mon, const PciDeviceInfo *dev) } if (dev->has_irq) { - monitor_printf(mon, " IRQ %" PRId64 ".\n", dev->irq); + monitor_printf(mon, " IRQ %" PRId64 ", pin %c\n", + dev->irq, (char)('A' + dev->irq_pin - 1)); } if (dev->has_pci_bridge) { diff --git a/qapi/misc.json b/qapi/misc.json index 99b90ac80b..a5a0beb902 100644 --- a/qapi/misc.json +++ b/qapi/misc.json @@ -403,6 +403,8 @@ # # @irq: if an IRQ is assigned to the device, the IRQ number # +# @irq_pin: the IRQ pin, zero means no IRQ (since 5.1) +# # @qdev_id: the device name of the PCI device # # @pci_bridge: if the device is a PCI bridge, the bridge information @@ -417,8 +419,8 @@ { 'struct': 'PciDeviceInfo', 'data': {'bus': 'int', 'slot': 'int', 'function': 'int', 'class_info': 'PciDeviceClass', 'id': 'PciDeviceId', - '*irq': 'int', 'qdev_id': 'str', '*pci_bridge': 'PciBridgeInfo', - 'regions': ['PciMemoryRegion']} } + '*irq': 'int', 'irq_pin': 'int', 'qdev_id': 'str', + '*pci_bridge': 'PciBridgeInfo', 'regions': ['PciMemoryRegion'] }} ## # @PciInfo: diff --git a/tests/data/acpi/pc/DSDT b/tests/data/acpi/pc/DSDT Binary files differindex ad4b2d46cc..384a82dbb3 100644 --- a/tests/data/acpi/pc/DSDT +++ b/tests/data/acpi/pc/DSDT diff --git a/tests/data/acpi/pc/DSDT.acpihmat b/tests/data/acpi/pc/DSDT.acpihmat Binary files differindex eff7aadfab..47ddfdb027 100644 --- a/tests/data/acpi/pc/DSDT.acpihmat +++ b/tests/data/acpi/pc/DSDT.acpihmat diff --git a/tests/data/acpi/pc/DSDT.bridge b/tests/data/acpi/pc/DSDT.bridge Binary files differindex 92ae808e2e..d1e2fa9fb8 100644 --- a/tests/data/acpi/pc/DSDT.bridge +++ b/tests/data/acpi/pc/DSDT.bridge diff --git a/tests/data/acpi/pc/DSDT.cphp b/tests/data/acpi/pc/DSDT.cphp Binary files differindex f357235851..54f481faf1 100644 --- a/tests/data/acpi/pc/DSDT.cphp +++ b/tests/data/acpi/pc/DSDT.cphp diff --git a/tests/data/acpi/pc/DSDT.dimmpxm b/tests/data/acpi/pc/DSDT.dimmpxm Binary files differindex 7fa09463c1..5d98016ae5 100644 --- a/tests/data/acpi/pc/DSDT.dimmpxm +++ b/tests/data/acpi/pc/DSDT.dimmpxm diff --git a/tests/data/acpi/pc/DSDT.ipmikcs b/tests/data/acpi/pc/DSDT.ipmikcs Binary files differindex 469d13e1f6..57b7835874 100644 --- a/tests/data/acpi/pc/DSDT.ipmikcs +++ b/tests/data/acpi/pc/DSDT.ipmikcs diff --git a/tests/data/acpi/pc/DSDT.memhp b/tests/data/acpi/pc/DSDT.memhp Binary files differindex aee75bea27..8cb90ef14e 100644 --- a/tests/data/acpi/pc/DSDT.memhp +++ b/tests/data/acpi/pc/DSDT.memhp diff --git a/tests/data/acpi/pc/DSDT.numamem b/tests/data/acpi/pc/DSDT.numamem Binary files differindex 9a747f6f08..f194bc6394 100644 --- a/tests/data/acpi/pc/DSDT.numamem +++ b/tests/data/acpi/pc/DSDT.numamem diff --git a/tests/data/acpi/pc/SRAT.dimmpxm b/tests/data/acpi/pc/SRAT.dimmpxm Binary files differindex f5c0267ea2..5a13c61b90 100644 --- a/tests/data/acpi/pc/SRAT.dimmpxm +++ b/tests/data/acpi/pc/SRAT.dimmpxm diff --git a/tests/data/acpi/q35/DSDT b/tests/data/acpi/q35/DSDT Binary files differindex 9fa4d5a405..6a5e4dd85a 100644 --- a/tests/data/acpi/q35/DSDT +++ b/tests/data/acpi/q35/DSDT diff --git a/tests/data/acpi/q35/DSDT.acpihmat b/tests/data/acpi/q35/DSDT.acpihmat Binary files differindex 2d834a854c..c1dd7773f3 100644 --- a/tests/data/acpi/q35/DSDT.acpihmat +++ b/tests/data/acpi/q35/DSDT.acpihmat diff --git a/tests/data/acpi/q35/DSDT.bridge b/tests/data/acpi/q35/DSDT.bridge Binary files differindex b75122b24a..2ef1e894a3 100644 --- a/tests/data/acpi/q35/DSDT.bridge +++ b/tests/data/acpi/q35/DSDT.bridge diff --git a/tests/data/acpi/q35/DSDT.cphp b/tests/data/acpi/q35/DSDT.cphp Binary files differindex c59c19ff46..74e86176e5 100644 --- a/tests/data/acpi/q35/DSDT.cphp +++ b/tests/data/acpi/q35/DSDT.cphp diff --git a/tests/data/acpi/q35/DSDT.dimmpxm b/tests/data/acpi/q35/DSDT.dimmpxm Binary files differindex 9edc104ee6..4bf8e9d64b 100644 --- a/tests/data/acpi/q35/DSDT.dimmpxm +++ b/tests/data/acpi/q35/DSDT.dimmpxm diff --git a/tests/data/acpi/q35/DSDT.ipmibt b/tests/data/acpi/q35/DSDT.ipmibt Binary files differindex 3910e9b767..38723daef8 100644 --- a/tests/data/acpi/q35/DSDT.ipmibt +++ b/tests/data/acpi/q35/DSDT.ipmibt diff --git a/tests/data/acpi/q35/DSDT.memhp b/tests/data/acpi/q35/DSDT.memhp Binary files differindex 8461e984c9..98328d1c41 100644 --- a/tests/data/acpi/q35/DSDT.memhp +++ b/tests/data/acpi/q35/DSDT.memhp diff --git a/tests/data/acpi/q35/DSDT.mmio64 b/tests/data/acpi/q35/DSDT.mmio64 Binary files differindex fc0cc096ba..5916c0e9ce 100644 --- a/tests/data/acpi/q35/DSDT.mmio64 +++ b/tests/data/acpi/q35/DSDT.mmio64 diff --git a/tests/data/acpi/q35/DSDT.numamem b/tests/data/acpi/q35/DSDT.numamem Binary files differindex 498c843be1..cf3fde3449 100644 --- a/tests/data/acpi/q35/DSDT.numamem +++ b/tests/data/acpi/q35/DSDT.numamem diff --git a/tests/data/acpi/q35/DSDT.tis b/tests/data/acpi/q35/DSDT.tis Binary files differnew file mode 100644 index 0000000000..56b6fb0c32 --- /dev/null +++ b/tests/data/acpi/q35/DSDT.tis diff --git a/tests/data/acpi/q35/SRAT.dimmpxm b/tests/data/acpi/q35/SRAT.dimmpxm Binary files differindex f5c0267ea2..5a13c61b90 100644 --- a/tests/data/acpi/q35/SRAT.dimmpxm +++ b/tests/data/acpi/q35/SRAT.dimmpxm diff --git a/tests/data/acpi/q35/TPM2.tis b/tests/data/acpi/q35/TPM2.tis Binary files differnew file mode 100644 index 0000000000..7878a6e79a --- /dev/null +++ b/tests/data/acpi/q35/TPM2.tis diff --git a/tests/data/acpi/virt/SRAT.memhp b/tests/data/acpi/virt/SRAT.memhp Binary files differindex 1b57db2072..9a35adb40c 100644 --- a/tests/data/acpi/virt/SRAT.memhp +++ b/tests/data/acpi/virt/SRAT.memhp diff --git a/tests/qtest/Makefile.include b/tests/qtest/Makefile.include index 9e5a51d033..5023fa413d 100644 --- a/tests/qtest/Makefile.include +++ b/tests/qtest/Makefile.include @@ -262,6 +262,7 @@ tests/qtest/hd-geo-test$(EXESUF): tests/qtest/hd-geo-test.o $(libqos-obj-y) tests/qtest/boot-order-test$(EXESUF): tests/qtest/boot-order-test.o $(libqos-obj-y) tests/qtest/boot-serial-test$(EXESUF): tests/qtest/boot-serial-test.o $(libqos-obj-y) tests/qtest/bios-tables-test$(EXESUF): tests/qtest/bios-tables-test.o \ + tests/qtest/tpm-emu.o $(test-io-obj-y) \ tests/qtest/boot-sector.o tests/qtest/acpi-utils.o $(libqos-obj-y) tests/qtest/pxe-test$(EXESUF): tests/qtest/pxe-test.o tests/qtest/boot-sector.o $(libqos-obj-y) tests/qtest/microbit-test$(EXESUF): tests/qtest/microbit-test.o diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c index c9843829b3..53f104a9c5 100644 --- a/tests/qtest/bios-tables-test.c +++ b/tests/qtest/bios-tables-test.c @@ -57,6 +57,9 @@ #include "qemu/bitmap.h" #include "acpi-utils.h" #include "boot-sector.h" +#include "tpm-emu.h" +#include "hw/acpi/tpm.h" + #define MACHINE_PC "pc" #define MACHINE_Q35 "q35" @@ -874,6 +877,60 @@ static void test_acpi_piix4_tcg_numamem(void) free_test_data(&data); } +uint64_t tpm_tis_base_addr; + +static void test_acpi_tcg_tpm(const char *machine, const char *tpm_if, + uint64_t base) +{ + gchar *tmp_dir_name = g_strdup_printf("qemu-test_acpi_%s_tcg_%s.XXXXXX", + machine, tpm_if); + char *tmp_path = g_dir_make_tmp(tmp_dir_name, NULL); + TestState test; + test_data data; + GThread *thread; + char *args, *variant = g_strdup_printf(".%s", tpm_if); + + tpm_tis_base_addr = base; + + module_call_init(MODULE_INIT_QOM); + + test.addr = g_new0(SocketAddress, 1); + test.addr->type = SOCKET_ADDRESS_TYPE_UNIX; + test.addr->u.q_unix.path = g_build_filename(tmp_path, "sock", NULL); + g_mutex_init(&test.data_mutex); + g_cond_init(&test.data_cond); + test.data_cond_signal = false; + + thread = g_thread_new(NULL, tpm_emu_ctrl_thread, &test); + tpm_emu_test_wait_cond(&test); + + memset(&data, 0, sizeof(data)); + data.machine = machine; + data.variant = variant; + + args = g_strdup_printf( + " -chardev socket,id=chr,path=%s" + " -tpmdev emulator,id=dev,chardev=chr" + " -device tpm-%s,tpmdev=dev", + test.addr->u.q_unix.path, tpm_if); + + test_acpi_one(args, &data); + + g_thread_join(thread); + g_unlink(test.addr->u.q_unix.path); + qapi_free_SocketAddress(test.addr); + g_rmdir(tmp_path); + g_free(variant); + g_free(tmp_path); + g_free(tmp_dir_name); + free_test_data(&data); +} + +static void test_acpi_q35_tcg_tpm_tis(void) +{ + test_acpi_tcg_tpm("q35", "tis", 0xFED40000); +} + static void test_acpi_tcg_dimm_pxm(const char *machine) { test_data data; @@ -1037,6 +1094,7 @@ int main(int argc, char *argv[]) return ret; } + qtest_add_func("acpi/q35/tpm-tis", test_acpi_q35_tcg_tpm_tis); qtest_add_func("acpi/piix4", test_acpi_piix4_tcg); qtest_add_func("acpi/piix4/bridge", test_acpi_piix4_tcg_bridge); qtest_add_func("acpi/q35", test_acpi_q35_tcg); diff --git a/tests/qtest/tpm-emu.c b/tests/qtest/tpm-emu.c index c43ac4aef8..298d0eec74 100644 --- a/tests/qtest/tpm-emu.c +++ b/tests/qtest/tpm-emu.c @@ -49,7 +49,6 @@ static void *tpm_emu_tpm_thread(void *data) s->tpm_msg->tag = be16_to_cpu(s->tpm_msg->tag); s->tpm_msg->len = be32_to_cpu(s->tpm_msg->len); g_assert_cmpint(s->tpm_msg->len, >=, minhlen); - g_assert_cmpint(s->tpm_msg->tag, ==, TPM2_ST_NO_SESSIONS); s->tpm_msg = g_realloc(s->tpm_msg, s->tpm_msg->len); qio_channel_read(ioc, (char *)&s->tpm_msg->code, diff --git a/tests/qtest/tpm-emu.h b/tests/qtest/tpm-emu.h index a4f1d64226..73f3bed0c4 100644 --- a/tests/qtest/tpm-emu.h +++ b/tests/qtest/tpm-emu.h @@ -16,6 +16,9 @@ #define TPM_RC_FAILURE 0x101 #define TPM2_ST_NO_SESSIONS 0x8001 +#include "qemu/sockets.h" +#include "io/channel.h" + struct tpm_hdr { uint16_t tag; uint32_t len; |