diff options
author | Stefan Hajnoczi <stefanha@redhat.com> | 2023-11-07 18:59:40 +0800 |
---|---|---|
committer | Stefan Hajnoczi <stefanha@redhat.com> | 2023-11-07 18:59:41 +0800 |
commit | f6b615b52d1d92f02103596a30df95f31138a2e4 (patch) | |
tree | b8d13f1b7e485177a8b6b470df30eaac268b3466 /hw/virtio | |
parent | 7eee58ae3bb15a2bceb368997ce1a48fd3c607e7 (diff) | |
parent | 94cd94f1c0137b56000c01208e03d0907ad34910 (diff) |
Merge tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging
virtio,pc,pci: features, fixes
virtio sound card support
vhost-user: back-end state migration
cxl:
line length reduction
enabling fabric management
vhost-vdpa:
shadow virtqueue hash calculation Support
shadow virtqueue RSS Support
tests:
CPU topology related smbios test cases
Fixes, cleanups all over the place
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
# -----BEGIN PGP SIGNATURE-----
#
# iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAmVKDDoPHG1zdEByZWRo
# YXQuY29tAAoJECgfDbjSjVRpF08H/0Zts8uvkHbgiOEJw4JMHU6/VaCipfIYsp01
# GSfwYOyEsXJ7GIxKWaCiMnWXEm7tebNCPKf3DoUtcAojQj3vuF9XbWBKw/bfRn83
# nGO/iiwbYViSKxkwqUI+Up5YiN9o0M8gBFrY0kScPezbnYmo5u2bcADdEEq6gH68
# D0Ea8i+WmszL891ypvgCDBL2ObDk3qX3vA5Q6J2I+HKX2ofJM59BwaKwS5ghw+IG
# BmbKXUZJNjUQfN9dQ7vJuiuqdknJ2xUzwW2Vn612ffarbOZB1DZ6ruWlrHty5TjX
# 0w4IXEJPBgZYbX9oc6zvTQnbLDBJbDU89mnme0TcmNMKWmQKTtc=
# =vEv+
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 07 Nov 2023 18:06:50 HKT
# gpg: using RSA key 5D09FD0871C8F85B94CA8A0D281F0DB8D28D5469
# gpg: issuer "mst@redhat.com"
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [full]
# gpg: aka "Michael S. Tsirkin <mst@redhat.com>" [full]
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67
# Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469
* tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu: (63 commits)
acpi/tests/avocado/bits: enable console logging from bits VM
acpi/tests/avocado/bits: enforce 32-bit SMBIOS entry point
hw/cxl: Add tunneled command support to mailbox for switch cci.
hw/cxl: Add dummy security state get
hw/cxl/type3: Cleanup multiple CXL_TYPE3() calls in read/write functions
hw/cxl/mbox: Add Get Background Operation Status Command
hw/cxl: Add support for device sanitation
hw/cxl/mbox: Wire up interrupts for background completion
hw/cxl/mbox: Add support for background operations
hw/cxl: Implement Physical Ports status retrieval
hw/pci-bridge/cxl_downstream: Set default link width and link speed
hw/cxl/mbox: Add Physical Switch Identify command.
hw/cxl/mbox: Add Information and Status / Identify command
hw/cxl: Add a switch mailbox CCI function
hw/pci-bridge/cxl_upstream: Move defintion of device to header.
hw/cxl/mbox: Generalize the CCI command processing
hw/cxl/mbox: Pull the CCI definition out of the CXLDeviceState
hw/cxl/mbox: Split mailbox command payload into separate input and output
hw/cxl/mbox: Pull the payload out of struct cxl_cmd and make instances constant
hw/cxl: Fix a QEMU_BUILD_BUG_ON() in switch statement scope issue.
...
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'hw/virtio')
-rw-r--r-- | hw/virtio/vhost-user-fs.c | 101 | ||||
-rw-r--r-- | hw/virtio/vhost-user.c | 146 | ||||
-rw-r--r-- | hw/virtio/vhost.c | 241 |
3 files changed, 487 insertions, 1 deletions
diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c index 49d699ffc2..eb91723855 100644 --- a/hw/virtio/vhost-user-fs.c +++ b/hw/virtio/vhost-user-fs.c @@ -298,9 +298,108 @@ static struct vhost_dev *vuf_get_vhost(VirtIODevice *vdev) return &fs->vhost_dev; } +/** + * Fetch the internal state from virtiofsd and save it to `f`. + */ +static int vuf_save_state(QEMUFile *f, void *pv, size_t size, + const VMStateField *field, JSONWriter *vmdesc) +{ + VirtIODevice *vdev = pv; + VHostUserFS *fs = VHOST_USER_FS(vdev); + Error *local_error = NULL; + int ret; + + ret = vhost_save_backend_state(&fs->vhost_dev, f, &local_error); + if (ret < 0) { + error_reportf_err(local_error, + "Error saving back-end state of %s device %s " + "(tag: \"%s\"): ", + vdev->name, vdev->parent_obj.canonical_path, + fs->conf.tag ?: "<none>"); + return ret; + } + + return 0; +} + +/** + * Load virtiofsd's internal state from `f` and send it over to virtiofsd. + */ +static int vuf_load_state(QEMUFile *f, void *pv, size_t size, + const VMStateField *field) +{ + VirtIODevice *vdev = pv; + VHostUserFS *fs = VHOST_USER_FS(vdev); + Error *local_error = NULL; + int ret; + + ret = vhost_load_backend_state(&fs->vhost_dev, f, &local_error); + if (ret < 0) { + error_reportf_err(local_error, + "Error loading back-end state of %s device %s " + "(tag: \"%s\"): ", + vdev->name, vdev->parent_obj.canonical_path, + fs->conf.tag ?: "<none>"); + return ret; + } + + return 0; +} + +static bool vuf_is_internal_migration(void *opaque) +{ + /* TODO: Return false when an external migration is requested */ + return true; +} + +static int vuf_check_migration_support(void *opaque) +{ + VirtIODevice *vdev = opaque; + VHostUserFS *fs = VHOST_USER_FS(vdev); + + if (!vhost_supports_device_state(&fs->vhost_dev)) { + error_report("Back-end of %s device %s (tag: \"%s\") does not support " + "migration through qemu", + vdev->name, vdev->parent_obj.canonical_path, + fs->conf.tag ?: "<none>"); + return -ENOTSUP; + } + + return 0; +} + +static const VMStateDescription vuf_backend_vmstate; + static const VMStateDescription vuf_vmstate = { .name = "vhost-user-fs", - .unmigratable = 1, + .version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_VIRTIO_DEVICE, + VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription * []) { + &vuf_backend_vmstate, + NULL, + } +}; + +static const VMStateDescription vuf_backend_vmstate = { + .name = "vhost-user-fs-backend", + .version_id = 0, + .needed = vuf_is_internal_migration, + .pre_load = vuf_check_migration_support, + .pre_save = vuf_check_migration_support, + .fields = (VMStateField[]) { + { + .name = "back-end", + .info = &(const VMStateInfo) { + .name = "virtio-fs back-end state", + .get = vuf_load_state, + .put = vuf_save_state, + }, + }, + VMSTATE_END_OF_LIST() + }, }; static Property vuf_properties[] = { diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 7b42ae8aae..f214df804b 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -103,6 +103,8 @@ typedef enum VhostUserRequest { VHOST_USER_SET_STATUS = 39, VHOST_USER_GET_STATUS = 40, VHOST_USER_GET_SHARED_OBJECT = 41, + VHOST_USER_SET_DEVICE_STATE_FD = 42, + VHOST_USER_CHECK_DEVICE_STATE = 43, VHOST_USER_MAX } VhostUserRequest; @@ -201,6 +203,12 @@ typedef struct { uint32_t size; /* the following payload size */ } QEMU_PACKED VhostUserHeader; +/* Request payload of VHOST_USER_SET_DEVICE_STATE_FD */ +typedef struct VhostUserTransferDeviceState { + uint32_t direction; + uint32_t phase; +} VhostUserTransferDeviceState; + typedef union { #define VHOST_USER_VRING_IDX_MASK (0xff) #define VHOST_USER_VRING_NOFD_MASK (0x1 << 8) @@ -216,6 +224,7 @@ typedef union { VhostUserVringArea area; VhostUserInflight inflight; VhostUserShared object; + VhostUserTransferDeviceState transfer_state; } VhostUserPayload; typedef struct VhostUserMsg { @@ -2855,6 +2864,140 @@ static void vhost_user_reset_status(struct vhost_dev *dev) } } +static bool vhost_user_supports_device_state(struct vhost_dev *dev) +{ + return virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_DEVICE_STATE); +} + +static int vhost_user_set_device_state_fd(struct vhost_dev *dev, + VhostDeviceStateDirection direction, + VhostDeviceStatePhase phase, + int fd, + int *reply_fd, + Error **errp) +{ + int ret; + struct vhost_user *vu = dev->opaque; + VhostUserMsg msg = { + .hdr = { + .request = VHOST_USER_SET_DEVICE_STATE_FD, + .flags = VHOST_USER_VERSION, + .size = sizeof(msg.payload.transfer_state), + }, + .payload.transfer_state = { + .direction = direction, + .phase = phase, + }, + }; + + *reply_fd = -1; + + if (!vhost_user_supports_device_state(dev)) { + close(fd); + error_setg(errp, "Back-end does not support migration state transfer"); + return -ENOTSUP; + } + + ret = vhost_user_write(dev, &msg, &fd, 1); + close(fd); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Failed to send SET_DEVICE_STATE_FD message"); + return ret; + } + + ret = vhost_user_read(dev, &msg); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Failed to receive SET_DEVICE_STATE_FD reply"); + return ret; + } + + if (msg.hdr.request != VHOST_USER_SET_DEVICE_STATE_FD) { + error_setg(errp, + "Received unexpected message type, expected %d, received %d", + VHOST_USER_SET_DEVICE_STATE_FD, msg.hdr.request); + return -EPROTO; + } + + if (msg.hdr.size != sizeof(msg.payload.u64)) { + error_setg(errp, + "Received bad message size, expected %zu, received %" PRIu32, + sizeof(msg.payload.u64), msg.hdr.size); + return -EPROTO; + } + + if ((msg.payload.u64 & 0xff) != 0) { + error_setg(errp, "Back-end did not accept migration state transfer"); + return -EIO; + } + + if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK)) { + *reply_fd = qemu_chr_fe_get_msgfd(vu->user->chr); + if (*reply_fd < 0) { + error_setg(errp, + "Failed to get back-end-provided transfer pipe FD"); + *reply_fd = -1; + return -EIO; + } + } + + return 0; +} + +static int vhost_user_check_device_state(struct vhost_dev *dev, Error **errp) +{ + int ret; + VhostUserMsg msg = { + .hdr = { + .request = VHOST_USER_CHECK_DEVICE_STATE, + .flags = VHOST_USER_VERSION, + .size = 0, + }, + }; + + if (!vhost_user_supports_device_state(dev)) { + error_setg(errp, "Back-end does not support migration state transfer"); + return -ENOTSUP; + } + + ret = vhost_user_write(dev, &msg, NULL, 0); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Failed to send CHECK_DEVICE_STATE message"); + return ret; + } + + ret = vhost_user_read(dev, &msg); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Failed to receive CHECK_DEVICE_STATE reply"); + return ret; + } + + if (msg.hdr.request != VHOST_USER_CHECK_DEVICE_STATE) { + error_setg(errp, + "Received unexpected message type, expected %d, received %d", + VHOST_USER_CHECK_DEVICE_STATE, msg.hdr.request); + return -EPROTO; + } + + if (msg.hdr.size != sizeof(msg.payload.u64)) { + error_setg(errp, + "Received bad message size, expected %zu, received %" PRIu32, + sizeof(msg.payload.u64), msg.hdr.size); + return -EPROTO; + } + + if (msg.payload.u64 != 0) { + error_setg(errp, "Back-end failed to process its internal state"); + return -EIO; + } + + return 0; +} + const VhostOps user_ops = { .backend_type = VHOST_BACKEND_TYPE_USER, .vhost_backend_init = vhost_user_backend_init, @@ -2890,4 +3033,7 @@ const VhostOps user_ops = { .vhost_set_inflight_fd = vhost_user_set_inflight_fd, .vhost_dev_start = vhost_user_dev_start, .vhost_reset_status = vhost_user_reset_status, + .vhost_supports_device_state = vhost_user_supports_device_state, + .vhost_set_device_state_fd = vhost_user_set_device_state_fd, + .vhost_check_device_state = vhost_user_check_device_state, }; diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 9c9ae7109e..2c9ac79468 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -2159,3 +2159,244 @@ int vhost_reset_device(struct vhost_dev *hdev) return -ENOSYS; } + +bool vhost_supports_device_state(struct vhost_dev *dev) +{ + if (dev->vhost_ops->vhost_supports_device_state) { + return dev->vhost_ops->vhost_supports_device_state(dev); + } + + return false; +} + +int vhost_set_device_state_fd(struct vhost_dev *dev, + VhostDeviceStateDirection direction, + VhostDeviceStatePhase phase, + int fd, + int *reply_fd, + Error **errp) +{ + if (dev->vhost_ops->vhost_set_device_state_fd) { + return dev->vhost_ops->vhost_set_device_state_fd(dev, direction, phase, + fd, reply_fd, errp); + } + + error_setg(errp, + "vhost transport does not support migration state transfer"); + return -ENOSYS; +} + +int vhost_check_device_state(struct vhost_dev *dev, Error **errp) +{ + if (dev->vhost_ops->vhost_check_device_state) { + return dev->vhost_ops->vhost_check_device_state(dev, errp); + } + + error_setg(errp, + "vhost transport does not support migration state transfer"); + return -ENOSYS; +} + +int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp) +{ + /* Maximum chunk size in which to transfer the state */ + const size_t chunk_size = 1 * 1024 * 1024; + g_autofree void *transfer_buf = NULL; + g_autoptr(GError) g_err = NULL; + int pipe_fds[2], read_fd = -1, write_fd = -1, reply_fd = -1; + int ret; + + /* [0] for reading (our end), [1] for writing (back-end's end) */ + if (!g_unix_open_pipe(pipe_fds, FD_CLOEXEC, &g_err)) { + error_setg(errp, "Failed to set up state transfer pipe: %s", + g_err->message); + ret = -EINVAL; + goto fail; + } + + read_fd = pipe_fds[0]; + write_fd = pipe_fds[1]; + + /* + * VHOST_TRANSFER_STATE_PHASE_STOPPED means the device must be stopped. + * Ideally, it is suspended, but SUSPEND/RESUME currently do not exist for + * vhost-user, so just check that it is stopped at all. + */ + assert(!dev->started); + + /* Transfer ownership of write_fd to the back-end */ + ret = vhost_set_device_state_fd(dev, + VHOST_TRANSFER_STATE_DIRECTION_SAVE, + VHOST_TRANSFER_STATE_PHASE_STOPPED, + write_fd, + &reply_fd, + errp); + if (ret < 0) { + error_prepend(errp, "Failed to initiate state transfer: "); + goto fail; + } + + /* If the back-end wishes to use a different pipe, switch over */ + if (reply_fd >= 0) { + close(read_fd); + read_fd = reply_fd; + } + + transfer_buf = g_malloc(chunk_size); + + while (true) { + ssize_t read_ret; + + read_ret = RETRY_ON_EINTR(read(read_fd, transfer_buf, chunk_size)); + if (read_ret < 0) { + ret = -errno; + error_setg_errno(errp, -ret, "Failed to receive state"); + goto fail; + } + + assert(read_ret <= chunk_size); + qemu_put_be32(f, read_ret); + + if (read_ret == 0) { + /* EOF */ + break; + } + + qemu_put_buffer(f, transfer_buf, read_ret); + } + + /* + * Back-end will not really care, but be clean and close our end of the pipe + * before inquiring the back-end about whether transfer was successful + */ + close(read_fd); + read_fd = -1; + + /* Also, verify that the device is still stopped */ + assert(!dev->started); + + ret = vhost_check_device_state(dev, errp); + if (ret < 0) { + goto fail; + } + + ret = 0; +fail: + if (read_fd >= 0) { + close(read_fd); + } + + return ret; +} + +int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp) +{ + size_t transfer_buf_size = 0; + g_autofree void *transfer_buf = NULL; + g_autoptr(GError) g_err = NULL; + int pipe_fds[2], read_fd = -1, write_fd = -1, reply_fd = -1; + int ret; + + /* [0] for reading (back-end's end), [1] for writing (our end) */ + if (!g_unix_open_pipe(pipe_fds, FD_CLOEXEC, &g_err)) { + error_setg(errp, "Failed to set up state transfer pipe: %s", + g_err->message); + ret = -EINVAL; + goto fail; + } + + read_fd = pipe_fds[0]; + write_fd = pipe_fds[1]; + + /* + * VHOST_TRANSFER_STATE_PHASE_STOPPED means the device must be stopped. + * Ideally, it is suspended, but SUSPEND/RESUME currently do not exist for + * vhost-user, so just check that it is stopped at all. + */ + assert(!dev->started); + + /* Transfer ownership of read_fd to the back-end */ + ret = vhost_set_device_state_fd(dev, + VHOST_TRANSFER_STATE_DIRECTION_LOAD, + VHOST_TRANSFER_STATE_PHASE_STOPPED, + read_fd, + &reply_fd, + errp); + if (ret < 0) { + error_prepend(errp, "Failed to initiate state transfer: "); + goto fail; + } + + /* If the back-end wishes to use a different pipe, switch over */ + if (reply_fd >= 0) { + close(write_fd); + write_fd = reply_fd; + } + + while (true) { + size_t this_chunk_size = qemu_get_be32(f); + ssize_t write_ret; + const uint8_t *transfer_pointer; + + if (this_chunk_size == 0) { + /* End of state */ + break; + } + + if (transfer_buf_size < this_chunk_size) { + transfer_buf = g_realloc(transfer_buf, this_chunk_size); + transfer_buf_size = this_chunk_size; + } + + if (qemu_get_buffer(f, transfer_buf, this_chunk_size) < + this_chunk_size) + { + error_setg(errp, "Failed to read state"); + ret = -EINVAL; + goto fail; + } + + transfer_pointer = transfer_buf; + while (this_chunk_size > 0) { + write_ret = RETRY_ON_EINTR( + write(write_fd, transfer_pointer, this_chunk_size) + ); + if (write_ret < 0) { + ret = -errno; + error_setg_errno(errp, -ret, "Failed to send state"); + goto fail; + } else if (write_ret == 0) { + error_setg(errp, "Failed to send state: Connection is closed"); + ret = -ECONNRESET; + goto fail; + } + + assert(write_ret <= this_chunk_size); + this_chunk_size -= write_ret; + transfer_pointer += write_ret; + } + } + + /* + * Close our end, thus ending transfer, before inquiring the back-end about + * whether transfer was successful + */ + close(write_fd); + write_fd = -1; + + /* Also, verify that the device is still stopped */ + assert(!dev->started); + + ret = vhost_check_device_state(dev, errp); + if (ret < 0) { + goto fail; + } + + ret = 0; +fail: + if (write_fd >= 0) { + close(write_fd); + } + + return ret; +} |