aboutsummaryrefslogtreecommitdiff
path: root/hw/virtio
diff options
context:
space:
mode:
authorStefan Hajnoczi <stefanha@redhat.com>2023-11-07 18:59:40 +0800
committerStefan Hajnoczi <stefanha@redhat.com>2023-11-07 18:59:41 +0800
commitf6b615b52d1d92f02103596a30df95f31138a2e4 (patch)
treeb8d13f1b7e485177a8b6b470df30eaac268b3466 /hw/virtio
parent7eee58ae3bb15a2bceb368997ce1a48fd3c607e7 (diff)
parent94cd94f1c0137b56000c01208e03d0907ad34910 (diff)
Merge tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging
virtio,pc,pci: features, fixes virtio sound card support vhost-user: back-end state migration cxl: line length reduction enabling fabric management vhost-vdpa: shadow virtqueue hash calculation Support shadow virtqueue RSS Support tests: CPU topology related smbios test cases Fixes, cleanups all over the place Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # -----BEGIN PGP SIGNATURE----- # # iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAmVKDDoPHG1zdEByZWRo # YXQuY29tAAoJECgfDbjSjVRpF08H/0Zts8uvkHbgiOEJw4JMHU6/VaCipfIYsp01 # GSfwYOyEsXJ7GIxKWaCiMnWXEm7tebNCPKf3DoUtcAojQj3vuF9XbWBKw/bfRn83 # nGO/iiwbYViSKxkwqUI+Up5YiN9o0M8gBFrY0kScPezbnYmo5u2bcADdEEq6gH68 # D0Ea8i+WmszL891ypvgCDBL2ObDk3qX3vA5Q6J2I+HKX2ofJM59BwaKwS5ghw+IG # BmbKXUZJNjUQfN9dQ7vJuiuqdknJ2xUzwW2Vn612ffarbOZB1DZ6ruWlrHty5TjX # 0w4IXEJPBgZYbX9oc6zvTQnbLDBJbDU89mnme0TcmNMKWmQKTtc= # =vEv+ # -----END PGP SIGNATURE----- # gpg: Signature made Tue 07 Nov 2023 18:06:50 HKT # gpg: using RSA key 5D09FD0871C8F85B94CA8A0D281F0DB8D28D5469 # gpg: issuer "mst@redhat.com" # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [full] # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" [full] # Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67 # Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469 * tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu: (63 commits) acpi/tests/avocado/bits: enable console logging from bits VM acpi/tests/avocado/bits: enforce 32-bit SMBIOS entry point hw/cxl: Add tunneled command support to mailbox for switch cci. hw/cxl: Add dummy security state get hw/cxl/type3: Cleanup multiple CXL_TYPE3() calls in read/write functions hw/cxl/mbox: Add Get Background Operation Status Command hw/cxl: Add support for device sanitation hw/cxl/mbox: Wire up interrupts for background completion hw/cxl/mbox: Add support for background operations hw/cxl: Implement Physical Ports status retrieval hw/pci-bridge/cxl_downstream: Set default link width and link speed hw/cxl/mbox: Add Physical Switch Identify command. hw/cxl/mbox: Add Information and Status / Identify command hw/cxl: Add a switch mailbox CCI function hw/pci-bridge/cxl_upstream: Move defintion of device to header. hw/cxl/mbox: Generalize the CCI command processing hw/cxl/mbox: Pull the CCI definition out of the CXLDeviceState hw/cxl/mbox: Split mailbox command payload into separate input and output hw/cxl/mbox: Pull the payload out of struct cxl_cmd and make instances constant hw/cxl: Fix a QEMU_BUILD_BUG_ON() in switch statement scope issue. ... Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'hw/virtio')
-rw-r--r--hw/virtio/vhost-user-fs.c101
-rw-r--r--hw/virtio/vhost-user.c146
-rw-r--r--hw/virtio/vhost.c241
3 files changed, 487 insertions, 1 deletions
diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
index 49d699ffc2..eb91723855 100644
--- a/hw/virtio/vhost-user-fs.c
+++ b/hw/virtio/vhost-user-fs.c
@@ -298,9 +298,108 @@ static struct vhost_dev *vuf_get_vhost(VirtIODevice *vdev)
return &fs->vhost_dev;
}
+/**
+ * Fetch the internal state from virtiofsd and save it to `f`.
+ */
+static int vuf_save_state(QEMUFile *f, void *pv, size_t size,
+ const VMStateField *field, JSONWriter *vmdesc)
+{
+ VirtIODevice *vdev = pv;
+ VHostUserFS *fs = VHOST_USER_FS(vdev);
+ Error *local_error = NULL;
+ int ret;
+
+ ret = vhost_save_backend_state(&fs->vhost_dev, f, &local_error);
+ if (ret < 0) {
+ error_reportf_err(local_error,
+ "Error saving back-end state of %s device %s "
+ "(tag: \"%s\"): ",
+ vdev->name, vdev->parent_obj.canonical_path,
+ fs->conf.tag ?: "<none>");
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * Load virtiofsd's internal state from `f` and send it over to virtiofsd.
+ */
+static int vuf_load_state(QEMUFile *f, void *pv, size_t size,
+ const VMStateField *field)
+{
+ VirtIODevice *vdev = pv;
+ VHostUserFS *fs = VHOST_USER_FS(vdev);
+ Error *local_error = NULL;
+ int ret;
+
+ ret = vhost_load_backend_state(&fs->vhost_dev, f, &local_error);
+ if (ret < 0) {
+ error_reportf_err(local_error,
+ "Error loading back-end state of %s device %s "
+ "(tag: \"%s\"): ",
+ vdev->name, vdev->parent_obj.canonical_path,
+ fs->conf.tag ?: "<none>");
+ return ret;
+ }
+
+ return 0;
+}
+
+static bool vuf_is_internal_migration(void *opaque)
+{
+ /* TODO: Return false when an external migration is requested */
+ return true;
+}
+
+static int vuf_check_migration_support(void *opaque)
+{
+ VirtIODevice *vdev = opaque;
+ VHostUserFS *fs = VHOST_USER_FS(vdev);
+
+ if (!vhost_supports_device_state(&fs->vhost_dev)) {
+ error_report("Back-end of %s device %s (tag: \"%s\") does not support "
+ "migration through qemu",
+ vdev->name, vdev->parent_obj.canonical_path,
+ fs->conf.tag ?: "<none>");
+ return -ENOTSUP;
+ }
+
+ return 0;
+}
+
+static const VMStateDescription vuf_backend_vmstate;
+
static const VMStateDescription vuf_vmstate = {
.name = "vhost-user-fs",
- .unmigratable = 1,
+ .version_id = 0,
+ .fields = (VMStateField[]) {
+ VMSTATE_VIRTIO_DEVICE,
+ VMSTATE_END_OF_LIST()
+ },
+ .subsections = (const VMStateDescription * []) {
+ &vuf_backend_vmstate,
+ NULL,
+ }
+};
+
+static const VMStateDescription vuf_backend_vmstate = {
+ .name = "vhost-user-fs-backend",
+ .version_id = 0,
+ .needed = vuf_is_internal_migration,
+ .pre_load = vuf_check_migration_support,
+ .pre_save = vuf_check_migration_support,
+ .fields = (VMStateField[]) {
+ {
+ .name = "back-end",
+ .info = &(const VMStateInfo) {
+ .name = "virtio-fs back-end state",
+ .get = vuf_load_state,
+ .put = vuf_save_state,
+ },
+ },
+ VMSTATE_END_OF_LIST()
+ },
};
static Property vuf_properties[] = {
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 7b42ae8aae..f214df804b 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -103,6 +103,8 @@ typedef enum VhostUserRequest {
VHOST_USER_SET_STATUS = 39,
VHOST_USER_GET_STATUS = 40,
VHOST_USER_GET_SHARED_OBJECT = 41,
+ VHOST_USER_SET_DEVICE_STATE_FD = 42,
+ VHOST_USER_CHECK_DEVICE_STATE = 43,
VHOST_USER_MAX
} VhostUserRequest;
@@ -201,6 +203,12 @@ typedef struct {
uint32_t size; /* the following payload size */
} QEMU_PACKED VhostUserHeader;
+/* Request payload of VHOST_USER_SET_DEVICE_STATE_FD */
+typedef struct VhostUserTransferDeviceState {
+ uint32_t direction;
+ uint32_t phase;
+} VhostUserTransferDeviceState;
+
typedef union {
#define VHOST_USER_VRING_IDX_MASK (0xff)
#define VHOST_USER_VRING_NOFD_MASK (0x1 << 8)
@@ -216,6 +224,7 @@ typedef union {
VhostUserVringArea area;
VhostUserInflight inflight;
VhostUserShared object;
+ VhostUserTransferDeviceState transfer_state;
} VhostUserPayload;
typedef struct VhostUserMsg {
@@ -2855,6 +2864,140 @@ static void vhost_user_reset_status(struct vhost_dev *dev)
}
}
+static bool vhost_user_supports_device_state(struct vhost_dev *dev)
+{
+ return virtio_has_feature(dev->protocol_features,
+ VHOST_USER_PROTOCOL_F_DEVICE_STATE);
+}
+
+static int vhost_user_set_device_state_fd(struct vhost_dev *dev,
+ VhostDeviceStateDirection direction,
+ VhostDeviceStatePhase phase,
+ int fd,
+ int *reply_fd,
+ Error **errp)
+{
+ int ret;
+ struct vhost_user *vu = dev->opaque;
+ VhostUserMsg msg = {
+ .hdr = {
+ .request = VHOST_USER_SET_DEVICE_STATE_FD,
+ .flags = VHOST_USER_VERSION,
+ .size = sizeof(msg.payload.transfer_state),
+ },
+ .payload.transfer_state = {
+ .direction = direction,
+ .phase = phase,
+ },
+ };
+
+ *reply_fd = -1;
+
+ if (!vhost_user_supports_device_state(dev)) {
+ close(fd);
+ error_setg(errp, "Back-end does not support migration state transfer");
+ return -ENOTSUP;
+ }
+
+ ret = vhost_user_write(dev, &msg, &fd, 1);
+ close(fd);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret,
+ "Failed to send SET_DEVICE_STATE_FD message");
+ return ret;
+ }
+
+ ret = vhost_user_read(dev, &msg);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret,
+ "Failed to receive SET_DEVICE_STATE_FD reply");
+ return ret;
+ }
+
+ if (msg.hdr.request != VHOST_USER_SET_DEVICE_STATE_FD) {
+ error_setg(errp,
+ "Received unexpected message type, expected %d, received %d",
+ VHOST_USER_SET_DEVICE_STATE_FD, msg.hdr.request);
+ return -EPROTO;
+ }
+
+ if (msg.hdr.size != sizeof(msg.payload.u64)) {
+ error_setg(errp,
+ "Received bad message size, expected %zu, received %" PRIu32,
+ sizeof(msg.payload.u64), msg.hdr.size);
+ return -EPROTO;
+ }
+
+ if ((msg.payload.u64 & 0xff) != 0) {
+ error_setg(errp, "Back-end did not accept migration state transfer");
+ return -EIO;
+ }
+
+ if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK)) {
+ *reply_fd = qemu_chr_fe_get_msgfd(vu->user->chr);
+ if (*reply_fd < 0) {
+ error_setg(errp,
+ "Failed to get back-end-provided transfer pipe FD");
+ *reply_fd = -1;
+ return -EIO;
+ }
+ }
+
+ return 0;
+}
+
+static int vhost_user_check_device_state(struct vhost_dev *dev, Error **errp)
+{
+ int ret;
+ VhostUserMsg msg = {
+ .hdr = {
+ .request = VHOST_USER_CHECK_DEVICE_STATE,
+ .flags = VHOST_USER_VERSION,
+ .size = 0,
+ },
+ };
+
+ if (!vhost_user_supports_device_state(dev)) {
+ error_setg(errp, "Back-end does not support migration state transfer");
+ return -ENOTSUP;
+ }
+
+ ret = vhost_user_write(dev, &msg, NULL, 0);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret,
+ "Failed to send CHECK_DEVICE_STATE message");
+ return ret;
+ }
+
+ ret = vhost_user_read(dev, &msg);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret,
+ "Failed to receive CHECK_DEVICE_STATE reply");
+ return ret;
+ }
+
+ if (msg.hdr.request != VHOST_USER_CHECK_DEVICE_STATE) {
+ error_setg(errp,
+ "Received unexpected message type, expected %d, received %d",
+ VHOST_USER_CHECK_DEVICE_STATE, msg.hdr.request);
+ return -EPROTO;
+ }
+
+ if (msg.hdr.size != sizeof(msg.payload.u64)) {
+ error_setg(errp,
+ "Received bad message size, expected %zu, received %" PRIu32,
+ sizeof(msg.payload.u64), msg.hdr.size);
+ return -EPROTO;
+ }
+
+ if (msg.payload.u64 != 0) {
+ error_setg(errp, "Back-end failed to process its internal state");
+ return -EIO;
+ }
+
+ return 0;
+}
+
const VhostOps user_ops = {
.backend_type = VHOST_BACKEND_TYPE_USER,
.vhost_backend_init = vhost_user_backend_init,
@@ -2890,4 +3033,7 @@ const VhostOps user_ops = {
.vhost_set_inflight_fd = vhost_user_set_inflight_fd,
.vhost_dev_start = vhost_user_dev_start,
.vhost_reset_status = vhost_user_reset_status,
+ .vhost_supports_device_state = vhost_user_supports_device_state,
+ .vhost_set_device_state_fd = vhost_user_set_device_state_fd,
+ .vhost_check_device_state = vhost_user_check_device_state,
};
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 9c9ae7109e..2c9ac79468 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -2159,3 +2159,244 @@ int vhost_reset_device(struct vhost_dev *hdev)
return -ENOSYS;
}
+
+bool vhost_supports_device_state(struct vhost_dev *dev)
+{
+ if (dev->vhost_ops->vhost_supports_device_state) {
+ return dev->vhost_ops->vhost_supports_device_state(dev);
+ }
+
+ return false;
+}
+
+int vhost_set_device_state_fd(struct vhost_dev *dev,
+ VhostDeviceStateDirection direction,
+ VhostDeviceStatePhase phase,
+ int fd,
+ int *reply_fd,
+ Error **errp)
+{
+ if (dev->vhost_ops->vhost_set_device_state_fd) {
+ return dev->vhost_ops->vhost_set_device_state_fd(dev, direction, phase,
+ fd, reply_fd, errp);
+ }
+
+ error_setg(errp,
+ "vhost transport does not support migration state transfer");
+ return -ENOSYS;
+}
+
+int vhost_check_device_state(struct vhost_dev *dev, Error **errp)
+{
+ if (dev->vhost_ops->vhost_check_device_state) {
+ return dev->vhost_ops->vhost_check_device_state(dev, errp);
+ }
+
+ error_setg(errp,
+ "vhost transport does not support migration state transfer");
+ return -ENOSYS;
+}
+
+int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp)
+{
+ /* Maximum chunk size in which to transfer the state */
+ const size_t chunk_size = 1 * 1024 * 1024;
+ g_autofree void *transfer_buf = NULL;
+ g_autoptr(GError) g_err = NULL;
+ int pipe_fds[2], read_fd = -1, write_fd = -1, reply_fd = -1;
+ int ret;
+
+ /* [0] for reading (our end), [1] for writing (back-end's end) */
+ if (!g_unix_open_pipe(pipe_fds, FD_CLOEXEC, &g_err)) {
+ error_setg(errp, "Failed to set up state transfer pipe: %s",
+ g_err->message);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ read_fd = pipe_fds[0];
+ write_fd = pipe_fds[1];
+
+ /*
+ * VHOST_TRANSFER_STATE_PHASE_STOPPED means the device must be stopped.
+ * Ideally, it is suspended, but SUSPEND/RESUME currently do not exist for
+ * vhost-user, so just check that it is stopped at all.
+ */
+ assert(!dev->started);
+
+ /* Transfer ownership of write_fd to the back-end */
+ ret = vhost_set_device_state_fd(dev,
+ VHOST_TRANSFER_STATE_DIRECTION_SAVE,
+ VHOST_TRANSFER_STATE_PHASE_STOPPED,
+ write_fd,
+ &reply_fd,
+ errp);
+ if (ret < 0) {
+ error_prepend(errp, "Failed to initiate state transfer: ");
+ goto fail;
+ }
+
+ /* If the back-end wishes to use a different pipe, switch over */
+ if (reply_fd >= 0) {
+ close(read_fd);
+ read_fd = reply_fd;
+ }
+
+ transfer_buf = g_malloc(chunk_size);
+
+ while (true) {
+ ssize_t read_ret;
+
+ read_ret = RETRY_ON_EINTR(read(read_fd, transfer_buf, chunk_size));
+ if (read_ret < 0) {
+ ret = -errno;
+ error_setg_errno(errp, -ret, "Failed to receive state");
+ goto fail;
+ }
+
+ assert(read_ret <= chunk_size);
+ qemu_put_be32(f, read_ret);
+
+ if (read_ret == 0) {
+ /* EOF */
+ break;
+ }
+
+ qemu_put_buffer(f, transfer_buf, read_ret);
+ }
+
+ /*
+ * Back-end will not really care, but be clean and close our end of the pipe
+ * before inquiring the back-end about whether transfer was successful
+ */
+ close(read_fd);
+ read_fd = -1;
+
+ /* Also, verify that the device is still stopped */
+ assert(!dev->started);
+
+ ret = vhost_check_device_state(dev, errp);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ ret = 0;
+fail:
+ if (read_fd >= 0) {
+ close(read_fd);
+ }
+
+ return ret;
+}
+
+int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp)
+{
+ size_t transfer_buf_size = 0;
+ g_autofree void *transfer_buf = NULL;
+ g_autoptr(GError) g_err = NULL;
+ int pipe_fds[2], read_fd = -1, write_fd = -1, reply_fd = -1;
+ int ret;
+
+ /* [0] for reading (back-end's end), [1] for writing (our end) */
+ if (!g_unix_open_pipe(pipe_fds, FD_CLOEXEC, &g_err)) {
+ error_setg(errp, "Failed to set up state transfer pipe: %s",
+ g_err->message);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ read_fd = pipe_fds[0];
+ write_fd = pipe_fds[1];
+
+ /*
+ * VHOST_TRANSFER_STATE_PHASE_STOPPED means the device must be stopped.
+ * Ideally, it is suspended, but SUSPEND/RESUME currently do not exist for
+ * vhost-user, so just check that it is stopped at all.
+ */
+ assert(!dev->started);
+
+ /* Transfer ownership of read_fd to the back-end */
+ ret = vhost_set_device_state_fd(dev,
+ VHOST_TRANSFER_STATE_DIRECTION_LOAD,
+ VHOST_TRANSFER_STATE_PHASE_STOPPED,
+ read_fd,
+ &reply_fd,
+ errp);
+ if (ret < 0) {
+ error_prepend(errp, "Failed to initiate state transfer: ");
+ goto fail;
+ }
+
+ /* If the back-end wishes to use a different pipe, switch over */
+ if (reply_fd >= 0) {
+ close(write_fd);
+ write_fd = reply_fd;
+ }
+
+ while (true) {
+ size_t this_chunk_size = qemu_get_be32(f);
+ ssize_t write_ret;
+ const uint8_t *transfer_pointer;
+
+ if (this_chunk_size == 0) {
+ /* End of state */
+ break;
+ }
+
+ if (transfer_buf_size < this_chunk_size) {
+ transfer_buf = g_realloc(transfer_buf, this_chunk_size);
+ transfer_buf_size = this_chunk_size;
+ }
+
+ if (qemu_get_buffer(f, transfer_buf, this_chunk_size) <
+ this_chunk_size)
+ {
+ error_setg(errp, "Failed to read state");
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ transfer_pointer = transfer_buf;
+ while (this_chunk_size > 0) {
+ write_ret = RETRY_ON_EINTR(
+ write(write_fd, transfer_pointer, this_chunk_size)
+ );
+ if (write_ret < 0) {
+ ret = -errno;
+ error_setg_errno(errp, -ret, "Failed to send state");
+ goto fail;
+ } else if (write_ret == 0) {
+ error_setg(errp, "Failed to send state: Connection is closed");
+ ret = -ECONNRESET;
+ goto fail;
+ }
+
+ assert(write_ret <= this_chunk_size);
+ this_chunk_size -= write_ret;
+ transfer_pointer += write_ret;
+ }
+ }
+
+ /*
+ * Close our end, thus ending transfer, before inquiring the back-end about
+ * whether transfer was successful
+ */
+ close(write_fd);
+ write_fd = -1;
+
+ /* Also, verify that the device is still stopped */
+ assert(!dev->started);
+
+ ret = vhost_check_device_state(dev, errp);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ ret = 0;
+fail:
+ if (write_fd >= 0) {
+ close(write_fd);
+ }
+
+ return ret;
+}