diff options
Diffstat (limited to 'hw/virtio/vhost-vdpa.c')
-rw-r--r-- | hw/virtio/vhost-vdpa.c | 522 |
1 files changed, 507 insertions, 15 deletions
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 6c67d5f034..c5ed7a3779 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -17,12 +17,14 @@ #include "hw/virtio/vhost.h" #include "hw/virtio/vhost-backend.h" #include "hw/virtio/virtio-net.h" +#include "hw/virtio/vhost-shadow-virtqueue.h" #include "hw/virtio/vhost-vdpa.h" #include "exec/address-spaces.h" #include "qemu/main-loop.h" #include "cpu.h" #include "trace.h" #include "qemu-common.h" +#include "qapi/error.h" /* * Return one past the end of the end of section. Be careful with uint64_t @@ -207,6 +209,21 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, vaddr, section->readonly); llsize = int128_sub(llend, int128_make64(iova)); + if (v->shadow_vqs_enabled) { + DMAMap mem_region = { + .translated_addr = (hwaddr)(uintptr_t)vaddr, + .size = int128_get64(llsize) - 1, + .perm = IOMMU_ACCESS_FLAG(true, section->readonly), + }; + + int r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region); + if (unlikely(r != IOVA_OK)) { + error_report("Can't allocate a mapping (%d)", r); + goto fail; + } + + iova = mem_region.iova; + } vhost_vdpa_iotlb_batch_begin_once(v); ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize), @@ -259,6 +276,20 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, llsize = int128_sub(llend, int128_make64(iova)); + if (v->shadow_vqs_enabled) { + const DMAMap *result; + const void *vaddr = memory_region_get_ram_ptr(section->mr) + + section->offset_within_region + + (iova - section->offset_within_address_space); + DMAMap mem_region = { + .translated_addr = (hwaddr)(uintptr_t)vaddr, + .size = int128_get64(llsize) - 1, + }; + + result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region); + iova = result->iova; + vhost_iova_tree_remove(v->iova_tree, &mem_region); + } vhost_vdpa_iotlb_batch_begin_once(v); ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); if (ret) { @@ -342,6 +373,55 @@ static bool vhost_vdpa_one_time_request(struct vhost_dev *dev) return v->index != 0; } +static int vhost_vdpa_get_dev_features(struct vhost_dev *dev, + uint64_t *features) +{ + int ret; + + ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features); + trace_vhost_vdpa_get_features(dev, *features); + return ret; +} + +static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, + Error **errp) +{ + g_autoptr(GPtrArray) shadow_vqs = NULL; + uint64_t dev_features, svq_features; + int r; + bool ok; + + if (!v->shadow_vqs_enabled) { + return 0; + } + + r = vhost_vdpa_get_dev_features(hdev, &dev_features); + if (r != 0) { + error_setg_errno(errp, -r, "Can't get vdpa device features"); + return r; + } + + svq_features = dev_features; + ok = vhost_svq_valid_features(svq_features, errp); + if (unlikely(!ok)) { + return -1; + } + + shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free); + for (unsigned n = 0; n < hdev->nvqs; ++n) { + g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree); + + if (unlikely(!svq)) { + error_setg(errp, "Cannot create svq %u", n); + return -1; + } + g_ptr_array_add(shadow_vqs, g_steal_pointer(&svq)); + } + + v->shadow_vqs = g_steal_pointer(&shadow_vqs); + return 0; +} + static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) { struct vhost_vdpa *v; @@ -364,6 +444,10 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) dev->opaque = opaque ; v->listener = vhost_vdpa_memory_listener; v->msg_type = VHOST_IOTLB_MSG_V2; + ret = vhost_vdpa_init_svq(dev, v, errp); + if (ret) { + goto err; + } vhost_vdpa_get_iova_range(v); @@ -375,6 +459,10 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) VIRTIO_CONFIG_S_DRIVER); return 0; + +err: + ram_block_discard_disable(false); + return ret; } static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev, @@ -445,8 +533,14 @@ static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev *dev, int n) static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev) { + struct vhost_vdpa *v = dev->opaque; int i; + if (v->shadow_vqs_enabled) { + /* FIXME SVQ is not compatible with host notifiers mr */ + return; + } + for (i = dev->vq_index; i < dev->vq_index + dev->nvqs; i++) { if (vhost_vdpa_host_notifier_init(dev, i)) { goto err; @@ -460,6 +554,21 @@ err: return; } +static void vhost_vdpa_svq_cleanup(struct vhost_dev *dev) +{ + struct vhost_vdpa *v = dev->opaque; + size_t idx; + + if (!v->shadow_vqs) { + return; + } + + for (idx = 0; idx < v->shadow_vqs->len; ++idx) { + vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx)); + } + g_ptr_array_free(v->shadow_vqs, true); +} + static int vhost_vdpa_cleanup(struct vhost_dev *dev) { struct vhost_vdpa *v; @@ -468,6 +577,7 @@ static int vhost_vdpa_cleanup(struct vhost_dev *dev) trace_vhost_vdpa_cleanup(dev, v); vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); memory_listener_unregister(&v->listener); + vhost_vdpa_svq_cleanup(dev); dev->opaque = NULL; ram_block_discard_disable(false); @@ -510,12 +620,29 @@ static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, static int vhost_vdpa_set_features(struct vhost_dev *dev, uint64_t features) { + struct vhost_vdpa *v = dev->opaque; int ret; if (vhost_vdpa_one_time_request(dev)) { return 0; } + if (v->shadow_vqs_enabled) { + if ((v->acked_features ^ features) == BIT_ULL(VHOST_F_LOG_ALL)) { + /* + * QEMU is just trying to enable or disable logging. SVQ handles + * this sepparately, so no need to forward this. + */ + v->acked_features = features; + return 0; + } + + v->acked_features = features; + + /* We must not ack _F_LOG if SVQ is enabled */ + features &= ~BIT_ULL(VHOST_F_LOG_ALL); + } + trace_vhost_vdpa_set_features(dev, features); ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features); if (ret) { @@ -559,11 +686,26 @@ static int vhost_vdpa_get_device_id(struct vhost_dev *dev, return ret; } +static void vhost_vdpa_reset_svq(struct vhost_vdpa *v) +{ + if (!v->shadow_vqs_enabled) { + return; + } + + for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); + vhost_svq_stop(svq); + } +} + static int vhost_vdpa_reset_device(struct vhost_dev *dev) { + struct vhost_vdpa *v = dev->opaque; int ret; uint8_t status = 0; + vhost_vdpa_reset_svq(v); + ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); trace_vhost_vdpa_reset_device(dev, status); return ret; @@ -647,15 +789,311 @@ static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config, return ret; } +static int vhost_vdpa_set_dev_vring_base(struct vhost_dev *dev, + struct vhost_vring_state *ring) +{ + trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num); + return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring); +} + +static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev, + struct vhost_vring_file *file) +{ + trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd); + return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file); +} + +static int vhost_vdpa_set_vring_dev_call(struct vhost_dev *dev, + struct vhost_vring_file *file) +{ + trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd); + return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file); +} + +static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev *dev, + struct vhost_vring_addr *addr) +{ + trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags, + addr->desc_user_addr, addr->used_user_addr, + addr->avail_user_addr, + addr->log_guest_addr); + + return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr); + +} + +/** + * Set the shadow virtqueue descriptors to the device + * + * @dev: The vhost device model + * @svq: The shadow virtqueue + * @idx: The index of the virtqueue in the vhost device + * @errp: Error + * + * Note that this function does not rewind kick file descriptor if cannot set + * call one. + */ +static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, + VhostShadowVirtqueue *svq, unsigned idx, + Error **errp) +{ + struct vhost_vring_file file = { + .index = dev->vq_index + idx, + }; + const EventNotifier *event_notifier = &svq->hdev_kick; + int r; + + file.fd = event_notifier_get_fd(event_notifier); + r = vhost_vdpa_set_vring_dev_kick(dev, &file); + if (unlikely(r != 0)) { + error_setg_errno(errp, -r, "Can't set device kick fd"); + return r; + } + + event_notifier = &svq->hdev_call; + file.fd = event_notifier_get_fd(event_notifier); + r = vhost_vdpa_set_vring_dev_call(dev, &file); + if (unlikely(r != 0)) { + error_setg_errno(errp, -r, "Can't set device call fd"); + } + + return r; +} + +/** + * Unmap a SVQ area in the device + */ +static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, + const DMAMap *needle) +{ + const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle); + hwaddr size; + int r; + + if (unlikely(!result)) { + error_report("Unable to find SVQ address to unmap"); + return false; + } + + size = ROUND_UP(result->size, qemu_real_host_page_size); + r = vhost_vdpa_dma_unmap(v, result->iova, size); + return r == 0; +} + +static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, + const VhostShadowVirtqueue *svq) +{ + DMAMap needle = {}; + struct vhost_vdpa *v = dev->opaque; + struct vhost_vring_addr svq_addr; + bool ok; + + vhost_svq_get_vring_addr(svq, &svq_addr); + + needle.translated_addr = svq_addr.desc_user_addr; + ok = vhost_vdpa_svq_unmap_ring(v, &needle); + if (unlikely(!ok)) { + return false; + } + + needle.translated_addr = svq_addr.used_user_addr; + return vhost_vdpa_svq_unmap_ring(v, &needle); +} + +/** + * Map the SVQ area in the device + * + * @v: Vhost-vdpa device + * @needle: The area to search iova + * @errorp: Error pointer + */ +static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle, + Error **errp) +{ + int r; + + r = vhost_iova_tree_map_alloc(v->iova_tree, needle); + if (unlikely(r != IOVA_OK)) { + error_setg(errp, "Cannot allocate iova (%d)", r); + return false; + } + + r = vhost_vdpa_dma_map(v, needle->iova, needle->size + 1, + (void *)(uintptr_t)needle->translated_addr, + needle->perm == IOMMU_RO); + if (unlikely(r != 0)) { + error_setg_errno(errp, -r, "Cannot map region to device"); + vhost_iova_tree_remove(v->iova_tree, needle); + } + + return r == 0; +} + +/** + * Map the shadow virtqueue rings in the device + * + * @dev: The vhost device + * @svq: The shadow virtqueue + * @addr: Assigned IOVA addresses + * @errp: Error pointer + */ +static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev, + const VhostShadowVirtqueue *svq, + struct vhost_vring_addr *addr, + Error **errp) +{ + DMAMap device_region, driver_region; + struct vhost_vring_addr svq_addr; + struct vhost_vdpa *v = dev->opaque; + size_t device_size = vhost_svq_device_area_size(svq); + size_t driver_size = vhost_svq_driver_area_size(svq); + size_t avail_offset; + bool ok; + + ERRP_GUARD(); + vhost_svq_get_vring_addr(svq, &svq_addr); + + driver_region = (DMAMap) { + .translated_addr = svq_addr.desc_user_addr, + .size = driver_size - 1, + .perm = IOMMU_RO, + }; + ok = vhost_vdpa_svq_map_ring(v, &driver_region, errp); + if (unlikely(!ok)) { + error_prepend(errp, "Cannot create vq driver region: "); + return false; + } + addr->desc_user_addr = driver_region.iova; + avail_offset = svq_addr.avail_user_addr - svq_addr.desc_user_addr; + addr->avail_user_addr = driver_region.iova + avail_offset; + + device_region = (DMAMap) { + .translated_addr = svq_addr.used_user_addr, + .size = device_size - 1, + .perm = IOMMU_RW, + }; + ok = vhost_vdpa_svq_map_ring(v, &device_region, errp); + if (unlikely(!ok)) { + error_prepend(errp, "Cannot create vq device region: "); + vhost_vdpa_svq_unmap_ring(v, &driver_region); + } + addr->used_user_addr = device_region.iova; + + return ok; +} + +static bool vhost_vdpa_svq_setup(struct vhost_dev *dev, + VhostShadowVirtqueue *svq, unsigned idx, + Error **errp) +{ + uint16_t vq_index = dev->vq_index + idx; + struct vhost_vring_state s = { + .index = vq_index, + }; + int r; + + r = vhost_vdpa_set_dev_vring_base(dev, &s); + if (unlikely(r)) { + error_setg_errno(errp, -r, "Cannot set vring base"); + return false; + } + + r = vhost_vdpa_svq_set_fds(dev, svq, idx, errp); + return r == 0; +} + +static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) +{ + struct vhost_vdpa *v = dev->opaque; + Error *err = NULL; + unsigned i; + + if (!v->shadow_vqs) { + return true; + } + + for (i = 0; i < v->shadow_vqs->len; ++i) { + VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i); + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); + struct vhost_vring_addr addr = { + .index = i, + }; + int r; + bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err); + if (unlikely(!ok)) { + goto err; + } + + vhost_svq_start(svq, dev->vdev, vq); + ok = vhost_vdpa_svq_map_rings(dev, svq, &addr, &err); + if (unlikely(!ok)) { + goto err_map; + } + + /* Override vring GPA set by vhost subsystem */ + r = vhost_vdpa_set_vring_dev_addr(dev, &addr); + if (unlikely(r != 0)) { + error_setg_errno(&err, -r, "Cannot set device address"); + goto err_set_addr; + } + } + + return true; + +err_set_addr: + vhost_vdpa_svq_unmap_rings(dev, g_ptr_array_index(v->shadow_vqs, i)); + +err_map: + vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, i)); + +err: + error_reportf_err(err, "Cannot setup SVQ %u: ", i); + for (unsigned j = 0; j < i; ++j) { + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, j); + vhost_vdpa_svq_unmap_rings(dev, svq); + vhost_svq_stop(svq); + } + + return false; +} + +static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev) +{ + struct vhost_vdpa *v = dev->opaque; + + if (!v->shadow_vqs) { + return true; + } + + for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); + bool ok = vhost_vdpa_svq_unmap_rings(dev, svq); + if (unlikely(!ok)) { + return false; + } + } + + return true; +} + static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) { struct vhost_vdpa *v = dev->opaque; + bool ok; trace_vhost_vdpa_dev_start(dev, started); if (started) { vhost_vdpa_host_notifiers_init(dev); + ok = vhost_vdpa_svqs_start(dev); + if (unlikely(!ok)) { + return -1; + } vhost_vdpa_set_vring_ready(dev); } else { + ok = vhost_vdpa_svqs_stop(dev); + if (unlikely(!ok)) { + return -1; + } vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); } @@ -679,7 +1117,8 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, struct vhost_log *log) { - if (vhost_vdpa_one_time_request(dev)) { + struct vhost_vdpa *v = dev->opaque; + if (v->shadow_vqs_enabled || vhost_vdpa_one_time_request(dev)) { return 0; } @@ -691,11 +1130,17 @@ static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev, struct vhost_vring_addr *addr) { - trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags, - addr->desc_user_addr, addr->used_user_addr, - addr->avail_user_addr, - addr->log_guest_addr); - return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr); + struct vhost_vdpa *v = dev->opaque; + + if (v->shadow_vqs_enabled) { + /* + * Device vring addr was set at device start. SVQ base is handled by + * VirtQueue code. + */ + return 0; + } + + return vhost_vdpa_set_vring_dev_addr(dev, addr); } static int vhost_vdpa_set_vring_num(struct vhost_dev *dev, @@ -708,15 +1153,41 @@ static int vhost_vdpa_set_vring_num(struct vhost_dev *dev, static int vhost_vdpa_set_vring_base(struct vhost_dev *dev, struct vhost_vring_state *ring) { - trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num); - return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring); + struct vhost_vdpa *v = dev->opaque; + + if (v->shadow_vqs_enabled) { + /* + * Device vring base was set at device start. SVQ base is handled by + * VirtQueue code. + */ + return 0; + } + + return vhost_vdpa_set_dev_vring_base(dev, ring); } static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, struct vhost_vring_state *ring) { + struct vhost_vdpa *v = dev->opaque; int ret; + if (v->shadow_vqs_enabled) { + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, + ring->index); + + /* + * Setting base as last used idx, so destination will see as available + * all the entries that the device did not use, including the in-flight + * processing ones. + * + * TODO: This is ok for networking, but other kinds of devices might + * have problems with these retransmissions. + */ + ring->num = svq->last_used_idx; + return 0; + } + ret = vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring); trace_vhost_vdpa_get_vring_base(dev, ring->index, ring->num); return ret; @@ -725,24 +1196,45 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev, struct vhost_vring_file *file) { - trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd); - return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file); + struct vhost_vdpa *v = dev->opaque; + int vdpa_idx = file->index - dev->vq_index; + + if (v->shadow_vqs_enabled) { + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx); + vhost_svq_set_svq_kick_fd(svq, file->fd); + return 0; + } else { + return vhost_vdpa_set_vring_dev_kick(dev, file); + } } static int vhost_vdpa_set_vring_call(struct vhost_dev *dev, struct vhost_vring_file *file) { - trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd); - return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file); + struct vhost_vdpa *v = dev->opaque; + + if (v->shadow_vqs_enabled) { + int vdpa_idx = file->index - dev->vq_index; + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx); + + vhost_svq_set_svq_call_fd(svq, file->fd); + return 0; + } else { + return vhost_vdpa_set_vring_dev_call(dev, file); + } } static int vhost_vdpa_get_features(struct vhost_dev *dev, uint64_t *features) { - int ret; + struct vhost_vdpa *v = dev->opaque; + int ret = vhost_vdpa_get_dev_features(dev, features); + + if (ret == 0 && v->shadow_vqs_enabled) { + /* Add SVQ logging capabilities */ + *features |= BIT_ULL(VHOST_F_LOG_ALL); + } - ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features); - trace_vhost_vdpa_get_features(dev, *features); return ret; } |