diff options
author | Stefan Hajnoczi <stefanha@redhat.com> | 2022-09-02 13:23:32 -0400 |
---|---|---|
committer | Stefan Hajnoczi <stefanha@redhat.com> | 2022-09-02 13:23:32 -0400 |
commit | fccffd53719255cafd4bc89f5b0bda1cd37924f4 (patch) | |
tree | 1f43f28083615001d6f3b1aeafd6b0be88f60de7 | |
parent | 9fd704da6809f3e01d0283f0d6d619022d481fb9 (diff) | |
parent | 36a894aeb64a2e02871016da1c37d4a4ca109182 (diff) |
Merge tag 'net-pull-request' of https://github.com/jasowang/qemu into staging
# -----BEGIN PGP SIGNATURE-----
# Version: GnuPG v1
#
# iQEcBAABAgAGBQJjEaMLAAoJEO8Ells5jWIRoRwIAJpwefLgH/+lkd1mtWqxBhuS
# KLa0bkcS6nIGnjQzNX/XWipu/5tMbBLzbaKw0myodvoK6Yx0MFog1cWf6gLHuvWH
# Jy3ONUrF9umHYuOa9sJJtXv/aP7neNJSB3RW67BaiLCLkaetDj9lLciA/KKMvb/I
# JNFtuLVTPibZ5iVTjvifFWmJD/Yk0P8mlrH5yfrA3B2EaaWf1es0GWobGIwwLu9s
# ZSqjhMDAhfOW2E1sBh7jFRh4lJX1t1jRhyIGx2bOXevPx2hFHq6FSq+yuJ9OsZvO
# wC8mC4DD+fovypDWbv3WLslIejM0+THD8KuBQnZtKX5Mbhc+0cELpIFLUdH95TM=
# =eMUT
# -----END PGP SIGNATURE-----
# gpg: Signature made Fri 02 Sep 2022 02:30:35 EDT
# gpg: using RSA key EF04965B398D6211
# gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>" [full]
# Primary key fingerprint: 215D 46F4 8246 689E C77F 3562 EF04 965B 398D 6211
* tag 'net-pull-request' of https://github.com/jasowang/qemu: (21 commits)
net: tulip: Restrict DMA engine to memories
net/colo.c: Fix the pointer issue reported by Coverity.
vdpa: Delete CVQ migration blocker
vdpa: Add virtio-net mac address via CVQ at start
vhost_net: add NetClientState->load() callback
vdpa: extract vhost_vdpa_net_cvq_add from vhost_vdpa_net_handle_ctrl_avail
vdpa: Move command buffers map to start of net device
vdpa: add net_vhost_vdpa_cvq_info NetClientInfo
vhost_net: Add NetClientInfo stop callback
vhost_net: Add NetClientInfo start callback
vhost: Do not depend on !NULL VirtQueueElement on vhost_svq_flush
vhost: Delete useless read memory barrier
vhost: use SVQ element ndescs instead of opaque data for desc validation
vhost: stop transfer elem ownership in vhost_handle_guest_kick
vdpa: Use ring hwaddr at vhost_vdpa_svq_unmap_ring
vhost: Always store new kick fd on vhost_svq_set_svq_kick_fd
vdpa: Make SVQ vring unmapping return void
vdpa: Remove SVQ vring from iova_tree at shutdown
util: accept iova_tree_remove_parameter by value
vdpa: do not save failed dma maps in SVQ iova tree
...
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-rw-r--r-- | hw/i386/intel_iommu.c | 6 | ||||
-rw-r--r-- | hw/net/tulip.c | 4 | ||||
-rw-r--r-- | hw/net/vhost_net.c | 17 | ||||
-rw-r--r-- | hw/virtio/vhost-iova-tree.c | 2 | ||||
-rw-r--r-- | hw/virtio/vhost-iova-tree.h | 2 | ||||
-rw-r--r-- | hw/virtio/vhost-shadow-virtqueue.c | 31 | ||||
-rw-r--r-- | hw/virtio/vhost-vdpa.c | 90 | ||||
-rw-r--r-- | include/hw/virtio/vhost-vdpa.h | 1 | ||||
-rw-r--r-- | include/net/net.h | 6 | ||||
-rw-r--r-- | include/qemu/iova-tree.h | 2 | ||||
-rw-r--r-- | net/colo.c | 25 | ||||
-rw-r--r-- | net/colo.h | 1 | ||||
-rw-r--r-- | net/trace-events | 2 | ||||
-rw-r--r-- | net/vhost-vdpa.c | 228 | ||||
-rw-r--r-- | util/iova-tree.c | 4 |
15 files changed, 248 insertions, 173 deletions
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 2162394e08..05d53a1aa9 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -1187,7 +1187,7 @@ static int vtd_page_walk_one(IOMMUTLBEvent *event, vtd_page_walk_info *info) return ret; } /* Drop any existing mapping */ - iova_tree_remove(as->iova_tree, &target); + iova_tree_remove(as->iova_tree, target); /* Recover the correct type */ event->type = IOMMU_NOTIFIER_MAP; entry->perm = cache_perm; @@ -1200,7 +1200,7 @@ static int vtd_page_walk_one(IOMMUTLBEvent *event, vtd_page_walk_info *info) trace_vtd_page_walk_one_skip_unmap(entry->iova, entry->addr_mask); return 0; } - iova_tree_remove(as->iova_tree, &target); + iova_tree_remove(as->iova_tree, target); } trace_vtd_page_walk_one(info->domain_id, entry->iova, @@ -3563,7 +3563,7 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) map.iova = n->start; map.size = size; - iova_tree_remove(as->iova_tree, &map); + iova_tree_remove(as->iova_tree, map); } static void vtd_address_space_unmap_all(IntelIOMMUState *s) diff --git a/hw/net/tulip.c b/hw/net/tulip.c index 097e905bec..b9e42c322a 100644 --- a/hw/net/tulip.c +++ b/hw/net/tulip.c @@ -70,7 +70,7 @@ static const VMStateDescription vmstate_pci_tulip = { static void tulip_desc_read(TULIPState *s, hwaddr p, struct tulip_descriptor *desc) { - const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED; + const MemTxAttrs attrs = { .memory = true }; if (s->csr[0] & CSR0_DBO) { ldl_be_pci_dma(&s->dev, p, &desc->status, attrs); @@ -88,7 +88,7 @@ static void tulip_desc_read(TULIPState *s, hwaddr p, static void tulip_desc_write(TULIPState *s, hwaddr p, struct tulip_descriptor *desc) { - const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED; + const MemTxAttrs attrs = { .memory = true }; if (s->csr[0] & CSR0_DBO) { stl_be_pci_dma(&s->dev, p, desc->status, attrs); diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index ccac5b7a64..d28f8b974b 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -244,6 +244,13 @@ static int vhost_net_start_one(struct vhost_net *net, struct vhost_vring_file file = { }; int r; + if (net->nc->info->start) { + r = net->nc->info->start(net->nc); + if (r < 0) { + return r; + } + } + r = vhost_dev_enable_notifiers(&net->dev, dev); if (r < 0) { goto fail_notifiers; @@ -274,6 +281,13 @@ static int vhost_net_start_one(struct vhost_net *net, } } } + + if (net->nc->info->load) { + r = net->nc->info->load(net->nc); + if (r < 0) { + goto fail; + } + } return 0; fail: file.fd = -1; @@ -313,6 +327,9 @@ static void vhost_net_stop_one(struct vhost_net *net, net->nc->info->poll(net->nc, true); } vhost_dev_stop(&net->dev, dev); + if (net->nc->info->stop) { + net->nc->info->stop(net->nc); + } vhost_dev_disable_notifiers(&net->dev, dev); } diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c index 67bf6d57ab..3d03395a77 100644 --- a/hw/virtio/vhost-iova-tree.c +++ b/hw/virtio/vhost-iova-tree.c @@ -104,7 +104,7 @@ int vhost_iova_tree_map_alloc(VhostIOVATree *tree, DMAMap *map) * @iova_tree: The vhost iova tree * @map: The map to remove */ -void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map) +void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map) { iova_tree_remove(iova_tree->iova_taddr_map, map); } diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h index 6a4f24e0f9..4adfd79ff0 100644 --- a/hw/virtio/vhost-iova-tree.h +++ b/hw/virtio/vhost-iova-tree.h @@ -22,6 +22,6 @@ G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostIOVATree, vhost_iova_tree_delete); const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *iova_tree, const DMAMap *map); int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map); -void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map); +void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map); #endif diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index e4956728dd..e8e5bbc368 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -233,9 +233,6 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq) /** * Add an element to a SVQ. * - * The caller must check that there is enough slots for the new element. It - * takes ownership of the element: In case of failure not ENOSPC, it is free. - * * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full */ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, @@ -252,7 +249,6 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head); if (unlikely(!ok)) { - g_free(elem); return -EINVAL; } @@ -293,7 +289,7 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) virtio_queue_set_notification(svq->vq, false); while (true) { - VirtQueueElement *elem; + g_autofree VirtQueueElement *elem; int r; if (svq->next_guest_avail_elem) { @@ -324,12 +320,14 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) * queue the current guest descriptor and ignore kicks * until some elements are used. */ - svq->next_guest_avail_elem = elem; + svq->next_guest_avail_elem = g_steal_pointer(&elem); } /* VQ is full or broken, just return and ignore kicks */ return; } + /* elem belongs to SVQ or external caller now */ + elem = NULL; } virtio_queue_set_notification(svq->vq, true); @@ -416,7 +414,7 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, return NULL; } - if (unlikely(!svq->desc_state[used_elem.id].elem)) { + if (unlikely(!svq->desc_state[used_elem.id].ndescs)) { qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used, but it was not available", svq->vdev->name, used_elem.id); @@ -424,6 +422,7 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, } num = svq->desc_state[used_elem.id].ndescs; + svq->desc_state[used_elem.id].ndescs = 0; last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); svq->desc_next[last_used_chain] = svq->free_head; svq->free_head = used_elem.id; @@ -500,20 +499,20 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq, size_t vhost_svq_poll(VhostShadowVirtqueue *svq) { int64_t start_us = g_get_monotonic_time(); + uint32_t len; + do { - uint32_t len; - VirtQueueElement *elem = vhost_svq_get_buf(svq, &len); - if (elem) { - return len; + if (vhost_svq_more_used(svq)) { + break; } if (unlikely(g_get_monotonic_time() - start_us > 10e6)) { return 0; } - - /* Make sure we read new used_idx */ - smp_rmb(); } while (true); + + vhost_svq_get_buf(svq, &len); + return len; } /** @@ -602,13 +601,13 @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd) event_notifier_set_handler(svq_kick, NULL); } + event_notifier_init_fd(svq_kick, svq_kick_fd); /* * event_notifier_set_handler already checks for guest's notifications if * they arrive at the new file descriptor in the switch, so there is no * need to explicitly check for them. */ if (poll_start) { - event_notifier_init_fd(svq_kick, svq_kick_fd); event_notifier_set(svq_kick); event_notifier_set_handler(svq_kick, vhost_handle_guest_kick_notifier); } @@ -655,7 +654,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, */ void vhost_svq_stop(VhostShadowVirtqueue *svq) { - event_notifier_set_handler(&svq->svq_kick, NULL); + vhost_svq_set_svq_kick_fd(svq, VHOST_FILE_UNBIND); g_autofree VirtQueueElement *next_avail_elem = NULL; if (!svq->vq) { diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 3ff9ce3501..7468e44b87 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -176,6 +176,7 @@ static void vhost_vdpa_listener_commit(MemoryListener *listener) static void vhost_vdpa_listener_region_add(MemoryListener *listener, MemoryRegionSection *section) { + DMAMap mem_region = {}; struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); hwaddr iova; Int128 llend, llsize; @@ -212,13 +213,13 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, llsize = int128_sub(llend, int128_make64(iova)); if (v->shadow_vqs_enabled) { - DMAMap mem_region = { - .translated_addr = (hwaddr)(uintptr_t)vaddr, - .size = int128_get64(llsize) - 1, - .perm = IOMMU_ACCESS_FLAG(true, section->readonly), - }; + int r; - int r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region); + mem_region.translated_addr = (hwaddr)(uintptr_t)vaddr, + mem_region.size = int128_get64(llsize) - 1, + mem_region.perm = IOMMU_ACCESS_FLAG(true, section->readonly), + + r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region); if (unlikely(r != IOVA_OK)) { error_report("Can't allocate a mapping (%d)", r); goto fail; @@ -232,11 +233,16 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, vaddr, section->readonly); if (ret) { error_report("vhost vdpa map fail!"); - goto fail; + goto fail_map; } return; +fail_map: + if (v->shadow_vqs_enabled) { + vhost_iova_tree_remove(v->iova_tree, mem_region); + } + fail: /* * On the initfn path, store the first error in the container so we @@ -289,8 +295,12 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, }; result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region); + if (!result) { + /* The memory listener map wasn't mapped */ + return; + } iova = result->iova; - vhost_iova_tree_remove(v->iova_tree, result); + vhost_iova_tree_remove(v->iova_tree, *result); } vhost_vdpa_iotlb_batch_begin_once(v); ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); @@ -874,41 +884,41 @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, /** * Unmap a SVQ area in the device */ -static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, - const DMAMap *needle) +static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr addr) { - const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle); + const DMAMap needle = { + .translated_addr = addr, + }; + const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, &needle); hwaddr size; int r; if (unlikely(!result)) { error_report("Unable to find SVQ address to unmap"); - return false; + return; } size = ROUND_UP(result->size, qemu_real_host_page_size()); r = vhost_vdpa_dma_unmap(v, result->iova, size); - return r == 0; + if (unlikely(r < 0)) { + error_report("Unable to unmap SVQ vring: %s (%d)", g_strerror(-r), -r); + return; + } + + vhost_iova_tree_remove(v->iova_tree, *result); } -static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, +static void vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, const VhostShadowVirtqueue *svq) { - DMAMap needle = {}; struct vhost_vdpa *v = dev->opaque; struct vhost_vring_addr svq_addr; - bool ok; vhost_svq_get_vring_addr(svq, &svq_addr); - needle.translated_addr = svq_addr.desc_user_addr; - ok = vhost_vdpa_svq_unmap_ring(v, &needle); - if (unlikely(!ok)) { - return false; - } + vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr); - needle.translated_addr = svq_addr.used_user_addr; - return vhost_vdpa_svq_unmap_ring(v, &needle); + vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr); } /** @@ -934,7 +944,7 @@ static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle, needle->perm == IOMMU_RO); if (unlikely(r != 0)) { error_setg_errno(errp, -r, "Cannot map region to device"); - vhost_iova_tree_remove(v->iova_tree, needle); + vhost_iova_tree_remove(v->iova_tree, *needle); } return r == 0; @@ -986,7 +996,7 @@ static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev, ok = vhost_vdpa_svq_map_ring(v, &device_region, errp); if (unlikely(!ok)) { error_prepend(errp, "Cannot create vq device region: "); - vhost_vdpa_svq_unmap_ring(v, &driver_region); + vhost_vdpa_svq_unmap_ring(v, driver_region.translated_addr); } addr->used_user_addr = device_region.iova; @@ -1023,13 +1033,6 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) return true; } - if (v->migration_blocker) { - int r = migrate_add_blocker(v->migration_blocker, &err); - if (unlikely(r < 0)) { - return false; - } - } - for (i = 0; i < v->shadow_vqs->len; ++i) { VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i); VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); @@ -1072,33 +1075,21 @@ err: vhost_svq_stop(svq); } - if (v->migration_blocker) { - migrate_del_blocker(v->migration_blocker); - } - return false; } -static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev) +static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) { struct vhost_vdpa *v = dev->opaque; if (!v->shadow_vqs) { - return true; + return; } for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); - bool ok = vhost_vdpa_svq_unmap_rings(dev, svq); - if (unlikely(!ok)) { - return false; - } - } - - if (v->migration_blocker) { - migrate_del_blocker(v->migration_blocker); + vhost_vdpa_svq_unmap_rings(dev, svq); } - return true; } static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) @@ -1115,10 +1106,7 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) } vhost_vdpa_set_vring_ready(dev); } else { - ok = vhost_vdpa_svqs_stop(dev); - if (unlikely(!ok)) { - return -1; - } + vhost_vdpa_svqs_stop(dev); vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); } diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h index d10a89303e..1111d85643 100644 --- a/include/hw/virtio/vhost-vdpa.h +++ b/include/hw/virtio/vhost-vdpa.h @@ -35,7 +35,6 @@ typedef struct vhost_vdpa { bool shadow_vqs_enabled; /* IOVA mapping used by the Shadow Virtqueue */ VhostIOVATree *iova_tree; - Error *migration_blocker; GPtrArray *shadow_vqs; const VhostShadowVirtqueueOps *shadow_vq_ops; void *shadow_vq_ops_opaque; diff --git a/include/net/net.h b/include/net/net.h index 523136c7ac..81d0b21def 100644 --- a/include/net/net.h +++ b/include/net/net.h @@ -44,6 +44,9 @@ typedef struct NICConf { typedef void (NetPoll)(NetClientState *, bool enable); typedef bool (NetCanReceive)(NetClientState *); +typedef int (NetStart)(NetClientState *); +typedef int (NetLoad)(NetClientState *); +typedef void (NetStop)(NetClientState *); typedef ssize_t (NetReceive)(NetClientState *, const uint8_t *, size_t); typedef ssize_t (NetReceiveIOV)(NetClientState *, const struct iovec *, int); typedef void (NetCleanup) (NetClientState *); @@ -71,6 +74,9 @@ typedef struct NetClientInfo { NetReceive *receive_raw; NetReceiveIOV *receive_iov; NetCanReceive *can_receive; + NetStart *start; + NetLoad *load; + NetStop *stop; NetCleanup *cleanup; LinkStatusChanged *link_status_changed; QueryRxFilter *query_rx_filter; diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h index 16bbfdf5f8..8528e5c98f 100644 --- a/include/qemu/iova-tree.h +++ b/include/qemu/iova-tree.h @@ -73,7 +73,7 @@ int iova_tree_insert(IOVATree *tree, const DMAMap *map); * all the mappings that are included in the provided range will be * removed from the tree. Here map->translated_addr is meaningless. */ -void iova_tree_remove(IOVATree *tree, const DMAMap *map); +void iova_tree_remove(IOVATree *tree, DMAMap map); /** * iova_tree_find: diff --git a/net/colo.c b/net/colo.c index 6b0ff562ad..fb2c36a026 100644 --- a/net/colo.c +++ b/net/colo.c @@ -44,21 +44,28 @@ int parse_packet_early(Packet *pkt) { int network_length; static const uint8_t vlan[] = {0x81, 0x00}; - uint8_t *data = pkt->data + pkt->vnet_hdr_len; + uint8_t *data = pkt->data; uint16_t l3_proto; ssize_t l2hdr_len; - if (data == NULL) { - trace_colo_proxy_main_vnet_info("This packet is not parsed correctly, " - "pkt->vnet_hdr_len", pkt->vnet_hdr_len); + assert(data); + + /* Check the received vnet_hdr_len then add the offset */ + if ((pkt->vnet_hdr_len > sizeof(struct virtio_net_hdr_v1_hash)) || + (pkt->size < sizeof(struct eth_header) + sizeof(struct vlan_header) + + pkt->vnet_hdr_len)) { + /* + * The received remote packet maybe misconfiguration here, + * Please enable/disable filter module's the vnet_hdr flag at + * the same time. + */ + trace_colo_proxy_main_vnet_info("This received packet load wrong ", + pkt->vnet_hdr_len, pkt->size); return 1; } - l2hdr_len = eth_get_l2_hdr_length(data); + data += pkt->vnet_hdr_len; - if (pkt->size < ETH_HLEN + pkt->vnet_hdr_len) { - trace_colo_proxy_main("pkt->size < ETH_HLEN"); - return 1; - } + l2hdr_len = eth_get_l2_hdr_length(data); /* * TODO: support vlan. diff --git a/net/colo.h b/net/colo.h index 8b3e8d5a83..22fc3031f7 100644 --- a/net/colo.h +++ b/net/colo.h @@ -18,6 +18,7 @@ #include "qemu/jhash.h" #include "qemu/timer.h" #include "net/eth.h" +#include "standard-headers/linux/virtio_net.h" #define HASHTABLE_MAX_SIZE 16384 diff --git a/net/trace-events b/net/trace-events index 6af927b4b9..823a071bdc 100644 --- a/net/trace-events +++ b/net/trace-events @@ -9,7 +9,7 @@ vhost_user_event(const char *chr, int event) "chr: %s got event: %d" # colo.c colo_proxy_main(const char *chr) ": %s" -colo_proxy_main_vnet_info(const char *sta, int size) ": %s = %d" +colo_proxy_main_vnet_info(const char *sta, uint32_t vnet_hdr, int size) ": %s pkt->vnet_hdr_len = %u, pkt->size = %d" # colo-compare.c colo_compare_main(const char *chr) ": %s" diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 303447a68e..6ce68fcd3f 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -244,7 +244,7 @@ static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) error_report("Device cannot unmap: %s(%d)", g_strerror(r), r); } - vhost_iova_tree_remove(tree, map); + vhost_iova_tree_remove(tree, *map); } static size_t vhost_vdpa_net_cvq_cmd_len(void) @@ -263,29 +263,20 @@ static size_t vhost_vdpa_net_cvq_cmd_page_len(void) return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size()); } -/** Copy and map a guest buffer. */ -static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, - const struct iovec *out_data, - size_t out_num, size_t data_len, void *buf, - size_t *written, bool write) +/** Map CVQ buffer. */ +static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size, + bool write) { DMAMap map = {}; int r; - if (unlikely(!data_len)) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid legnth of %s buffer\n", - __func__, write ? "in" : "out"); - return false; - } - - *written = iov_to_buf(out_data, out_num, 0, buf, data_len); map.translated_addr = (hwaddr)(uintptr_t)buf; - map.size = vhost_vdpa_net_cvq_cmd_page_len() - 1; + map.size = size - 1; map.perm = write ? IOMMU_RW : IOMMU_RO, r = vhost_iova_tree_map_alloc(v->iova_tree, &map); if (unlikely(r != IOVA_OK)) { error_report("Cannot map injected element"); - return false; + return r; } r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf, @@ -294,63 +285,151 @@ static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, goto dma_map_err; } - return true; + return 0; dma_map_err: - vhost_iova_tree_remove(v->iova_tree, &map); - return false; + vhost_iova_tree_remove(v->iova_tree, map); + return r; } -/** - * Copy the guest element into a dedicated buffer suitable to be sent to NIC - * - * @iov: [0] is the out buffer, [1] is the in one - */ -static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s, - VirtQueueElement *elem, - struct iovec *iov) +static int vhost_vdpa_net_cvq_start(NetClientState *nc) { - size_t in_copied; - bool ok; + VhostVDPAState *s; + int r; - iov[0].iov_base = s->cvq_cmd_out_buffer; - ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, elem->out_sg, elem->out_num, - vhost_vdpa_net_cvq_cmd_len(), iov[0].iov_base, - &iov[0].iov_len, false); - if (unlikely(!ok)) { - return false; + assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); + + s = DO_UPCAST(VhostVDPAState, nc, nc); + if (!s->vhost_vdpa.shadow_vqs_enabled) { + return 0; } - iov[1].iov_base = s->cvq_cmd_in_buffer; - ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, NULL, 0, - sizeof(virtio_net_ctrl_ack), iov[1].iov_base, - &in_copied, true); - if (unlikely(!ok)) { + r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer, + vhost_vdpa_net_cvq_cmd_page_len(), false); + if (unlikely(r < 0)) { + return r; + } + + r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_in_buffer, + vhost_vdpa_net_cvq_cmd_page_len(), true); + if (unlikely(r < 0)) { vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); - return false; } - iov[1].iov_len = sizeof(virtio_net_ctrl_ack); - return true; + return r; +} + +static void vhost_vdpa_net_cvq_stop(NetClientState *nc) +{ + VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); + + assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); + + if (s->vhost_vdpa.shadow_vqs_enabled) { + vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); + vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_in_buffer); + } +} + +static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len, + size_t in_len) +{ + /* Buffers for the device */ + const struct iovec out = { + .iov_base = s->cvq_cmd_out_buffer, + .iov_len = out_len, + }; + const struct iovec in = { + .iov_base = s->cvq_cmd_in_buffer, + .iov_len = sizeof(virtio_net_ctrl_ack), + }; + VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0); + int r; + + r = vhost_svq_add(svq, &out, 1, &in, 1, NULL); + if (unlikely(r != 0)) { + if (unlikely(r == -ENOSPC)) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", + __func__); + } + return r; + } + + /* + * We can poll here since we've had BQL from the time we sent the + * descriptor. Also, we need to take the answer before SVQ pulls by itself, + * when BQL is released + */ + return vhost_svq_poll(svq); } +static int vhost_vdpa_net_load(NetClientState *nc) +{ + VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); + const struct vhost_vdpa *v = &s->vhost_vdpa; + const VirtIONet *n; + uint64_t features; + + assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); + + if (!v->shadow_vqs_enabled) { + return 0; + } + + n = VIRTIO_NET(v->dev->vdev); + features = n->parent_obj.guest_features; + if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) { + const struct virtio_net_ctrl_hdr ctrl = { + .class = VIRTIO_NET_CTRL_MAC, + .cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET, + }; + char *cursor = s->cvq_cmd_out_buffer; + ssize_t dev_written; + + memcpy(cursor, &ctrl, sizeof(ctrl)); + cursor += sizeof(ctrl); + memcpy(cursor, n->mac, sizeof(n->mac)); + + dev_written = vhost_vdpa_net_cvq_add(s, sizeof(ctrl) + sizeof(n->mac), + sizeof(virtio_net_ctrl_ack)); + if (unlikely(dev_written < 0)) { + return dev_written; + } + + return *((virtio_net_ctrl_ack *)s->cvq_cmd_in_buffer) != VIRTIO_NET_OK; + } + + return 0; +} + +static NetClientInfo net_vhost_vdpa_cvq_info = { + .type = NET_CLIENT_DRIVER_VHOST_VDPA, + .size = sizeof(VhostVDPAState), + .receive = vhost_vdpa_receive, + .start = vhost_vdpa_net_cvq_start, + .load = vhost_vdpa_net_load, + .stop = vhost_vdpa_net_cvq_stop, + .cleanup = vhost_vdpa_cleanup, + .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, + .has_ufo = vhost_vdpa_has_ufo, + .check_peer_type = vhost_vdpa_check_peer_type, +}; + /** * Do not forward commands not supported by SVQ. Otherwise, the device could * accept it and qemu would not know how to update the device model. */ -static bool vhost_vdpa_net_cvq_validate_cmd(const struct iovec *out, - size_t out_num) +static bool vhost_vdpa_net_cvq_validate_cmd(const void *out_buf, size_t len) { struct virtio_net_ctrl_hdr ctrl; - size_t n; - n = iov_to_buf(out, out_num, 0, &ctrl, sizeof(ctrl)); - if (unlikely(n < sizeof(ctrl))) { + if (unlikely(len < sizeof(ctrl))) { qemu_log_mask(LOG_GUEST_ERROR, - "%s: invalid legnth of out buffer %zu\n", __func__, n); + "%s: invalid legnth of out buffer %zu\n", __func__, len); return false; } + memcpy(&ctrl, out_buf, sizeof(ctrl)); switch (ctrl.class) { case VIRTIO_NET_CTRL_MAC: switch (ctrl.cmd) { @@ -380,58 +459,45 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, void *opaque) { VhostVDPAState *s = opaque; - size_t in_len, dev_written; + size_t in_len; virtio_net_ctrl_ack status = VIRTIO_NET_ERR; - /* out and in buffers sent to the device */ - struct iovec dev_buffers[2] = { - { .iov_base = s->cvq_cmd_out_buffer }, - { .iov_base = s->cvq_cmd_in_buffer }, + /* Out buffer sent to both the vdpa device and the device model */ + struct iovec out = { + .iov_base = s->cvq_cmd_out_buffer, }; /* in buffer used for device model */ const struct iovec in = { .iov_base = &status, .iov_len = sizeof(status), }; - int r = -EINVAL; + ssize_t dev_written = -EINVAL; bool ok; - ok = vhost_vdpa_net_cvq_map_elem(s, elem, dev_buffers); + out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0, + s->cvq_cmd_out_buffer, + vhost_vdpa_net_cvq_cmd_len()); + ok = vhost_vdpa_net_cvq_validate_cmd(s->cvq_cmd_out_buffer, out.iov_len); if (unlikely(!ok)) { goto out; } - ok = vhost_vdpa_net_cvq_validate_cmd(&dev_buffers[0], 1); - if (unlikely(!ok)) { + dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status)); + if (unlikely(dev_written < 0)) { goto out; } - r = vhost_svq_add(svq, &dev_buffers[0], 1, &dev_buffers[1], 1, elem); - if (unlikely(r != 0)) { - if (unlikely(r == -ENOSPC)) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", - __func__); - } - goto out; - } - - /* - * We can poll here since we've had BQL from the time we sent the - * descriptor. Also, we need to take the answer before SVQ pulls by itself, - * when BQL is released - */ - dev_written = vhost_svq_poll(svq); if (unlikely(dev_written < sizeof(status))) { error_report("Insufficient written data (%zu)", dev_written); goto out; } - memcpy(&status, dev_buffers[1].iov_base, sizeof(status)); + memcpy(&status, s->cvq_cmd_in_buffer, sizeof(status)); if (status != VIRTIO_NET_OK) { - goto out; + return VIRTIO_NET_ERR; } status = VIRTIO_NET_ERR; - virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, dev_buffers, 1); + virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, &out, 1); if (status != VIRTIO_NET_OK) { error_report("Bad CVQ processing in model"); } @@ -444,13 +510,7 @@ out: } vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status))); g_free(elem); - if (dev_buffers[0].iov_base) { - vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[0].iov_base); - } - if (dev_buffers[1].iov_base) { - vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[1].iov_base); - } - return r; + return dev_written < 0 ? dev_written : 0; } static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { @@ -475,7 +535,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, name); } else { - nc = qemu_new_net_control_client(&net_vhost_vdpa_info, peer, + nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer, device, name); } snprintf(nc->info_str, sizeof(nc->info_str), TYPE_VHOST_VDPA); @@ -495,8 +555,6 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; s->vhost_vdpa.shadow_vq_ops_opaque = s; - error_setg(&s->vhost_vdpa.migration_blocker, - "Migration disabled: vhost-vdpa uses CVQ."); } ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); if (ret) { diff --git a/util/iova-tree.c b/util/iova-tree.c index fee530a579..536789797e 100644 --- a/util/iova-tree.c +++ b/util/iova-tree.c @@ -164,11 +164,11 @@ void iova_tree_foreach(IOVATree *tree, iova_tree_iterator iterator) g_tree_foreach(tree->tree, iova_tree_traverse, iterator); } -void iova_tree_remove(IOVATree *tree, const DMAMap *map) +void iova_tree_remove(IOVATree *tree, DMAMap map) { const DMAMap *overlap; - while ((overlap = iova_tree_find(tree, map))) { + while ((overlap = iova_tree_find(tree, &map))) { g_tree_remove(tree->tree, overlap); } } |