diff options
27 files changed, 1416 insertions, 152 deletions
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc index 9096fb302b..1610472cfc 100644 --- a/fpu/softfloat-specialize.c.inc +++ b/fpu/softfloat-specialize.c.inc @@ -390,8 +390,8 @@ bool float32_is_signaling_nan(float32 a_, float_status *status) static int pickNaN(FloatClass a_cls, FloatClass b_cls, bool aIsLargerSignificand, float_status *status) { -#if defined(TARGET_ARM) || defined(TARGET_MIPS) || defined(TARGET_HPPA) \ - || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM) || defined(TARGET_MIPS) || defined(TARGET_HPPA) || \ + defined(TARGET_LOONGARCH64) || defined(TARGET_S390X) /* ARM mandated NaN propagation rules (see FPProcessNaNs()), take * the first of: * 1. A if it is signaling diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 7ad948ee7c..dd0d056fde 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -49,7 +49,6 @@ #define VIRTIO_NET_VM_VERSION 11 -#define MAC_TABLE_ENTRIES 64 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ /* previously fixed value */ @@ -1434,57 +1433,71 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, return VIRTIO_NET_OK; } -static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) +size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, + const struct iovec *in_sg, unsigned in_num, + const struct iovec *out_sg, + unsigned out_num) { VirtIONet *n = VIRTIO_NET(vdev); struct virtio_net_ctrl_hdr ctrl; virtio_net_ctrl_ack status = VIRTIO_NET_ERR; - VirtQueueElement *elem; size_t s; struct iovec *iov, *iov2; - unsigned int iov_cnt; + + if (iov_size(in_sg, in_num) < sizeof(status) || + iov_size(out_sg, out_num) < sizeof(ctrl)) { + virtio_error(vdev, "virtio-net ctrl missing headers"); + return 0; + } + + iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num); + s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl)); + iov_discard_front(&iov, &out_num, sizeof(ctrl)); + if (s != sizeof(ctrl)) { + status = VIRTIO_NET_ERR; + } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { + status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num); + } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { + status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num); + } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { + status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num); + } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { + status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num); + } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { + status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num); + } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { + status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num); + } + + s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status)); + assert(s == sizeof(status)); + + g_free(iov2); + return sizeof(status); +} + +static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) +{ + VirtQueueElement *elem; for (;;) { + size_t written; elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); if (!elem) { break; } - if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) || - iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) { - virtio_error(vdev, "virtio-net ctrl missing headers"); + + written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num, + elem->out_sg, elem->out_num); + if (written > 0) { + virtqueue_push(vq, elem, written); + virtio_notify(vdev, vq); + g_free(elem); + } else { virtqueue_detach_element(vq, elem, 0); g_free(elem); break; } - - iov_cnt = elem->out_num; - iov2 = iov = g_memdup2(elem->out_sg, - sizeof(struct iovec) * elem->out_num); - s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl)); - iov_discard_front(&iov, &iov_cnt, sizeof(ctrl)); - if (s != sizeof(ctrl)) { - status = VIRTIO_NET_ERR; - } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { - status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt); - } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { - status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt); - } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { - status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt); - } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { - status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt); - } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { - status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt); - } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { - status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt); - } - - s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status)); - assert(s == sizeof(status)); - - virtqueue_push(vq, elem, sizeof(status)); - virtio_notify(vdev, vq); - g_free(iov2); - g_free(elem); } } diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index 56c96ebd13..e4956728dd 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -122,17 +122,35 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, return true; } -static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, - const struct iovec *iovec, size_t num, - bool more_descs, bool write) +/** + * Write descriptors to SVQ vring + * + * @svq: The shadow virtqueue + * @sg: Cache for hwaddr + * @iovec: The iovec from the guest + * @num: iovec length + * @more_descs: True if more descriptors come in the chain + * @write: True if they are writeable descriptors + * + * Return true if success, false otherwise and print error. + */ +static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, + const struct iovec *iovec, size_t num, + bool more_descs, bool write) { uint16_t i = svq->free_head, last = svq->free_head; unsigned n; uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0; vring_desc_t *descs = svq->vring.desc; + bool ok; if (num == 0) { - return; + return true; + } + + ok = vhost_svq_translate_addr(svq, sg, iovec, num); + if (unlikely(!ok)) { + return false; } for (n = 0; n < num; n++) { @@ -150,40 +168,39 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, } svq->free_head = le16_to_cpu(svq->desc_next[last]); + return true; } static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, - VirtQueueElement *elem, unsigned *head) + const struct iovec *out_sg, size_t out_num, + const struct iovec *in_sg, size_t in_num, + unsigned *head) { unsigned avail_idx; vring_avail_t *avail = svq->vring.avail; bool ok; - g_autofree hwaddr *sgs = g_new(hwaddr, MAX(elem->out_num, elem->in_num)); + g_autofree hwaddr *sgs = g_new(hwaddr, MAX(out_num, in_num)); *head = svq->free_head; /* We need some descriptors here */ - if (unlikely(!elem->out_num && !elem->in_num)) { + if (unlikely(!out_num && !in_num)) { qemu_log_mask(LOG_GUEST_ERROR, "Guest provided element with no descriptors"); return false; } - ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num); + ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0, + false); if (unlikely(!ok)) { return false; } - vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num, - elem->in_num > 0, false); - - ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num); + ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true); if (unlikely(!ok)) { return false; } - vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true); - /* * Put the entry in the available array (but don't update avail->idx until * they do sync). @@ -199,38 +216,58 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, return true; } +static void vhost_svq_kick(VhostShadowVirtqueue *svq) +{ + /* + * We need to expose the available array entries before checking the used + * flags + */ + smp_mb(); + if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) { + return; + } + + event_notifier_set(&svq->hdev_kick); +} + /** * Add an element to a SVQ. * * The caller must check that there is enough slots for the new element. It - * takes ownership of the element: In case of failure, it is free and the SVQ - * is considered broken. + * takes ownership of the element: In case of failure not ENOSPC, it is free. + * + * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full */ -static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) +int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, + size_t out_num, const struct iovec *in_sg, size_t in_num, + VirtQueueElement *elem) { unsigned qemu_head; - bool ok = vhost_svq_add_split(svq, elem, &qemu_head); + unsigned ndescs = in_num + out_num; + bool ok; + + if (unlikely(ndescs > vhost_svq_available_slots(svq))) { + return -ENOSPC; + } + + ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head); if (unlikely(!ok)) { g_free(elem); - return false; + return -EINVAL; } - svq->ring_id_maps[qemu_head] = elem; - return true; + svq->desc_state[qemu_head].elem = elem; + svq->desc_state[qemu_head].ndescs = ndescs; + vhost_svq_kick(svq); + return 0; } -static void vhost_svq_kick(VhostShadowVirtqueue *svq) +/* Convenience wrapper to add a guest's element to SVQ */ +static int vhost_svq_add_element(VhostShadowVirtqueue *svq, + VirtQueueElement *elem) { - /* - * We need to expose the available array entries before checking the used - * flags - */ - smp_mb(); - if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) { - return; - } - - event_notifier_set(&svq->hdev_kick); + return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->in_sg, + elem->in_num, elem); } /** @@ -257,7 +294,7 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) while (true) { VirtQueueElement *elem; - bool ok; + int r; if (svq->next_guest_avail_elem) { elem = g_steal_pointer(&svq->next_guest_avail_elem); @@ -269,28 +306,30 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) break; } - if (elem->out_num + elem->in_num > vhost_svq_available_slots(svq)) { - /* - * This condition is possible since a contiguous buffer in GPA - * does not imply a contiguous buffer in qemu's VA - * scatter-gather segments. If that happens, the buffer exposed - * to the device needs to be a chain of descriptors at this - * moment. - * - * SVQ cannot hold more available buffers if we are here: - * queue the current guest descriptor and ignore further kicks - * until some elements are used. - */ - svq->next_guest_avail_elem = elem; - return; + if (svq->ops) { + r = svq->ops->avail_handler(svq, elem, svq->ops_opaque); + } else { + r = vhost_svq_add_element(svq, elem); } - - ok = vhost_svq_add(svq, elem); - if (unlikely(!ok)) { - /* VQ is broken, just return and ignore any other kicks */ + if (unlikely(r != 0)) { + if (r == -ENOSPC) { + /* + * This condition is possible since a contiguous buffer in + * GPA does not imply a contiguous buffer in qemu's VA + * scatter-gather segments. If that happens, the buffer + * exposed to the device needs to be a chain of descriptors + * at this moment. + * + * SVQ cannot hold more available buffers if we are here: + * queue the current guest descriptor and ignore kicks + * until some elements are used. + */ + svq->next_guest_avail_elem = elem; + } + + /* VQ is full or broken, just return and ignore kicks */ return; } - vhost_svq_kick(svq); } virtio_queue_set_notification(svq->vq, true); @@ -311,11 +350,12 @@ static void vhost_handle_guest_kick_notifier(EventNotifier *n) static bool vhost_svq_more_used(VhostShadowVirtqueue *svq) { + uint16_t *used_idx = &svq->vring.used->idx; if (svq->last_used_idx != svq->shadow_used_idx) { return true; } - svq->shadow_used_idx = cpu_to_le16(svq->vring.used->idx); + svq->shadow_used_idx = cpu_to_le16(*(volatile uint16_t *)used_idx); return svq->last_used_idx != svq->shadow_used_idx; } @@ -376,21 +416,36 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, return NULL; } - if (unlikely(!svq->ring_id_maps[used_elem.id])) { + if (unlikely(!svq->desc_state[used_elem.id].elem)) { qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used, but it was not available", svq->vdev->name, used_elem.id); return NULL; } - num = svq->ring_id_maps[used_elem.id]->in_num + - svq->ring_id_maps[used_elem.id]->out_num; + num = svq->desc_state[used_elem.id].ndescs; last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); svq->desc_next[last_used_chain] = svq->free_head; svq->free_head = used_elem.id; *len = used_elem.len; - return g_steal_pointer(&svq->ring_id_maps[used_elem.id]); + return g_steal_pointer(&svq->desc_state[used_elem.id].elem); +} + +/** + * Push an element to SVQ, returning it to the guest. + */ +void vhost_svq_push_elem(VhostShadowVirtqueue *svq, + const VirtQueueElement *elem, uint32_t len) +{ + virtqueue_push(svq->vq, elem, len); + if (svq->next_guest_avail_elem) { + /* + * Avail ring was full when vhost_svq_flush was called, so it's a + * good moment to make more descriptors available if possible. + */ + vhost_handle_guest_kick(svq); + } } static void vhost_svq_flush(VhostShadowVirtqueue *svq, @@ -435,6 +490,33 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq, } /** + * Poll the SVQ for one device used buffer. + * + * This function race with main event loop SVQ polling, so extra + * synchronization is needed. + * + * Return the length written by the device. + */ +size_t vhost_svq_poll(VhostShadowVirtqueue *svq) +{ + int64_t start_us = g_get_monotonic_time(); + do { + uint32_t len; + VirtQueueElement *elem = vhost_svq_get_buf(svq, &len); + if (elem) { + return len; + } + + if (unlikely(g_get_monotonic_time() - start_us > 10e6)) { + return 0; + } + + /* Make sure we read new used_idx */ + smp_rmb(); + } while (true); +} + +/** * Forward used buffers. * * @n: hdev call event notifier, the one that device set to notify svq. @@ -560,7 +642,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, memset(svq->vring.desc, 0, driver_size); svq->vring.used = qemu_memalign(qemu_real_host_page_size(), device_size); memset(svq->vring.used, 0, device_size); - svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num); + svq->desc_state = g_new0(SVQDescState, svq->vring.num); svq->desc_next = g_new0(uint16_t, svq->vring.num); for (unsigned i = 0; i < svq->vring.num - 1; i++) { svq->desc_next[i] = cpu_to_le16(i + 1); @@ -585,7 +667,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) for (unsigned i = 0; i < svq->vring.num; ++i) { g_autofree VirtQueueElement *elem = NULL; - elem = g_steal_pointer(&svq->ring_id_maps[i]); + elem = g_steal_pointer(&svq->desc_state[i].elem); if (elem) { virtqueue_detach_element(svq->vq, elem, 0); } @@ -597,7 +679,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) } svq->vq = NULL; g_free(svq->desc_next); - g_free(svq->ring_id_maps); + g_free(svq->desc_state); qemu_vfree(svq->vring.desc); qemu_vfree(svq->vring.used); } @@ -607,12 +689,16 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) * shadow methods and file descriptors. * * @iova_tree: Tree to perform descriptors translations + * @ops: SVQ owner callbacks + * @ops_opaque: ops opaque pointer * * Returns the new virtqueue or NULL. * * In case of error, reason is reported through error_report. */ -VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree) +VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, + const VhostShadowVirtqueueOps *ops, + void *ops_opaque) { g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1); int r; @@ -634,6 +720,8 @@ VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree) event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND); event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); svq->iova_tree = iova_tree; + svq->ops = ops; + svq->ops_opaque = ops_opaque; return g_steal_pointer(&svq); err_init_hdev_call: diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h index c132c994e9..d04c34a589 100644 --- a/hw/virtio/vhost-shadow-virtqueue.h +++ b/hw/virtio/vhost-shadow-virtqueue.h @@ -15,6 +15,37 @@ #include "standard-headers/linux/vhost_types.h" #include "hw/virtio/vhost-iova-tree.h" +typedef struct SVQDescState { + VirtQueueElement *elem; + + /* + * Number of descriptors exposed to the device. May or may not match + * guest's + */ + unsigned int ndescs; +} SVQDescState; + +typedef struct VhostShadowVirtqueue VhostShadowVirtqueue; + +/** + * Callback to handle an avail buffer. + * + * @svq: Shadow virtqueue + * @elem: Element placed in the queue by the guest + * @vq_callback_opaque: Opaque + * + * Returns 0 if the vq is running as expected. + * + * Note that ownership of elem is transferred to the callback. + */ +typedef int (*VirtQueueAvailCallback)(VhostShadowVirtqueue *svq, + VirtQueueElement *elem, + void *vq_callback_opaque); + +typedef struct VhostShadowVirtqueueOps { + VirtQueueAvailCallback avail_handler; +} VhostShadowVirtqueueOps; + /* Shadow virtqueue to relay notifications */ typedef struct VhostShadowVirtqueue { /* Shadow vring */ @@ -47,8 +78,8 @@ typedef struct VhostShadowVirtqueue { /* IOVA mapping */ VhostIOVATree *iova_tree; - /* Map for use the guest's descriptors */ - VirtQueueElement **ring_id_maps; + /* SVQ vring descriptors state */ + SVQDescState *desc_state; /* Next VirtQueue element that guest made available */ VirtQueueElement *next_guest_avail_elem; @@ -59,6 +90,12 @@ typedef struct VhostShadowVirtqueue { */ uint16_t *desc_next; + /* Caller callbacks */ + const VhostShadowVirtqueueOps *ops; + + /* Caller callbacks opaque */ + void *ops_opaque; + /* Next head to expose to the device */ uint16_t shadow_avail_idx; @@ -74,6 +111,13 @@ typedef struct VhostShadowVirtqueue { bool vhost_svq_valid_features(uint64_t features, Error **errp); +void vhost_svq_push_elem(VhostShadowVirtqueue *svq, + const VirtQueueElement *elem, uint32_t len); +int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, + size_t out_num, const struct iovec *in_sg, size_t in_num, + VirtQueueElement *elem); +size_t vhost_svq_poll(VhostShadowVirtqueue *svq); + void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd); void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd); void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq, @@ -85,7 +129,9 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, VirtQueue *vq); void vhost_svq_stop(VhostShadowVirtqueue *svq); -VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree); +VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, + const VhostShadowVirtqueueOps *ops, + void *ops_opaque); void vhost_svq_free(gpointer vq); G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free); diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 66f054a12c..291cd19054 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -20,6 +20,7 @@ #include "hw/virtio/vhost-shadow-virtqueue.h" #include "hw/virtio/vhost-vdpa.h" #include "exec/address-spaces.h" +#include "migration/blocker.h" #include "qemu/cutils.h" #include "qemu/main-loop.h" #include "cpu.h" @@ -71,8 +72,8 @@ static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, return false; } -static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, - void *vaddr, bool readonly) +int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, + void *vaddr, bool readonly) { struct vhost_msg_v2 msg = {}; int fd = v->device_fd; @@ -97,8 +98,7 @@ static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, return ret; } -static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, - hwaddr size) +int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size) { struct vhost_msg_v2 msg = {}; int fd = v->device_fd; @@ -418,8 +418,10 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free); for (unsigned n = 0; n < hdev->nvqs; ++n) { - g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree); + g_autoptr(VhostShadowVirtqueue) svq; + svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops, + v->shadow_vq_ops_opaque); if (unlikely(!svq)) { error_setg(errp, "Cannot create svq %u", n); return -1; @@ -1021,6 +1023,13 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) return true; } + if (v->migration_blocker) { + int r = migrate_add_blocker(v->migration_blocker, &err); + if (unlikely(r < 0)) { + return false; + } + } + for (i = 0; i < v->shadow_vqs->len; ++i) { VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i); VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); @@ -1063,6 +1072,10 @@ err: vhost_svq_stop(svq); } + if (v->migration_blocker) { + migrate_del_blocker(v->migration_blocker); + } + return false; } @@ -1082,6 +1095,9 @@ static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev) } } + if (v->migration_blocker) { + migrate_del_blocker(v->migration_blocker); + } return true; } diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h index a29dbb3f53..d10a89303e 100644 --- a/include/hw/virtio/vhost-vdpa.h +++ b/include/hw/virtio/vhost-vdpa.h @@ -15,6 +15,7 @@ #include <gmodule.h> #include "hw/virtio/vhost-iova-tree.h" +#include "hw/virtio/vhost-shadow-virtqueue.h" #include "hw/virtio/virtio.h" #include "standard-headers/linux/vhost_types.h" @@ -34,9 +35,16 @@ typedef struct vhost_vdpa { bool shadow_vqs_enabled; /* IOVA mapping used by the Shadow Virtqueue */ VhostIOVATree *iova_tree; + Error *migration_blocker; GPtrArray *shadow_vqs; + const VhostShadowVirtqueueOps *shadow_vq_ops; + void *shadow_vq_ops_opaque; struct vhost_dev *dev; VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX]; } VhostVDPA; +int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, + void *vaddr, bool readonly); +int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size); + #endif diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h index eb87032627..ef234ffe7e 100644 --- a/include/hw/virtio/virtio-net.h +++ b/include/hw/virtio/virtio-net.h @@ -35,6 +35,9 @@ OBJECT_DECLARE_SIMPLE_TYPE(VirtIONet, VIRTIO_NET) * and latency. */ #define TX_BURST 256 +/* Maximum VIRTIO_NET_CTRL_MAC_TABLE_SET unicast + multicast entries. */ +#define MAC_TABLE_ENTRIES 64 + typedef struct virtio_net_conf { uint32_t txtimer; @@ -218,6 +221,10 @@ struct VirtIONet { struct EBPFRSSContext ebpf_rss; }; +size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, + const struct iovec *in_sg, unsigned in_num, + const struct iovec *out_sg, + unsigned out_num); void virtio_net_set_netclient_name(VirtIONet *n, const char *name, const char *type); diff --git a/net/colo-compare.c b/net/colo-compare.c index d5d0965805..787c740f14 100644 --- a/net/colo-compare.c +++ b/net/colo-compare.c @@ -1323,7 +1323,7 @@ static void colo_compare_complete(UserCreatable *uc, Error **errp) s->connection_track_table = g_hash_table_new_full(connection_key_hash, connection_key_equal, g_free, - connection_destroy); + NULL); colo_compare_iothread(s); diff --git a/net/colo.c b/net/colo.c index 1f8162f59f..6b0ff562ad 100644 --- a/net/colo.c +++ b/net/colo.c @@ -46,7 +46,14 @@ int parse_packet_early(Packet *pkt) static const uint8_t vlan[] = {0x81, 0x00}; uint8_t *data = pkt->data + pkt->vnet_hdr_len; uint16_t l3_proto; - ssize_t l2hdr_len = eth_get_l2_hdr_length(data); + ssize_t l2hdr_len; + + if (data == NULL) { + trace_colo_proxy_main_vnet_info("This packet is not parsed correctly, " + "pkt->vnet_hdr_len", pkt->vnet_hdr_len); + return 1; + } + l2hdr_len = eth_get_l2_hdr_length(data); if (pkt->size < ETH_HLEN + pkt->vnet_hdr_len) { trace_colo_proxy_main("pkt->size < ETH_HLEN"); @@ -218,7 +225,7 @@ Connection *connection_get(GHashTable *connection_track_table, /* * clear the conn_list */ - while (!g_queue_is_empty(conn_list)) { + while (conn_list && !g_queue_is_empty(conn_list)) { connection_destroy(g_queue_pop_head(conn_list)); } } diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c index bf05023dc3..c18c4c2019 100644 --- a/net/filter-rewriter.c +++ b/net/filter-rewriter.c @@ -383,7 +383,7 @@ static void colo_rewriter_setup(NetFilterState *nf, Error **errp) s->connection_track_table = g_hash_table_new_full(connection_key_hash, connection_key_equal, g_free, - connection_destroy); + NULL); s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf); } diff --git a/net/meson.build b/net/meson.build index 754e2d1d40..d1be76daf3 100644 --- a/net/meson.build +++ b/net/meson.build @@ -41,7 +41,8 @@ endif softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files(tap_posix)) softmmu_ss.add(when: 'CONFIG_WIN32', if_true: files('tap-win32.c')) if have_vhost_net_vdpa - softmmu_ss.add(files('vhost-vdpa.c')) + softmmu_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-vdpa.c'), if_false: files('vhost-vdpa-stub.c')) + softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-vdpa-stub.c')) endif vmnet_files = files( diff --git a/net/trace-events b/net/trace-events index d7a17256cc..6af927b4b9 100644 --- a/net/trace-events +++ b/net/trace-events @@ -9,6 +9,7 @@ vhost_user_event(const char *chr, int event) "chr: %s got event: %d" # colo.c colo_proxy_main(const char *chr) ": %s" +colo_proxy_main_vnet_info(const char *sta, int size) ": %s = %d" # colo-compare.c colo_compare_main(const char *chr) ": %s" diff --git a/net/vhost-vdpa-stub.c b/net/vhost-vdpa-stub.c new file mode 100644 index 0000000000..1732ed2443 --- /dev/null +++ b/net/vhost-vdpa-stub.c @@ -0,0 +1,21 @@ +/* + * vhost-vdpa-stub.c + * + * Copyright (c) 2022 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "clients.h" +#include "net/vhost-vdpa.h" +#include "qapi/error.h" + +int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + NetClientState *peer, Error **errp) +{ + error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*"); + return -1; +} diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index df1e69ee72..6abad276a6 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -11,11 +11,14 @@ #include "qemu/osdep.h" #include "clients.h" +#include "hw/virtio/virtio-net.h" #include "net/vhost_net.h" #include "net/vhost-vdpa.h" #include "hw/virtio/vhost-vdpa.h" #include "qemu/config-file.h" #include "qemu/error-report.h" +#include "qemu/log.h" +#include "qemu/memalign.h" #include "qemu/option.h" #include "qapi/error.h" #include <linux/vhost.h> @@ -30,6 +33,9 @@ typedef struct VhostVDPAState { NetClientState nc; struct vhost_vdpa vhost_vdpa; VHostNetState *vhost_net; + + /* Control commands shadow buffers */ + void *cvq_cmd_out_buffer, *cvq_cmd_in_buffer; bool started; } VhostVDPAState; @@ -69,6 +75,28 @@ const int vdpa_feature_bits[] = { VHOST_INVALID_FEATURE_BIT }; +/** Supported device specific feature bits with SVQ */ +static const uint64_t vdpa_svq_device_features = + BIT_ULL(VIRTIO_NET_F_CSUM) | + BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | + BIT_ULL(VIRTIO_NET_F_MTU) | + BIT_ULL(VIRTIO_NET_F_MAC) | + BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | + BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | + BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | + BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | + BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | + BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | + BIT_ULL(VIRTIO_NET_F_HOST_ECN) | + BIT_ULL(VIRTIO_NET_F_HOST_UFO) | + BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | + BIT_ULL(VIRTIO_NET_F_STATUS) | + BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | + BIT_ULL(VIRTIO_F_ANY_LAYOUT) | + BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | + BIT_ULL(VIRTIO_NET_F_RSC_EXT) | + BIT_ULL(VIRTIO_NET_F_STANDBY); + VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) { VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); @@ -127,7 +155,13 @@ err_init: static void vhost_vdpa_cleanup(NetClientState *nc) { VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); + struct vhost_dev *dev = &s->vhost_net->dev; + qemu_vfree(s->cvq_cmd_out_buffer); + qemu_vfree(s->cvq_cmd_in_buffer); + if (dev->vq_index + dev->nvqs == dev->vq_index_end) { + g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); + } if (s->vhost_net) { vhost_net_cleanup(s->vhost_net); g_free(s->vhost_net); @@ -187,13 +221,251 @@ static NetClientInfo net_vhost_vdpa_info = { .check_peer_type = vhost_vdpa_check_peer_type, }; +static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) +{ + VhostIOVATree *tree = v->iova_tree; + DMAMap needle = { + /* + * No need to specify size or to look for more translations since + * this contiguous chunk was allocated by us. + */ + .translated_addr = (hwaddr)(uintptr_t)addr, + }; + const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle); + int r; + + if (unlikely(!map)) { + error_report("Cannot locate expected map"); + return; + } + + r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1); + if (unlikely(r != 0)) { + error_report("Device cannot unmap: %s(%d)", g_strerror(r), r); + } + + vhost_iova_tree_remove(tree, map); +} + +static size_t vhost_vdpa_net_cvq_cmd_len(void) +{ + /* + * MAC_TABLE_SET is the ctrl command that produces the longer out buffer. + * In buffer is always 1 byte, so it should fit here + */ + return sizeof(struct virtio_net_ctrl_hdr) + + 2 * sizeof(struct virtio_net_ctrl_mac) + + MAC_TABLE_ENTRIES * ETH_ALEN; +} + +static size_t vhost_vdpa_net_cvq_cmd_page_len(void) +{ + return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size()); +} + +/** Copy and map a guest buffer. */ +static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, + const struct iovec *out_data, + size_t out_num, size_t data_len, void *buf, + size_t *written, bool write) +{ + DMAMap map = {}; + int r; + + if (unlikely(!data_len)) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid legnth of %s buffer\n", + __func__, write ? "in" : "out"); + return false; + } + + *written = iov_to_buf(out_data, out_num, 0, buf, data_len); + map.translated_addr = (hwaddr)(uintptr_t)buf; + map.size = vhost_vdpa_net_cvq_cmd_page_len() - 1; + map.perm = write ? IOMMU_RW : IOMMU_RO, + r = vhost_iova_tree_map_alloc(v->iova_tree, &map); + if (unlikely(r != IOVA_OK)) { + error_report("Cannot map injected element"); + return false; + } + + r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf, + !write); + if (unlikely(r < 0)) { + goto dma_map_err; + } + + return true; + +dma_map_err: + vhost_iova_tree_remove(v->iova_tree, &map); + return false; +} + +/** + * Copy the guest element into a dedicated buffer suitable to be sent to NIC + * + * @iov: [0] is the out buffer, [1] is the in one + */ +static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s, + VirtQueueElement *elem, + struct iovec *iov) +{ + size_t in_copied; + bool ok; + + iov[0].iov_base = s->cvq_cmd_out_buffer; + ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, elem->out_sg, elem->out_num, + vhost_vdpa_net_cvq_cmd_len(), iov[0].iov_base, + &iov[0].iov_len, false); + if (unlikely(!ok)) { + return false; + } + + iov[1].iov_base = s->cvq_cmd_in_buffer; + ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, NULL, 0, + sizeof(virtio_net_ctrl_ack), iov[1].iov_base, + &in_copied, true); + if (unlikely(!ok)) { + vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); + return false; + } + + iov[1].iov_len = sizeof(virtio_net_ctrl_ack); + return true; +} + +/** + * Do not forward commands not supported by SVQ. Otherwise, the device could + * accept it and qemu would not know how to update the device model. + */ +static bool vhost_vdpa_net_cvq_validate_cmd(const struct iovec *out, + size_t out_num) +{ + struct virtio_net_ctrl_hdr ctrl; + size_t n; + + n = iov_to_buf(out, out_num, 0, &ctrl, sizeof(ctrl)); + if (unlikely(n < sizeof(ctrl))) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: invalid legnth of out buffer %zu\n", __func__, n); + return false; + } + + switch (ctrl.class) { + case VIRTIO_NET_CTRL_MAC: + switch (ctrl.cmd) { + case VIRTIO_NET_CTRL_MAC_ADDR_SET: + return true; + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mac cmd %u\n", + __func__, ctrl.cmd); + }; + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid control class %u\n", + __func__, ctrl.class); + }; + + return false; +} + +/** + * Validate and copy control virtqueue commands. + * + * Following QEMU guidelines, we offer a copy of the buffers to the device to + * prevent TOCTOU bugs. + */ +static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, + VirtQueueElement *elem, + void *opaque) +{ + VhostVDPAState *s = opaque; + size_t in_len, dev_written; + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; + /* out and in buffers sent to the device */ + struct iovec dev_buffers[2] = { + { .iov_base = s->cvq_cmd_out_buffer }, + { .iov_base = s->cvq_cmd_in_buffer }, + }; + /* in buffer used for device model */ + const struct iovec in = { + .iov_base = &status, + .iov_len = sizeof(status), + }; + int r = -EINVAL; + bool ok; + + ok = vhost_vdpa_net_cvq_map_elem(s, elem, dev_buffers); + if (unlikely(!ok)) { + goto out; + } + + ok = vhost_vdpa_net_cvq_validate_cmd(&dev_buffers[0], 1); + if (unlikely(!ok)) { + goto out; + } + + r = vhost_svq_add(svq, &dev_buffers[0], 1, &dev_buffers[1], 1, elem); + if (unlikely(r != 0)) { + if (unlikely(r == -ENOSPC)) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", + __func__); + } + goto out; + } + + /* + * We can poll here since we've had BQL from the time we sent the + * descriptor. Also, we need to take the answer before SVQ pulls by itself, + * when BQL is released + */ + dev_written = vhost_svq_poll(svq); + if (unlikely(dev_written < sizeof(status))) { + error_report("Insufficient written data (%zu)", dev_written); + goto out; + } + + memcpy(&status, dev_buffers[1].iov_base, sizeof(status)); + if (status != VIRTIO_NET_OK) { + goto out; + } + + status = VIRTIO_NET_ERR; + virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, dev_buffers, 1); + if (status != VIRTIO_NET_OK) { + error_report("Bad CVQ processing in model"); + } + +out: + in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, + sizeof(status)); + if (unlikely(in_len < sizeof(status))) { + error_report("Bad device CVQ written length"); + } + vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status))); + g_free(elem); + if (dev_buffers[0].iov_base) { + vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[0].iov_base); + } + if (dev_buffers[1].iov_base) { + vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[1].iov_base); + } + return r; +} + +static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { + .avail_handler = vhost_vdpa_net_handle_ctrl_avail, +}; + static NetClientState *net_vhost_vdpa_init(NetClientState *peer, const char *device, const char *name, int vdpa_device_fd, int queue_pair_index, int nvqs, - bool is_datapath) + bool is_datapath, + bool svq, + VhostIOVATree *iova_tree) { NetClientState *nc = NULL; VhostVDPAState *s; @@ -211,6 +483,21 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, s->vhost_vdpa.device_fd = vdpa_device_fd; s->vhost_vdpa.index = queue_pair_index; + s->vhost_vdpa.shadow_vqs_enabled = svq; + s->vhost_vdpa.iova_tree = iova_tree; + if (!is_datapath) { + s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(), + vhost_vdpa_net_cvq_cmd_page_len()); + memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len()); + s->cvq_cmd_in_buffer = qemu_memalign(qemu_real_host_page_size(), + vhost_vdpa_net_cvq_cmd_page_len()); + memset(s->cvq_cmd_in_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len()); + + s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; + s->vhost_vdpa.shadow_vq_ops_opaque = s; + error_setg(&s->vhost_vdpa.migration_blocker, + "Migration disabled: vhost-vdpa uses CVQ."); + } ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); if (ret) { qemu_del_net_client(nc); @@ -219,20 +506,32 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, return nc; } -static int vhost_vdpa_get_max_queue_pairs(int fd, int *has_cvq, Error **errp) +static int vhost_vdpa_get_iova_range(int fd, + struct vhost_vdpa_iova_range *iova_range) +{ + int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range); + + return ret < 0 ? -errno : 0; +} + +static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp) +{ + int ret = ioctl(fd, VHOST_GET_FEATURES, features); + if (unlikely(ret < 0)) { + error_setg_errno(errp, errno, + "Fail to query features from vhost-vDPA device"); + } + return ret; +} + +static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features, + int *has_cvq, Error **errp) { unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); g_autofree struct vhost_vdpa_config *config = NULL; __virtio16 *max_queue_pairs; - uint64_t features; int ret; - ret = ioctl(fd, VHOST_GET_FEATURES, &features); - if (ret) { - error_setg(errp, "Fail to query features from vhost-vDPA device"); - return ret; - } - if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) { *has_cvq = 1; } else { @@ -262,10 +561,12 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp) { const NetdevVhostVDPAOptions *opts; + uint64_t features; int vdpa_device_fd; g_autofree NetClientState **ncs = NULL; + g_autoptr(VhostIOVATree) iova_tree = NULL; NetClientState *nc; - int queue_pairs, i, has_cvq = 0; + int queue_pairs, r, i, has_cvq = 0; assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA); opts = &netdev->u.vhost_vdpa; @@ -279,29 +580,57 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, return -errno; } - queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, + r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp); + if (unlikely(r < 0)) { + return r; + } + + queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features, &has_cvq, errp); if (queue_pairs < 0) { qemu_close(vdpa_device_fd); return queue_pairs; } + if (opts->x_svq) { + struct vhost_vdpa_iova_range iova_range; + + uint64_t invalid_dev_features = + features & ~vdpa_svq_device_features & + /* Transport are all accepted at this point */ + ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START, + VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START); + + if (invalid_dev_features) { + error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64, + invalid_dev_features); + goto err_svq; + } + + vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range); + iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last); + } + ncs = g_malloc0(sizeof(*ncs) * queue_pairs); for (i = 0; i < queue_pairs; i++) { ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, - vdpa_device_fd, i, 2, true); + vdpa_device_fd, i, 2, true, opts->x_svq, + iova_tree); if (!ncs[i]) goto err; } if (has_cvq) { nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, - vdpa_device_fd, i, 1, false); + vdpa_device_fd, i, 1, false, + opts->x_svq, iova_tree); if (!nc) goto err; } + /* iova_tree ownership belongs to last NetClientState */ + g_steal_pointer(&iova_tree); return 0; err: @@ -310,6 +639,8 @@ err: qemu_del_net_client(ncs[i]); } } + +err_svq: qemu_close(vdpa_device_fd); return -1; diff --git a/qapi/net.json b/qapi/net.json index 9af11e9a3b..75ba2cb989 100644 --- a/qapi/net.json +++ b/qapi/net.json @@ -445,12 +445,19 @@ # @queues: number of queues to be created for multiqueue vhost-vdpa # (default: 1) # +# @x-svq: Start device with (experimental) shadow virtqueue. (Since 7.1) +# (default: false) +# +# Features: +# @unstable: Member @x-svq is experimental. +# # Since: 5.1 ## { 'struct': 'NetdevVhostVDPAOptions', 'data': { '*vhostdev': 'str', - '*queues': 'int' } } + '*queues': 'int', + '*x-svq': {'type': 'bool', 'features' : [ 'unstable'] } } } ## # @NetdevVmnetHostOptions: diff --git a/softmmu/runstate.c b/softmmu/runstate.c index fac7b63259..168e1b78a0 100644 --- a/softmmu/runstate.c +++ b/softmmu/runstate.c @@ -126,6 +126,7 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_RESTORE_VM, RUN_STATE_PRELAUNCH }, { RUN_STATE_COLO, RUN_STATE_RUNNING }, + { RUN_STATE_COLO, RUN_STATE_PRELAUNCH }, { RUN_STATE_COLO, RUN_STATE_SHUTDOWN}, { RUN_STATE_RUNNING, RUN_STATE_DEBUG }, diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index c6f0879b6e..50634ac459 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -339,12 +339,13 @@ do { \ TCGv LSB = tcg_temp_local_new(); \ TCGLabel *label = gen_new_label(); \ - GET_EA; \ + tcg_gen_movi_tl(EA, 0); \ PRED; \ + CHECK_NOSHUF_PRED(GET_EA, SIZE, LSB); \ PRED_LOAD_CANCEL(LSB, EA); \ tcg_gen_movi_tl(RdV, 0); \ tcg_gen_brcondi_tl(TCG_COND_EQ, LSB, 0, label); \ - fLOAD(1, SIZE, SIGN, EA, RdV); \ + fLOAD(1, SIZE, SIGN, EA, RdV); \ gen_set_label(label); \ tcg_temp_free(LSB); \ } while (0) @@ -398,12 +399,13 @@ do { \ TCGv LSB = tcg_temp_local_new(); \ TCGLabel *label = gen_new_label(); \ - GET_EA; \ + tcg_gen_movi_tl(EA, 0); \ PRED; \ + CHECK_NOSHUF_PRED(GET_EA, 8, LSB); \ PRED_LOAD_CANCEL(LSB, EA); \ tcg_gen_movi_i64(RddV, 0); \ tcg_gen_brcondi_tl(TCG_COND_EQ, LSB, 0, label); \ - fLOAD(1, 8, u, EA, RddV); \ + fLOAD(1, 8, u, EA, RddV); \ gen_set_label(label); \ tcg_temp_free(LSB); \ } while (0) diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index cd6af4bceb..8a334ba07b 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -638,5 +638,12 @@ static void vec_to_qvec(size_t size, intptr_t dstoff, intptr_t srcoff) tcg_temp_free_i64(mask); } +static void probe_noshuf_load(TCGv va, int s, int mi) +{ + TCGv size = tcg_constant_tl(s); + TCGv mem_idx = tcg_constant_tl(mi); + gen_helper_probe_noshuf_load(cpu_env, va, size, mem_idx); +} + #include "tcg_funcs_generated.c.inc" #include "tcg_func_table_generated.c.inc" diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h index c89aa4ed4d..368f0b5708 100644 --- a/target/hexagon/helper.h +++ b/target/hexagon/helper.h @@ -104,6 +104,7 @@ DEF_HELPER_1(vwhist128q, void, env) DEF_HELPER_2(vwhist128m, void, env, s32) DEF_HELPER_2(vwhist128qm, void, env, s32) +DEF_HELPER_4(probe_noshuf_load, void, env, i32, int, int) DEF_HELPER_2(probe_pkt_scalar_store_s0, void, env, int) DEF_HELPER_2(probe_hvx_stores, void, env, int) DEF_HELPER_3(probe_pkt_scalar_hvx_stores, void, env, int, int) diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h index a78e84faa4..92eb8bbf05 100644 --- a/target/hexagon/macros.h +++ b/target/hexagon/macros.h @@ -87,49 +87,66 @@ * * * For qemu, we look for a load in slot 0 when there is a store in slot 1 - * in the same packet. When we see this, we call a helper that merges the - * bytes from the store buffer with the value loaded from memory. + * in the same packet. When we see this, we call a helper that probes the + * load to make sure it doesn't fault. Then, we process the store ahead of + * the actual load. + */ -#define CHECK_NOSHUF \ +#define CHECK_NOSHUF(VA, SIZE) \ do { \ if (insn->slot == 0 && pkt->pkt_has_store_s1) { \ + probe_noshuf_load(VA, SIZE, ctx->mem_idx); \ + process_store(ctx, pkt, 1); \ + } \ + } while (0) + +#define CHECK_NOSHUF_PRED(GET_EA, SIZE, PRED) \ + do { \ + TCGLabel *label = gen_new_label(); \ + tcg_gen_brcondi_tl(TCG_COND_EQ, PRED, 0, label); \ + GET_EA; \ + if (insn->slot == 0 && pkt->pkt_has_store_s1) { \ + probe_noshuf_load(EA, SIZE, ctx->mem_idx); \ + } \ + gen_set_label(label); \ + if (insn->slot == 0 && pkt->pkt_has_store_s1) { \ process_store(ctx, pkt, 1); \ } \ } while (0) #define MEM_LOAD1s(DST, VA) \ do { \ - CHECK_NOSHUF; \ + CHECK_NOSHUF(VA, 1); \ tcg_gen_qemu_ld8s(DST, VA, ctx->mem_idx); \ } while (0) #define MEM_LOAD1u(DST, VA) \ do { \ - CHECK_NOSHUF; \ + CHECK_NOSHUF(VA, 1); \ tcg_gen_qemu_ld8u(DST, VA, ctx->mem_idx); \ } while (0) #define MEM_LOAD2s(DST, VA) \ do { \ - CHECK_NOSHUF; \ + CHECK_NOSHUF(VA, 2); \ tcg_gen_qemu_ld16s(DST, VA, ctx->mem_idx); \ } while (0) #define MEM_LOAD2u(DST, VA) \ do { \ - CHECK_NOSHUF; \ + CHECK_NOSHUF(VA, 2); \ tcg_gen_qemu_ld16u(DST, VA, ctx->mem_idx); \ } while (0) #define MEM_LOAD4s(DST, VA) \ do { \ - CHECK_NOSHUF; \ + CHECK_NOSHUF(VA, 4); \ tcg_gen_qemu_ld32s(DST, VA, ctx->mem_idx); \ } while (0) #define MEM_LOAD4u(DST, VA) \ do { \ - CHECK_NOSHUF; \ + CHECK_NOSHUF(VA, 4); \ tcg_gen_qemu_ld32s(DST, VA, ctx->mem_idx); \ } while (0) #define MEM_LOAD8u(DST, VA) \ do { \ - CHECK_NOSHUF; \ + CHECK_NOSHUF(VA, 8); \ tcg_gen_qemu_ld64(DST, VA, ctx->mem_idx); \ } while (0) diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index a5ed819c04..085afc3274 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -442,6 +442,17 @@ static void probe_store(CPUHexagonState *env, int slot, int mmu_idx) } } +/* + * Called from a mem_noshuf packet to make sure the load doesn't + * raise an exception + */ +void HELPER(probe_noshuf_load)(CPUHexagonState *env, target_ulong va, + int size, int mmu_idx) +{ + uintptr_t retaddr = GETPC(); + probe_read(env, va, size, mmu_idx, retaddr); +} + /* Called during packet commit when there are two scalar stores */ void HELPER(probe_pkt_scalar_store_s0)(CPUHexagonState *env, int mmu_idx) { @@ -514,10 +525,12 @@ void HELPER(probe_pkt_scalar_hvx_stores)(CPUHexagonState *env, int mask, * If the load is in slot 0 and there is a store in slot1 (that * wasn't cancelled), we have to do the store first. */ -static void check_noshuf(CPUHexagonState *env, uint32_t slot) +static void check_noshuf(CPUHexagonState *env, uint32_t slot, + target_ulong vaddr, int size) { if (slot == 0 && env->pkt_has_store_s1 && ((env->slot_cancelled & (1 << 1)) == 0)) { + HELPER(probe_noshuf_load)(env, vaddr, size, MMU_USER_IDX); HELPER(commit_store)(env, 1); } } @@ -526,7 +539,7 @@ static uint8_t mem_load1(CPUHexagonState *env, uint32_t slot, target_ulong vaddr) { uintptr_t ra = GETPC(); - check_noshuf(env, slot); + check_noshuf(env, slot, vaddr, 1); return cpu_ldub_data_ra(env, vaddr, ra); } @@ -534,7 +547,7 @@ static uint16_t mem_load2(CPUHexagonState *env, uint32_t slot, target_ulong vaddr) { uintptr_t ra = GETPC(); - check_noshuf(env, slot); + check_noshuf(env, slot, vaddr, 2); return cpu_lduw_data_ra(env, vaddr, ra); } @@ -542,7 +555,7 @@ static uint32_t mem_load4(CPUHexagonState *env, uint32_t slot, target_ulong vaddr) { uintptr_t ra = GETPC(); - check_noshuf(env, slot); + check_noshuf(env, slot, vaddr, 4); return cpu_ldl_data_ra(env, vaddr, ra); } @@ -550,7 +563,7 @@ static uint64_t mem_load8(CPUHexagonState *env, uint32_t slot, target_ulong vaddr) { uintptr_t ra = GETPC(); - check_noshuf(env, slot); + check_noshuf(env, slot, vaddr, 8); return cpu_ldq_data_ra(env, vaddr, ra); } diff --git a/target/s390x/tcg/vec_fpu_helper.c b/target/s390x/tcg/vec_fpu_helper.c index 2a618a1093..75cf605b9f 100644 --- a/target/s390x/tcg/vec_fpu_helper.c +++ b/target/s390x/tcg/vec_fpu_helper.c @@ -824,7 +824,7 @@ static S390MinMaxRes vfmin_res(uint16_t dcmask_a, uint16_t dcmask_b, default: g_assert_not_reached(); } - } else if (unlikely(dcmask_a & dcmask_b & DCMASK_ZERO)) { + } else if (unlikely((dcmask_a & DCMASK_ZERO) && (dcmask_b & DCMASK_ZERO))) { switch (type) { case S390_MINMAX_TYPE_JAVA: return neg_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; @@ -874,7 +874,7 @@ static S390MinMaxRes vfmax_res(uint16_t dcmask_a, uint16_t dcmask_b, default: g_assert_not_reached(); } - } else if (unlikely(dcmask_a & dcmask_b & DCMASK_ZERO)) { + } else if (unlikely((dcmask_a & DCMASK_ZERO) && (dcmask_b & DCMASK_ZERO))) { const bool neg_a = dcmask_a & DCMASK_NEGATIVE; switch (type) { diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target index 23b9870534..96a4d7a614 100644 --- a/tests/tcg/hexagon/Makefile.target +++ b/tests/tcg/hexagon/Makefile.target @@ -35,6 +35,7 @@ HEX_TESTS += preg_alias HEX_TESTS += dual_stores HEX_TESTS += multi_result HEX_TESTS += mem_noshuf +HEX_TESTS += mem_noshuf_exception HEX_TESTS += circ HEX_TESTS += brev HEX_TESTS += load_unpack diff --git a/tests/tcg/hexagon/mem_noshuf.c b/tests/tcg/hexagon/mem_noshuf.c index dd714d5e98..0f4064e700 100644 --- a/tests/tcg/hexagon/mem_noshuf.c +++ b/tests/tcg/hexagon/mem_noshuf.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -84,6 +84,70 @@ MEM_NOSHUF32(mem_noshuf_sd_luh, long long, unsigned short, memd, memuh) MEM_NOSHUF32(mem_noshuf_sd_lw, long long, signed int, memd, memw) MEM_NOSHUF64(mem_noshuf_sd_ld, long long, signed long long, memd, memd) +static inline int pred_lw_sw(int pred, int *p, int *q, int x, int y) +{ + int ret; + asm volatile("p0 = cmp.eq(%5, #0)\n\t" + "%0 = %3\n\t" + "{\n\t" + " memw(%1) = %4\n\t" + " if (!p0) %0 = memw(%2)\n\t" + "}:mem_noshuf\n" + : "=&r"(ret) + : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred) + : "p0", "memory"); + return ret; +} + +static inline int pred_lw_sw_pi(int pred, int *p, int *q, int x, int y) +{ + int ret; + asm volatile("p0 = cmp.eq(%5, #0)\n\t" + "%0 = %3\n\t" + "r7 = %2\n\t" + "{\n\t" + " memw(%1) = %4\n\t" + " if (!p0) %0 = memw(r7++#4)\n\t" + "}:mem_noshuf\n" + : "=&r"(ret) + : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred) + : "r7", "p0", "memory"); + return ret; +} + +static inline long long pred_ld_sd(int pred, long long *p, long long *q, + long long x, long long y) +{ + unsigned long long ret; + asm volatile("p0 = cmp.eq(%5, #0)\n\t" + "%0 = %3\n\t" + "{\n\t" + " memd(%1) = %4\n\t" + " if (!p0) %0 = memd(%2)\n\t" + "}:mem_noshuf\n" + : "=&r"(ret) + : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred) + : "p0", "memory"); + return ret; +} + +static inline long long pred_ld_sd_pi(int pred, long long *p, long long *q, + long long x, long long y) +{ + long long ret; + asm volatile("p0 = cmp.eq(%5, #0)\n\t" + "%0 = %3\n\t" + "r7 = %2\n\t" + "{\n\t" + " memd(%1) = %4\n\t" + " if (!p0) %0 = memd(r7++#8)\n\t" + "}:mem_noshuf\n" + : "=&r"(ret) + : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred) + : "p0", "memory"); + return ret; +} + static inline unsigned int cancel_sw_lb(int pred, int *p, signed char *q, int x) { unsigned int ret; @@ -126,18 +190,22 @@ typedef union { int err; -static void check32(int n, int expect) +#define check32(n, expect) check32_(n, expect, __LINE__) + +static void check32_(int n, int expect, int line) { if (n != expect) { - printf("ERROR: 0x%08x != 0x%08x\n", n, expect); + printf("ERROR: 0x%08x != 0x%08x, line %d\n", n, expect, line); err++; } } -static void check64(long long n, long long expect) +#define check64(n, expect) check64_(n, expect, __LINE__) + +static void check64_(long long n, long long expect, int line) { if (n != expect) { - printf("ERROR: 0x%08llx != 0x%08llx\n", n, expect); + printf("ERROR: 0x%08llx != 0x%08llx, line %d\n", n, expect, line); err++; } } @@ -323,6 +391,50 @@ int main() res64 = mem_noshuf_sd_ld(&n.d[0], &n.d[1], 0x123456789abcdef0LL); check64(res64, 0xffffffffffffffffLL); + n.w[0] = ~0; + res32 = pred_lw_sw(0, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda); + check32(res32, 0x12345678); + check32(n.w[0], 0xc0ffeeda); + + n.w[0] = ~0; + res32 = pred_lw_sw(1, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda); + check32(res32, 0xc0ffeeda); + check32(n.w[0], 0xc0ffeeda); + + n.w[0] = ~0; + res32 = pred_lw_sw_pi(0, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda); + check32(res32, 0x12345678); + check32(n.w[0], 0xc0ffeeda); + + n.w[0] = ~0; + res32 = pred_lw_sw_pi(1, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda); + check32(res32, 0xc0ffeeda); + check32(n.w[0], 0xc0ffeeda); + + n.d[0] = ~0LL; + res64 = pred_ld_sd(0, &n.d[0], &n.d[0], + 0x1234567812345678LL, 0xc0ffeedac0ffeedaLL); + check64(res64, 0x1234567812345678LL); + check64(n.d[0], 0xc0ffeedac0ffeedaLL); + + n.d[0] = ~0LL; + res64 = pred_ld_sd(1, &n.d[0], &n.d[0], + 0x1234567812345678LL, 0xc0ffeedac0ffeedaLL); + check64(res64, 0xc0ffeedac0ffeedaLL); + check64(n.d[0], 0xc0ffeedac0ffeedaLL); + + n.d[0] = ~0LL; + res64 = pred_ld_sd_pi(0, &n.d[0], &n.d[0], + 0x1234567812345678LL, 0xc0ffeedac0ffeedaLL); + check64(res64, 0x1234567812345678LL); + check64(n.d[0], 0xc0ffeedac0ffeedaLL); + + n.d[0] = ~0LL; + res64 = pred_ld_sd_pi(1, &n.d[0], &n.d[0], + 0x1234567812345678LL, 0xc0ffeedac0ffeedaLL); + check64(res64, 0xc0ffeedac0ffeedaLL); + check64(n.d[0], 0xc0ffeedac0ffeedaLL); + puts(err ? "FAIL" : "PASS"); return err; } diff --git a/tests/tcg/hexagon/mem_noshuf_exception.c b/tests/tcg/hexagon/mem_noshuf_exception.c new file mode 100644 index 0000000000..08660ea3e1 --- /dev/null +++ b/tests/tcg/hexagon/mem_noshuf_exception.c @@ -0,0 +1,146 @@ +/* + * Copyright(c) 2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Test the VLIW semantics of exceptions with mem_noshuf + * + * When a packet has the :mem_noshuf attribute, the semantics dictate + * That the load will get the data from the store if the addresses overlap. + * To accomplish this, we perform the store first. However, we have to + * handle the case where the store raises an exception. In that case, the + * store should not alter the machine state. + * + * We test this with a mem_noshuf packet with a store to a global variable, + * "should_not_change" and a load from NULL. After the SIGSEGV is caught, + * we check * that the "should_not_change" value is the same. + * + * We also check that a predicated load where the predicate is false doesn't + * raise an exception and allows the store to happen. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/types.h> +#include <fcntl.h> +#include <setjmp.h> +#include <signal.h> + +int err; +int segv_caught; + +#define SHOULD_NOT_CHANGE_VAL 5 +int should_not_change = SHOULD_NOT_CHANGE_VAL; + +#define OK_TO_CHANGE_VAL 13 +int ok_to_change = OK_TO_CHANGE_VAL; + +static void __check(const char *filename, int line, int x, int expect) +{ + if (x != expect) { + printf("ERROR %s:%d - %d != %d\n", + filename, line, x, expect); + err++; + } +} + +#define check(x, expect) __check(__FILE__, __LINE__, (x), (expect)) + +static void __chk_error(const char *filename, int line, int ret) +{ + if (ret < 0) { + printf("ERROR %s:%d - %d\n", filename, line, ret); + err++; + } +} + +#define chk_error(ret) __chk_error(__FILE__, __LINE__, (ret)) + +jmp_buf jmp_env; + +static void sig_segv(int sig, siginfo_t *info, void *puc) +{ + check(sig, SIGSEGV); + segv_caught = 1; + longjmp(jmp_env, 1); +} + +int main() +{ + struct sigaction act; + int dummy32; + long long dummy64; + void *p; + + /* SIGSEGV test */ + act.sa_sigaction = sig_segv; + sigemptyset(&act.sa_mask); + act.sa_flags = SA_SIGINFO; + chk_error(sigaction(SIGSEGV, &act, NULL)); + if (setjmp(jmp_env) == 0) { + asm volatile("r18 = ##should_not_change\n\t" + "r19 = #0\n\t" + "{\n\t" + " memw(r18) = #7\n\t" + " %0 = memw(r19)\n\t" + "}:mem_noshuf\n\t" + : "=r"(dummy32) : : "r18", "r19", "memory"); + } + + act.sa_handler = SIG_DFL; + sigemptyset(&act.sa_mask); + act.sa_flags = 0; + chk_error(sigaction(SIGSEGV, &act, NULL)); + + check(segv_caught, 1); + check(should_not_change, SHOULD_NOT_CHANGE_VAL); + + /* + * Check that a predicated load where the predicate is false doesn't + * raise an exception and allows the store to happen. + */ + asm volatile("r18 = ##ok_to_change\n\t" + "r19 = #0\n\t" + "p0 = cmp.gt(r0, r0)\n\t" + "{\n\t" + " memw(r18) = #7\n\t" + " if (p0) %0 = memw(r19)\n\t" + "}:mem_noshuf\n\t" + : "=r"(dummy32) : : "r18", "r19", "p0", "memory"); + + check(ok_to_change, 7); + + /* + * Also check that the post-increment doesn't happen when the + * predicate is false. + */ + ok_to_change = OK_TO_CHANGE_VAL; + p = NULL; + asm volatile("r18 = ##ok_to_change\n\t" + "p0 = cmp.gt(r0, r0)\n\t" + "{\n\t" + " memw(r18) = #9\n\t" + " if (p0) %1 = memd(%0 ++ #8)\n\t" + "}:mem_noshuf\n\t" + : "+r"(p), "=r"(dummy64) : : "r18", "p0", "memory"); + + check(ok_to_change, 9); + check((int)p, (int)NULL); + + puts(err ? "FAIL" : "PASS"); + return err ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target index 3124172736..1a7a4a2f59 100644 --- a/tests/tcg/s390x/Makefile.target +++ b/tests/tcg/s390x/Makefile.target @@ -17,6 +17,13 @@ TESTS+=trap TESTS+=signals-s390x TESTS+=branch-relative-long +Z14_TESTS=vfminmax +vfminmax: LDFLAGS+=-lm +$(Z14_TESTS): CFLAGS+=-march=z14 -O2 + +TESTS+=$(if $(shell $(CC) -march=z14 -S -o /dev/null -xc /dev/null \ + >/dev/null 2>&1 && echo OK),$(Z14_TESTS)) + VECTOR_TESTS=vxeh2_vs VECTOR_TESTS+=vxeh2_vcvt VECTOR_TESTS+=vxeh2_vlstr diff --git a/tests/tcg/s390x/vfminmax.c b/tests/tcg/s390x/vfminmax.c new file mode 100644 index 0000000000..22629df160 --- /dev/null +++ b/tests/tcg/s390x/vfminmax.c @@ -0,0 +1,411 @@ +#define _GNU_SOURCE +#include <fenv.h> +#include <stdbool.h> +#include <stdio.h> +#include <string.h> + +/* + * vfmin/vfmax instruction execution. + */ +#define VFMIN 0xEE +#define VFMAX 0xEF + +extern char insn[6]; +asm(".pushsection .rwx,\"awx\",@progbits\n" + ".globl insn\n" + /* e7 89 a0 00 2e ef */ + "insn: vfmaxsb %v24,%v25,%v26,0\n" + ".popsection\n"); + +static void vfminmax(unsigned int op, + unsigned int m4, unsigned int m5, unsigned int m6, + void *v1, const void *v2, const void *v3) +{ + insn[3] = (m6 << 4) | m5; + insn[4] = (m4 << 4) | 0x0e; + insn[5] = op; + + asm("vl %%v25,%[v2]\n" + "vl %%v26,%[v3]\n" + "ex 0,%[insn]\n" + "vst %%v24,%[v1]\n" + : [v1] "=m" (*(char (*)[16])v1) + : [v2] "m" (*(char (*)[16])v2) + , [v3] "m" (*(char (*)[16])v3) + , [insn] "m"(insn) + : "v24", "v25", "v26"); +} + +/* + * Floating-point value classes. + */ +#define N_FORMATS 3 +#define N_SIGNED_CLASSES 8 +static const size_t float_sizes[N_FORMATS] = { + /* M4 == 2: short */ 4, + /* M4 == 3: long */ 8, + /* M4 == 4: extended */ 16, +}; +static const size_t e_bits[N_FORMATS] = { + /* M4 == 2: short */ 8, + /* M4 == 3: long */ 11, + /* M4 == 4: extended */ 15, +}; +static const unsigned char signed_floats[N_FORMATS][N_SIGNED_CLASSES][2][16] = { + /* M4 == 2: short */ + { + /* -inf */ {{0xff, 0x80, 0x00, 0x00}, + {0xff, 0x80, 0x00, 0x00}}, + /* -Fn */ {{0xc2, 0x28, 0x00, 0x00}, + {0xc2, 0x29, 0x00, 0x00}}, + /* -0 */ {{0x80, 0x00, 0x00, 0x00}, + {0x80, 0x00, 0x00, 0x00}}, + /* +0 */ {{0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00}}, + /* +Fn */ {{0x42, 0x28, 0x00, 0x00}, + {0x42, 0x2a, 0x00, 0x00}}, + /* +inf */ {{0x7f, 0x80, 0x00, 0x00}, + {0x7f, 0x80, 0x00, 0x00}}, + /* QNaN */ {{0x7f, 0xff, 0xff, 0xff}, + {0x7f, 0xff, 0xff, 0xfe}}, + /* SNaN */ {{0x7f, 0xbf, 0xff, 0xff}, + {0x7f, 0xbf, 0xff, 0xfd}}, + }, + + /* M4 == 3: long */ + { + /* -inf */ {{0xff, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0xff, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + /* -Fn */ {{0xc0, 0x45, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0xc0, 0x46, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + /* -0 */ {{0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + /* +0 */ {{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + /* +Fn */ {{0x40, 0x45, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x40, 0x47, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + /* +inf */ {{0x7f, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x7f, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + /* QNaN */ {{0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe}}, + /* SNaN */ {{0x7f, 0xf7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0x7f, 0xf7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfd}}, + }, + + /* M4 == 4: extended */ + { + /* -inf */ {{0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + /* -Fn */ {{0xc0, 0x04, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0xc0, 0x04, 0x51, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + /* -0 */ {{0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + /* +0 */ {{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + /* +Fn */ {{0x40, 0x04, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x40, 0x04, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + /* +inf */ {{0x7f, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x7f, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + /* QNaN */ {{0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe}}, + /* SNaN */ {{0x7f, 0xff, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0x7f, 0xff, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfd}}, + }, +}; + +/* + * PoP tables as close to the original as possible. + */ +struct signed_test { + int op; + int m6; + const char *m6_desc; + const char *table[N_SIGNED_CLASSES][N_SIGNED_CLASSES]; +} signed_tests[] = { + { + .op = VFMIN, + .m6 = 0, + .m6_desc = "IEEE MinNum", + .table = { + /* -inf -Fn -0 +0 +Fn +inf QNaN SNaN */ + {/* -inf */ "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(b*)"}, + {/* -Fn */ "T(b)", "T(M(a,b))", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(b*)"}, + {/* -0 */ "T(b)", "T(b)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(b*)"}, + {/* +0 */ "T(b)", "T(b)", "T(b)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(b*)"}, + {/* +Fn */ "T(b)", "T(b)", "T(b)", "T(b)", "T(M(a,b))", "T(a)", "T(a)", "Xi: T(b*)"}, + {/* +inf */ "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(a)", "T(a)", "Xi: T(b*)"}, + {/* QNaN */ "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(a)", "Xi: T(b*)"}, + {/* SNaN */ "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)"}, + }, + }, + { + .op = VFMIN, + .m6 = 1, + .m6_desc = "JAVA Math.Min()", + .table = { + /* -inf -Fn -0 +0 +Fn +inf QNaN SNaN */ + {/* -inf */ "T(b)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(b)", "Xi: T(b*)"}, + {/* -Fn */ "T(b)", "T(M(a,b))", "T(a)", "T(a)", "T(a)", "T(a)", "T(b)", "Xi: T(b*)"}, + {/* -0 */ "T(b)", "T(b)", "T(b)", "T(a)", "T(a)", "T(a)", "T(b)", "Xi: T(b*)"}, + {/* +0 */ "T(b)", "T(b)", "T(b)", "T(b)", "T(a)", "T(a)", "T(b)", "Xi: T(b*)"}, + {/* +Fn */ "T(b)", "T(b)", "T(b)", "T(b)", "T(M(a,b))", "T(a)", "T(b)", "Xi: T(b*)"}, + {/* +inf */ "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "Xi: T(b*)"}, + {/* QNaN */ "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(b*)"}, + {/* SNaN */ "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)"}, + }, + }, + { + .op = VFMIN, + .m6 = 2, + .m6_desc = "C-style Min Macro", + .table = { + /* -inf -Fn -0 +0 +Fn +inf QNaN SNaN */ + {/* -inf */ "T(b)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(b)", "Xi: T(b)"}, + {/* -Fn */ "T(b)", "T(M(a,b))", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(b)", "Xi: T(b)"}, + {/* -0 */ "T(b)", "T(b)", "T(b)", "T(b)", "T(a)", "T(a)", "Xi: T(b)", "Xi: T(b)"}, + {/* +0 */ "T(b)", "T(b)", "T(b)", "T(b)", "T(a)", "T(a)", "Xi: T(b)", "Xi: T(b)"}, + {/* +Fn */ "T(b)", "T(b)", "T(b)", "T(b)", "T(M(a,b))", "T(a)", "Xi: T(b)", "Xi: T(b)"}, + {/* +inf */ "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(a)", "Xi: T(b)", "Xi: T(b)"}, + {/* QNaN */ "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)"}, + {/* SNaN */ "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)"}, + }, + }, + { + .op = VFMIN, + .m6 = 3, + .m6_desc = "C++ algorithm.min()", + .table = { + /* -inf -Fn -0 +0 +Fn +inf QNaN SNaN */ + {/* -inf */ "T(b)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(a)", "Xi: T(a)"}, + {/* -Fn */ "T(b)", "T(M(a,b))", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(a)", "Xi: T(a)"}, + {/* -0 */ "T(b)", "T(b)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(a)", "Xi: T(a)"}, + {/* +0 */ "T(b)", "T(b)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(a)", "Xi: T(a)"}, + {/* +Fn */ "T(b)", "T(b)", "T(b)", "T(b)", "T(M(a,b))", "T(a)", "Xi: T(a)", "Xi: T(a)"}, + {/* +inf */ "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(a)", "Xi: T(a)", "Xi: T(a)"}, + {/* QNaN */ "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)"}, + {/* SNaN */ "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)"}, + }, + }, + { + .op = VFMIN, + .m6 = 4, + .m6_desc = "fmin()", + .table = { + /* -inf -Fn -0 +0 +Fn +inf QNaN SNaN */ + {/* -inf */ "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(a)"}, + {/* -Fn */ "T(b)", "T(M(a,b))", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(a)"}, + {/* -0 */ "T(b)", "T(b)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(a)"}, + {/* +0 */ "T(b)", "T(b)", "T(b)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(a)"}, + {/* +Fn */ "T(b)", "T(b)", "T(b)", "T(b)", "T(M(a,b))", "T(a)", "T(a)", "Xi: T(a)"}, + {/* +inf */ "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(a)", "T(a)", "Xi: T(a)"}, + {/* QNaN */ "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(a)", "Xi: T(a)"}, + {/* SNaN */ "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(a)", "Xi: T(a)"}, + }, + }, + + { + .op = VFMAX, + .m6 = 0, + .m6_desc = "IEEE MaxNum", + .table = { + /* -inf -Fn -0 +0 +Fn +inf QNaN SNaN */ + {/* -inf */ "T(a)", "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(a)", "Xi: T(b*)"}, + {/* -Fn */ "T(a)", "T(M(a,b))", "T(b)", "T(b)", "T(b)", "T(b)", "T(a)", "Xi: T(b*)"}, + {/* -0 */ "T(a)", "T(a)", "T(a)", "T(b)", "T(b)", "T(b)", "T(a)", "Xi: T(b*)"}, + {/* +0 */ "T(a)", "T(a)", "T(a)", "T(a)", "T(b)", "T(b)", "T(a)", "Xi: T(b*)"}, + {/* +Fn */ "T(a)", "T(a)", "T(a)", "T(a)", "T(M(a,b))", "T(b)", "T(a)", "Xi: T(b*)"}, + {/* +inf */ "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(b*)"}, + {/* QNaN */ "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(a)", "Xi: T(b*)"}, + {/* SNaN */ "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)"}, + }, + }, + { + .op = VFMAX, + .m6 = 1, + .m6_desc = "JAVA Math.Max()", + .table = { + /* -inf -Fn -0 +0 +Fn +inf QNaN SNaN */ + {/* -inf */ "T(a)", "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "Xi: T(b*)"}, + {/* -Fn */ "T(a)", "T(M(a,b))", "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "Xi: T(b*)"}, + {/* -0 */ "T(a)", "T(a)", "T(a)", "T(b)", "T(b)", "T(b)", "T(b)", "Xi: T(b*)"}, + {/* +0 */ "T(a)", "T(a)", "T(a)", "T(a)", "T(b)", "T(b)", "T(b)", "Xi: T(b*)"}, + {/* +Fn */ "T(a)", "T(a)", "T(a)", "T(a)", "T(M(a,b))", "T(b)", "T(b)", "Xi: T(b*)"}, + {/* +inf */ "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(b)", "Xi: T(b*)"}, + {/* QNaN */ "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(b*)"}, + {/* SNaN */ "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)"}, + }, + }, + { + .op = VFMAX, + .m6 = 2, + .m6_desc = "C-style Max Macro", + .table = { + /* -inf -Fn -0 +0 +Fn +inf QNaN SNaN */ + {/* -inf */ "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "Xi: T(b)", "Xi: T(b)"}, + {/* -Fn */ "T(a)", "T(M(a,b))", "T(b)", "T(b)", "T(b)", "T(b)", "Xi: T(b)", "Xi: T(b)"}, + {/* -0 */ "T(a)", "T(a)", "T(b)", "T(b)", "T(b)", "T(b)", "Xi: T(b)", "Xi: T(b)"}, + {/* +0 */ "T(a)", "T(a)", "T(b)", "T(b)", "T(b)", "T(b)", "Xi: T(b)", "Xi: T(b)"}, + {/* +Fn */ "T(a)", "T(a)", "T(a)", "T(a)", "T(M(a,b))", "T(b)", "Xi: T(b)", "Xi: T(b)"}, + {/* +inf */ "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(b)", "Xi: T(b)", "Xi: T(b)"}, + {/* QNaN */ "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)"}, + {/* SNaN */ "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)"}, + }, + }, + { + .op = VFMAX, + .m6 = 3, + .m6_desc = "C++ algorithm.max()", + .table = { + /* -inf -Fn -0 +0 +Fn +inf QNaN SNaN */ + {/* -inf */ "T(a)", "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "Xi: T(a)", "Xi: T(a)"}, + {/* -Fn */ "T(a)", "T(M(a,b))", "T(b)", "T(b)", "T(b)", "T(b)", "Xi: T(a)", "Xi: T(a)"}, + {/* -0 */ "T(a)", "T(a)", "T(a)", "T(a)", "T(b)", "T(b)", "Xi: T(a)", "Xi: T(a)"}, + {/* +0 */ "T(a)", "T(a)", "T(a)", "T(a)", "T(b)", "T(b)", "Xi: T(a)", "Xi: T(a)"}, + {/* +Fn */ "T(a)", "T(a)", "T(a)", "T(a)", "T(M(a,b))", "T(b)", "Xi: T(a)", "Xi: T(a)"}, + {/* +inf */ "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(a)", "Xi: T(a)"}, + {/* QNaN */ "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)"}, + {/* SNaN */ "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)"}, + }, + }, + { + .op = VFMAX, + .m6 = 4, + .m6_desc = "fmax()", + .table = { + /* -inf -Fn -0 +0 +Fn +inf QNaN SNaN */ + {/* -inf */ "T(a)", "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(a)", "Xi: T(a)"}, + {/* -Fn */ "T(a)", "T(M(a,b))", "T(b)", "T(b)", "T(b)", "T(b)", "T(a)", "Xi: T(a)"}, + {/* -0 */ "T(a)", "T(a)", "T(a)", "T(b)", "T(b)", "T(b)", "T(a)", "Xi: T(a)"}, + {/* +0 */ "T(a)", "T(a)", "T(a)", "T(a)", "T(b)", "T(b)", "T(a)", "Xi: T(a)"}, + {/* +Fn */ "T(a)", "T(a)", "T(a)", "T(a)", "T(M(a,b))", "T(b)", "T(a)", "Xi: T(a)"}, + {/* +inf */ "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(a)"}, + {/* QNaN */ "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(a)", "Xi: T(a)"}, + {/* SNaN */ "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(a)", "Xi: T(a)"}, + }, + }, +}; + +static void dump_v(FILE *f, const void *v, size_t n) +{ + for (int i = 0; i < n; i++) { + fprintf(f, "%02x", ((const unsigned char *)v)[i]); + } +} + +static int signed_test(struct signed_test *test, int m4, int m5, + const void *v1_exp, bool xi_exp, + const void *v2, const void *v3) +{ + size_t n = (m5 & 8) ? float_sizes[m4 - 2] : 16; + char v1[16]; + bool xi; + + feclearexcept(FE_ALL_EXCEPT); + vfminmax(test->op, m4, m5, test->m6, v1, v2, v3); + xi = fetestexcept(FE_ALL_EXCEPT) == FE_INVALID; + + if (memcmp(v1, v1_exp, n) != 0 || xi != xi_exp) { + fprintf(stderr, "[ FAILED ] %s ", test->m6_desc); + dump_v(stderr, v2, n); + fprintf(stderr, ", "); + dump_v(stderr, v3, n); + fprintf(stderr, ", %d, %d, %d: actual=", m4, m5, test->m6); + dump_v(stderr, v1, n); + fprintf(stderr, "/%d, expected=", (int)xi); + dump_v(stderr, v1_exp, n); + fprintf(stderr, "/%d\n", (int)xi_exp); + return 1; + } + + return 0; +} + +static void snan_to_qnan(char *v, int m4) +{ + size_t bit = 1 + e_bits[m4 - 2]; + v[bit / 8] |= 1 << (7 - (bit % 8)); +} + +int main(void) +{ + int ret = 0; + size_t i; + + for (i = 0; i < sizeof(signed_tests) / sizeof(signed_tests[0]); i++) { + struct signed_test *test = &signed_tests[i]; + int m4; + + for (m4 = 2; m4 <= 4; m4++) { + const unsigned char (*floats)[2][16] = signed_floats[m4 - 2]; + size_t float_size = float_sizes[m4 - 2]; + int m5; + + for (m5 = 0; m5 <= 8; m5 += 8) { + char v1_exp[16], v2[16], v3[16]; + bool xi_exp = false; + int pos = 0; + int i2; + + for (i2 = 0; i2 < N_SIGNED_CLASSES * 2; i2++) { + int i3; + + for (i3 = 0; i3 < N_SIGNED_CLASSES * 2; i3++) { + const char *spec = test->table[i2 / 2][i3 / 2]; + + memcpy(&v2[pos], floats[i2 / 2][i2 % 2], float_size); + memcpy(&v3[pos], floats[i3 / 2][i3 % 2], float_size); + if (strcmp(spec, "T(a)") == 0 || + strcmp(spec, "Xi: T(a)") == 0) { + memcpy(&v1_exp[pos], &v2[pos], float_size); + } else if (strcmp(spec, "T(b)") == 0 || + strcmp(spec, "Xi: T(b)") == 0) { + memcpy(&v1_exp[pos], &v3[pos], float_size); + } else if (strcmp(spec, "Xi: T(a*)") == 0) { + memcpy(&v1_exp[pos], &v2[pos], float_size); + snan_to_qnan(&v1_exp[pos], m4); + } else if (strcmp(spec, "Xi: T(b*)") == 0) { + memcpy(&v1_exp[pos], &v3[pos], float_size); + snan_to_qnan(&v1_exp[pos], m4); + } else if (strcmp(spec, "T(M(a,b))") == 0) { + /* + * Comparing floats is risky, since the compiler + * might generate the same instruction that we are + * testing. Compare ints instead. This works, + * because we get here only for +-Fn, and the + * corresponding test values have identical + * exponents. + */ + int v2_int = *(int *)&v2[pos]; + int v3_int = *(int *)&v3[pos]; + + if ((v2_int < v3_int) == + ((test->op == VFMIN) != (v2_int < 0))) { + memcpy(&v1_exp[pos], &v2[pos], float_size); + } else { + memcpy(&v1_exp[pos], &v3[pos], float_size); + } + } else { + fprintf(stderr, "Unexpected spec: %s\n", spec); + return 1; + } + xi_exp |= spec[0] == 'X'; + pos += float_size; + + if ((m5 & 8) || pos == 16) { + ret |= signed_test(test, m4, m5, + v1_exp, xi_exp, v2, v3); + pos = 0; + xi_exp = false; + } + } + } + + if (pos != 0) { + ret |= signed_test(test, m4, m5, v1_exp, xi_exp, v2, v3); + } + } + } + } + + return ret; +} |