diff options
82 files changed, 2138 insertions, 829 deletions
diff --git a/backends/hostmem.c b/backends/hostmem.c index 987f6f591e..81a72ce40b 100644 --- a/backends/hostmem.c +++ b/backends/hostmem.c @@ -20,6 +20,7 @@ #include "qom/object_interfaces.h" #include "qemu/mmap-alloc.h" #include "qemu/madvise.h" +#include "hw/qdev-core.h" #ifdef CONFIG_NUMA #include <numaif.h> @@ -237,7 +238,7 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value, uint64_t sz = memory_region_size(&backend->mr); if (!qemu_prealloc_mem(fd, ptr, sz, backend->prealloc_threads, - backend->prealloc_context, errp)) { + backend->prealloc_context, false, errp)) { return; } backend->prealloc = true; @@ -323,6 +324,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc); void *ptr; uint64_t sz; + bool async = !phase_check(PHASE_LATE_BACKENDS_CREATED); if (!bc->alloc) { return; @@ -402,7 +404,8 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) if (backend->prealloc && !qemu_prealloc_mem(memory_region_get_fd(&backend->mr), ptr, sz, backend->prealloc_threads, - backend->prealloc_context, errp)) { + backend->prealloc_context, + async, errp)) { return; } } diff --git a/block/blkio.c b/block/blkio.c index bc2f21784c..882e1c297b 100644 --- a/block/blkio.c +++ b/block/blkio.c @@ -89,6 +89,9 @@ static int blkio_resize_bounce_pool(BDRVBlkioState *s, int64_t bytes) /* Pad size to reduce frequency of resize calls */ bytes += 128 * 1024; + /* Align the pool size to avoid blkio_alloc_mem_region() failure */ + bytes = QEMU_ALIGN_UP(bytes, s->mem_region_alignment); + WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { int ret; diff --git a/block/block-backend.c b/block/block-backend.c index 209eb07528..9c4de79e6b 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -44,7 +44,7 @@ struct BlockBackend { char *name; int refcnt; BdrvChild *root; - AioContext *ctx; + AioContext *ctx; /* access with atomic operations only */ DriveInfo *legacy_dinfo; /* null unless created by drive_new() */ QTAILQ_ENTRY(BlockBackend) link; /* for block_backends */ QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */ @@ -2414,22 +2414,22 @@ void blk_op_unblock_all(BlockBackend *blk, Error *reason) } } +/** + * Return BB's current AioContext. Note that this context may change + * concurrently at any time, with one exception: If the BB has a root node + * attached, its context will only change through bdrv_try_change_aio_context(), + * which creates a drained section. Therefore, incrementing such a BB's + * in-flight counter will prevent its context from changing. + */ AioContext *blk_get_aio_context(BlockBackend *blk) { - BlockDriverState *bs; IO_CODE(); if (!blk) { return qemu_get_aio_context(); } - bs = blk_bs(blk); - if (bs) { - AioContext *ctx = bdrv_get_aio_context(blk_bs(blk)); - assert(ctx == blk->ctx); - } - - return blk->ctx; + return qatomic_read(&blk->ctx); } int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, @@ -2442,7 +2442,7 @@ int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, GLOBAL_STATE_CODE(); if (!bs) { - blk->ctx = new_context; + qatomic_set(&blk->ctx, new_context); return 0; } @@ -2471,7 +2471,7 @@ static void blk_root_set_aio_ctx_commit(void *opaque) AioContext *new_context = s->new_ctx; ThrottleGroupMember *tgm = &blk->public.throttle_group_member; - blk->ctx = new_context; + qatomic_set(&blk->ctx, new_context); if (tgm->throttle_state) { throttle_group_detach_aio_context(tgm); throttle_group_attach_aio_context(tgm, new_context); diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst index d4492b9460..c7b95e6068 100644 --- a/docs/about/deprecated.rst +++ b/docs/about/deprecated.rst @@ -183,6 +183,15 @@ Nios II CPU (since 8.2) The Nios II architecture is orphan. The ``nios2`` guest CPU support is deprecated and will be removed in a future version of QEMU. +``power5+`` and ``power7+`` CPU names (since 9.0) +''''''''''''''''''''''''''''''''''''''''''''''''' + +The character "+" in device (and thus also CPU) names is not allowed +in the QEMU object model anymore. ``power5+``, ``power5+_v2.1``, +``power7+`` and ``power7+_v2.1`` are currently still supported via +an alias, but for consistency these will get removed in a future +release, too. Use ``power5p_v2.1`` and ``power7p_v2.1`` instead. + System emulator machines ------------------------ diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 8ae59ea02b..d46b625e0e 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -253,6 +253,8 @@ Jumps/Labels | ``TCG_COND_GEU /* unsigned */`` | ``TCG_COND_LEU /* unsigned */`` | ``TCG_COND_GTU /* unsigned */`` + | ``TCG_COND_TSTEQ /* t1 & t2 == 0 */`` + | ``TCG_COND_TSTNE /* t1 & t2 != 0 */`` Arithmetic ---------- diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 227d83569f..738cb2ac36 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -37,6 +37,8 @@ #include "hw/virtio/virtio-blk-common.h" #include "qemu/coroutine.h" +static void virtio_blk_ioeventfd_attach(VirtIOBlock *s); + static void virtio_blk_init_request(VirtIOBlock *s, VirtQueue *vq, VirtIOBlockReq *req) { @@ -64,7 +66,7 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) iov_discard_undo(&req->inhdr_undo); iov_discard_undo(&req->outhdr_undo); virtqueue_push(req->vq, &req->elem, req->in_len); - if (s->ioeventfd_started && !s->ioeventfd_disabled) { + if (qemu_in_iothread()) { virtio_notify_irqfd(vdev, req->vq); } else { virtio_notify(vdev, req->vq); @@ -661,6 +663,9 @@ static void virtio_blk_zone_report_complete(void *opaque, int ret) int64_t zrp_size, n, j = 0; int64_t nz = data->zone_report_data.nr_zones; int8_t err_status = VIRTIO_BLK_S_OK; + struct virtio_blk_zone_report zrp_hdr = (struct virtio_blk_zone_report) { + .nr_zones = cpu_to_le64(nz), + }; trace_virtio_blk_zone_report_complete(vdev, req, nz, ret); if (ret) { @@ -668,9 +673,6 @@ static void virtio_blk_zone_report_complete(void *opaque, int ret) goto out; } - struct virtio_blk_zone_report zrp_hdr = (struct virtio_blk_zone_report) { - .nr_zones = cpu_to_le64(nz), - }; zrp_size = sizeof(struct virtio_blk_zone_report) + sizeof(struct virtio_blk_zone_descriptor) * nz; n = iov_from_buf(in_iov, in_num, 0, &zrp_hdr, sizeof(zrp_hdr)); @@ -898,13 +900,14 @@ static int virtio_blk_handle_zone_append(VirtIOBlockReq *req, int64_t offset = virtio_ldq_p(vdev, &req->out.sector) << BDRV_SECTOR_BITS; int64_t len = iov_size(out_iov, out_num); + ZoneCmdData *data; trace_virtio_blk_handle_zone_append(vdev, req, offset >> BDRV_SECTOR_BITS); if (!check_zoned_request(s, offset, len, true, &err_status)) { goto out; } - ZoneCmdData *data = g_malloc(sizeof(ZoneCmdData)); + data = g_malloc(sizeof(ZoneCmdData)); data->req = req; data->in_iov = in_iov; data->in_num = in_num; @@ -1191,14 +1194,15 @@ static void virtio_blk_dma_restart_cb(void *opaque, bool running, { VirtIOBlock *s = opaque; uint16_t num_queues = s->conf.num_queues; + g_autofree VirtIOBlockReq **vq_rq = NULL; + VirtIOBlockReq *rq; if (!running) { return; } /* Split the device-wide s->rq request list into per-vq request lists */ - g_autofree VirtIOBlockReq **vq_rq = g_new0(VirtIOBlockReq *, num_queues); - VirtIOBlockReq *rq; + vq_rq = g_new0(VirtIOBlockReq *, num_queues); WITH_QEMU_LOCK_GUARD(&s->rq_lock) { rq = s->rq; @@ -1209,6 +1213,8 @@ static void virtio_blk_dma_restart_cb(void *opaque, bool running, VirtIOBlockReq *next = rq->next; uint16_t idx = virtio_get_queue_index(rq->vq); + /* Only num_queues vqs were created so vq_rq[idx] is within bounds */ + assert(idx < num_queues); rq->next = vq_rq[idx]; vq_rq[idx] = rq; rq = next; @@ -1485,68 +1491,6 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, return 0; } -static bool -validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list, - uint16_t num_queues, Error **errp) -{ - g_autofree unsigned long *vqs = bitmap_new(num_queues); - g_autoptr(GHashTable) iothreads = - g_hash_table_new(g_str_hash, g_str_equal); - - for (IOThreadVirtQueueMappingList *node = list; node; node = node->next) { - const char *name = node->value->iothread; - uint16List *vq; - - if (!iothread_by_id(name)) { - error_setg(errp, "IOThread \"%s\" object does not exist", name); - return false; - } - - if (!g_hash_table_add(iothreads, (gpointer)name)) { - error_setg(errp, - "duplicate IOThread name \"%s\" in iothread-vq-mapping", - name); - return false; - } - - if (node != list) { - if (!!node->value->vqs != !!list->value->vqs) { - error_setg(errp, "either all items in iothread-vq-mapping " - "must have vqs or none of them must have it"); - return false; - } - } - - for (vq = node->value->vqs; vq; vq = vq->next) { - if (vq->value >= num_queues) { - error_setg(errp, "vq index %u for IOThread \"%s\" must be " - "less than num_queues %u in iothread-vq-mapping", - vq->value, name, num_queues); - return false; - } - - if (test_and_set_bit(vq->value, vqs)) { - error_setg(errp, "cannot assign vq %u to IOThread \"%s\" " - "because it is already assigned", vq->value, name); - return false; - } - } - } - - if (list->value->vqs) { - for (uint16_t i = 0; i < num_queues; i++) { - if (!test_bit(i, vqs)) { - error_setg(errp, - "missing vq %u IOThread assignment in iothread-vq-mapping", - i); - return false; - } - } - } - - return true; -} - static void virtio_resize_cb(void *opaque) { VirtIODevice *vdev = opaque; @@ -1613,15 +1557,95 @@ static const BlockDevOps virtio_block_ops = { .drained_end = virtio_blk_drained_end, }; -/* Generate vq:AioContext mappings from a validated iothread-vq-mapping list */ -static void -apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, - AioContext **vq_aio_context, uint16_t num_queues) +static bool +validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list, + uint16_t num_queues, Error **errp) +{ + g_autofree unsigned long *vqs = bitmap_new(num_queues); + g_autoptr(GHashTable) iothreads = + g_hash_table_new(g_str_hash, g_str_equal); + + for (IOThreadVirtQueueMappingList *node = list; node; node = node->next) { + const char *name = node->value->iothread; + uint16List *vq; + + if (!iothread_by_id(name)) { + error_setg(errp, "IOThread \"%s\" object does not exist", name); + return false; + } + + if (!g_hash_table_add(iothreads, (gpointer)name)) { + error_setg(errp, + "duplicate IOThread name \"%s\" in iothread-vq-mapping", + name); + return false; + } + + if (node != list) { + if (!!node->value->vqs != !!list->value->vqs) { + error_setg(errp, "either all items in iothread-vq-mapping " + "must have vqs or none of them must have it"); + return false; + } + } + + for (vq = node->value->vqs; vq; vq = vq->next) { + if (vq->value >= num_queues) { + error_setg(errp, "vq index %u for IOThread \"%s\" must be " + "less than num_queues %u in iothread-vq-mapping", + vq->value, name, num_queues); + return false; + } + + if (test_and_set_bit(vq->value, vqs)) { + error_setg(errp, "cannot assign vq %u to IOThread \"%s\" " + "because it is already assigned", vq->value, name); + return false; + } + } + } + + if (list->value->vqs) { + for (uint16_t i = 0; i < num_queues; i++) { + if (!test_bit(i, vqs)) { + error_setg(errp, + "missing vq %u IOThread assignment in iothread-vq-mapping", + i); + return false; + } + } + } + + return true; +} + +/** + * apply_iothread_vq_mapping: + * @iothread_vq_mapping_list: The mapping of virtqueues to IOThreads. + * @vq_aio_context: The array of AioContext pointers to fill in. + * @num_queues: The length of @vq_aio_context. + * @errp: If an error occurs, a pointer to the area to store the error. + * + * Fill in the AioContext for each virtqueue in the @vq_aio_context array given + * the iothread-vq-mapping parameter in @iothread_vq_mapping_list. + * + * Returns: %true on success, %false on failure. + **/ +static bool apply_iothread_vq_mapping( + IOThreadVirtQueueMappingList *iothread_vq_mapping_list, + AioContext **vq_aio_context, + uint16_t num_queues, + Error **errp) { IOThreadVirtQueueMappingList *node; size_t num_iothreads = 0; size_t cur_iothread = 0; + if (!validate_iothread_vq_mapping_list(iothread_vq_mapping_list, + num_queues, errp)) { + return false; + } + for (node = iothread_vq_mapping_list; node; node = node->next) { num_iothreads++; } @@ -1638,6 +1662,7 @@ apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, /* Explicit vq:IOThread assignment */ for (vq = node->value->vqs; vq; vq = vq->next) { + assert(vq->value < num_queues); vq_aio_context[vq->value] = ctx; } } else { @@ -1650,6 +1675,8 @@ apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, cur_iothread++; } + + return true; } /* Context: BQL held */ @@ -1660,6 +1687,13 @@ static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp) BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + if (conf->iothread && conf->iothread_vq_mapping_list) { + error_setg(errp, + "iothread and iothread-vq-mapping properties cannot be set " + "at the same time"); + return false; + } + if (conf->iothread || conf->iothread_vq_mapping_list) { if (!k->set_guest_notifiers || !k->ioeventfd_assign) { error_setg(errp, @@ -1685,8 +1719,14 @@ static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp) s->vq_aio_context = g_new(AioContext *, conf->num_queues); if (conf->iothread_vq_mapping_list) { - apply_vq_mapping(conf->iothread_vq_mapping_list, s->vq_aio_context, - conf->num_queues); + if (!apply_iothread_vq_mapping(conf->iothread_vq_mapping_list, + s->vq_aio_context, + conf->num_queues, + errp)) { + g_free(s->vq_aio_context); + s->vq_aio_context = NULL; + return false; + } } else if (conf->iothread) { AioContext *ctx = iothread_get_aio_context(conf->iothread); for (unsigned i = 0; i < conf->num_queues; i++) { @@ -1790,6 +1830,7 @@ static int virtio_blk_start_ioeventfd(VirtIODevice *vdev) * Try to change the AioContext so that block jobs and other operations can * co-locate their activity in the same AioContext. If it fails, nevermind. */ + assert(nvqs > 0); /* enforced during ->realize() */ r = blk_set_aio_context(s->conf.conf.blk, s->vq_aio_context[0], &local_err); if (r < 0) { @@ -1808,17 +1849,14 @@ static int virtio_blk_start_ioeventfd(VirtIODevice *vdev) s->ioeventfd_started = true; smp_wmb(); /* paired with aio_notify_accept() on the read side */ - /* Get this show started by hooking up our callbacks */ - for (i = 0; i < nvqs; i++) { - VirtQueue *vq = virtio_get_queue(vdev, i); - AioContext *ctx = s->vq_aio_context[i]; - - /* Kick right away to begin processing requests already in vring */ - event_notifier_set(virtio_queue_get_host_notifier(vq)); - - if (!blk_in_drain(s->conf.conf.blk)) { - virtio_queue_aio_attach_host_notifier(vq, ctx); - } + /* + * Get this show started by hooking up our callbacks. If drained now, + * virtio_blk_drained_end() will do this later. + * Attaching the notifier also kicks the virtqueues, processing any requests + * they may already have. + */ + if (!blk_in_drain(s->conf.conf.blk)) { + virtio_blk_ioeventfd_attach(s); } return 0; @@ -1924,6 +1962,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtIOBlock *s = VIRTIO_BLK(dev); VirtIOBlkConf *conf = &s->conf; + BlockDriverState *bs; Error *err = NULL; unsigned i; @@ -1969,7 +2008,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) return; } - BlockDriverState *bs = blk_bs(conf->conf.blk); + bs = blk_bs(conf->conf.blk); if (bs->bl.zoned != BLK_Z_NONE) { virtio_add_feature(&s->host_features, VIRTIO_BLK_F_ZONED); if (bs->bl.zoned == BLK_Z_HM) { @@ -1996,19 +2035,6 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) return; } - if (conf->iothread_vq_mapping_list) { - if (conf->iothread) { - error_setg(errp, "iothread and iothread-vq-mapping properties " - "cannot be set at the same time"); - return; - } - - if (!validate_iothread_vq_mapping_list(conf->iothread_vq_mapping_list, - conf->num_queues, errp)) { - return; - } - } - s->config_size = virtio_get_config_size(&virtio_blk_cfg_size_params, s->host_features); virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size); diff --git a/hw/hyperv/hv-balloon.c b/hw/hyperv/hv-balloon.c index 0238365712..ade283335a 100644 --- a/hw/hyperv/hv-balloon.c +++ b/hw/hyperv/hv-balloon.c @@ -1477,22 +1477,7 @@ static void hv_balloon_ensure_mr(HvBalloon *balloon) balloon->mr = g_new0(MemoryRegion, 1); memory_region_init(balloon->mr, OBJECT(balloon), TYPE_HV_BALLOON, memory_region_size(hostmem_mr)); - - /* - * The VM can indicate an alignment up to 32 GiB. Memory device core can - * usually only handle/guarantee 1 GiB alignment. The user will have to - * specify a larger maxmem eventually. - * - * The memory device core will warn the user in case maxmem might have to be - * increased and will fail plugging the device if there is not sufficient - * space after alignment. - * - * TODO: we could do the alignment ourselves in a slightly bigger region. - * But this feels better, although the warning might be annoying. Maybe - * we can optimize that in the future (e.g., with such a device on the - * cmdline place/size the device memory region differently. - */ - balloon->mr->align = MAX(32 * GiB, memory_region_get_alignment(hostmem_mr)); + balloon->mr->align = memory_region_get_alignment(hostmem_mr); } static void hv_balloon_free_mr(HvBalloon *balloon) @@ -1654,6 +1639,25 @@ static MemoryRegion *hv_balloon_md_get_memory_region(MemoryDeviceState *md, return balloon->mr; } +static uint64_t hv_balloon_md_get_min_alignment(const MemoryDeviceState *md) +{ + /* + * The VM can indicate an alignment up to 32 GiB. Memory device core can + * usually only handle/guarantee 1 GiB alignment. The user will have to + * specify a larger maxmem eventually. + * + * The memory device core will warn the user in case maxmem might have to be + * increased and will fail plugging the device if there is not sufficient + * space after alignment. + * + * TODO: we could do the alignment ourselves in a slightly bigger region. + * But this feels better, although the warning might be annoying. Maybe + * we can optimize that in the future (e.g., with such a device on the + * cmdline place/size the device memory region differently. + */ + return 32 * GiB; +} + static void hv_balloon_md_fill_device_info(const MemoryDeviceState *md, MemoryDeviceInfo *info) { @@ -1766,5 +1770,6 @@ static void hv_balloon_class_init(ObjectClass *klass, void *data) mdc->get_memory_region = hv_balloon_md_get_memory_region; mdc->decide_memslots = hv_balloon_decide_memslots; mdc->get_memslots = hv_balloon_get_memslots; + mdc->get_min_alignment = hv_balloon_md_get_min_alignment; mdc->fill_device_info = hv_balloon_md_fill_device_info; } diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c index a1b1af26bc..e098585cda 100644 --- a/hw/mem/memory-device.c +++ b/hw/mem/memory-device.c @@ -374,6 +374,20 @@ void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms, goto out; } + /* + * We always want the memory region size to be multiples of the memory + * region alignment: for example, DIMMs with 1G+1byte size don't make + * any sense. Note that we don't check that the size is multiples + * of any additional alignment requirements the memory device might + * have when it comes to the address in physical address space. + */ + if (!QEMU_IS_ALIGNED(memory_region_size(mr), + memory_region_get_alignment(mr))) { + error_setg(errp, "backend memory size must be multiple of 0x%" + PRIx64, memory_region_get_alignment(mr)); + return; + } + if (legacy_align) { align = *legacy_align; } else { diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index 5aa1ed474a..0c0fb3f1b0 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -389,9 +389,9 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { DEFINE_SPAPR_CPU_CORE_TYPE("970_v2.2"), DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.0"), DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.1"), - DEFINE_SPAPR_CPU_CORE_TYPE("power5+_v2.1"), + DEFINE_SPAPR_CPU_CORE_TYPE("power5p_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power7_v2.3"), - DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), + DEFINE_SPAPR_CPU_CORE_TYPE("power7p_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), DEFINE_SPAPR_CPU_CORE_TYPE("power8e_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8nvl_v1.0"), diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c index 34e3b89287..d607a5f9fb 100644 --- a/hw/scsi/lsi53c895a.c +++ b/hw/scsi/lsi53c895a.c @@ -1159,6 +1159,7 @@ again: lsi_script_scsi_interrupt(s, LSI_SIST0_UDC, 0); lsi_disconnect(s); trace_lsi_execute_script_stop(); + reentrancy_level--; return; } insn = read_dword(s, s->dsp); diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c index 0a2eb11c56..9e40b0c920 100644 --- a/hw/scsi/scsi-bus.c +++ b/hw/scsi/scsi-bus.c @@ -120,17 +120,13 @@ static void scsi_device_for_each_req_async_bh(void *opaque) SCSIRequest *next; /* - * If the AioContext changed before this BH was called then reschedule into - * the new AioContext before accessing ->requests. This can happen when - * scsi_device_for_each_req_async() is called and then the AioContext is - * changed before BHs are run. + * The BB cannot have changed contexts between this BH being scheduled and + * now: BBs' AioContexts, when they have a node attached, can only be + * changed via bdrv_try_change_aio_context(), in a drained section. While + * we have the in-flight counter incremented, that drain must block. */ ctx = blk_get_aio_context(s->conf.blk); - if (ctx != qemu_get_current_aio_context()) { - aio_bh_schedule_oneshot(ctx, scsi_device_for_each_req_async_bh, - g_steal_pointer(&data)); - return; - } + assert(ctx == qemu_get_current_aio_context()); QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) { data->fn(req, data->fn_opaque); @@ -138,11 +134,16 @@ static void scsi_device_for_each_req_async_bh(void *opaque) /* Drop the reference taken by scsi_device_for_each_req_async() */ object_unref(OBJECT(s)); + + /* Paired with blk_inc_in_flight() in scsi_device_for_each_req_async() */ + blk_dec_in_flight(s->conf.blk); } /* * Schedule @fn() to be invoked for each enqueued request in device @s. @fn() * runs in the AioContext that is executing the request. + * Keeps the BlockBackend's in-flight counter incremented until everything is + * done, so draining it will settle all scheduled @fn() calls. */ static void scsi_device_for_each_req_async(SCSIDevice *s, void (*fn)(SCSIRequest *, void *), @@ -163,6 +164,8 @@ static void scsi_device_for_each_req_async(SCSIDevice *s, */ object_ref(OBJECT(s)); + /* Paired with blk_dec_in_flight() in scsi_device_for_each_req_async_bh() */ + blk_inc_in_flight(s->conf.blk); aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.blk), scsi_device_for_each_req_async_bh, data); @@ -373,15 +376,13 @@ static void scsi_qdev_unrealize(DeviceState *qdev) /* handle legacy '-drive if=scsi,...' cmd line args */ SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk, - int unit, bool removable, int bootindex, - bool share_rw, - BlockdevOnError rerror, - BlockdevOnError werror, + int unit, bool removable, BlockConf *conf, const char *serial, Error **errp) { const char *driver; char *name; DeviceState *dev; + SCSIDevice *s; DriveInfo *dinfo; if (blk_is_sg(blk)) { @@ -399,11 +400,10 @@ SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk, object_property_add_child(OBJECT(bus), name, OBJECT(dev)); g_free(name); + s = SCSI_DEVICE(dev); + s->conf = *conf; + qdev_prop_set_uint32(dev, "scsi-id", unit); - if (bootindex >= 0) { - object_property_set_int(OBJECT(dev), "bootindex", bootindex, - &error_abort); - } if (object_property_find(OBJECT(dev), "removable")) { qdev_prop_set_bit(dev, "removable", removable); } @@ -414,19 +414,12 @@ SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk, object_unparent(OBJECT(dev)); return NULL; } - if (!object_property_set_bool(OBJECT(dev), "share-rw", share_rw, errp)) { - object_unparent(OBJECT(dev)); - return NULL; - } - - qdev_prop_set_enum(dev, "rerror", rerror); - qdev_prop_set_enum(dev, "werror", werror); if (!qdev_realize_and_unref(dev, &bus->qbus, errp)) { object_unparent(OBJECT(dev)); return NULL; } - return SCSI_DEVICE(dev); + return s; } void scsi_bus_legacy_handle_cmdline(SCSIBus *bus) @@ -434,6 +427,12 @@ void scsi_bus_legacy_handle_cmdline(SCSIBus *bus) Location loc; DriveInfo *dinfo; int unit; + BlockConf conf = { + .bootindex = -1, + .share_rw = false, + .rerror = BLOCKDEV_ON_ERROR_AUTO, + .werror = BLOCKDEV_ON_ERROR_AUTO, + }; loc_push_none(&loc); for (unit = 0; unit <= bus->info->max_target; unit++) { @@ -443,10 +442,7 @@ void scsi_bus_legacy_handle_cmdline(SCSIBus *bus) } qemu_opts_loc_restore(dinfo->opts); scsi_bus_legacy_add_drive(bus, blk_by_legacy_dinfo(dinfo), - unit, false, -1, false, - BLOCKDEV_ON_ERROR_AUTO, - BLOCKDEV_ON_ERROR_AUTO, - NULL, &error_fatal); + unit, false, &conf, NULL, &error_fatal); } loc_pop(&loc); } @@ -1728,11 +1724,20 @@ static void scsi_device_purge_one_req(SCSIRequest *req, void *opaque) scsi_req_cancel_async(req, NULL); } +/** + * Cancel all requests, and block until they are deleted. + */ void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense) { scsi_device_for_each_req_async(sdev, scsi_device_purge_one_req, NULL); + /* + * Await all the scsi_device_purge_one_req() calls scheduled by + * scsi_device_for_each_req_async(), and all I/O requests that were + * cancelled this way, but may still take a bit of time to settle. + */ blk_drain(sdev->conf.blk); + scsi_device_set_ua(sdev, sense); } diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 690aceec45..9f02ceea09 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -1149,6 +1149,7 @@ static void virtio_scsi_drained_begin(SCSIBus *bus) static void virtio_scsi_drained_end(SCSIBus *bus) { VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus); + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); VirtIODevice *vdev = VIRTIO_DEVICE(s); uint32_t total_queues = VIRTIO_SCSI_VQ_NUM_FIXED + s->parent_obj.conf.num_queues; @@ -1166,7 +1167,11 @@ static void virtio_scsi_drained_end(SCSIBus *bus) for (uint32_t i = 0; i < total_queues; i++) { VirtQueue *vq = virtio_get_queue(vdev, i); - virtio_queue_aio_attach_host_notifier(vq, s->ctx); + if (vq == vs->event_vq) { + virtio_queue_aio_attach_host_notifier_no_poll(vq, s->ctx); + } else { + virtio_queue_aio_attach_host_notifier(vq, s->ctx); + } } } diff --git a/hw/usb/dev-storage-classic.c b/hw/usb/dev-storage-classic.c index 84d19752b5..50a3ad6285 100644 --- a/hw/usb/dev-storage-classic.c +++ b/hw/usb/dev-storage-classic.c @@ -67,10 +67,7 @@ static void usb_msd_storage_realize(USBDevice *dev, Error **errp) scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(dev), &usb_msd_scsi_info_storage); scsi_dev = scsi_bus_legacy_add_drive(&s->bus, blk, 0, !!s->removable, - s->conf.bootindex, s->conf.share_rw, - s->conf.rerror, s->conf.werror, - dev->serial, - errp); + &s->conf, dev->serial, errp); blk_unref(blk); if (!scsi_dev) { return; diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c index 99ab989852..ffd119ebac 100644 --- a/hw/virtio/virtio-mem.c +++ b/hw/virtio/virtio-mem.c @@ -605,7 +605,7 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa, int fd = memory_region_get_fd(&vmem->memdev->mr); Error *local_err = NULL; - if (!qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err)) { + if (!qemu_prealloc_mem(fd, area, size, 1, NULL, false, &local_err)) { static bool warned; /* @@ -1248,7 +1248,7 @@ static int virtio_mem_prealloc_range_cb(VirtIOMEM *vmem, void *arg, int fd = memory_region_get_fd(&vmem->memdev->mr); Error *local_err = NULL; - if (!qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err)) { + if (!qemu_prealloc_mem(fd, area, size, 1, NULL, false, &local_err)) { error_report_err(local_err); return -ENOMEM; } diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 7549094154..d229755eae 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -3556,6 +3556,17 @@ static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n) void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx) { + /* + * virtio_queue_aio_detach_host_notifier() can leave notifications disabled. + * Re-enable them. (And if detach has not been used before, notifications + * being enabled is still the default state while a notifier is attached; + * see virtio_queue_host_notifier_aio_poll_end(), which will always leave + * notifications enabled once the polling section is left.) + */ + if (!virtio_queue_get_notification(vq)) { + virtio_queue_set_notification(vq, 1); + } + aio_set_event_notifier(ctx, &vq->host_notifier, virtio_queue_host_notifier_read, virtio_queue_host_notifier_aio_poll, @@ -3563,6 +3574,13 @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx) aio_set_event_notifier_poll(ctx, &vq->host_notifier, virtio_queue_host_notifier_aio_poll_begin, virtio_queue_host_notifier_aio_poll_end); + + /* + * We will have ignored notifications about new requests from the guest + * while no notifiers were attached, so "kick" the virt queue to process + * those requests now. + */ + event_notifier_set(&vq->host_notifier); } /* @@ -3573,14 +3591,38 @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx) */ void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx) { + /* See virtio_queue_aio_attach_host_notifier() */ + if (!virtio_queue_get_notification(vq)) { + virtio_queue_set_notification(vq, 1); + } + aio_set_event_notifier(ctx, &vq->host_notifier, virtio_queue_host_notifier_read, NULL, NULL); + + /* + * See virtio_queue_aio_attach_host_notifier(). + * Note that this may be unnecessary for the type of virtqueues this + * function is used for. Still, it will not hurt to have a quick look into + * whether we can/should process any of the virtqueue elements. + */ + event_notifier_set(&vq->host_notifier); } void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx) { aio_set_event_notifier(ctx, &vq->host_notifier, NULL, NULL, NULL); + + /* + * aio_set_event_notifier_poll() does not guarantee whether io_poll_end() + * will run after io_poll_begin(), so by removing the notifier, we do not + * know whether virtio_queue_host_notifier_aio_poll_end() has run after a + * previous virtio_queue_host_notifier_aio_poll_begin(), i.e. whether + * notifications are enabled or disabled. It does not really matter anyway; + * we just removed the notifier, so we do not care about notifications until + * we potentially re-attach it. The attach_host_notifier functions will + * ensure that notifications are enabled again when they are needed. + */ } void virtio_queue_host_notifier_read(EventNotifier *n) diff --git a/include/block/aio.h b/include/block/aio.h index 5d0a114988..8378553eb9 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -480,9 +480,14 @@ void aio_set_event_notifier(AioContext *ctx, AioPollFn *io_poll, EventNotifierHandler *io_poll_ready); -/* Set polling begin/end callbacks for an event notifier that has already been +/* + * Set polling begin/end callbacks for an event notifier that has already been * registered with aio_set_event_notifier. Do nothing if the event notifier is * not registered. + * + * Note that if the io_poll_end() callback (or the entire notifier) is removed + * during polling, it will not be called, so an io_poll_begin() is not + * necessarily always followed by an io_poll_end(). */ void aio_set_event_notifier_poll(AioContext *ctx, EventNotifier *notifier, diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index d47536eadb..9228e96c87 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -1084,6 +1084,11 @@ typedef enum MachineInitPhase { PHASE_ACCEL_CREATED, /* + * Late backend objects have been created and initialized. + */ + PHASE_LATE_BACKENDS_CREATED, + + /* * machine_class->init has been called, thus creating any embedded * devices and validating machine properties. Devices created at * this time are considered to be cold-plugged. diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h index 10c4e8288d..c3d5e17e38 100644 --- a/include/hw/scsi/scsi.h +++ b/include/hw/scsi/scsi.h @@ -199,10 +199,7 @@ static inline SCSIBus *scsi_bus_from_device(SCSIDevice *d) } SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk, - int unit, bool removable, int bootindex, - bool share_rw, - BlockdevOnError rerror, - BlockdevOnError werror, + int unit, bool removable, BlockConf *conf, const char *serial, Error **errp); void scsi_bus_set_ua(SCSIBus *bus, SCSISense sense); void scsi_bus_legacy_handle_cmdline(SCSIBus *bus); diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index 833a9a344f..5c14110c4b 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -55,7 +55,7 @@ struct VirtIOBlock { VirtIODevice parent_obj; BlockBackend *blk; QemuMutex rq_lock; - void *rq; /* protected by rq_lock */ + struct VirtIOBlockReq *rq; /* protected by rq_lock */ VirtIOBlkConf conf; unsigned short sector_mask; bool original_wce; diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index c9692cc314..7d359dabc4 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -680,6 +680,8 @@ typedef struct ThreadContext ThreadContext; * @area: start address of the are to preallocate * @sz: the size of the area to preallocate * @max_threads: maximum number of threads to use + * @tc: prealloc context threads pointer, NULL if not in use + * @async: request asynchronous preallocation, requires @tc * @errp: returns an error if this function fails * * Preallocate memory (populate/prefault page tables writable) for the virtual @@ -687,10 +689,24 @@ typedef struct ThreadContext ThreadContext; * each page in the area was faulted in writable at least once, for example, * after allocating file blocks for mapped files. * + * When setting @async, allocation might be performed asynchronously. + * qemu_finish_async_prealloc_mem() must be called to finish any asynchronous + * preallocation. + * * Return: true on success, else false setting @errp with error. */ bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads, - ThreadContext *tc, Error **errp); + ThreadContext *tc, bool async, Error **errp); + +/** + * qemu_finish_async_prealloc_mem: + * @errp: returns an error if this function fails + * + * Finish all outstanding asynchronous memory preallocation. + * + * Return: true on success, else false setting @errp with error. + */ +bool qemu_finish_async_prealloc_mem(Error **errp); /** * qemu_get_pid_name: diff --git a/include/tcg/tcg-cond.h b/include/tcg/tcg-cond.h index 2a38a386d4..5cadbd6ff2 100644 --- a/include/tcg/tcg-cond.h +++ b/include/tcg/tcg-cond.h @@ -29,26 +29,34 @@ * Conditions. Note that these are laid out for easy manipulation by * the functions below: * bit 0 is used for inverting; - * bit 1 is signed, - * bit 2 is unsigned, - * bit 3 is used with bit 0 for swapping signed/unsigned. + * bit 1 is used for conditions that need swapping (signed/unsigned). + * bit 2 is used with bit 1 for swapping. + * bit 3 is used for unsigned conditions. */ typedef enum { /* non-signed */ TCG_COND_NEVER = 0 | 0 | 0 | 0, TCG_COND_ALWAYS = 0 | 0 | 0 | 1, + + /* equality */ TCG_COND_EQ = 8 | 0 | 0 | 0, TCG_COND_NE = 8 | 0 | 0 | 1, + + /* "test" i.e. and then compare vs 0 */ + TCG_COND_TSTEQ = 8 | 4 | 0 | 0, + TCG_COND_TSTNE = 8 | 4 | 0 | 1, + /* signed */ TCG_COND_LT = 0 | 0 | 2 | 0, TCG_COND_GE = 0 | 0 | 2 | 1, - TCG_COND_LE = 8 | 0 | 2 | 0, - TCG_COND_GT = 8 | 0 | 2 | 1, + TCG_COND_GT = 0 | 4 | 2 | 0, + TCG_COND_LE = 0 | 4 | 2 | 1, + /* unsigned */ - TCG_COND_LTU = 0 | 4 | 0 | 0, - TCG_COND_GEU = 0 | 4 | 0 | 1, - TCG_COND_LEU = 8 | 4 | 0 | 0, - TCG_COND_GTU = 8 | 4 | 0 | 1, + TCG_COND_LTU = 8 | 0 | 2 | 0, + TCG_COND_GEU = 8 | 0 | 2 | 1, + TCG_COND_GTU = 8 | 4 | 2 | 0, + TCG_COND_LEU = 8 | 4 | 2 | 1, } TCGCond; /* Invert the sense of the comparison. */ @@ -60,25 +68,49 @@ static inline TCGCond tcg_invert_cond(TCGCond c) /* Swap the operands in a comparison. */ static inline TCGCond tcg_swap_cond(TCGCond c) { - return c & 6 ? (TCGCond)(c ^ 9) : c; + return (TCGCond)(c ^ ((c & 2) << 1)); +} + +/* Must a comparison be considered signed? */ +static inline bool is_signed_cond(TCGCond c) +{ + return (c & (8 | 2)) == 2; +} + +/* Must a comparison be considered unsigned? */ +static inline bool is_unsigned_cond(TCGCond c) +{ + return (c & (8 | 2)) == (8 | 2); +} + +/* Must a comparison be considered a test? */ +static inline bool is_tst_cond(TCGCond c) +{ + return (c | 1) == TCG_COND_TSTNE; } /* Create an "unsigned" version of a "signed" comparison. */ static inline TCGCond tcg_unsigned_cond(TCGCond c) { - return c & 2 ? (TCGCond)(c ^ 6) : c; + return is_signed_cond(c) ? (TCGCond)(c + 8) : c; } /* Create a "signed" version of an "unsigned" comparison. */ static inline TCGCond tcg_signed_cond(TCGCond c) { - return c & 4 ? (TCGCond)(c ^ 6) : c; + return is_unsigned_cond(c) ? (TCGCond)(c - 8) : c; } -/* Must a comparison be considered unsigned? */ -static inline bool is_unsigned_cond(TCGCond c) +/* Create the eq/ne version of a tsteq/tstne comparison. */ +static inline TCGCond tcg_tst_eqne_cond(TCGCond c) +{ + return is_tst_cond(c) ? (TCGCond)(c - 4) : c; +} + +/* Create the lt/ge version of a tstne/tsteq comparison of the sign. */ +static inline TCGCond tcg_tst_ltge_cond(TCGCond c) { - return (c & 4) != 0; + return is_tst_cond(c) ? (TCGCond)(c ^ 0xf) : c; } /* @@ -92,7 +124,7 @@ static inline TCGCond tcg_high_cond(TCGCond c) case TCG_COND_LE: case TCG_COND_GEU: case TCG_COND_LEU: - return (TCGCond)(c ^ 8); + return (TCGCond)(c ^ (4 | 1)); default: return c; } diff --git a/meson.build b/meson.build index b5d6dc94a8..e5d6f2d057 100644 --- a/meson.build +++ b/meson.build @@ -2384,6 +2384,22 @@ else endif config_host_data.set('CONFIG_ASAN_IFACE_FIBER', have_asan_fiber) +have_inotify_init = cc.has_header_symbol('sys/inotify.h', 'inotify_init') +have_inotify_init1 = cc.has_header_symbol('sys/inotify.h', 'inotify_init1') +inotify = not_found +if (have_inotify_init or have_inotify_init1) and host_os == 'freebsd' + # libinotify-kqueue + inotify = cc.find_library('inotify') + if have_inotify_init + have_inotify_init = inotify.found() + endif + if have_inotify_init1 + have_inotify_init1 = inotify.found() + endif +endif +config_host_data.set('CONFIG_INOTIFY', have_inotify_init) +config_host_data.set('CONFIG_INOTIFY1', have_inotify_init1) + # has_header_symbol config_host_data.set('CONFIG_BLKZONED', cc.has_header_symbol('linux/blkzoned.h', 'BLKOPENZONE')) @@ -2400,10 +2416,6 @@ config_host_data.set('CONFIG_FIEMAP', config_host_data.set('CONFIG_GETRANDOM', cc.has_function('getrandom') and cc.has_header_symbol('sys/random.h', 'GRND_NONBLOCK')) -config_host_data.set('CONFIG_INOTIFY', - cc.has_header_symbol('sys/inotify.h', 'inotify_init')) -config_host_data.set('CONFIG_INOTIFY1', - cc.has_header_symbol('sys/inotify.h', 'inotify_init1')) config_host_data.set('CONFIG_PRCTL_PR_SET_TIMERSLACK', cc.has_header_symbol('sys/prctl.h', 'PR_SET_TIMERSLACK')) config_host_data.set('CONFIG_RTNETLINK', @@ -4407,6 +4419,9 @@ summary_info += {'libudev': libudev} summary_info += {'FUSE lseek': fuse_lseek.found()} summary_info += {'selinux': selinux} summary_info += {'libdw': libdw} +if host_os == 'freebsd' + summary_info += {'libinotify-kqueue': inotify} +endif summary(summary_info, bool_yn: true, section: 'Dependencies') if host_arch == 'unknown' diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c index 176b549473..f3488afeef 100644 --- a/qapi/qmp-dispatch.c +++ b/qapi/qmp-dispatch.c @@ -212,8 +212,7 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ * executing the command handler so that it can make progress if it * involves an AIO_WAIT_WHILE(). */ - aio_co_schedule(qemu_get_aio_context(), qemu_coroutine_self()); - qemu_coroutine_yield(); + aio_co_reschedule_self(qemu_get_aio_context()); } monitor_set_cur(qemu_coroutine_self(), cur_mon); @@ -227,9 +226,7 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ * Move back to iohandler_ctx so that nested event loops for * qemu_aio_context don't start new monitor commands. */ - aio_co_schedule(iohandler_get_aio_context(), - qemu_coroutine_self()); - qemu_coroutine_yield(); + aio_co_reschedule_self(iohandler_get_aio_context()); } } else { /* diff --git a/qom/object.c b/qom/object.c index 654e1afaf2..2c4c64d2b6 100644 --- a/qom/object.c +++ b/qom/object.c @@ -160,10 +160,6 @@ static bool type_name_is_valid(const char *name) /* Allow some legacy names with '+' in it for compatibility reasons */ if (name[plen] == '+') { - if (plen == 6 && g_str_has_prefix(name, "power")) { - /* Allow "power5+" and "power7+" CPU names*/ - return true; - } if (plen >= 17 && g_str_has_prefix(name, "Sun-UltraSparc-I")) { /* Allow "Sun-UltraSparc-IV+" and "Sun-UltraSparc-IIIi+" */ return true; diff --git a/system/vl.c b/system/vl.c index bb959cbc44..2a0bd08ff1 100644 --- a/system/vl.c +++ b/system/vl.c @@ -2013,6 +2013,14 @@ static void qemu_create_late_backends(void) object_option_foreach_add(object_create_late); + /* + * Wait for any outstanding memory prealloc from created memory + * backends to complete. + */ + if (!qemu_finish_async_prealloc_mem(&error_fatal)) { + exit(1); + } + if (tpm_init() < 0) { exit(1); } @@ -3699,6 +3707,7 @@ void qemu_init(int argc, char **argv) * over memory-backend-file objects). */ qemu_create_late_backends(); + phase_advance(PHASE_LATE_BACKENDS_CREATED); /* * Note: creates a QOM object, must run only after global and diff --git a/target/alpha/translate.c b/target/alpha/translate.c index 4b464f8651..882cf6cea0 100644 --- a/target/alpha/translate.c +++ b/target/alpha/translate.c @@ -453,13 +453,13 @@ static DisasJumpType gen_bdirect(DisasContext *ctx, int ra, int32_t disp) } static DisasJumpType gen_bcond_internal(DisasContext *ctx, TCGCond cond, - TCGv cmp, int32_t disp) + TCGv cmp, uint64_t imm, int32_t disp) { uint64_t dest = ctx->base.pc_next + (disp << 2); TCGLabel *lab_true = gen_new_label(); if (use_goto_tb(ctx, dest)) { - tcg_gen_brcondi_i64(cond, cmp, 0, lab_true); + tcg_gen_brcondi_i64(cond, cmp, imm, lab_true); tcg_gen_goto_tb(0); tcg_gen_movi_i64(cpu_pc, ctx->base.pc_next); @@ -472,81 +472,71 @@ static DisasJumpType gen_bcond_internal(DisasContext *ctx, TCGCond cond, return DISAS_NORETURN; } else { - TCGv_i64 z = load_zero(ctx); + TCGv_i64 i = tcg_constant_i64(imm); TCGv_i64 d = tcg_constant_i64(dest); TCGv_i64 p = tcg_constant_i64(ctx->base.pc_next); - tcg_gen_movcond_i64(cond, cpu_pc, cmp, z, d, p); + tcg_gen_movcond_i64(cond, cpu_pc, cmp, i, d, p); return DISAS_PC_UPDATED; } } static DisasJumpType gen_bcond(DisasContext *ctx, TCGCond cond, int ra, - int32_t disp, int mask) + int32_t disp) { - if (mask) { - TCGv tmp = tcg_temp_new(); - DisasJumpType ret; - - tcg_gen_andi_i64(tmp, load_gpr(ctx, ra), 1); - ret = gen_bcond_internal(ctx, cond, tmp, disp); - return ret; - } - return gen_bcond_internal(ctx, cond, load_gpr(ctx, ra), disp); + return gen_bcond_internal(ctx, cond, load_gpr(ctx, ra), + is_tst_cond(cond), disp); } /* Fold -0.0 for comparison with COND. */ -static void gen_fold_mzero(TCGCond cond, TCGv dest, TCGv src) +static TCGv_i64 gen_fold_mzero(TCGCond *pcond, uint64_t *pimm, TCGv_i64 src) { - uint64_t mzero = 1ull << 63; + TCGv_i64 tmp; - switch (cond) { + *pimm = 0; + switch (*pcond) { case TCG_COND_LE: case TCG_COND_GT: /* For <= or >, the -0.0 value directly compares the way we want. */ - tcg_gen_mov_i64(dest, src); - break; + return src; case TCG_COND_EQ: case TCG_COND_NE: - /* For == or !=, we can simply mask off the sign bit and compare. */ - tcg_gen_andi_i64(dest, src, mzero - 1); - break; + /* For == or !=, we can compare without the sign bit. */ + *pcond = *pcond == TCG_COND_EQ ? TCG_COND_TSTEQ : TCG_COND_TSTNE; + *pimm = INT64_MAX; + return src; case TCG_COND_GE: case TCG_COND_LT: /* For >= or <, map -0.0 to +0.0. */ - tcg_gen_movcond_i64(TCG_COND_NE, dest, src, tcg_constant_i64(mzero), - src, tcg_constant_i64(0)); - break; + tmp = tcg_temp_new_i64(); + tcg_gen_movcond_i64(TCG_COND_EQ, tmp, + src, tcg_constant_i64(INT64_MIN), + tcg_constant_i64(0), src); + return tmp; default: - abort(); + g_assert_not_reached(); } } static DisasJumpType gen_fbcond(DisasContext *ctx, TCGCond cond, int ra, int32_t disp) { - TCGv cmp_tmp = tcg_temp_new(); - DisasJumpType ret; - - gen_fold_mzero(cond, cmp_tmp, load_fpr(ctx, ra)); - ret = gen_bcond_internal(ctx, cond, cmp_tmp, disp); - return ret; + uint64_t imm; + TCGv_i64 tmp = gen_fold_mzero(&cond, &imm, load_fpr(ctx, ra)); + return gen_bcond_internal(ctx, cond, tmp, imm, disp); } static void gen_fcmov(DisasContext *ctx, TCGCond cond, int ra, int rb, int rc) { - TCGv_i64 va, vb, z; - - z = load_zero(ctx); - vb = load_fpr(ctx, rb); - va = tcg_temp_new(); - gen_fold_mzero(cond, va, load_fpr(ctx, ra)); - - tcg_gen_movcond_i64(cond, dest_fpr(ctx, rc), va, z, vb, load_fpr(ctx, rc)); + uint64_t imm; + TCGv_i64 tmp = gen_fold_mzero(&cond, &imm, load_fpr(ctx, ra)); + tcg_gen_movcond_i64(cond, dest_fpr(ctx, rc), + tmp, tcg_constant_i64(imm), + load_fpr(ctx, rb), load_fpr(ctx, rc)); } #define QUAL_RM_N 0x080 /* Round mode nearest even */ @@ -1683,16 +1673,12 @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn) break; case 0x14: /* CMOVLBS */ - tmp = tcg_temp_new(); - tcg_gen_andi_i64(tmp, va, 1); - tcg_gen_movcond_i64(TCG_COND_NE, vc, tmp, load_zero(ctx), + tcg_gen_movcond_i64(TCG_COND_TSTNE, vc, va, tcg_constant_i64(1), vb, load_gpr(ctx, rc)); break; case 0x16: /* CMOVLBC */ - tmp = tcg_temp_new(); - tcg_gen_andi_i64(tmp, va, 1); - tcg_gen_movcond_i64(TCG_COND_EQ, vc, tmp, load_zero(ctx), + tcg_gen_movcond_i64(TCG_COND_TSTEQ, vc, va, tcg_constant_i64(1), vb, load_gpr(ctx, rc)); break; case 0x20: @@ -2827,35 +2813,35 @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn) break; case 0x38: /* BLBC */ - ret = gen_bcond(ctx, TCG_COND_EQ, ra, disp21, 1); + ret = gen_bcond(ctx, TCG_COND_TSTEQ, ra, disp21); break; case 0x39: /* BEQ */ - ret = gen_bcond(ctx, TCG_COND_EQ, ra, disp21, 0); + ret = gen_bcond(ctx, TCG_COND_EQ, ra, disp21); break; case 0x3A: /* BLT */ - ret = gen_bcond(ctx, TCG_COND_LT, ra, disp21, 0); + ret = gen_bcond(ctx, TCG_COND_LT, ra, disp21); break; case 0x3B: /* BLE */ - ret = gen_bcond(ctx, TCG_COND_LE, ra, disp21, 0); + ret = gen_bcond(ctx, TCG_COND_LE, ra, disp21); break; case 0x3C: /* BLBS */ - ret = gen_bcond(ctx, TCG_COND_NE, ra, disp21, 1); + ret = gen_bcond(ctx, TCG_COND_TSTNE, ra, disp21); break; case 0x3D: /* BNE */ - ret = gen_bcond(ctx, TCG_COND_NE, ra, disp21, 0); + ret = gen_bcond(ctx, TCG_COND_NE, ra, disp21); break; case 0x3E: /* BGE */ - ret = gen_bcond(ctx, TCG_COND_GE, ra, disp21, 0); + ret = gen_bcond(ctx, TCG_COND_GE, ra, disp21); break; case 0x3F: /* BGT */ - ret = gen_bcond(ctx, TCG_COND_GT, ra, disp21, 0); + ret = gen_bcond(ctx, TCG_COND_GT, ra, disp21); break; invalid_opc: ret = gen_invalid(ctx); diff --git a/target/m68k/translate.c b/target/m68k/translate.c index f886190f88..d7d5ff4300 100644 --- a/target/m68k/translate.c +++ b/target/m68k/translate.c @@ -5129,46 +5129,44 @@ undef: static void gen_fcc_cond(DisasCompare *c, DisasContext *s, int cond) { TCGv fpsr; + int imm = 0; - c->v2 = tcg_constant_i32(0); /* TODO: Raise BSUN exception. */ fpsr = tcg_temp_new(); gen_load_fcr(s, fpsr, M68K_FPSR); + c->v1 = fpsr; + switch (cond) { case 0: /* False */ case 16: /* Signaling False */ - c->v1 = c->v2; c->tcond = TCG_COND_NEVER; break; case 1: /* EQual Z */ case 17: /* Signaling EQual Z */ - c->v1 = tcg_temp_new(); - tcg_gen_andi_i32(c->v1, fpsr, FPSR_CC_Z); - c->tcond = TCG_COND_NE; + imm = FPSR_CC_Z; + c->tcond = TCG_COND_TSTNE; break; case 2: /* Ordered Greater Than !(A || Z || N) */ case 18: /* Greater Than !(A || Z || N) */ - c->v1 = tcg_temp_new(); - tcg_gen_andi_i32(c->v1, fpsr, - FPSR_CC_A | FPSR_CC_Z | FPSR_CC_N); - c->tcond = TCG_COND_EQ; + imm = FPSR_CC_A | FPSR_CC_Z | FPSR_CC_N; + c->tcond = TCG_COND_TSTEQ; break; case 3: /* Ordered Greater than or Equal Z || !(A || N) */ case 19: /* Greater than or Equal Z || !(A || N) */ c->v1 = tcg_temp_new(); tcg_gen_andi_i32(c->v1, fpsr, FPSR_CC_A); tcg_gen_shli_i32(c->v1, c->v1, ctz32(FPSR_CC_N) - ctz32(FPSR_CC_A)); - tcg_gen_andi_i32(fpsr, fpsr, FPSR_CC_Z | FPSR_CC_N); tcg_gen_or_i32(c->v1, c->v1, fpsr); tcg_gen_xori_i32(c->v1, c->v1, FPSR_CC_N); - c->tcond = TCG_COND_NE; + imm = FPSR_CC_Z | FPSR_CC_N; + c->tcond = TCG_COND_TSTNE; break; case 4: /* Ordered Less Than !(!N || A || Z); */ case 20: /* Less Than !(!N || A || Z); */ c->v1 = tcg_temp_new(); tcg_gen_xori_i32(c->v1, fpsr, FPSR_CC_N); - tcg_gen_andi_i32(c->v1, c->v1, FPSR_CC_N | FPSR_CC_A | FPSR_CC_Z); - c->tcond = TCG_COND_EQ; + imm = FPSR_CC_N | FPSR_CC_A | FPSR_CC_Z; + c->tcond = TCG_COND_TSTEQ; break; case 5: /* Ordered Less than or Equal Z || (N && !A) */ case 21: /* Less than or Equal Z || (N && !A) */ @@ -5176,49 +5174,45 @@ static void gen_fcc_cond(DisasCompare *c, DisasContext *s, int cond) tcg_gen_andi_i32(c->v1, fpsr, FPSR_CC_A); tcg_gen_shli_i32(c->v1, c->v1, ctz32(FPSR_CC_N) - ctz32(FPSR_CC_A)); tcg_gen_andc_i32(c->v1, fpsr, c->v1); - tcg_gen_andi_i32(c->v1, c->v1, FPSR_CC_Z | FPSR_CC_N); - c->tcond = TCG_COND_NE; + imm = FPSR_CC_Z | FPSR_CC_N; + c->tcond = TCG_COND_TSTNE; break; case 6: /* Ordered Greater or Less than !(A || Z) */ case 22: /* Greater or Less than !(A || Z) */ - c->v1 = tcg_temp_new(); - tcg_gen_andi_i32(c->v1, fpsr, FPSR_CC_A | FPSR_CC_Z); - c->tcond = TCG_COND_EQ; + imm = FPSR_CC_A | FPSR_CC_Z; + c->tcond = TCG_COND_TSTEQ; break; case 7: /* Ordered !A */ case 23: /* Greater, Less or Equal !A */ - c->v1 = tcg_temp_new(); - tcg_gen_andi_i32(c->v1, fpsr, FPSR_CC_A); - c->tcond = TCG_COND_EQ; + imm = FPSR_CC_A; + c->tcond = TCG_COND_TSTEQ; break; case 8: /* Unordered A */ case 24: /* Not Greater, Less or Equal A */ - c->v1 = tcg_temp_new(); - tcg_gen_andi_i32(c->v1, fpsr, FPSR_CC_A); - c->tcond = TCG_COND_NE; + imm = FPSR_CC_A; + c->tcond = TCG_COND_TSTNE; break; case 9: /* Unordered or Equal A || Z */ case 25: /* Not Greater or Less then A || Z */ - c->v1 = tcg_temp_new(); - tcg_gen_andi_i32(c->v1, fpsr, FPSR_CC_A | FPSR_CC_Z); - c->tcond = TCG_COND_NE; + imm = FPSR_CC_A | FPSR_CC_Z; + c->tcond = TCG_COND_TSTNE; break; case 10: /* Unordered or Greater Than A || !(N || Z)) */ case 26: /* Not Less or Equal A || !(N || Z)) */ c->v1 = tcg_temp_new(); tcg_gen_andi_i32(c->v1, fpsr, FPSR_CC_Z); tcg_gen_shli_i32(c->v1, c->v1, ctz32(FPSR_CC_N) - ctz32(FPSR_CC_Z)); - tcg_gen_andi_i32(fpsr, fpsr, FPSR_CC_A | FPSR_CC_N); tcg_gen_or_i32(c->v1, c->v1, fpsr); tcg_gen_xori_i32(c->v1, c->v1, FPSR_CC_N); - c->tcond = TCG_COND_NE; + imm = FPSR_CC_A | FPSR_CC_N; + c->tcond = TCG_COND_TSTNE; break; case 11: /* Unordered or Greater or Equal A || Z || !N */ case 27: /* Not Less Than A || Z || !N */ c->v1 = tcg_temp_new(); - tcg_gen_andi_i32(c->v1, fpsr, FPSR_CC_A | FPSR_CC_Z | FPSR_CC_N); - tcg_gen_xori_i32(c->v1, c->v1, FPSR_CC_N); - c->tcond = TCG_COND_NE; + tcg_gen_xori_i32(c->v1, fpsr, FPSR_CC_N); + imm = FPSR_CC_A | FPSR_CC_Z | FPSR_CC_N; + c->tcond = TCG_COND_TSTNE; break; case 12: /* Unordered or Less Than A || (N && !Z) */ case 28: /* Not Greater than or Equal A || (N && !Z) */ @@ -5226,27 +5220,25 @@ static void gen_fcc_cond(DisasCompare *c, DisasContext *s, int cond) tcg_gen_andi_i32(c->v1, fpsr, FPSR_CC_Z); tcg_gen_shli_i32(c->v1, c->v1, ctz32(FPSR_CC_N) - ctz32(FPSR_CC_Z)); tcg_gen_andc_i32(c->v1, fpsr, c->v1); - tcg_gen_andi_i32(c->v1, c->v1, FPSR_CC_A | FPSR_CC_N); - c->tcond = TCG_COND_NE; + imm = FPSR_CC_A | FPSR_CC_N; + c->tcond = TCG_COND_TSTNE; break; case 13: /* Unordered or Less or Equal A || Z || N */ case 29: /* Not Greater Than A || Z || N */ - c->v1 = tcg_temp_new(); - tcg_gen_andi_i32(c->v1, fpsr, FPSR_CC_A | FPSR_CC_Z | FPSR_CC_N); - c->tcond = TCG_COND_NE; + imm = FPSR_CC_A | FPSR_CC_Z | FPSR_CC_N; + c->tcond = TCG_COND_TSTNE; break; case 14: /* Not Equal !Z */ case 30: /* Signaling Not Equal !Z */ - c->v1 = tcg_temp_new(); - tcg_gen_andi_i32(c->v1, fpsr, FPSR_CC_Z); - c->tcond = TCG_COND_EQ; + imm = FPSR_CC_Z; + c->tcond = TCG_COND_TSTEQ; break; case 15: /* True */ case 31: /* Signaling True */ - c->v1 = c->v2; c->tcond = TCG_COND_ALWAYS; break; } + c->v2 = tcg_constant_i32(imm); } static void gen_fjmpcc(DisasContext *s, int cond, TCGLabel *l1) diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c index 7dbb47de64..36e465b390 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c @@ -716,11 +716,11 @@ "PowerPC 970MP v1.0") POWERPC_DEF("970mp_v1.1", CPU_POWERPC_970MP_v11, 970, "PowerPC 970MP v1.1") - POWERPC_DEF("power5+_v2.1", CPU_POWERPC_POWER5P_v21, POWER5P, + POWERPC_DEF("power5p_v2.1", CPU_POWERPC_POWER5P_v21, POWER5P, "POWER5+ v2.1") POWERPC_DEF("power7_v2.3", CPU_POWERPC_POWER7_v23, POWER7, "POWER7 v2.3") - POWERPC_DEF("power7+_v2.1", CPU_POWERPC_POWER7P_v21, POWER7, + POWERPC_DEF("power7p_v2.1", CPU_POWERPC_POWER7P_v21, POWER7, "POWER7+ v2.1") POWERPC_DEF("power8e_v2.1", CPU_POWERPC_POWER8E_v21, POWER8, "POWER8E v2.1") @@ -902,10 +902,12 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "970", "970_v2.2" }, { "970fx", "970fx_v3.1" }, { "970mp", "970mp_v1.1" }, - { "power5+", "power5+_v2.1" }, + { "power5+", "power5p_v2.1" }, + { "power5+_v2.1", "power5p_v2.1" }, { "power5gs", "power5+_v2.1" }, { "power7", "power7_v2.3" }, - { "power7+", "power7+_v2.1" }, + { "power7+", "power7p_v2.1" }, + { "power7+_v2.1", "power7p_v2.1" }, { "power8e", "power8e_v2.1" }, { "power8", "power8_v2.0" }, { "power8nvl", "power8nvl_v1.0" }, diff --git a/target/s390x/cpu-dump.c b/target/s390x/cpu-dump.c index ffa9e94d84..69cc9f7746 100644 --- a/target/s390x/cpu-dump.c +++ b/target/s390x/cpu-dump.c @@ -27,8 +27,7 @@ void s390_cpu_dump_state(CPUState *cs, FILE *f, int flags) { - S390CPU *cpu = S390_CPU(cs); - CPUS390XState *env = &cpu->env; + CPUS390XState *env = cpu_env(cs); int i; qemu_fprintf(f, "PSW=mask %016" PRIx64 " addr %016" PRIx64, diff --git a/target/s390x/gdbstub.c b/target/s390x/gdbstub.c index 6fbfd41bc8..f02fa316e5 100644 --- a/target/s390x/gdbstub.c +++ b/target/s390x/gdbstub.c @@ -30,8 +30,7 @@ int s390_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n) { - S390CPU *cpu = S390_CPU(cs); - CPUS390XState *env = &cpu->env; + CPUS390XState *env = cpu_env(cs); switch (n) { case S390_PSWM_REGNUM: @@ -46,8 +45,7 @@ int s390_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n) int s390_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) { - S390CPU *cpu = S390_CPU(cs); - CPUS390XState *env = &cpu->env; + CPUS390XState *env = cpu_env(cs); target_ulong tmpl = ldtul_p(mem_buf); switch (n) { diff --git a/target/s390x/helper.c b/target/s390x/helper.c index d76c06381b..00d5d403f3 100644 --- a/target/s390x/helper.c +++ b/target/s390x/helper.c @@ -139,8 +139,7 @@ void do_restart_interrupt(CPUS390XState *env) void s390_cpu_recompute_watchpoints(CPUState *cs) { const int wp_flags = BP_CPU | BP_MEM_WRITE | BP_STOP_BEFORE_ACCESS; - S390CPU *cpu = S390_CPU(cs); - CPUS390XState *env = &cpu->env; + CPUS390XState *env = cpu_env(cs); /* We are called when the watchpoints have changed. First remove them all. */ diff --git a/target/s390x/helper.h b/target/s390x/helper.h index 05102578fc..cc1c20e9e3 100644 --- a/target/s390x/helper.h +++ b/target/s390x/helper.h @@ -88,7 +88,10 @@ DEF_HELPER_FLAGS_3(tcxb, TCG_CALL_NO_RWG_SE, i32, env, i128, i64) DEF_HELPER_FLAGS_2(sqeb, TCG_CALL_NO_WG, i64, env, i64) DEF_HELPER_FLAGS_2(sqdb, TCG_CALL_NO_WG, i64, env, i64) DEF_HELPER_FLAGS_2(sqxb, TCG_CALL_NO_WG, i128, env, i128) +DEF_HELPER_3(cvb, void, env, i32, i64) +DEF_HELPER_FLAGS_2(cvbg, TCG_CALL_NO_WG, i64, env, i128) DEF_HELPER_FLAGS_1(cvd, TCG_CALL_NO_RWG_SE, i64, s32) +DEF_HELPER_FLAGS_1(cvdg, TCG_CALL_NO_RWG_SE, i128, s64) DEF_HELPER_FLAGS_4(pack, TCG_CALL_NO_WG, void, env, i32, i64, i64) DEF_HELPER_FLAGS_4(pka, TCG_CALL_NO_WG, void, env, i64, i64, i32) DEF_HELPER_FLAGS_4(pku, TCG_CALL_NO_WG, void, env, i64, i64, i32) diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c index 888d6c1a1c..4ce809c5d4 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c @@ -474,8 +474,7 @@ static int can_sync_regs(CPUState *cs, int regs) int kvm_arch_put_registers(CPUState *cs, int level) { - S390CPU *cpu = S390_CPU(cs); - CPUS390XState *env = &cpu->env; + CPUS390XState *env = cpu_env(cs); struct kvm_fpu fpu = {}; int r; int i; @@ -601,8 +600,7 @@ int kvm_arch_put_registers(CPUState *cs, int level) int kvm_arch_get_registers(CPUState *cs) { - S390CPU *cpu = S390_CPU(cs); - CPUS390XState *env = &cpu->env; + CPUS390XState *env = cpu_env(cs); struct kvm_fpu fpu; int i, r; diff --git a/target/s390x/tcg/excp_helper.c b/target/s390x/tcg/excp_helper.c index b875bf14e5..f1c33f7967 100644 --- a/target/s390x/tcg/excp_helper.c +++ b/target/s390x/tcg/excp_helper.c @@ -90,10 +90,7 @@ void HELPER(data_exception)(CPUS390XState *env, uint32_t dxc) static G_NORETURN void do_unaligned_access(CPUState *cs, uintptr_t retaddr) { - S390CPU *cpu = S390_CPU(cs); - CPUS390XState *env = &cpu->env; - - tcg_s390_program_interrupt(env, PGM_SPECIFICATION, retaddr); + tcg_s390_program_interrupt(cpu_env(cs), PGM_SPECIFICATION, retaddr); } #if defined(CONFIG_USER_ONLY) @@ -146,8 +143,7 @@ bool s390_cpu_tlb_fill(CPUState *cs, vaddr address, int size, MMUAccessType access_type, int mmu_idx, bool probe, uintptr_t retaddr) { - S390CPU *cpu = S390_CPU(cs); - CPUS390XState *env = &cpu->env; + CPUS390XState *env = cpu_env(cs); target_ulong vaddr, raddr; uint64_t asc, tec; int prot, excp; @@ -600,8 +596,7 @@ bool s390_cpu_exec_interrupt(CPUState *cs, int interrupt_request) void s390x_cpu_debug_excp_handler(CPUState *cs) { - S390CPU *cpu = S390_CPU(cs); - CPUS390XState *env = &cpu->env; + CPUS390XState *env = cpu_env(cs); CPUWatchpoint *wp_hit = cs->watchpoint_hit; if (wp_hit && wp_hit->flags & BP_CPU) { diff --git a/target/s390x/tcg/insn-data.h.inc b/target/s390x/tcg/insn-data.h.inc index 2f07f39d9c..e7d61cdec2 100644 --- a/target/s390x/tcg/insn-data.h.inc +++ b/target/s390x/tcg/insn-data.h.inc @@ -293,9 +293,14 @@ D(0xec73, CLFIT, RIE_a, GIE, r1_32u, i2_16u, 0, 0, ct, 0, 1) D(0xec71, CLGIT, RIE_a, GIE, r1_o, i2_16u, 0, 0, ct, 0, 1) +/* CONVERT TO BINARY */ + C(0x4f00, CVB, RX_a, Z, la2, 0, 0, 0, cvb, 0) + C(0xe306, CVBY, RXY_a, LD, la2, 0, 0, 0, cvb, 0) + C(0xe30e, CVBG, RXY_a, Z, la2, 0, r1, 0, cvbg, 0) /* CONVERT TO DECIMAL */ C(0x4e00, CVD, RX_a, Z, r1_o, a2, 0, 0, cvd, 0) C(0xe326, CVDY, RXY_a, LD, r1_o, a2, 0, 0, cvd, 0) + C(0xe32e, CVDG, RXY_a, Z, r1_o, a2, 0, 0, cvdg, 0) /* CONVERT TO FIXED */ F(0xb398, CFEBR, RRF_e, Z, 0, e2, new, r1_32, cfeb, 0, IF_BFP) F(0xb399, CFDBR, RRF_e, Z, 0, f2, new, r1_32, cfdb, 0, IF_BFP) diff --git a/target/s390x/tcg/int_helper.c b/target/s390x/tcg/int_helper.c index eb8e6dd1b5..2af970f2c8 100644 --- a/target/s390x/tcg/int_helper.c +++ b/target/s390x/tcg/int_helper.c @@ -25,6 +25,7 @@ #include "exec/exec-all.h" #include "qemu/host-utils.h" #include "exec/helper-proto.h" +#include "exec/cpu_ldst.h" /* #define DEBUG_HELPER */ #ifdef DEBUG_HELPER @@ -98,6 +99,81 @@ Int128 HELPER(divu64)(CPUS390XState *env, uint64_t ah, uint64_t al, uint64_t b) tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC()); } +void HELPER(cvb)(CPUS390XState *env, uint32_t r1, uint64_t dec) +{ + int64_t pow10 = 1, bin = 0; + int digit, sign; + + sign = dec & 0xf; + if (sign < 0xa) { + tcg_s390_data_exception(env, 0, GETPC()); + } + dec >>= 4; + + while (dec) { + digit = dec & 0xf; + if (digit > 0x9) { + tcg_s390_data_exception(env, 0, GETPC()); + } + dec >>= 4; + bin += digit * pow10; + pow10 *= 10; + } + + if (sign == 0xb || sign == 0xd) { + bin = -bin; + } + + /* R1 is updated even on fixed-point-divide exception. */ + env->regs[r1] = (env->regs[r1] & 0xffffffff00000000ULL) | (uint32_t)bin; + if (bin != (int32_t)bin) { + tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC()); + } +} + +uint64_t HELPER(cvbg)(CPUS390XState *env, Int128 dec) +{ + uint64_t dec64[] = {int128_getlo(dec), int128_gethi(dec)}; + int64_t bin = 0, pow10, tmp; + int digit, i, sign; + + sign = dec64[0] & 0xf; + if (sign < 0xa) { + tcg_s390_data_exception(env, 0, GETPC()); + } + dec64[0] >>= 4; + pow10 = (sign == 0xb || sign == 0xd) ? -1 : 1; + + for (i = 1; i < 20; i++) { + digit = dec64[i >> 4] & 0xf; + if (digit > 0x9) { + tcg_s390_data_exception(env, 0, GETPC()); + } + dec64[i >> 4] >>= 4; + /* + * Prepend the next digit and check for overflow. The multiplication + * cannot overflow, since, conveniently, the int64_t limits are + * approximately +-9.2E+18. If bin is zero, the addition cannot + * overflow. Otherwise bin is known to have the same sign as the rhs + * addend, in which case overflow happens if and only if the result + * has a different sign. + */ + tmp = bin + pow10 * digit; + if (bin && ((tmp ^ bin) < 0)) { + tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC()); + } + bin = tmp; + pow10 *= 10; + } + + g_assert(!dec64[0]); + if (dec64[1]) { + tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC()); + } + + return bin; +} + uint64_t HELPER(cvd)(int32_t reg) { /* positive 0 */ @@ -118,6 +194,27 @@ uint64_t HELPER(cvd)(int32_t reg) return dec; } +Int128 HELPER(cvdg)(int64_t reg) +{ + /* positive 0 */ + Int128 dec = int128_make64(0x0c); + Int128 bin = int128_makes64(reg); + Int128 base = int128_make64(10); + int shift; + + if (!int128_nonneg(bin)) { + bin = int128_neg(bin); + dec = int128_make64(0x0d); + } + + for (shift = 4; (shift < 128) && int128_nz(bin); shift += 4) { + dec = int128_or(dec, int128_lshift(int128_remu(bin, base), shift)); + bin = int128_divu(bin, base); + } + + return dec; +} + uint64_t HELPER(popcnt)(uint64_t val) { /* Note that we don't fold past bytes. */ diff --git a/target/s390x/tcg/misc_helper.c b/target/s390x/tcg/misc_helper.c index 89b5268fd4..8764846ce8 100644 --- a/target/s390x/tcg/misc_helper.c +++ b/target/s390x/tcg/misc_helper.c @@ -214,9 +214,7 @@ void HELPER(sckc)(CPUS390XState *env, uint64_t ckc) void tcg_s390_tod_updated(CPUState *cs, run_on_cpu_data opaque) { - S390CPU *cpu = S390_CPU(cs); - - update_ckc_timer(&cpu->env); + update_ckc_timer(cpu_env(cs)); } /* Set Clock */ diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c index a5fd9cccaa..0d0c672c95 100644 --- a/target/s390x/tcg/translate.c +++ b/target/s390x/tcg/translate.c @@ -754,10 +754,10 @@ static void disas_jcc(DisasContext *s, DisasCompare *c, uint32_t mask) case CC_OP_TM_64: switch (mask) { case 8: - cond = TCG_COND_EQ; + cond = TCG_COND_TSTEQ; break; case 4 | 2 | 1: - cond = TCG_COND_NE; + cond = TCG_COND_TSTNE; break; default: goto do_dynamic; @@ -768,11 +768,11 @@ static void disas_jcc(DisasContext *s, DisasCompare *c, uint32_t mask) case CC_OP_ICM: switch (mask) { case 8: - cond = TCG_COND_EQ; + cond = TCG_COND_TSTEQ; break; case 4 | 2 | 1: case 4 | 2: - cond = TCG_COND_NE; + cond = TCG_COND_TSTNE; break; default: goto do_dynamic; @@ -854,18 +854,14 @@ static void disas_jcc(DisasContext *s, DisasCompare *c, uint32_t mask) c->u.s64.a = cc_dst; c->u.s64.b = tcg_constant_i64(0); break; + case CC_OP_LTGT_64: case CC_OP_LTUGTU_64: - c->u.s64.a = cc_src; - c->u.s64.b = cc_dst; - break; - case CC_OP_TM_32: case CC_OP_TM_64: case CC_OP_ICM: - c->u.s64.a = tcg_temp_new_i64(); - c->u.s64.b = tcg_constant_i64(0); - tcg_gen_and_i64(c->u.s64.a, cc_src, cc_dst); + c->u.s64.a = cc_src; + c->u.s64.b = cc_dst; break; case CC_OP_ADDU: @@ -889,67 +885,45 @@ static void disas_jcc(DisasContext *s, DisasCompare *c, uint32_t mask) case CC_OP_STATIC: c->is_64 = false; c->u.s32.a = cc_op; - switch (mask) { - case 0x8 | 0x4 | 0x2: /* cc != 3 */ - cond = TCG_COND_NE; + + /* Fold half of the cases using bit 3 to invert. */ + switch (mask & 8 ? mask ^ 0xf : mask) { + case 0x1: /* cc == 3 */ + cond = TCG_COND_EQ; c->u.s32.b = tcg_constant_i32(3); break; - case 0x8 | 0x4 | 0x1: /* cc != 2 */ - cond = TCG_COND_NE; - c->u.s32.b = tcg_constant_i32(2); - break; - case 0x8 | 0x2 | 0x1: /* cc != 1 */ - cond = TCG_COND_NE; - c->u.s32.b = tcg_constant_i32(1); - break; - case 0x8 | 0x2: /* cc == 0 || cc == 2 => (cc & 1) == 0 */ + case 0x2: /* cc == 2 */ cond = TCG_COND_EQ; - c->u.s32.a = tcg_temp_new_i32(); - c->u.s32.b = tcg_constant_i32(0); - tcg_gen_andi_i32(c->u.s32.a, cc_op, 1); - break; - case 0x8 | 0x4: /* cc < 2 */ - cond = TCG_COND_LTU; c->u.s32.b = tcg_constant_i32(2); break; - case 0x8: /* cc == 0 */ - cond = TCG_COND_EQ; - c->u.s32.b = tcg_constant_i32(0); - break; - case 0x4 | 0x2 | 0x1: /* cc != 0 */ - cond = TCG_COND_NE; - c->u.s32.b = tcg_constant_i32(0); - break; - case 0x4 | 0x1: /* cc == 1 || cc == 3 => (cc & 1) != 0 */ - cond = TCG_COND_NE; - c->u.s32.a = tcg_temp_new_i32(); - c->u.s32.b = tcg_constant_i32(0); - tcg_gen_andi_i32(c->u.s32.a, cc_op, 1); - break; case 0x4: /* cc == 1 */ cond = TCG_COND_EQ; c->u.s32.b = tcg_constant_i32(1); break; - case 0x2 | 0x1: /* cc > 1 */ + case 0x2 | 0x1: /* cc == 2 || cc == 3 => cc > 1 */ cond = TCG_COND_GTU; c->u.s32.b = tcg_constant_i32(1); break; - case 0x2: /* cc == 2 */ - cond = TCG_COND_EQ; - c->u.s32.b = tcg_constant_i32(2); + case 0x4 | 0x1: /* cc == 1 || cc == 3 => (cc & 1) != 0 */ + cond = TCG_COND_TSTNE; + c->u.s32.b = tcg_constant_i32(1); break; - case 0x1: /* cc == 3 */ - cond = TCG_COND_EQ; - c->u.s32.b = tcg_constant_i32(3); + case 0x4 | 0x2: /* cc == 1 || cc == 2 => (cc - 1) <= 1 */ + cond = TCG_COND_LEU; + c->u.s32.a = tcg_temp_new_i32(); + c->u.s32.b = tcg_constant_i32(1); + tcg_gen_addi_i32(c->u.s32.a, cc_op, -1); break; - default: - /* CC is masked by something else: (8 >> cc) & mask. */ + case 0x4 | 0x2 | 0x1: /* cc != 0 */ cond = TCG_COND_NE; - c->u.s32.a = tcg_temp_new_i32(); c->u.s32.b = tcg_constant_i32(0); - tcg_gen_shr_i32(c->u.s32.a, tcg_constant_i32(8), cc_op); - tcg_gen_andi_i32(c->u.s32.a, c->u.s32.a, mask); break; + default: + /* case 0: never, handled above. */ + g_assert_not_reached(); + } + if (mask & 8) { + cond = tcg_invert_cond(cond); } break; @@ -2223,6 +2197,22 @@ static DisasJumpType op_csp(DisasContext *s, DisasOps *o) } #endif +static DisasJumpType op_cvb(DisasContext *s, DisasOps *o) +{ + TCGv_i64 t = tcg_temp_new_i64(); + tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TEUQ); + gen_helper_cvb(tcg_env, tcg_constant_i32(get_field(s, r1)), t); + return DISAS_NEXT; +} + +static DisasJumpType op_cvbg(DisasContext *s, DisasOps *o) +{ + TCGv_i128 t = tcg_temp_new_i128(); + tcg_gen_qemu_ld_i128(t, o->addr1, get_mem_index(s), MO_TE | MO_128); + gen_helper_cvbg(o->out, tcg_env, t); + return DISAS_NEXT; +} + static DisasJumpType op_cvd(DisasContext *s, DisasOps *o) { TCGv_i64 t1 = tcg_temp_new_i64(); @@ -2233,6 +2223,14 @@ static DisasJumpType op_cvd(DisasContext *s, DisasOps *o) return DISAS_NEXT; } +static DisasJumpType op_cvdg(DisasContext *s, DisasOps *o) +{ + TCGv_i128 t = tcg_temp_new_i128(); + gen_helper_cvdg(t, o->in1); + tcg_gen_qemu_st_i128(t, o->in2, get_mem_index(s), MO_TE | MO_128); + return DISAS_NEXT; +} + static DisasJumpType op_ct(DisasContext *s, DisasOps *o) { int m3 = get_field(s, m3); @@ -6558,8 +6556,7 @@ void s390x_restore_state_to_opc(CPUState *cs, const TranslationBlock *tb, const uint64_t *data) { - S390CPU *cpu = S390_CPU(cs); - CPUS390XState *env = &cpu->env; + CPUS390XState *env = cpu_env(cs); int cc_op = data[1]; env->psw.addr = data[0]; diff --git a/target/sparc/translate.c b/target/sparc/translate.c index 7df6f83b13..d9304a5bc4 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -488,6 +488,7 @@ static void gen_op_subccc(TCGv dst, TCGv src1, TCGv src2) static void gen_op_mulscc(TCGv dst, TCGv src1, TCGv src2) { TCGv zero = tcg_constant_tl(0); + TCGv one = tcg_constant_tl(1); TCGv t_src1 = tcg_temp_new(); TCGv t_src2 = tcg_temp_new(); TCGv t0 = tcg_temp_new(); @@ -499,8 +500,7 @@ static void gen_op_mulscc(TCGv dst, TCGv src1, TCGv src2) * if (!(env->y & 1)) * src2 = 0; */ - tcg_gen_andi_tl(t0, cpu_y, 0x1); - tcg_gen_movcond_tl(TCG_COND_EQ, t_src2, t0, zero, zero, t_src2); + tcg_gen_movcond_tl(TCG_COND_TSTEQ, t_src2, cpu_y, one, zero, t_src2); /* * b2 = src1 & 1; diff --git a/tcg/aarch64/tcg-target-con-set.h b/tcg/aarch64/tcg-target-con-set.h index 3fdee26a3d..44fcc1206e 100644 --- a/tcg/aarch64/tcg-target-con-set.h +++ b/tcg/aarch64/tcg-target-con-set.h @@ -10,7 +10,7 @@ * tcg-target-con-str.h; the constraint combination is inclusive or. */ C_O0_I1(r) -C_O0_I2(r, rA) +C_O0_I2(r, rC) C_O0_I2(rZ, r) C_O0_I2(w, r) C_O0_I3(rZ, rZ, r) @@ -22,6 +22,7 @@ C_O1_I2(r, 0, rZ) C_O1_I2(r, r, r) C_O1_I2(r, r, rA) C_O1_I2(r, r, rAL) +C_O1_I2(r, r, rC) C_O1_I2(r, r, ri) C_O1_I2(r, r, rL) C_O1_I2(r, rZ, rZ) @@ -31,6 +32,6 @@ C_O1_I2(w, w, wN) C_O1_I2(w, w, wO) C_O1_I2(w, w, wZ) C_O1_I3(w, w, w, w) -C_O1_I4(r, r, rA, rZ, rZ) +C_O1_I4(r, r, rC, rZ, rZ) C_O2_I1(r, r, r) C_O2_I4(r, r, rZ, rZ, rA, rMZ) diff --git a/tcg/aarch64/tcg-target-con-str.h b/tcg/aarch64/tcg-target-con-str.h index fb1a845b4f..48e1722c68 100644 --- a/tcg/aarch64/tcg-target-con-str.h +++ b/tcg/aarch64/tcg-target-con-str.h @@ -16,6 +16,7 @@ REGS('w', ALL_VECTOR_REGS) * CONST(letter, TCG_CT_CONST_* bit set) */ CONST('A', TCG_CT_CONST_AIMM) +CONST('C', TCG_CT_CONST_CMP) CONST('L', TCG_CT_CONST_LIMM) CONST('M', TCG_CT_CONST_MONE) CONST('O', TCG_CT_CONST_ORRI) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index a3efa1e67a..dec8ecc1b6 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -105,6 +105,18 @@ static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target) return false; } +static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target) +{ + const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); + ptrdiff_t offset = target - src_rx; + + if (offset == sextract64(offset, 0, 14)) { + *src_rw = deposit32(*src_rw, 5, 14, offset); + return true; + } + return false; +} + static bool patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { @@ -115,6 +127,8 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, return reloc_pc26(code_ptr, (const tcg_insn_unit *)value); case R_AARCH64_CONDBR19: return reloc_pc19(code_ptr, (const tcg_insn_unit *)value); + case R_AARCH64_TSTBR14: + return reloc_pc14(code_ptr, (const tcg_insn_unit *)value); default: g_assert_not_reached(); } @@ -126,6 +140,7 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, #define TCG_CT_CONST_MONE 0x800 #define TCG_CT_CONST_ORRI 0x1000 #define TCG_CT_CONST_ANDI 0x2000 +#define TCG_CT_CONST_CMP 0x4000 #define ALL_GENERAL_REGS 0xffffffffu #define ALL_VECTOR_REGS 0xffffffff00000000ull @@ -270,7 +285,8 @@ static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8) } } -static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece) +static bool tcg_target_const_match(int64_t val, int ct, + TCGType type, TCGCond cond, int vece) { if (ct & TCG_CT_CONST) { return 1; @@ -278,6 +294,15 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece) if (type == TCG_TYPE_I32) { val = (int32_t)val; } + + if (ct & TCG_CT_CONST_CMP) { + if (is_tst_cond(cond)) { + ct |= TCG_CT_CONST_LIMM; + } else { + ct |= TCG_CT_CONST_AIMM; + } + } + if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) { return 1; } @@ -344,6 +369,9 @@ static const enum aarch64_cond_code tcg_cond_to_aarch64[] = { [TCG_COND_GTU] = COND_HI, [TCG_COND_GEU] = COND_HS, [TCG_COND_LEU] = COND_LS, + /* bit test */ + [TCG_COND_TSTEQ] = COND_EQ, + [TCG_COND_TSTNE] = COND_NE, }; typedef enum { @@ -366,6 +394,10 @@ typedef enum { /* Conditional branch (immediate). */ I3202_B_C = 0x54000000, + /* Test and branch (immediate). */ + I3205_TBZ = 0x36000000, + I3205_TBNZ = 0x37000000, + /* Unconditional branch (immediate). */ I3206_B = 0x14000000, I3206_BL = 0x94000000, @@ -646,6 +678,14 @@ static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn, tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5); } +static void tcg_out_insn_3205(TCGContext *s, AArch64Insn insn, + TCGReg rt, int imm6, int imm14) +{ + insn |= (imm6 & 0x20) << (31 - 5); + insn |= (imm6 & 0x1f) << 19; + tcg_out32(s, insn | (imm14 & 0x3fff) << 5 | rt); +} + static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26) { tcg_out32(s, insn | (imm26 & 0x03ffffff)); @@ -1341,19 +1381,26 @@ static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd, tcg_out_bfm(s, ext, rd, rn, a, b); } -static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a, +static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGCond cond, TCGReg a, tcg_target_long b, bool const_b) { - if (const_b) { - /* Using CMP or CMN aliases. */ - if (b >= 0) { + if (is_tst_cond(cond)) { + if (!const_b) { + tcg_out_insn(s, 3510, ANDS, ext, TCG_REG_XZR, a, b); + } else { + tcg_debug_assert(is_limm(b)); + tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, a, b); + } + } else { + if (!const_b) { + tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); + } else if (b >= 0) { + tcg_debug_assert(is_aimm(b)); tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b); } else { + tcg_debug_assert(is_aimm(-b)); tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b); } - } else { - /* Using CMP alias SUBS wzr, Wn, Wm */ - tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); } } @@ -1394,30 +1441,75 @@ static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, TCGArg b, bool b_const, TCGLabel *l) { - intptr_t offset; - bool need_cmp; + int tbit = -1; + bool need_cmp = true; - if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) { - need_cmp = false; - } else { - need_cmp = true; - tcg_out_cmp(s, ext, a, b, b_const); + switch (c) { + case TCG_COND_EQ: + case TCG_COND_NE: + /* cmp xN,0; b.ne L -> cbnz xN,L */ + if (b_const && b == 0) { + need_cmp = false; + } + break; + case TCG_COND_LT: + case TCG_COND_GE: + /* cmp xN,0; b.mi L -> tbnz xN,63,L */ + if (b_const && b == 0) { + c = (c == TCG_COND_LT ? TCG_COND_TSTNE : TCG_COND_TSTEQ); + tbit = ext ? 63 : 31; + need_cmp = false; + } + break; + case TCG_COND_TSTEQ: + case TCG_COND_TSTNE: + /* tst xN,0xffffffff; b.ne L -> cbnz wN,L */ + if (b_const && b == UINT32_MAX) { + ext = TCG_TYPE_I32; + need_cmp = false; + break; + } + /* tst xN,1<<B; b.ne L -> tbnz xN,B,L */ + if (b_const && is_power_of_2(b)) { + tbit = ctz64(b); + need_cmp = false; + } + break; + default: + break; } - if (!l->has_value) { + if (need_cmp) { + tcg_out_cmp(s, ext, c, a, b, b_const); tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); - offset = tcg_in32(s) >> 5; - } else { - offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2; - tcg_debug_assert(offset == sextract64(offset, 0, 19)); + tcg_out_insn(s, 3202, B_C, c, 0); + return; } - if (need_cmp) { - tcg_out_insn(s, 3202, B_C, c, offset); - } else if (c == TCG_COND_EQ) { - tcg_out_insn(s, 3201, CBZ, ext, a, offset); + if (tbit >= 0) { + tcg_out_reloc(s, s->code_ptr, R_AARCH64_TSTBR14, l, 0); + switch (c) { + case TCG_COND_TSTEQ: + tcg_out_insn(s, 3205, TBZ, a, tbit, 0); + break; + case TCG_COND_TSTNE: + tcg_out_insn(s, 3205, TBNZ, a, tbit, 0); + break; + default: + g_assert_not_reached(); + } } else { - tcg_out_insn(s, 3201, CBNZ, ext, a, offset); + tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); + switch (c) { + case TCG_COND_EQ: + tcg_out_insn(s, 3201, CBZ, ext, a, 0); + break; + case TCG_COND_NE: + tcg_out_insn(s, 3201, CBNZ, ext, a, 0); + break; + default: + g_assert_not_reached(); + } } } @@ -1574,7 +1666,7 @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, } else { AArch64Insn sel = I3506_CSEL; - tcg_out_cmp(s, ext, a0, 0, 1); + tcg_out_cmp(s, ext, TCG_COND_NE, a0, 0, 1); tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1); if (const_b) { @@ -1719,7 +1811,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, addr_adj, compare_mask); /* Perform the address comparison. */ - tcg_out_cmp(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, 0); + tcg_out_cmp(s, addr_type, TCG_COND_NE, TCG_REG_TMP0, TCG_REG_TMP2, 0); /* If not equal, we jump to the slow path. */ ldst->label_ptr[0] = s->code_ptr; @@ -2275,7 +2367,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, a2 = (int32_t)a2; /* FALLTHRU */ case INDEX_op_setcond_i64: - tcg_out_cmp(s, ext, a1, a2, c2); + tcg_out_cmp(s, ext, args[3], a1, a2, c2); /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */ tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR, TCG_REG_XZR, tcg_invert_cond(args[3])); @@ -2285,7 +2377,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, a2 = (int32_t)a2; /* FALLTHRU */ case INDEX_op_negsetcond_i64: - tcg_out_cmp(s, ext, a1, a2, c2); + tcg_out_cmp(s, ext, args[3], a1, a2, c2); /* Use CSETM alias of CSINV Wd, WZR, WZR, invert(cond). */ tcg_out_insn(s, 3506, CSINV, ext, a0, TCG_REG_XZR, TCG_REG_XZR, tcg_invert_cond(args[3])); @@ -2295,7 +2387,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, a2 = (int32_t)a2; /* FALLTHRU */ case INDEX_op_movcond_i64: - tcg_out_cmp(s, ext, a1, a2, c2); + tcg_out_cmp(s, ext, args[5], a1, a2, c2); tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]); break; @@ -2895,11 +2987,13 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_add_i64: case INDEX_op_sub_i32: case INDEX_op_sub_i64: + return C_O1_I2(r, r, rA); + case INDEX_op_setcond_i32: case INDEX_op_setcond_i64: case INDEX_op_negsetcond_i32: case INDEX_op_negsetcond_i64: - return C_O1_I2(r, r, rA); + return C_O1_I2(r, r, rC); case INDEX_op_mul_i32: case INDEX_op_mul_i64: @@ -2949,11 +3043,11 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_brcond_i32: case INDEX_op_brcond_i64: - return C_O0_I2(r, rA); + return C_O0_I2(r, rC); case INDEX_op_movcond_i32: case INDEX_op_movcond_i64: - return C_O1_I4(r, r, rA, rZ, rZ); + return C_O1_I4(r, r, rC, rZ, rZ); case INDEX_op_qemu_ld_a32_i32: case INDEX_op_qemu_ld_a64_i32: diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 33f15a564a..ef5ebe91bd 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -138,6 +138,8 @@ typedef enum { #define TCG_TARGET_HAS_qemu_ldst_i128 1 #endif +#define TCG_TARGET_HAS_tst 1 + #define TCG_TARGET_HAS_v64 1 #define TCG_TARGET_HAS_v128 1 #define TCG_TARGET_HAS_v256 0 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index a9aa8aa91c..ffd23ef789 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -501,7 +501,8 @@ static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8) * mov operand2: values represented with x << (2 * y), x < 0x100 * add, sub, eor...: ditto */ -static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece) +static bool tcg_target_const_match(int64_t val, int ct, + TCGType type, TCGCond cond, int vece) { if (ct & TCG_CT_CONST) { return 1; @@ -1190,6 +1191,33 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0) } } +static TCGCond tcg_out_cmp(TCGContext *s, TCGCond cond, TCGReg a, + TCGArg b, int b_const) +{ + if (!is_tst_cond(cond)) { + tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, a, b, b_const); + return cond; + } + + cond = tcg_tst_eqne_cond(cond); + if (b_const) { + int imm12 = encode_imm(b); + + /* + * The compare constraints allow rIN, but TST does not support N. + * Be prepared to load the constant into a scratch register. + */ + if (imm12 >= 0) { + tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, a, imm12); + return cond; + } + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, b); + b = TCG_REG_TMP; + } + tcg_out_dat_reg(s, COND_AL, ARITH_TST, 0, a, b, SHIFT_IMM_LSL(0)); + return cond; +} + static TCGCond tcg_out_cmp2(TCGContext *s, const TCGArg *args, const int *const_args) { @@ -1217,6 +1245,13 @@ static TCGCond tcg_out_cmp2(TCGContext *s, const TCGArg *args, tcg_out_dat_rI(s, COND_EQ, ARITH_CMP, 0, al, bl, const_bl); return cond; + case TCG_COND_TSTEQ: + case TCG_COND_TSTNE: + /* Similar, but with TST instead of CMP. */ + tcg_out_dat_rI(s, COND_AL, ARITH_TST, 0, ah, bh, const_bh); + tcg_out_dat_rI(s, COND_EQ, ARITH_TST, 0, al, bl, const_bl); + return tcg_tst_eqne_cond(cond); + case TCG_COND_LT: case TCG_COND_GE: /* We perform a double-word subtraction and examine the result. @@ -1808,9 +1843,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, /* Constraints mean that v2 is always in the same register as dest, * so we only need to do "if condition passed, move v1 to dest". */ - tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, - args[1], args[2], const_args[2]); - tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV, + c = tcg_out_cmp(s, args[5], args[1], args[2], const_args[2]); + tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[c], ARITH_MOV, ARITH_MVN, args[0], 0, args[3], const_args[3]); break; case INDEX_op_add_i32: @@ -1960,25 +1994,21 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_brcond_i32: - tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, - args[0], args[1], const_args[1]); - tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]], - arg_label(args[3])); + c = tcg_out_cmp(s, args[2], args[0], args[1], const_args[1]); + tcg_out_goto_label(s, tcg_cond_to_arm_cond[c], arg_label(args[3])); break; case INDEX_op_setcond_i32: - tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, - args[1], args[2], const_args[2]); - tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]], + c = tcg_out_cmp(s, args[3], args[1], args[2], const_args[2]); + tcg_out_dat_imm(s, tcg_cond_to_arm_cond[c], ARITH_MOV, args[0], 0, 1); - tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])], + tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(c)], ARITH_MOV, args[0], 0, 0); break; case INDEX_op_negsetcond_i32: - tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, - args[1], args[2], const_args[2]); - tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]], + c = tcg_out_cmp(s, args[3], args[1], args[2], const_args[2]); + tcg_out_dat_imm(s, tcg_cond_to_arm_cond[c], ARITH_MVN, args[0], 0, 0); - tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])], + tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(c)], ARITH_MOV, args[0], 0, 0); break; diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index a712cc80ad..a43875cb09 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -125,6 +125,8 @@ extern bool use_neon_instructions; #define TCG_TARGET_HAS_qemu_ldst_i128 0 +#define TCG_TARGET_HAS_tst 1 + #define TCG_TARGET_HAS_v64 use_neon_instructions #define TCG_TARGET_HAS_v128 use_neon_instructions #define TCG_TARGET_HAS_v256 0 diff --git a/tcg/i386/tcg-target-con-set.h b/tcg/i386/tcg-target-con-set.h index 7d00a7dde8..e24241cfa2 100644 --- a/tcg/i386/tcg-target-con-set.h +++ b/tcg/i386/tcg-target-con-set.h @@ -20,7 +20,7 @@ C_O0_I2(L, L) C_O0_I2(qi, r) C_O0_I2(re, r) C_O0_I2(ri, r) -C_O0_I2(r, re) +C_O0_I2(r, reT) C_O0_I2(s, L) C_O0_I2(x, r) C_O0_I3(L, L, L) @@ -34,7 +34,7 @@ C_O1_I1(r, r) C_O1_I1(x, r) C_O1_I1(x, x) C_O1_I2(q, 0, qi) -C_O1_I2(q, r, re) +C_O1_I2(q, r, reT) C_O1_I2(r, 0, ci) C_O1_I2(r, 0, r) C_O1_I2(r, 0, re) @@ -50,7 +50,7 @@ C_N1_I2(r, r, r) C_N1_I2(r, r, rW) C_O1_I3(x, 0, x, x) C_O1_I3(x, x, x, x) -C_O1_I4(r, r, re, r, 0) +C_O1_I4(r, r, reT, r, 0) C_O1_I4(r, r, r, ri, ri) C_O2_I1(r, r, L) C_O2_I2(a, d, a, r) diff --git a/tcg/i386/tcg-target-con-str.h b/tcg/i386/tcg-target-con-str.h index 95a30e58cd..cc22db227b 100644 --- a/tcg/i386/tcg-target-con-str.h +++ b/tcg/i386/tcg-target-con-str.h @@ -28,5 +28,6 @@ REGS('s', ALL_BYTEL_REGS & ~SOFTMMU_RESERVE_REGS) /* qemu_st8_i32 data */ */ CONST('e', TCG_CT_CONST_S32) CONST('I', TCG_CT_CONST_I32) +CONST('T', TCG_CT_CONST_TST) CONST('W', TCG_CT_CONST_WSZ) CONST('Z', TCG_CT_CONST_U32) diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index d268199fc1..c6ba498623 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -132,6 +132,7 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) #define TCG_CT_CONST_U32 0x200 #define TCG_CT_CONST_I32 0x400 #define TCG_CT_CONST_WSZ 0x800 +#define TCG_CT_CONST_TST 0x1000 /* Registers used with L constraint, which are the first argument registers on x86_64, and two random call clobbered registers on @@ -195,13 +196,15 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, } /* test if a constant matches the constraint */ -static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece) +static bool tcg_target_const_match(int64_t val, int ct, + TCGType type, TCGCond cond, int vece) { if (ct & TCG_CT_CONST) { return 1; } if (type == TCG_TYPE_I32) { - if (ct & (TCG_CT_CONST_S32 | TCG_CT_CONST_U32 | TCG_CT_CONST_I32)) { + if (ct & (TCG_CT_CONST_S32 | TCG_CT_CONST_U32 | + TCG_CT_CONST_I32 | TCG_CT_CONST_TST)) { return 1; } } else { @@ -214,6 +217,17 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece) if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) { return 1; } + /* + * This will be used in combination with TCG_CT_CONST_S32, + * so "normal" TESTQ is already matched. Also accept: + * TESTQ -> TESTL (uint32_t) + * TESTQ -> BT (is_power_of_2) + */ + if ((ct & TCG_CT_CONST_TST) + && is_tst_cond(cond) + && (val == (uint32_t)val || is_power_of_2(val))) { + return 1; + } } if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) { return 1; @@ -395,6 +409,7 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece) #define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16) #define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2) #define OPC_SHRD_Ib (0xac | P_EXT) +#define OPC_TESTB (0x84) #define OPC_TESTL (0x85) #define OPC_TZCNT (0xbc | P_EXT | P_SIMDF3) #define OPC_UD2 (0x0b | P_EXT) @@ -441,6 +456,12 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece) #define OPC_GRP3_Ev (0xf7) #define OPC_GRP5 (0xff) #define OPC_GRP14 (0x73 | P_EXT | P_DATA16) +#define OPC_GRPBT (0xba | P_EXT) + +#define OPC_GRPBT_BT 4 +#define OPC_GRPBT_BTS 5 +#define OPC_GRPBT_BTR 6 +#define OPC_GRPBT_BTC 7 /* Group 1 opcode extensions for 0x80-0x83. These are also used as modifiers for OPC_ARITH. */ @@ -505,6 +526,8 @@ static const uint8_t tcg_cond_to_jcc[] = { [TCG_COND_GEU] = JCC_JAE, [TCG_COND_LEU] = JCC_JBE, [TCG_COND_GTU] = JCC_JA, + [TCG_COND_TSTEQ] = JCC_JE, + [TCG_COND_TSTNE] = JCC_JNE, }; #if TCG_TARGET_REG_BITS == 64 @@ -1448,27 +1471,101 @@ static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, bool small) } } -static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2, - int const_arg2, int rexw) +static int tcg_out_cmp(TCGContext *s, TCGCond cond, TCGArg arg1, + TCGArg arg2, int const_arg2, int rexw) { - if (const_arg2) { - if (arg2 == 0) { - /* test r, r */ + int jz, js; + + if (!is_tst_cond(cond)) { + if (!const_arg2) { + tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2); + } else if (arg2 == 0) { tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1); } else { + tcg_debug_assert(!rexw || arg2 == (int32_t)arg2); tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0); } - } else { - tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2); + return tcg_cond_to_jcc[cond]; + } + + jz = tcg_cond_to_jcc[cond]; + js = (cond == TCG_COND_TSTNE ? JCC_JS : JCC_JNS); + + if (!const_arg2) { + tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg2); + return jz; + } + + if (arg2 <= 0xff && (TCG_TARGET_REG_BITS == 64 || arg1 < 4)) { + if (arg2 == 0x80) { + tcg_out_modrm(s, OPC_TESTB | P_REXB_R, arg1, arg1); + return js; + } + if (arg2 == 0xff) { + tcg_out_modrm(s, OPC_TESTB | P_REXB_R, arg1, arg1); + return jz; + } + tcg_out_modrm(s, OPC_GRP3_Eb | P_REXB_RM, EXT3_TESTi, arg1); + tcg_out8(s, arg2); + return jz; + } + + if ((arg2 & ~0xff00) == 0 && arg1 < 4) { + if (arg2 == 0x8000) { + tcg_out_modrm(s, OPC_TESTB, arg1 + 4, arg1 + 4); + return js; + } + if (arg2 == 0xff00) { + tcg_out_modrm(s, OPC_TESTB, arg1 + 4, arg1 + 4); + return jz; + } + tcg_out_modrm(s, OPC_GRP3_Eb, EXT3_TESTi, arg1 + 4); + tcg_out8(s, arg2 >> 8); + return jz; + } + + if (arg2 == 0xffff) { + tcg_out_modrm(s, OPC_TESTL | P_DATA16, arg1, arg1); + return jz; } + if (arg2 == 0xffffffffu) { + tcg_out_modrm(s, OPC_TESTL, arg1, arg1); + return jz; + } + + if (is_power_of_2(rexw ? arg2 : (uint32_t)arg2)) { + int jc = (cond == TCG_COND_TSTNE ? JCC_JB : JCC_JAE); + int sh = ctz64(arg2); + + rexw = (sh & 32 ? P_REXW : 0); + if ((sh & 31) == 31) { + tcg_out_modrm(s, OPC_TESTL | rexw, arg1, arg1); + return js; + } else { + tcg_out_modrm(s, OPC_GRPBT | rexw, OPC_GRPBT_BT, arg1); + tcg_out8(s, sh); + return jc; + } + } + + if (rexw) { + if (arg2 == (uint32_t)arg2) { + rexw = 0; + } else { + tcg_debug_assert(arg2 == (int32_t)arg2); + } + } + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_TESTi, arg1); + tcg_out32(s, arg2); + return jz; } static void tcg_out_brcond(TCGContext *s, int rexw, TCGCond cond, TCGArg arg1, TCGArg arg2, int const_arg2, TCGLabel *label, bool small) { - tcg_out_cmp(s, arg1, arg2, const_arg2, rexw); - tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small); + int jcc = tcg_out_cmp(s, cond, arg1, arg2, const_arg2, rexw); + tcg_out_jxx(s, jcc, label, small); } #if TCG_TARGET_REG_BITS == 32 @@ -1477,18 +1574,21 @@ static void tcg_out_brcond2(TCGContext *s, const TCGArg *args, { TCGLabel *label_next = gen_new_label(); TCGLabel *label_this = arg_label(args[5]); + TCGCond cond = args[4]; - switch(args[4]) { + switch (cond) { case TCG_COND_EQ: - tcg_out_brcond(s, 0, TCG_COND_NE, args[0], args[2], const_args[2], - label_next, 1); - tcg_out_brcond(s, 0, TCG_COND_EQ, args[1], args[3], const_args[3], + case TCG_COND_TSTEQ: + tcg_out_brcond(s, 0, tcg_invert_cond(cond), + args[0], args[2], const_args[2], label_next, 1); + tcg_out_brcond(s, 0, cond, args[1], args[3], const_args[3], label_this, small); break; case TCG_COND_NE: - tcg_out_brcond(s, 0, TCG_COND_NE, args[0], args[2], const_args[2], + case TCG_COND_TSTNE: + tcg_out_brcond(s, 0, cond, args[0], args[2], const_args[2], label_this, small); - tcg_out_brcond(s, 0, TCG_COND_NE, args[1], args[3], const_args[3], + tcg_out_brcond(s, 0, cond, args[1], args[3], const_args[3], label_this, small); break; case TCG_COND_LT: @@ -1560,6 +1660,7 @@ static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond, { bool inv = false; bool cleared; + int jcc; switch (cond) { case TCG_COND_NE: @@ -1596,7 +1697,7 @@ static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond, * We can then use NEG or INC to produce the desired result. * This is always smaller than the SETCC expansion. */ - tcg_out_cmp(s, arg1, arg2, const_arg2, rexw); + tcg_out_cmp(s, TCG_COND_LTU, arg1, arg2, const_arg2, rexw); /* X - X - C = -C = (C ? -1 : 0) */ tgen_arithr(s, ARITH_SBB + (neg ? rexw : 0), dest, dest); @@ -1643,8 +1744,8 @@ static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond, cleared = true; } - tcg_out_cmp(s, arg1, arg2, const_arg2, rexw); - tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest); + jcc = tcg_out_cmp(s, cond, arg1, arg2, const_arg2, rexw); + tcg_out_modrm(s, OPC_SETCC | jcc, 0, dest); if (!cleared) { tcg_out_ext8u(s, dest, dest); @@ -1698,14 +1799,14 @@ static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, } #endif -static void tcg_out_cmov(TCGContext *s, TCGCond cond, int rexw, +static void tcg_out_cmov(TCGContext *s, int jcc, int rexw, TCGReg dest, TCGReg v1) { if (have_cmov) { - tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | rexw, dest, v1); + tcg_out_modrm(s, OPC_CMOVCC | jcc | rexw, dest, v1); } else { TCGLabel *over = gen_new_label(); - tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1); + tcg_out_jxx(s, jcc ^ 1, over, 1); tcg_out_mov(s, TCG_TYPE_I32, dest, v1); tcg_out_label(s, over); } @@ -1715,8 +1816,8 @@ static void tcg_out_movcond(TCGContext *s, int rexw, TCGCond cond, TCGReg dest, TCGReg c1, TCGArg c2, int const_c2, TCGReg v1) { - tcg_out_cmp(s, c1, c2, const_c2, rexw); - tcg_out_cmov(s, cond, rexw, dest, v1); + int jcc = tcg_out_cmp(s, cond, c1, c2, const_c2, rexw); + tcg_out_cmov(s, jcc, rexw, dest, v1); } static void tcg_out_ctz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1, @@ -1728,12 +1829,12 @@ static void tcg_out_ctz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1, tcg_debug_assert(arg2 == (rexw ? 64 : 32)); } else { tcg_debug_assert(dest != arg2); - tcg_out_cmov(s, TCG_COND_LTU, rexw, dest, arg2); + tcg_out_cmov(s, JCC_JB, rexw, dest, arg2); } } else { tcg_debug_assert(dest != arg2); tcg_out_modrm(s, OPC_BSF + rexw, dest, arg1); - tcg_out_cmov(s, TCG_COND_EQ, rexw, dest, arg2); + tcg_out_cmov(s, JCC_JE, rexw, dest, arg2); } } @@ -1746,7 +1847,7 @@ static void tcg_out_clz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1, tcg_debug_assert(arg2 == (rexw ? 64 : 32)); } else { tcg_debug_assert(dest != arg2); - tcg_out_cmov(s, TCG_COND_LTU, rexw, dest, arg2); + tcg_out_cmov(s, JCC_JB, rexw, dest, arg2); } } else { tcg_debug_assert(!const_a2); @@ -1758,8 +1859,8 @@ static void tcg_out_clz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1, tgen_arithi(s, ARITH_XOR + rexw, dest, rexw ? 63 : 31, 0); /* Since we have destroyed the flags from BSR, we have to re-test. */ - tcg_out_cmp(s, arg1, 0, 1, rexw); - tcg_out_cmov(s, TCG_COND_EQ, rexw, dest, arg2); + int jcc = tcg_out_cmp(s, TCG_COND_EQ, arg1, 0, 1, rexw); + tcg_out_cmov(s, jcc, rexw, dest, arg2); } } @@ -1824,23 +1925,6 @@ static void tcg_out_nopn(TCGContext *s, int n) tcg_out8(s, 0x90); } -/* Test register R vs immediate bits I, setting Z flag for EQ/NE. */ -static void __attribute__((unused)) -tcg_out_testi(TCGContext *s, TCGReg r, uint32_t i) -{ - /* - * This is used for testing alignment, so we can usually use testb. - * For i686, we have to use testl for %esi/%edi. - */ - if (i <= 0xff && (TCG_TARGET_REG_BITS == 64 || r < 4)) { - tcg_out_modrm(s, OPC_GRP3_Eb | P_REXB_RM, EXT3_TESTi, r); - tcg_out8(s, i); - } else { - tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_TESTi, r); - tcg_out32(s, i); - } -} - typedef struct { TCGReg base; int index; @@ -2101,16 +2185,17 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L0, TCG_REG_L0, offsetof(CPUTLBEntry, addend)); } else if (a_mask) { - ldst = new_ldst_label(s); + int jcc; + ldst = new_ldst_label(s); ldst->is_ld = is_ld; ldst->oi = oi; ldst->addrlo_reg = addrlo; ldst->addrhi_reg = addrhi; - tcg_out_testi(s, addrlo, a_mask); /* jne slow_path */ - tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); + jcc = tcg_out_cmp(s, TCG_COND_TSTNE, addrlo, a_mask, true, false); + tcg_out_opc(s, OPC_JCC_long + jcc, 0, 0, 0); ldst->label_ptr[0] = s->code_ptr; s->code_ptr += 4; } @@ -2256,9 +2341,10 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, } else { TCGLabel *l1 = gen_new_label(); TCGLabel *l2 = gen_new_label(); + int jcc; - tcg_out_testi(s, h.base, 15); - tcg_out_jxx(s, JCC_JNE, l1, true); + jcc = tcg_out_cmp(s, TCG_COND_TSTNE, h.base, 15, true, false); + tcg_out_jxx(s, jcc, l1, true); tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_VxWx + h.seg, TCG_TMP_VEC, 0, @@ -2384,9 +2470,10 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, } else { TCGLabel *l1 = gen_new_label(); TCGLabel *l2 = gen_new_label(); + int jcc; - tcg_out_testi(s, h.base, 15); - tcg_out_jxx(s, JCC_JNE, l1, true); + jcc = tcg_out_cmp(s, TCG_COND_TSTNE, h.base, 15, true, false); + tcg_out_jxx(s, jcc, l1, true); tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_WxVx + h.seg, TCG_TMP_VEC, 0, @@ -3373,7 +3460,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_brcond_i32: case INDEX_op_brcond_i64: - return C_O0_I2(r, re); + return C_O0_I2(r, reT); case INDEX_op_bswap16_i32: case INDEX_op_bswap16_i64: @@ -3421,11 +3508,11 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_setcond_i64: case INDEX_op_negsetcond_i32: case INDEX_op_negsetcond_i64: - return C_O1_I2(q, r, re); + return C_O1_I2(q, r, reT); case INDEX_op_movcond_i32: case INDEX_op_movcond_i64: - return C_O1_I4(r, r, re, r, 0); + return C_O1_I4(r, r, reT, r, 0); case INDEX_op_div2_i32: case INDEX_op_div2_i64: diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index fa34deec47..a10d4e1fce 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -198,6 +198,8 @@ typedef enum { #define TCG_TARGET_HAS_qemu_ldst_i128 \ (TCG_TARGET_REG_BITS == 64 && (cpuinfo & CPUINFO_ATOMIC_VMOVDQA)) +#define TCG_TARGET_HAS_tst 1 + /* We do not support older SSE systems, only beginning with AVX1. */ #define TCG_TARGET_HAS_v64 have_avx1 #define TCG_TARGET_HAS_v128 have_avx1 diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index dcf0205458..69c5b8ac4f 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -183,7 +183,8 @@ static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len) } /* test if a constant matches the constraint */ -static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece) +static bool tcg_target_const_match(int64_t val, int ct, + TCGType type, TCGCond cond, int vece) { if (ct & TCG_CT_CONST) { return true; diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h index 9c70ebfefc..fede627bf7 100644 --- a/tcg/loongarch64/tcg-target.h +++ b/tcg/loongarch64/tcg-target.h @@ -169,6 +169,8 @@ typedef enum { #define TCG_TARGET_HAS_qemu_ldst_i128 (cpuinfo & CPUINFO_LSX) +#define TCG_TARGET_HAS_tst 0 + #define TCG_TARGET_HAS_v64 0 #define TCG_TARGET_HAS_v128 (cpuinfo & CPUINFO_LSX) #define TCG_TARGET_HAS_v256 0 diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 8328dbdecc..3b5b5c6d5b 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -188,7 +188,8 @@ static bool is_p2m1(tcg_target_long val) } /* test if a constant matches the constraint */ -static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece) +static bool tcg_target_const_match(int64_t val, int ct, + TCGType type, TCGCond cond, int vece) { if (ct & TCG_CT_CONST) { return 1; diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index b98ffae1d0..a996aa171d 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -194,6 +194,8 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_qemu_ldst_i128 0 +#define TCG_TARGET_HAS_tst 0 + #define TCG_TARGET_DEFAULT_MO 0 #define TCG_TARGET_NEED_LDST_LABELS #define TCG_TARGET_NEED_POOL_LABELS diff --git a/tcg/optimize.c b/tcg/optimize.c index f2d01654c5..79e701652b 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -124,11 +124,22 @@ static inline bool ts_is_const(TCGTemp *ts) return ts_info(ts)->is_const; } +static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val) +{ + TempOptInfo *ti = ts_info(ts); + return ti->is_const && ti->val == val; +} + static inline bool arg_is_const(TCGArg arg) { return ts_is_const(arg_temp(arg)); } +static inline bool arg_is_const_val(TCGArg arg, uint64_t val) +{ + return ts_is_const_val(arg_temp(arg), val); +} + static inline bool ts_is_copy(TCGTemp *ts) { return ts_info(ts)->next_copy != ts; @@ -353,6 +364,13 @@ static TCGArg arg_new_constant(OptContext *ctx, uint64_t val) return temp_arg(ts); } +static TCGArg arg_new_temp(OptContext *ctx) +{ + TCGTemp *ts = tcg_temp_new_internal(ctx->type, TEMP_EBB); + init_ts_info(ctx, ts); + return temp_arg(ts); +} + static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) { TCGTemp *dst_ts = arg_temp(dst); @@ -614,9 +632,15 @@ static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) return x <= y; case TCG_COND_GTU: return x > y; - default: - g_assert_not_reached(); + case TCG_COND_TSTEQ: + return (x & y) == 0; + case TCG_COND_TSTNE: + return (x & y) != 0; + case TCG_COND_ALWAYS: + case TCG_COND_NEVER: + break; } + g_assert_not_reached(); } static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) @@ -642,12 +666,18 @@ static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) return x <= y; case TCG_COND_GTU: return x > y; - default: - g_assert_not_reached(); + case TCG_COND_TSTEQ: + return (x & y) == 0; + case TCG_COND_TSTNE: + return (x & y) != 0; + case TCG_COND_ALWAYS: + case TCG_COND_NEVER: + break; } + g_assert_not_reached(); } -static bool do_constant_folding_cond_eq(TCGCond c) +static int do_constant_folding_cond_eq(TCGCond c) { switch (c) { case TCG_COND_GT: @@ -662,9 +692,14 @@ static bool do_constant_folding_cond_eq(TCGCond c) case TCG_COND_LEU: case TCG_COND_EQ: return 1; - default: - g_assert_not_reached(); + case TCG_COND_TSTEQ: + case TCG_COND_TSTNE: + return -1; + case TCG_COND_ALWAYS: + case TCG_COND_NEVER: + break; } + g_assert_not_reached(); } /* @@ -689,11 +724,13 @@ static int do_constant_folding_cond(TCGType type, TCGArg x, } } else if (args_are_copies(x, y)) { return do_constant_folding_cond_eq(c); - } else if (arg_is_const(y) && arg_info(y)->val == 0) { + } else if (arg_is_const_val(y, 0)) { switch (c) { case TCG_COND_LTU: + case TCG_COND_TSTNE: return 0; case TCG_COND_GEU: + case TCG_COND_TSTEQ: return 1; default: return -1; @@ -702,43 +739,6 @@ static int do_constant_folding_cond(TCGType type, TCGArg x, return -1; } -/* - * Return -1 if the condition can't be simplified, - * and the result of the condition (0 or 1) if it can. - */ -static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) -{ - TCGArg al = p1[0], ah = p1[1]; - TCGArg bl = p2[0], bh = p2[1]; - - if (arg_is_const(bl) && arg_is_const(bh)) { - tcg_target_ulong blv = arg_info(bl)->val; - tcg_target_ulong bhv = arg_info(bh)->val; - uint64_t b = deposit64(blv, 32, 32, bhv); - - if (arg_is_const(al) && arg_is_const(ah)) { - tcg_target_ulong alv = arg_info(al)->val; - tcg_target_ulong ahv = arg_info(ah)->val; - uint64_t a = deposit64(alv, 32, 32, ahv); - return do_constant_folding_cond_64(a, b, c); - } - if (b == 0) { - switch (c) { - case TCG_COND_LTU: - return 0; - case TCG_COND_GEU: - return 1; - default: - break; - } - } - } - if (args_are_copies(al, bl) && args_are_copies(ah, bh)) { - return do_constant_folding_cond_eq(c); - } - return -1; -} - /** * swap_commutative: * @dest: TCGArg of the destination argument, or NO_DEST. @@ -785,6 +785,166 @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2) return false; } +/* + * Return -1 if the condition can't be simplified, + * and the result of the condition (0 or 1) if it can. + */ +static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest, + TCGArg *p1, TCGArg *p2, TCGArg *pcond) +{ + TCGCond cond; + bool swap; + int r; + + swap = swap_commutative(dest, p1, p2); + cond = *pcond; + if (swap) { + *pcond = cond = tcg_swap_cond(cond); + } + + r = do_constant_folding_cond(ctx->type, *p1, *p2, cond); + if (r >= 0) { + return r; + } + if (!is_tst_cond(cond)) { + return -1; + } + + /* + * TSTNE x,x -> NE x,0 + * TSTNE x,-1 -> NE x,0 + */ + if (args_are_copies(*p1, *p2) || arg_is_const_val(*p2, -1)) { + *p2 = arg_new_constant(ctx, 0); + *pcond = tcg_tst_eqne_cond(cond); + return -1; + } + + /* TSTNE x,sign -> LT x,0 */ + if (arg_is_const_val(*p2, (ctx->type == TCG_TYPE_I32 + ? INT32_MIN : INT64_MIN))) { + *p2 = arg_new_constant(ctx, 0); + *pcond = tcg_tst_ltge_cond(cond); + return -1; + } + + /* Expand to AND with a temporary if no backend support. */ + if (!TCG_TARGET_HAS_tst) { + TCGOpcode and_opc = (ctx->type == TCG_TYPE_I32 + ? INDEX_op_and_i32 : INDEX_op_and_i64); + TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, and_opc, 3); + TCGArg tmp = arg_new_temp(ctx); + + op2->args[0] = tmp; + op2->args[1] = *p1; + op2->args[2] = *p2; + + *p1 = tmp; + *p2 = arg_new_constant(ctx, 0); + *pcond = tcg_tst_eqne_cond(cond); + } + return -1; +} + +static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args) +{ + TCGArg al, ah, bl, bh; + TCGCond c; + bool swap; + int r; + + swap = swap_commutative2(args, args + 2); + c = args[4]; + if (swap) { + args[4] = c = tcg_swap_cond(c); + } + + al = args[0]; + ah = args[1]; + bl = args[2]; + bh = args[3]; + + if (arg_is_const(bl) && arg_is_const(bh)) { + tcg_target_ulong blv = arg_info(bl)->val; + tcg_target_ulong bhv = arg_info(bh)->val; + uint64_t b = deposit64(blv, 32, 32, bhv); + + if (arg_is_const(al) && arg_is_const(ah)) { + tcg_target_ulong alv = arg_info(al)->val; + tcg_target_ulong ahv = arg_info(ah)->val; + uint64_t a = deposit64(alv, 32, 32, ahv); + + r = do_constant_folding_cond_64(a, b, c); + if (r >= 0) { + return r; + } + } + + if (b == 0) { + switch (c) { + case TCG_COND_LTU: + case TCG_COND_TSTNE: + return 0; + case TCG_COND_GEU: + case TCG_COND_TSTEQ: + return 1; + default: + break; + } + } + + /* TSTNE x,-1 -> NE x,0 */ + if (b == -1 && is_tst_cond(c)) { + args[3] = args[2] = arg_new_constant(ctx, 0); + args[4] = tcg_tst_eqne_cond(c); + return -1; + } + + /* TSTNE x,sign -> LT x,0 */ + if (b == INT64_MIN && is_tst_cond(c)) { + /* bl must be 0, so copy that to bh */ + args[3] = bl; + args[4] = tcg_tst_ltge_cond(c); + return -1; + } + } + + if (args_are_copies(al, bl) && args_are_copies(ah, bh)) { + r = do_constant_folding_cond_eq(c); + if (r >= 0) { + return r; + } + + /* TSTNE x,x -> NE x,0 */ + if (is_tst_cond(c)) { + args[3] = args[2] = arg_new_constant(ctx, 0); + args[4] = tcg_tst_eqne_cond(c); + return -1; + } + } + + /* Expand to AND with a temporary if no backend support. */ + if (!TCG_TARGET_HAS_tst && is_tst_cond(c)) { + TCGOp *op1 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_and_i32, 3); + TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_and_i32, 3); + TCGArg t1 = arg_new_temp(ctx); + TCGArg t2 = arg_new_temp(ctx); + + op1->args[0] = t1; + op1->args[1] = al; + op1->args[2] = bl; + op2->args[0] = t2; + op2->args[1] = ah; + op2->args[2] = bh; + + args[0] = t1; + args[1] = t2; + args[3] = args[2] = arg_new_constant(ctx, 0); + args[4] = tcg_tst_eqne_cond(c); + } + return -1; +} + static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args) { for (int i = 0; i < nb_args; i++) { @@ -954,7 +1114,7 @@ static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx) /* If the binary operation has first argument @i, fold to @i. */ static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i) { - if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) { + if (arg_is_const_val(op->args[1], i)) { return tcg_opt_gen_movi(ctx, op, op->args[0], i); } return false; @@ -963,7 +1123,7 @@ static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i) /* If the binary operation has first argument @i, fold to NOT. */ static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i) { - if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) { + if (arg_is_const_val(op->args[1], i)) { return fold_to_not(ctx, op, 2); } return false; @@ -972,7 +1132,7 @@ static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i) /* If the binary operation has second argument @i, fold to @i. */ static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i) { - if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) { + if (arg_is_const_val(op->args[2], i)) { return tcg_opt_gen_movi(ctx, op, op->args[0], i); } return false; @@ -981,7 +1141,7 @@ static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i) /* If the binary operation has second argument @i, fold to identity. */ static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i) { - if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) { + if (arg_is_const_val(op->args[2], i)) { return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); } return false; @@ -990,7 +1150,7 @@ static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i) /* If the binary operation has second argument @i, fold to NOT. */ static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i) { - if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) { + if (arg_is_const_val(op->args[2], i)) { return fold_to_not(ctx, op, 1); } return false; @@ -1182,14 +1342,8 @@ static bool fold_andc(OptContext *ctx, TCGOp *op) static bool fold_brcond(OptContext *ctx, TCGOp *op) { - TCGCond cond = op->args[2]; - int i; - - if (swap_commutative(NO_DEST, &op->args[0], &op->args[1])) { - op->args[2] = cond = tcg_swap_cond(cond); - } - - i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond); + int i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[0], + &op->args[1], &op->args[2]); if (i == 0) { tcg_op_remove(ctx->tcg, op); return true; @@ -1203,15 +1357,13 @@ static bool fold_brcond(OptContext *ctx, TCGOp *op) static bool fold_brcond2(OptContext *ctx, TCGOp *op) { - TCGCond cond = op->args[4]; - TCGArg label = op->args[5]; + TCGCond cond; + TCGArg label; int i, inv = 0; - if (swap_commutative2(&op->args[0], &op->args[2])) { - op->args[4] = cond = tcg_swap_cond(cond); - } - - i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond); + i = do_constant_folding_cond2(ctx, op, &op->args[0]); + cond = op->args[4]; + label = op->args[5]; if (i >= 0) { goto do_brcond_const; } @@ -1223,8 +1375,8 @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op) * Simplify LT/GE comparisons vs zero to a single compare * vs the high word of the input. */ - if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 && - arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0) { + if (arg_is_const_val(op->args[2], 0) && + arg_is_const_val(op->args[3], 0)) { goto do_brcond_high; } break; @@ -1252,24 +1404,37 @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op) case 0: goto do_brcond_const; case 1: - op->opc = INDEX_op_brcond_i32; - op->args[1] = op->args[2]; - op->args[2] = cond; - op->args[3] = label; - break; + goto do_brcond_low; + } + break; + + case TCG_COND_TSTEQ: + case TCG_COND_TSTNE: + if (arg_is_const_val(op->args[2], 0)) { + goto do_brcond_high; + } + if (arg_is_const_val(op->args[3], 0)) { + goto do_brcond_low; } break; default: break; + do_brcond_low: + op->opc = INDEX_op_brcond_i32; + op->args[1] = op->args[2]; + op->args[2] = cond; + op->args[3] = label; + return fold_brcond(ctx, op); + do_brcond_high: op->opc = INDEX_op_brcond_i32; op->args[0] = op->args[1]; op->args[1] = op->args[3]; op->args[2] = cond; op->args[3] = label; - break; + return fold_brcond(ctx, op); do_brcond_const: if (i == 0) { @@ -1448,9 +1613,7 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op) } /* Inserting a value into zero at offset 0. */ - if (arg_is_const(op->args[1]) - && arg_info(op->args[1])->val == 0 - && op->args[3] == 0) { + if (arg_is_const_val(op->args[1], 0) && op->args[3] == 0) { uint64_t mask = MAKE_64BIT_MASK(0, op->args[4]); op->opc = and_opc; @@ -1461,8 +1624,7 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op) } /* Inserting zero into a value. */ - if (arg_is_const(op->args[2]) - && arg_info(op->args[2])->val == 0) { + if (arg_is_const_val(op->args[2], 0)) { uint64_t mask = deposit64(-1, op->args[3], op->args[4], 0); op->opc = and_opc; @@ -1687,21 +1849,18 @@ static bool fold_mov(OptContext *ctx, TCGOp *op) static bool fold_movcond(OptContext *ctx, TCGOp *op) { - TCGCond cond = op->args[5]; int i; - if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) { - op->args[5] = cond = tcg_swap_cond(cond); - } /* * Canonicalize the "false" input reg to match the destination reg so * that the tcg backend can implement a "move if true" operation. */ if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) { - op->args[5] = cond = tcg_invert_cond(cond); + op->args[5] = tcg_invert_cond(op->args[5]); } - i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond); + i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[1], + &op->args[2], &op->args[5]); if (i >= 0) { return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]); } @@ -1715,6 +1874,7 @@ static bool fold_movcond(OptContext *ctx, TCGOp *op) uint64_t tv = arg_info(op->args[3])->val; uint64_t fv = arg_info(op->args[4])->val; TCGOpcode opc, negopc = 0; + TCGCond cond = op->args[5]; switch (ctx->type) { case TCG_TYPE_I32: @@ -1940,19 +2100,107 @@ static bool fold_remainder(OptContext *ctx, TCGOp *op) return false; } -static bool fold_setcond(OptContext *ctx, TCGOp *op) +static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) { + TCGOpcode and_opc, sub_opc, xor_opc, neg_opc, shr_opc, uext_opc, sext_opc; TCGCond cond = op->args[3]; - int i; + TCGArg ret, src1, src2; + TCGOp *op2; + uint64_t val; + int sh; + bool inv; + + if (!is_tst_cond(cond) || !arg_is_const(op->args[2])) { + return; + } + + src2 = op->args[2]; + val = arg_info(src2)->val; + if (!is_power_of_2(val)) { + return; + } + sh = ctz64(val); - if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) { - op->args[3] = cond = tcg_swap_cond(cond); + switch (ctx->type) { + case TCG_TYPE_I32: + and_opc = INDEX_op_and_i32; + sub_opc = INDEX_op_sub_i32; + xor_opc = INDEX_op_xor_i32; + shr_opc = INDEX_op_shr_i32; + neg_opc = INDEX_op_neg_i32; + if (TCG_TARGET_extract_i32_valid(sh, 1)) { + uext_opc = TCG_TARGET_HAS_extract_i32 ? INDEX_op_extract_i32 : 0; + sext_opc = TCG_TARGET_HAS_sextract_i32 ? INDEX_op_sextract_i32 : 0; + } + break; + case TCG_TYPE_I64: + and_opc = INDEX_op_and_i64; + sub_opc = INDEX_op_sub_i64; + xor_opc = INDEX_op_xor_i64; + shr_opc = INDEX_op_shr_i64; + neg_opc = INDEX_op_neg_i64; + if (TCG_TARGET_extract_i64_valid(sh, 1)) { + uext_opc = TCG_TARGET_HAS_extract_i64 ? INDEX_op_extract_i64 : 0; + sext_opc = TCG_TARGET_HAS_sextract_i64 ? INDEX_op_sextract_i64 : 0; + } + break; + default: + g_assert_not_reached(); + } + + ret = op->args[0]; + src1 = op->args[1]; + inv = cond == TCG_COND_TSTEQ; + + if (sh && sext_opc && neg && !inv) { + op->opc = sext_opc; + op->args[1] = src1; + op->args[2] = sh; + op->args[3] = 1; + return; + } else if (sh && uext_opc) { + op->opc = uext_opc; + op->args[1] = src1; + op->args[2] = sh; + op->args[3] = 1; + } else { + if (sh) { + op2 = tcg_op_insert_before(ctx->tcg, op, shr_opc, 3); + op2->args[0] = ret; + op2->args[1] = src1; + op2->args[2] = arg_new_constant(ctx, sh); + src1 = ret; + } + op->opc = and_opc; + op->args[1] = src1; + op->args[2] = arg_new_constant(ctx, 1); } - i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond); + if (neg && inv) { + op2 = tcg_op_insert_after(ctx->tcg, op, sub_opc, 3); + op2->args[0] = ret; + op2->args[1] = ret; + op2->args[2] = arg_new_constant(ctx, 1); + } else if (inv) { + op2 = tcg_op_insert_after(ctx->tcg, op, xor_opc, 3); + op2->args[0] = ret; + op2->args[1] = ret; + op2->args[2] = arg_new_constant(ctx, 1); + } else if (neg) { + op2 = tcg_op_insert_after(ctx->tcg, op, neg_opc, 2); + op2->args[0] = ret; + op2->args[1] = ret; + } +} + +static bool fold_setcond(OptContext *ctx, TCGOp *op) +{ + int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1], + &op->args[2], &op->args[3]); if (i >= 0) { return tcg_opt_gen_movi(ctx, op, op->args[0], i); } + fold_setcond_tst_pow2(ctx, op, false); ctx->z_mask = 1; ctx->s_mask = smask_from_zmask(1); @@ -1961,34 +2209,25 @@ static bool fold_setcond(OptContext *ctx, TCGOp *op) static bool fold_negsetcond(OptContext *ctx, TCGOp *op) { - TCGCond cond = op->args[3]; - int i; - - if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) { - op->args[3] = cond = tcg_swap_cond(cond); - } - - i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond); + int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1], + &op->args[2], &op->args[3]); if (i >= 0) { return tcg_opt_gen_movi(ctx, op, op->args[0], -i); } + fold_setcond_tst_pow2(ctx, op, true); /* Value is {0,-1} so all bits are repetitions of the sign. */ ctx->s_mask = -1; return false; } - static bool fold_setcond2(OptContext *ctx, TCGOp *op) { - TCGCond cond = op->args[5]; + TCGCond cond; int i, inv = 0; - if (swap_commutative2(&op->args[1], &op->args[3])) { - op->args[5] = cond = tcg_swap_cond(cond); - } - - i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond); + i = do_constant_folding_cond2(ctx, op, &op->args[1]); + cond = op->args[5]; if (i >= 0) { goto do_setcond_const; } @@ -2000,8 +2239,8 @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op) * Simplify LT/GE comparisons vs zero to a single compare * vs the high word of the input. */ - if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 && - arg_is_const(op->args[4]) && arg_info(op->args[4])->val == 0) { + if (arg_is_const_val(op->args[3], 0) && + arg_is_const_val(op->args[4], 0)) { goto do_setcond_high; } break; @@ -2029,22 +2268,35 @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op) case 0: goto do_setcond_const; case 1: - op->args[2] = op->args[3]; - op->args[3] = cond; - op->opc = INDEX_op_setcond_i32; - break; + goto do_setcond_low; + } + break; + + case TCG_COND_TSTEQ: + case TCG_COND_TSTNE: + if (arg_is_const_val(op->args[2], 0)) { + goto do_setcond_high; + } + if (arg_is_const_val(op->args[4], 0)) { + goto do_setcond_low; } break; default: break; + do_setcond_low: + op->args[2] = op->args[3]; + op->args[3] = cond; + op->opc = INDEX_op_setcond_i32; + return fold_setcond(ctx, op); + do_setcond_high: op->args[1] = op->args[2]; op->args[2] = op->args[4]; op->args[3] = cond; op->opc = INDEX_op_setcond_i32; - break; + return fold_setcond(ctx, op); } ctx->z_mask = 1; diff --git a/tcg/ppc/tcg-target-con-set.h b/tcg/ppc/tcg-target-con-set.h index cb47b29452..9f99bde505 100644 --- a/tcg/ppc/tcg-target-con-set.h +++ b/tcg/ppc/tcg-target-con-set.h @@ -11,7 +11,7 @@ */ C_O0_I1(r) C_O0_I2(r, r) -C_O0_I2(r, ri) +C_O0_I2(r, rC) C_O0_I2(v, r) C_O0_I3(r, r, r) C_O0_I3(o, m, r) @@ -26,13 +26,14 @@ C_O1_I2(r, rI, ri) C_O1_I2(r, rI, rT) C_O1_I2(r, r, r) C_O1_I2(r, r, ri) +C_O1_I2(r, r, rC) C_O1_I2(r, r, rI) C_O1_I2(r, r, rT) C_O1_I2(r, r, rU) C_O1_I2(r, r, rZW) C_O1_I2(v, v, v) C_O1_I3(v, v, v, v) -C_O1_I4(r, r, ri, rZ, rZ) +C_O1_I4(r, r, rC, rZ, rZ) C_O1_I4(r, r, r, ri, ri) C_O2_I1(r, r, r) C_N1O1_I1(o, m, r) diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h index 20846901de..16b687216e 100644 --- a/tcg/ppc/tcg-target-con-str.h +++ b/tcg/ppc/tcg-target-con-str.h @@ -16,6 +16,7 @@ REGS('v', ALL_VECTOR_REGS) * Define constraint letters for constants: * CONST(letter, TCG_CT_CONST_* bit set) */ +CONST('C', TCG_CT_CONST_CMP) CONST('I', TCG_CT_CONST_S16) CONST('M', TCG_CT_CONST_MONE) CONST('T', TCG_CT_CONST_S32) diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 54816967bc..7f3829beeb 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -92,11 +92,13 @@ #define SZR (TCG_TARGET_REG_BITS / 8) #define TCG_CT_CONST_S16 0x100 +#define TCG_CT_CONST_U16 0x200 #define TCG_CT_CONST_S32 0x400 #define TCG_CT_CONST_U32 0x800 #define TCG_CT_CONST_ZERO 0x1000 #define TCG_CT_CONST_MONE 0x2000 #define TCG_CT_CONST_WSZ 0x4000 +#define TCG_CT_CONST_CMP 0x8000 #define ALL_GENERAL_REGS 0xffffffffu #define ALL_VECTOR_REGS 0xffffffff00000000ull @@ -281,31 +283,78 @@ static bool reloc_pc34(tcg_insn_unit *src_rw, const tcg_insn_unit *target) return false; } +static bool mask_operand(uint32_t c, int *mb, int *me); +static bool mask64_operand(uint64_t c, int *mb, int *me); + /* test if a constant matches the constraint */ -static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece) +static bool tcg_target_const_match(int64_t sval, int ct, + TCGType type, TCGCond cond, int vece) { + uint64_t uval = sval; + int mb, me; + if (ct & TCG_CT_CONST) { return 1; } - /* The only 32-bit constraint we use aside from - TCG_CT_CONST is TCG_CT_CONST_S16. */ if (type == TCG_TYPE_I32) { - val = (int32_t)val; + uval = (uint32_t)sval; + sval = (int32_t)sval; + } + + if (ct & TCG_CT_CONST_CMP) { + switch (cond) { + case TCG_COND_EQ: + case TCG_COND_NE: + ct |= TCG_CT_CONST_S16 | TCG_CT_CONST_U16; + break; + case TCG_COND_LT: + case TCG_COND_GE: + case TCG_COND_LE: + case TCG_COND_GT: + ct |= TCG_CT_CONST_S16; + break; + case TCG_COND_LTU: + case TCG_COND_GEU: + case TCG_COND_LEU: + case TCG_COND_GTU: + ct |= TCG_CT_CONST_U16; + break; + case TCG_COND_TSTEQ: + case TCG_COND_TSTNE: + if ((uval & ~0xffff) == 0 || (uval & ~0xffff0000ull) == 0) { + return 1; + } + if (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I32 + ? mask_operand(uval, &mb, &me) + : mask64_operand(uval << clz64(uval), &mb, &me)) { + return 1; + } + return 0; + default: + g_assert_not_reached(); + } } - if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { + if ((ct & TCG_CT_CONST_S16) && sval == (int16_t)sval) { + return 1; + } + if ((ct & TCG_CT_CONST_U16) && uval == (uint16_t)uval) { return 1; - } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { + } + if ((ct & TCG_CT_CONST_S32) && sval == (int32_t)sval) { return 1; - } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { + } + if ((ct & TCG_CT_CONST_U32) && uval == (uint32_t)uval) { return 1; - } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { + } + if ((ct & TCG_CT_CONST_ZERO) && sval == 0) { return 1; - } else if ((ct & TCG_CT_CONST_MONE) && val == -1) { + } + if ((ct & TCG_CT_CONST_MONE) && sval == -1) { return 1; - } else if ((ct & TCG_CT_CONST_WSZ) - && val == (type == TCG_TYPE_I32 ? 32 : 64)) { + } + if ((ct & TCG_CT_CONST_WSZ) && sval == (type == TCG_TYPE_I32 ? 32 : 64)) { return 1; } return 0; @@ -669,31 +718,35 @@ enum { CR_SO }; -static const uint32_t tcg_to_bc[] = { - [TCG_COND_EQ] = BC | BI(7, CR_EQ) | BO_COND_TRUE, - [TCG_COND_NE] = BC | BI(7, CR_EQ) | BO_COND_FALSE, - [TCG_COND_LT] = BC | BI(7, CR_LT) | BO_COND_TRUE, - [TCG_COND_GE] = BC | BI(7, CR_LT) | BO_COND_FALSE, - [TCG_COND_LE] = BC | BI(7, CR_GT) | BO_COND_FALSE, - [TCG_COND_GT] = BC | BI(7, CR_GT) | BO_COND_TRUE, - [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE, - [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE, - [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE, - [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE, +static const uint32_t tcg_to_bc[16] = { + [TCG_COND_EQ] = BC | BI(0, CR_EQ) | BO_COND_TRUE, + [TCG_COND_NE] = BC | BI(0, CR_EQ) | BO_COND_FALSE, + [TCG_COND_TSTEQ] = BC | BI(0, CR_EQ) | BO_COND_TRUE, + [TCG_COND_TSTNE] = BC | BI(0, CR_EQ) | BO_COND_FALSE, + [TCG_COND_LT] = BC | BI(0, CR_LT) | BO_COND_TRUE, + [TCG_COND_GE] = BC | BI(0, CR_LT) | BO_COND_FALSE, + [TCG_COND_LE] = BC | BI(0, CR_GT) | BO_COND_FALSE, + [TCG_COND_GT] = BC | BI(0, CR_GT) | BO_COND_TRUE, + [TCG_COND_LTU] = BC | BI(0, CR_LT) | BO_COND_TRUE, + [TCG_COND_GEU] = BC | BI(0, CR_LT) | BO_COND_FALSE, + [TCG_COND_LEU] = BC | BI(0, CR_GT) | BO_COND_FALSE, + [TCG_COND_GTU] = BC | BI(0, CR_GT) | BO_COND_TRUE, }; /* The low bit here is set if the RA and RB fields must be inverted. */ -static const uint32_t tcg_to_isel[] = { - [TCG_COND_EQ] = ISEL | BC_(7, CR_EQ), - [TCG_COND_NE] = ISEL | BC_(7, CR_EQ) | 1, - [TCG_COND_LT] = ISEL | BC_(7, CR_LT), - [TCG_COND_GE] = ISEL | BC_(7, CR_LT) | 1, - [TCG_COND_LE] = ISEL | BC_(7, CR_GT) | 1, - [TCG_COND_GT] = ISEL | BC_(7, CR_GT), - [TCG_COND_LTU] = ISEL | BC_(7, CR_LT), - [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1, - [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1, - [TCG_COND_GTU] = ISEL | BC_(7, CR_GT), +static const uint32_t tcg_to_isel[16] = { + [TCG_COND_EQ] = ISEL | BC_(0, CR_EQ), + [TCG_COND_NE] = ISEL | BC_(0, CR_EQ) | 1, + [TCG_COND_TSTEQ] = ISEL | BC_(0, CR_EQ), + [TCG_COND_TSTNE] = ISEL | BC_(0, CR_EQ) | 1, + [TCG_COND_LT] = ISEL | BC_(0, CR_LT), + [TCG_COND_GE] = ISEL | BC_(0, CR_LT) | 1, + [TCG_COND_LE] = ISEL | BC_(0, CR_GT) | 1, + [TCG_COND_GT] = ISEL | BC_(0, CR_GT), + [TCG_COND_LTU] = ISEL | BC_(0, CR_LT), + [TCG_COND_GEU] = ISEL | BC_(0, CR_LT) | 1, + [TCG_COND_LEU] = ISEL | BC_(0, CR_GT) | 1, + [TCG_COND_GTU] = ISEL | BC_(0, CR_GT), }; static bool patch_reloc(tcg_insn_unit *code_ptr, int type, @@ -838,19 +891,31 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) return true; } -static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs, - int sh, int mb) +static void tcg_out_rld_rc(TCGContext *s, int op, TCGReg ra, TCGReg rs, + int sh, int mb, bool rc) { tcg_debug_assert(TCG_TARGET_REG_BITS == 64); sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1); mb = MB64((mb >> 5) | ((mb << 1) & 0x3f)); - tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb); + tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb | rc); +} + +static void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs, + int sh, int mb) +{ + tcg_out_rld_rc(s, op, ra, rs, sh, mb, false); +} + +static void tcg_out_rlw_rc(TCGContext *s, int op, TCGReg ra, TCGReg rs, + int sh, int mb, int me, bool rc) +{ + tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me) | rc); } -static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs, - int sh, int mb, int me) +static void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs, + int sh, int mb, int me) { - tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me)); + tcg_out_rlw_rc(s, op, ra, rs, sh, mb, me, false); } static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) @@ -1668,6 +1733,50 @@ static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, return false; } +/* + * Set dest non-zero if and only if (arg1 & arg2) is non-zero. + * If RC, then also set RC0. + */ +static void tcg_out_test(TCGContext *s, TCGReg dest, TCGReg arg1, TCGArg arg2, + bool const_arg2, TCGType type, bool rc) +{ + int mb, me; + + if (!const_arg2) { + tcg_out32(s, AND | SAB(arg1, dest, arg2) | rc); + return; + } + + if (type == TCG_TYPE_I32) { + arg2 = (uint32_t)arg2; + } else if (arg2 == (uint32_t)arg2) { + type = TCG_TYPE_I32; + } + + if ((arg2 & ~0xffff) == 0) { + tcg_out32(s, ANDI | SAI(arg1, dest, arg2)); + return; + } + if ((arg2 & ~0xffff0000ull) == 0) { + tcg_out32(s, ANDIS | SAI(arg1, dest, arg2 >> 16)); + return; + } + if (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I32) { + if (mask_operand(arg2, &mb, &me)) { + tcg_out_rlw_rc(s, RLWINM, dest, arg1, 0, mb, me, rc); + return; + } + } else { + int sh = clz64(arg2); + if (mask64_operand(arg2 << sh, &mb, &me)) { + tcg_out_rld_rc(s, RLDICR, dest, arg1, sh, me, rc); + return; + } + } + /* Constraints should satisfy this. */ + g_assert_not_reached(); +} + static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, int const_arg2, int cr, TCGType type) { @@ -1676,7 +1785,10 @@ static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); - /* Simplify the comparisons below wrt CMPI. */ + /* + * Simplify the comparisons below wrt CMPI. + * All of the tests are 16-bit, so a 32-bit sign extend always works. + */ if (type == TCG_TYPE_I32) { arg2 = (int32_t)arg2; } @@ -1699,6 +1811,12 @@ static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, imm = 0; break; + case TCG_COND_TSTEQ: + case TCG_COND_TSTNE: + tcg_debug_assert(cr == 0); + tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, true); + return; + case TCG_COND_LT: case TCG_COND_GE: case TCG_COND_LE: @@ -1826,7 +1944,7 @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, if (have_isa_3_10) { tcg_insn_unit bi, opc; - tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); + tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type); /* Re-use tcg_to_bc for BI and BO_COND_{TRUE,FALSE}. */ bi = tcg_to_bc[cond] & (0x1f << 16); @@ -1879,7 +1997,7 @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, if (have_isel) { int isel, tab; - tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); + tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type); isel = tcg_to_isel[cond]; @@ -1909,6 +2027,16 @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, tcg_out_setcond_ne0(s, type, arg0, arg1, neg); break; + case TCG_COND_TSTEQ: + tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, false); + tcg_out_setcond_eq0(s, type, arg0, TCG_REG_R0, neg); + break; + + case TCG_COND_TSTNE: + tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, false); + tcg_out_setcond_ne0(s, type, arg0, TCG_REG_R0, neg); + break; + case TCG_COND_LE: case TCG_COND_LEU: inv = true; @@ -1945,22 +2073,28 @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, } } -static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l) +static void tcg_out_bc(TCGContext *s, TCGCond cond, int bd) +{ + tcg_out32(s, tcg_to_bc[cond] | bd); +} + +static void tcg_out_bc_lab(TCGContext *s, TCGCond cond, TCGLabel *l) { + int bd = 0; if (l->has_value) { - bc |= reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr); + bd = reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr); } else { tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0); } - tcg_out32(s, bc); + tcg_out_bc(s, cond, bd); } static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGArg arg1, TCGArg arg2, int const_arg2, TCGLabel *l, TCGType type) { - tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); - tcg_out_bc(s, tcg_to_bc[cond], l); + tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type); + tcg_out_bc_lab(s, cond, l); } static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, @@ -1973,7 +2107,7 @@ static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, return; } - tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type); + tcg_out_cmp(s, cond, c1, c2, const_c2, 0, type); if (have_isel) { int isel = tcg_to_isel[cond]; @@ -2002,7 +2136,7 @@ static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, } } /* Branch forward over one insn */ - tcg_out32(s, tcg_to_bc[cond] | 8); + tcg_out_bc(s, cond, 8); if (v2 == 0) { tcg_out_movi(s, type, dest, 0); } else { @@ -2017,17 +2151,17 @@ static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc, if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) { tcg_out32(s, opc | RA(a0) | RS(a1)); } else { - tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type); + tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 0, type); /* Note that the only other valid constant for a2 is 0. */ if (have_isel) { tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1)); tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0)); } else if (!const_a2 && a0 == a2) { - tcg_out32(s, tcg_to_bc[TCG_COND_EQ] | 8); + tcg_out_bc(s, TCG_COND_EQ, 8); tcg_out32(s, opc | RA(a0) | RS(a1)); } else { tcg_out32(s, opc | RA(a0) | RS(a1)); - tcg_out32(s, tcg_to_bc[TCG_COND_NE] | 8); + tcg_out_bc(s, TCG_COND_NE, 8); if (const_a2) { tcg_out_movi(s, type, a0, 0); } else { @@ -2072,7 +2206,22 @@ static void tcg_out_cmp2(TCGContext *s, const TCGArg *args, do_equality: tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32); tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32); - tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); + tcg_out32(s, op | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); + break; + + case TCG_COND_TSTEQ: + case TCG_COND_TSTNE: + if (blconst) { + tcg_out_andi32(s, TCG_REG_R0, al, bl); + } else { + tcg_out32(s, AND | SAB(al, TCG_REG_R0, bl)); + } + if (bhconst) { + tcg_out_andi32(s, TCG_REG_TMP1, ah, bh); + } else { + tcg_out32(s, AND | SAB(ah, TCG_REG_TMP1, bh)); + } + tcg_out32(s, OR | SAB(TCG_REG_R0, TCG_REG_R0, TCG_REG_TMP1) | 1); break; case TCG_COND_LT: @@ -2090,8 +2239,8 @@ static void tcg_out_cmp2(TCGContext *s, const TCGArg *args, tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32); tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32); - tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2)); - tcg_out32(s, CROR | BT(7, CR_EQ) | BA(6, bit1) | BB(7, CR_EQ)); + tcg_out32(s, op | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2)); + tcg_out32(s, CROR | BT(0, CR_EQ) | BA(6, bit1) | BB(0, CR_EQ)); break; default: @@ -2103,15 +2252,15 @@ static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, const int *const_args) { tcg_out_cmp2(s, args + 1, const_args + 1); - tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); - tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31); + tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(0)); + tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, CR_EQ + 0*4 + 1, 31, 31); } -static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args, - const int *const_args) +static void tcg_out_brcond2(TCGContext *s, const TCGArg *args, + const int *const_args) { tcg_out_cmp2(s, args, const_args); - tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5])); + tcg_out_bc_lab(s, TCG_COND_EQ, arg_label(args[5])); } static void tcg_out_mb(TCGContext *s, TCGArg a0) @@ -2435,17 +2584,17 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_TMP2, 0, 6, TCG_TYPE_I32); - /* Combine comparisons into cr7. */ - tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); + /* Combine comparisons into cr0. */ + tcg_out32(s, CRAND | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); } else { - /* Full comparison into cr7. */ + /* Full comparison into cr0. */ tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, - 0, 7, addr_type); + 0, 0, addr_type); } /* Load a pointer into the current opcode w/conditional branch-link. */ ldst->label_ptr[0] = s->code_ptr; - tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); + tcg_out_bc(s, TCG_COND_NE, LK); h->base = TCG_REG_TMP1; } else { @@ -3979,8 +4128,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_sar_i32: case INDEX_op_rotl_i32: case INDEX_op_rotr_i32: - case INDEX_op_setcond_i32: - case INDEX_op_negsetcond_i32: case INDEX_op_and_i64: case INDEX_op_andc_i64: case INDEX_op_shl_i64: @@ -3988,8 +4135,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_sar_i64: case INDEX_op_rotl_i64: case INDEX_op_rotr_i64: - case INDEX_op_setcond_i64: - case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, ri); case INDEX_op_mul_i32: @@ -4033,11 +4178,16 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_brcond_i32: case INDEX_op_brcond_i64: - return C_O0_I2(r, ri); - + return C_O0_I2(r, rC); + case INDEX_op_setcond_i32: + case INDEX_op_setcond_i64: + case INDEX_op_negsetcond_i32: + case INDEX_op_negsetcond_i64: + return C_O1_I2(r, r, rC); case INDEX_op_movcond_i32: case INDEX_op_movcond_i64: - return C_O1_I4(r, r, ri, rZ, rZ); + return C_O1_I4(r, r, rC, rZ, rZ); + case INDEX_op_deposit_i32: case INDEX_op_deposit_i64: return C_O1_I2(r, 0, rZ); diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index 5295e4f9ab..04a7aba4d3 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -143,6 +143,8 @@ typedef enum { #define TCG_TARGET_HAS_qemu_ldst_i128 \ (TCG_TARGET_REG_BITS == 64 && have_isa_2_07) +#define TCG_TARGET_HAS_tst 1 + /* * While technically Altivec could support V64, it has no 64-bit store * instruction and substituting two 32-bit stores makes the generated diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 34e10e77d9..639363039b 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -145,7 +145,8 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) #define sextreg sextract64 /* test if a constant matches the constraint */ -static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece) +static bool tcg_target_const_match(int64_t val, int ct, + TCGType type, TCGCond cond, int vece) { if (ct & TCG_CT_CONST) { return 1; diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h index a4edc3dc74..2c1b680b93 100644 --- a/tcg/riscv/tcg-target.h +++ b/tcg/riscv/tcg-target.h @@ -158,6 +158,8 @@ extern bool have_zbb; #define TCG_TARGET_HAS_qemu_ldst_i128 0 +#define TCG_TARGET_HAS_tst 0 + #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_NEED_LDST_LABELS diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h index 9a42037499..f75955eaa8 100644 --- a/tcg/s390x/tcg-target-con-set.h +++ b/tcg/s390x/tcg-target-con-set.h @@ -15,7 +15,7 @@ C_O0_I1(r) C_O0_I2(r, r) C_O0_I2(r, ri) -C_O0_I2(r, rA) +C_O0_I2(r, rC) C_O0_I2(v, r) C_O0_I3(o, m, r) C_O1_I1(r, r) @@ -27,7 +27,7 @@ C_O1_I2(r, 0, rI) C_O1_I2(r, 0, rJ) C_O1_I2(r, r, r) C_O1_I2(r, r, ri) -C_O1_I2(r, r, rA) +C_O1_I2(r, r, rC) C_O1_I2(r, r, rI) C_O1_I2(r, r, rJ) C_O1_I2(r, r, rK) @@ -39,10 +39,10 @@ C_O1_I2(v, v, r) C_O1_I2(v, v, v) C_O1_I3(v, v, v, v) C_O1_I4(r, r, ri, rI, r) -C_O1_I4(r, r, rA, rI, r) +C_O1_I4(r, r, rC, rI, r) C_O2_I1(o, m, r) C_O2_I2(o, m, 0, r) C_O2_I2(o, m, r, r) C_O2_I3(o, m, 0, 1, r) C_N1_O1_I4(r, r, 0, 1, ri, r) -C_N1_O1_I4(r, r, 0, 1, rA, r) +C_N1_O1_I4(r, r, 0, 1, rJU, r) diff --git a/tcg/s390x/tcg-target-con-str.h b/tcg/s390x/tcg-target-con-str.h index 25675b449e..745f6c0df5 100644 --- a/tcg/s390x/tcg-target-con-str.h +++ b/tcg/s390x/tcg-target-con-str.h @@ -16,10 +16,11 @@ REGS('o', 0xaaaa) /* odd numbered general regs */ * Define constraint letters for constants: * CONST(letter, TCG_CT_CONST_* bit set) */ -CONST('A', TCG_CT_CONST_S33) +CONST('C', TCG_CT_CONST_CMP) CONST('I', TCG_CT_CONST_S16) CONST('J', TCG_CT_CONST_S32) CONST('K', TCG_CT_CONST_P32) CONST('N', TCG_CT_CONST_INV) CONST('R', TCG_CT_CONST_INVRISBG) +CONST('U', TCG_CT_CONST_U32) CONST('Z', TCG_CT_CONST_ZERO) diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 7f6b84aa2c..ad587325fc 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -30,11 +30,12 @@ #define TCG_CT_CONST_S16 (1 << 8) #define TCG_CT_CONST_S32 (1 << 9) -#define TCG_CT_CONST_S33 (1 << 10) +#define TCG_CT_CONST_U32 (1 << 10) #define TCG_CT_CONST_ZERO (1 << 11) #define TCG_CT_CONST_P32 (1 << 12) #define TCG_CT_CONST_INV (1 << 13) #define TCG_CT_CONST_INVRISBG (1 << 14) +#define TCG_CT_CONST_CMP (1 << 15) #define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 16) #define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32) @@ -111,6 +112,9 @@ typedef enum S390Opcode { RI_OILH = 0xa50a, RI_OILL = 0xa50b, RI_TMLL = 0xa701, + RI_TMLH = 0xa700, + RI_TMHL = 0xa703, + RI_TMHH = 0xa702, RIEb_CGRJ = 0xec64, RIEb_CLGRJ = 0xec65, @@ -403,10 +407,15 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) #define S390_CC_NEVER 0 #define S390_CC_ALWAYS 15 +#define S390_TM_EQ 8 /* CC == 0 */ +#define S390_TM_NE 7 /* CC in {1,2,3} */ + /* Condition codes that result from a COMPARE and COMPARE LOGICAL. */ -static const uint8_t tcg_cond_to_s390_cond[] = { +static const uint8_t tcg_cond_to_s390_cond[16] = { [TCG_COND_EQ] = S390_CC_EQ, [TCG_COND_NE] = S390_CC_NE, + [TCG_COND_TSTEQ] = S390_CC_EQ, + [TCG_COND_TSTNE] = S390_CC_NE, [TCG_COND_LT] = S390_CC_LT, [TCG_COND_LE] = S390_CC_LE, [TCG_COND_GT] = S390_CC_GT, @@ -420,9 +429,11 @@ static const uint8_t tcg_cond_to_s390_cond[] = { /* Condition codes that result from a LOAD AND TEST. Here, we have no unsigned instruction variation, however since the test is vs zero we can re-map the outcomes appropriately. */ -static const uint8_t tcg_cond_to_ltr_cond[] = { +static const uint8_t tcg_cond_to_ltr_cond[16] = { [TCG_COND_EQ] = S390_CC_EQ, [TCG_COND_NE] = S390_CC_NE, + [TCG_COND_TSTEQ] = S390_CC_ALWAYS, + [TCG_COND_TSTNE] = S390_CC_NEVER, [TCG_COND_LT] = S390_CC_LT, [TCG_COND_LE] = S390_CC_LE, [TCG_COND_GT] = S390_CC_GT, @@ -538,42 +549,74 @@ static bool risbg_mask(uint64_t c) } /* Test if a constant matches the constraint. */ -static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece) +static bool tcg_target_const_match(int64_t val, int ct, + TCGType type, TCGCond cond, int vece) { + uint64_t uval = val; + if (ct & TCG_CT_CONST) { - return 1; + return true; } - if (type == TCG_TYPE_I32) { + uval = (uint32_t)val; val = (int32_t)val; } - /* The following are mutually exclusive. */ - if (ct & TCG_CT_CONST_S16) { - return val == (int16_t)val; - } else if (ct & TCG_CT_CONST_S32) { - return val == (int32_t)val; - } else if (ct & TCG_CT_CONST_S33) { - return val >= -0xffffffffll && val <= 0xffffffffll; - } else if (ct & TCG_CT_CONST_ZERO) { - return val == 0; + if (ct & TCG_CT_CONST_CMP) { + switch (cond) { + case TCG_COND_EQ: + case TCG_COND_NE: + ct |= TCG_CT_CONST_S32 | TCG_CT_CONST_U32; /* CGFI or CLGFI */ + break; + case TCG_COND_LT: + case TCG_COND_GE: + case TCG_COND_LE: + case TCG_COND_GT: + ct |= TCG_CT_CONST_S32; /* CGFI */ + break; + case TCG_COND_LTU: + case TCG_COND_GEU: + case TCG_COND_LEU: + case TCG_COND_GTU: + ct |= TCG_CT_CONST_U32; /* CLGFI */ + break; + case TCG_COND_TSTNE: + case TCG_COND_TSTEQ: + if (is_const_p16(uval) >= 0) { + return true; /* TMxx */ + } + if (risbg_mask(uval)) { + return true; /* RISBG */ + } + break; + default: + g_assert_not_reached(); + } + } + + if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { + return true; + } + if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { + return true; + } + if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { + return true; + } + if ((ct & TCG_CT_CONST_ZERO) && val == 0) { + return true; } if (ct & TCG_CT_CONST_INV) { val = ~val; } - /* - * Note that is_const_p16 is a subset of is_const_p32, - * so we don't need both constraints. - */ if ((ct & TCG_CT_CONST_P32) && is_const_p32(val) >= 0) { return true; } if ((ct & TCG_CT_CONST_INVRISBG) && risbg_mask(~val)) { return true; } - - return 0; + return false; } /* Emit instructions according to the given instruction format. */ @@ -843,6 +886,9 @@ static const S390Opcode oi_insns[4] = { static const S390Opcode lif_insns[2] = { RIL_LLILF, RIL_LLIHF, }; +static const S390Opcode tm_insns[4] = { + RI_TMLL, RI_TMLH, RI_TMHL, RI_TMHH +}; /* load a register with an immediate value */ static void tcg_out_movi(TCGContext *s, TCGType type, @@ -1203,6 +1249,36 @@ static int tgen_cmp2(TCGContext *s, TCGType type, TCGCond c, TCGReg r1, TCGCond inv_c = tcg_invert_cond(c); S390Opcode op; + if (is_tst_cond(c)) { + tcg_debug_assert(!need_carry); + + if (!c2const) { + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RRFa, NRK, TCG_REG_R0, r1, c2); + } else { + tcg_out_insn(s, RRFa, NGRK, TCG_REG_R0, r1, c2); + } + goto exit; + } + + if (type == TCG_TYPE_I32) { + c2 = (uint32_t)c2; + } + + int i = is_const_p16(c2); + if (i >= 0) { + tcg_out_insn_RI(s, tm_insns[i], r1, c2 >> (i * 16)); + *inv_cc = c == TCG_COND_TSTEQ ? S390_TM_NE : S390_TM_EQ; + return *inv_cc ^ 15; + } + + if (risbg_mask(c2)) { + tgen_andi_risbg(s, TCG_REG_R0, r1, c2); + goto exit; + } + g_assert_not_reached(); + } + if (c2const) { if (c2 == 0) { if (!(is_unsigned && need_carry)) { @@ -1228,22 +1304,34 @@ static int tgen_cmp2(TCGContext *s, TCGType type, TCGCond c, TCGReg r1, goto exit; } - /* - * Constraints are for a signed 33-bit operand, which is a - * convenient superset of this signed/unsigned test. - */ - if (c2 == (is_unsigned ? (TCGArg)(uint32_t)c2 : (TCGArg)(int32_t)c2)) { - op = (is_unsigned ? RIL_CLGFI : RIL_CGFI); - tcg_out_insn_RIL(s, op, r1, c2); - goto exit; + /* Should match TCG_CT_CONST_CMP. */ + switch (c) { + case TCG_COND_LT: + case TCG_COND_GE: + case TCG_COND_LE: + case TCG_COND_GT: + tcg_debug_assert(c2 == (int32_t)c2); + op = RIL_CGFI; + break; + case TCG_COND_EQ: + case TCG_COND_NE: + if (c2 == (int32_t)c2) { + op = RIL_CGFI; + break; + } + /* fall through */ + case TCG_COND_LTU: + case TCG_COND_GEU: + case TCG_COND_LEU: + case TCG_COND_GTU: + tcg_debug_assert(c2 == (uint32_t)c2); + op = RIL_CLGFI; + break; + default: + g_assert_not_reached(); } - - /* Load everything else into a register. */ - tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, c2); - c2 = TCG_TMP0; - } - - if (type == TCG_TYPE_I32) { + tcg_out_insn_RIL(s, op, r1, c2); + } else if (type == TCG_TYPE_I32) { op = (is_unsigned ? RR_CLR : RR_CR); tcg_out_insn_RR(s, op, r1, c2); } else { @@ -1516,46 +1604,49 @@ static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c, TCGReg r1, TCGArg c2, int c2const, TCGLabel *l) { int cc; - bool is_unsigned = is_unsigned_cond(c); - bool in_range; - S390Opcode opc; - cc = tcg_cond_to_s390_cond[c]; + if (!is_tst_cond(c)) { + bool is_unsigned = is_unsigned_cond(c); + bool in_range; + S390Opcode opc; - if (!c2const) { - opc = (type == TCG_TYPE_I32 - ? (is_unsigned ? RIEb_CLRJ : RIEb_CRJ) - : (is_unsigned ? RIEb_CLGRJ : RIEb_CGRJ)); - tgen_compare_branch(s, opc, cc, r1, c2, l); - return; - } + cc = tcg_cond_to_s390_cond[c]; - /* - * COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field. - * If the immediate we've been given does not fit that range, we'll - * fall back to separate compare and branch instructions using the - * larger comparison range afforded by COMPARE IMMEDIATE. - */ - if (type == TCG_TYPE_I32) { - if (is_unsigned) { - opc = RIEc_CLIJ; - in_range = (uint32_t)c2 == (uint8_t)c2; - } else { - opc = RIEc_CIJ; - in_range = (int32_t)c2 == (int8_t)c2; + if (!c2const) { + opc = (type == TCG_TYPE_I32 + ? (is_unsigned ? RIEb_CLRJ : RIEb_CRJ) + : (is_unsigned ? RIEb_CLGRJ : RIEb_CGRJ)); + tgen_compare_branch(s, opc, cc, r1, c2, l); + return; } - } else { - if (is_unsigned) { - opc = RIEc_CLGIJ; - in_range = (uint64_t)c2 == (uint8_t)c2; + + /* + * COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field. + * If the immediate we've been given does not fit that range, we'll + * fall back to separate compare and branch instructions using the + * larger comparison range afforded by COMPARE IMMEDIATE. + */ + if (type == TCG_TYPE_I32) { + if (is_unsigned) { + opc = RIEc_CLIJ; + in_range = (uint32_t)c2 == (uint8_t)c2; + } else { + opc = RIEc_CIJ; + in_range = (int32_t)c2 == (int8_t)c2; + } } else { - opc = RIEc_CGIJ; - in_range = (int64_t)c2 == (int8_t)c2; + if (is_unsigned) { + opc = RIEc_CLGIJ; + in_range = (uint64_t)c2 == (uint8_t)c2; + } else { + opc = RIEc_CGIJ; + in_range = (int64_t)c2 == (int8_t)c2; + } + } + if (in_range) { + tgen_compare_imm_branch(s, opc, cc, r1, c2, l); + return; } - } - if (in_range) { - tgen_compare_imm_branch(s, opc, cc, r1, c2, l); - return; } cc = tgen_cmp(s, type, c, r1, c2, c2const, false); @@ -1834,11 +1925,10 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, ldst->oi = oi; ldst->addrlo_reg = addr_reg; - /* We are expecting a_bits to max out at 7, much lower than TMLL. */ tcg_debug_assert(a_mask <= 0xffff); tcg_out_insn(s, RI, TMLL, addr_reg, a_mask); - tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */ + tcg_out16(s, RI_BRC | (S390_TM_NE << 4)); ldst->label_ptr[0] = s->code_ptr++; } @@ -1919,7 +2009,7 @@ static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, l2 = gen_new_label(); tcg_out_insn(s, RI, TMLL, addr_reg, 15); - tgen_branch(s, 7, l1); /* CC in {1,2,3} */ + tgen_branch(s, S390_TM_NE, l1); } tcg_debug_assert(!need_bswap); @@ -3136,7 +3226,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) return C_O1_I2(r, r, ri); case INDEX_op_setcond_i64: case INDEX_op_negsetcond_i64: - return C_O1_I2(r, r, rA); + return C_O1_I2(r, r, rC); case INDEX_op_clz_i64: return C_O1_I2(r, r, rI); @@ -3186,7 +3276,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_brcond_i32: return C_O0_I2(r, ri); case INDEX_op_brcond_i64: - return C_O0_I2(r, rA); + return C_O0_I2(r, rC); case INDEX_op_bswap16_i32: case INDEX_op_bswap16_i64: @@ -3239,7 +3329,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_movcond_i32: return C_O1_I4(r, r, ri, rI, r); case INDEX_op_movcond_i64: - return C_O1_I4(r, r, rA, rI, r); + return C_O1_I4(r, r, rC, rI, r); case INDEX_op_div2_i32: case INDEX_op_div2_i64: @@ -3258,7 +3348,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_add2_i64: case INDEX_op_sub2_i64: - return C_N1_O1_I4(r, r, 0, 1, rA, r); + return C_N1_O1_I4(r, r, 0, 1, rJU, r); case INDEX_op_st_vec: return C_O0_I2(v, r); diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h index e69b0d2ddd..ae448c3a3a 100644 --- a/tcg/s390x/tcg-target.h +++ b/tcg/s390x/tcg-target.h @@ -138,6 +138,8 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_qemu_ldst_i128 1 +#define TCG_TARGET_HAS_tst 1 + #define TCG_TARGET_HAS_v64 HAVE_FACILITY(VECTOR) #define TCG_TARGET_HAS_v128 HAVE_FACILITY(VECTOR) #define TCG_TARGET_HAS_v256 0 diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index a91defd0ac..176c98740b 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -322,7 +322,8 @@ static bool patch_reloc(tcg_insn_unit *src_rw, int type, } /* test if a constant matches the constraint */ -static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece) +static bool tcg_target_const_match(int64_t val, int ct, + TCGType type, TCGCond cond, int vece) { if (ct & TCG_CT_CONST) { return 1; @@ -606,9 +607,11 @@ static void tcg_out_div32(TCGContext *s, TCGReg rd, TCGReg rs1, uns ? ARITH_UDIV : ARITH_SDIV); } -static const uint8_t tcg_cond_to_bcond[] = { +static const uint8_t tcg_cond_to_bcond[16] = { [TCG_COND_EQ] = COND_E, [TCG_COND_NE] = COND_NE, + [TCG_COND_TSTEQ] = COND_E, + [TCG_COND_TSTNE] = COND_NE, [TCG_COND_LT] = COND_L, [TCG_COND_GE] = COND_GE, [TCG_COND_LE] = COND_LE, @@ -619,7 +622,7 @@ static const uint8_t tcg_cond_to_bcond[] = { [TCG_COND_GTU] = COND_GU, }; -static const uint8_t tcg_cond_to_rcond[] = { +static const uint8_t tcg_cond_to_rcond[16] = { [TCG_COND_EQ] = RCOND_Z, [TCG_COND_NE] = RCOND_NZ, [TCG_COND_LT] = RCOND_LZ, @@ -645,15 +648,17 @@ static void tcg_out_bpcc(TCGContext *s, int scond, int flags, TCGLabel *l) tcg_out_bpcc0(s, scond, flags, off19); } -static void tcg_out_cmp(TCGContext *s, TCGReg c1, int32_t c2, int c2const) +static void tcg_out_cmp(TCGContext *s, TCGCond cond, + TCGReg c1, int32_t c2, int c2const) { - tcg_out_arithc(s, TCG_REG_G0, c1, c2, c2const, ARITH_SUBCC); + tcg_out_arithc(s, TCG_REG_G0, c1, c2, c2const, + is_tst_cond(cond) ? ARITH_ANDCC : ARITH_SUBCC); } static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGReg arg1, int32_t arg2, int const_arg2, TCGLabel *l) { - tcg_out_cmp(s, arg1, arg2, const_arg2); + tcg_out_cmp(s, cond, arg1, arg2, const_arg2); tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_ICC | BPCC_PT, l); tcg_out_nop(s); } @@ -670,7 +675,7 @@ static void tcg_out_movcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg c1, int32_t c2, int c2const, int32_t v1, int v1const) { - tcg_out_cmp(s, c1, c2, c2const); + tcg_out_cmp(s, cond, c1, c2, c2const); tcg_out_movcc(s, cond, MOVCC_ICC, ret, v1, v1const); } @@ -678,7 +683,8 @@ static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGReg arg1, int32_t arg2, int const_arg2, TCGLabel *l) { /* For 64-bit signed comparisons vs zero, we can avoid the compare. */ - if (arg2 == 0 && !is_unsigned_cond(cond)) { + int rcond = tcg_cond_to_rcond[cond]; + if (arg2 == 0 && rcond) { int off16 = 0; if (l->has_value) { @@ -687,19 +693,18 @@ static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGReg arg1, tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP16, l, 0); } tcg_out32(s, INSN_OP(0) | INSN_OP2(3) | BPR_PT | INSN_RS1(arg1) - | INSN_COND(tcg_cond_to_rcond[cond]) | off16); + | INSN_COND(rcond) | off16); } else { - tcg_out_cmp(s, arg1, arg2, const_arg2); + tcg_out_cmp(s, cond, arg1, arg2, const_arg2); tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_XCC | BPCC_PT, l); } tcg_out_nop(s); } -static void tcg_out_movr(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg c1, +static void tcg_out_movr(TCGContext *s, int rcond, TCGReg ret, TCGReg c1, int32_t v1, int v1const) { - tcg_out32(s, ARITH_MOVR | INSN_RD(ret) | INSN_RS1(c1) - | (tcg_cond_to_rcond[cond] << 10) + tcg_out32(s, ARITH_MOVR | INSN_RD(ret) | INSN_RS1(c1) | (rcond << 10) | (v1const ? INSN_IMM10(v1) : INSN_RS2(v1))); } @@ -710,11 +715,11 @@ static void tcg_out_movcond_i64(TCGContext *s, TCGCond cond, TCGReg ret, /* For 64-bit signed comparisons vs zero, we can avoid the compare. Note that the immediate range is one bit smaller, so we must check for that as well. */ - if (c2 == 0 && !is_unsigned_cond(cond) - && (!v1const || check_fit_i32(v1, 10))) { - tcg_out_movr(s, cond, ret, c1, v1, v1const); + int rcond = tcg_cond_to_rcond[cond]; + if (c2 == 0 && rcond && (!v1const || check_fit_i32(v1, 10))) { + tcg_out_movr(s, rcond, ret, c1, v1, v1const); } else { - tcg_out_cmp(s, c1, c2, c2const); + tcg_out_cmp(s, cond, c1, c2, c2const); tcg_out_movcc(s, cond, MOVCC_XCC, ret, v1, v1const); } } @@ -742,6 +747,15 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, cond = (cond == TCG_COND_EQ ? TCG_COND_GEU : TCG_COND_LTU); break; + case TCG_COND_TSTEQ: + case TCG_COND_TSTNE: + /* Transform to inequality vs zero. */ + tcg_out_arithc(s, TCG_REG_T1, c1, c2, c2const, ARITH_AND); + c1 = TCG_REG_G0; + c2 = TCG_REG_T1, c2const = 0; + cond = (cond == TCG_COND_TSTEQ ? TCG_COND_GEU : TCG_COND_LTU); + break; + case TCG_COND_GTU: case TCG_COND_LEU: /* If we don't need to load a constant into a register, we can @@ -758,13 +772,13 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, /* FALLTHRU */ default: - tcg_out_cmp(s, c1, c2, c2const); + tcg_out_cmp(s, cond, c1, c2, c2const); tcg_out_movi_s13(s, ret, 0); tcg_out_movcc(s, cond, MOVCC_ICC, ret, neg ? -1 : 1, 1); return; } - tcg_out_cmp(s, c1, c2, c2const); + tcg_out_cmp(s, cond, c1, c2, c2const); if (cond == TCG_COND_LTU) { if (neg) { /* 0 - 0 - C = -C = (C ? -1 : 0) */ @@ -787,6 +801,8 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg c1, int32_t c2, int c2const, bool neg) { + int rcond; + if (use_vis3_instructions && !neg) { switch (cond) { case TCG_COND_NE: @@ -796,7 +812,7 @@ static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret, c2 = c1, c2const = 0, c1 = TCG_REG_G0; /* FALLTHRU */ case TCG_COND_LTU: - tcg_out_cmp(s, c1, c2, c2const); + tcg_out_cmp(s, cond, c1, c2, c2const); tcg_out_arith(s, ret, TCG_REG_G0, TCG_REG_G0, ARITH_ADDXC); return; default: @@ -806,11 +822,12 @@ static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret, /* For 64-bit signed comparisons vs zero, we can avoid the compare if the input does not overlap the output. */ - if (c2 == 0 && !is_unsigned_cond(cond) && c1 != ret) { + rcond = tcg_cond_to_rcond[cond]; + if (c2 == 0 && rcond && c1 != ret) { tcg_out_movi_s13(s, ret, 0); - tcg_out_movr(s, cond, ret, c1, neg ? -1 : 1, 1); + tcg_out_movr(s, rcond, ret, c1, neg ? -1 : 1, 1); } else { - tcg_out_cmp(s, c1, c2, c2const); + tcg_out_cmp(s, cond, c1, c2, c2const); tcg_out_movi_s13(s, ret, 0); tcg_out_movcc(s, cond, MOVCC_XCC, ret, neg ? -1 : 1, 1); } @@ -1098,7 +1115,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, tcg_out_movi_s32(s, TCG_REG_T3, compare_mask); tcg_out_arith(s, TCG_REG_T3, addr_reg, TCG_REG_T3, ARITH_AND); } - tcg_out_cmp(s, TCG_REG_T2, TCG_REG_T3, 0); + tcg_out_cmp(s, TCG_COND_NE, TCG_REG_T2, TCG_REG_T3, 0); ldst = new_ldst_label(s); ldst->is_ld = is_ld; diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h index f8cf145266..a18906a14e 100644 --- a/tcg/sparc64/tcg-target.h +++ b/tcg/sparc64/tcg-target.h @@ -149,6 +149,8 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_qemu_ldst_i128 0 +#define TCG_TARGET_HAS_tst 1 + #define TCG_AREG0 TCG_REG_I0 #define TCG_TARGET_DEFAULT_MO (0) diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h index 6c9d9e48db..9b0d982f65 100644 --- a/tcg/tcg-internal.h +++ b/tcg/tcg-internal.h @@ -83,6 +83,8 @@ static inline TCGv_i64 TCGV128_HIGH(TCGv_i128 t) bool tcg_target_has_memory_bswap(MemOp memop); +TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind); + /* * Locate or create a read-only temporary that is a constant. * This kind of temporary need not be freed, but for convenience @@ -173,7 +173,8 @@ static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, const TCGHelperInfo *info); static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot); -static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece); +static bool tcg_target_const_match(int64_t val, int ct, + TCGType type, TCGCond cond, int vece); #ifdef TCG_TARGET_NEED_LDST_LABELS static int tcg_out_ldst_finalize(TCGContext *s); #endif @@ -1655,7 +1656,7 @@ TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name) return temp_tcgv_ptr(ts); } -static TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind) +TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind) { TCGContext *s = tcg_ctx; TCGTemp *ts; @@ -2482,7 +2483,9 @@ static const char * const cond_name[] = [TCG_COND_LTU] = "ltu", [TCG_COND_GEU] = "geu", [TCG_COND_LEU] = "leu", - [TCG_COND_GTU] = "gtu" + [TCG_COND_GTU] = "gtu", + [TCG_COND_TSTEQ] = "tsteq", + [TCG_COND_TSTNE] = "tstne", }; static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] = @@ -4784,6 +4787,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) TCGTemp *ts; TCGArg new_args[TCG_MAX_OP_ARGS]; int const_args[TCG_MAX_OP_ARGS]; + TCGCond op_cond; nb_oargs = def->nb_oargs; nb_iargs = def->nb_iargs; @@ -4796,6 +4800,33 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) i_allocated_regs = s->reserved_regs; o_allocated_regs = s->reserved_regs; + switch (op->opc) { + case INDEX_op_brcond_i32: + case INDEX_op_brcond_i64: + op_cond = op->args[2]; + break; + case INDEX_op_setcond_i32: + case INDEX_op_setcond_i64: + case INDEX_op_negsetcond_i32: + case INDEX_op_negsetcond_i64: + case INDEX_op_cmp_vec: + op_cond = op->args[3]; + break; + case INDEX_op_brcond2_i32: + op_cond = op->args[4]; + break; + case INDEX_op_movcond_i32: + case INDEX_op_movcond_i64: + case INDEX_op_setcond2_i32: + case INDEX_op_cmpsel_vec: + op_cond = op->args[5]; + break; + default: + /* No condition within opcode. */ + op_cond = TCG_COND_ALWAYS; + break; + } + /* satisfy input constraints */ for (k = 0; k < nb_iargs; k++) { TCGRegSet i_preferred_regs, i_required_regs; @@ -4809,7 +4840,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) ts = arg_temp(arg); if (ts->val_type == TEMP_VAL_CONST - && tcg_target_const_match(ts->val, ts->type, arg_ct->ct, TCGOP_VECE(op))) { + && tcg_target_const_match(ts->val, arg_ct->ct, ts->type, + op_cond, TCGOP_VECE(op))) { /* constant is OK for instruction */ const_args[i] = 1; new_args[i] = ts->val; @@ -228,6 +228,12 @@ static bool tci_compare32(uint32_t u0, uint32_t u1, TCGCond condition) case TCG_COND_GTU: result = (u0 > u1); break; + case TCG_COND_TSTEQ: + result = (u0 & u1) == 0; + break; + case TCG_COND_TSTNE: + result = (u0 & u1) != 0; + break; default: g_assert_not_reached(); } @@ -270,6 +276,12 @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition) case TCG_COND_GTU: result = (u0 > u1); break; + case TCG_COND_TSTEQ: + result = (u0 & u1) == 0; + break; + case TCG_COND_TSTNE: + result = (u0 & u1) != 0; + break; default: g_assert_not_reached(); } @@ -1041,6 +1053,8 @@ static const char *str_c(TCGCond c) [TCG_COND_GEU] = "geu", [TCG_COND_LEU] = "leu", [TCG_COND_GTU] = "gtu", + [TCG_COND_TSTEQ] = "tsteq", + [TCG_COND_TSTNE] = "tstne", }; assert((unsigned)c < ARRAY_SIZE(cond)); diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 461f4b47ff..c740864b96 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -913,7 +913,8 @@ static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, } /* Test if a constant matches the constraint. */ -static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece) +static bool tcg_target_const_match(int64_t val, int ct, + TCGType type, TCGCond cond, int vece) { return ct & TCG_CT_CONST; } diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index 2a13816c8e..a076f401d2 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -117,6 +117,8 @@ #define TCG_TARGET_HAS_qemu_ldst_i128 0 +#define TCG_TARGET_HAS_tst 1 + /* Number of registers available. */ #define TCG_TARGET_NB_REGS 16 diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check index f2e9d27dcf..56d88ca423 100755 --- a/tests/qemu-iotests/check +++ b/tests/qemu-iotests/check @@ -184,7 +184,8 @@ if __name__ == '__main__': sys.exit(str(e)) if args.dry_run: - print('\n'.join([os.path.basename(t) for t in tests])) + with env: + print('\n'.join([os.path.basename(t) for t in tests])) else: with TestRunner(env, tap=args.tap, color=args.color) as tr: diff --git a/tests/qemu-iotests/testenv.py b/tests/qemu-iotests/testenv.py index 3ff38f2661..588f30a4f1 100644 --- a/tests/qemu-iotests/testenv.py +++ b/tests/qemu-iotests/testenv.py @@ -126,7 +126,7 @@ class TestEnv(ContextManager['TestEnv']): self.tmp_sock_dir = False Path(self.sock_dir).mkdir(parents=True, exist_ok=True) except KeyError: - self.sock_dir = tempfile.mkdtemp() + self.sock_dir = tempfile.mkdtemp(prefix="qemu-iotests-") self.tmp_sock_dir = True self.sample_img_dir = os.getenv('SAMPLE_IMG_DIR', diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target index 30994dcf9c..e2aba2ec27 100644 --- a/tests/tcg/s390x/Makefile.target +++ b/tests/tcg/s390x/Makefile.target @@ -45,6 +45,8 @@ TESTS+=clc TESTS+=laalg TESTS+=add-logical-with-carry TESTS+=lae +TESTS+=cvd +TESTS+=cvb cdsg: CFLAGS+=-pthread cdsg: LDFLAGS+=-pthread diff --git a/tests/tcg/s390x/cvb.c b/tests/tcg/s390x/cvb.c new file mode 100644 index 0000000000..e1735f6b81 --- /dev/null +++ b/tests/tcg/s390x/cvb.c @@ -0,0 +1,102 @@ +/* + * Test the CONVERT TO BINARY instruction. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#include <assert.h> +#include <signal.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +static int signum; + +static void signal_handler(int n) +{ + signum = n; +} + +#define FAIL 0x1234567887654321 +#define OK32(x) (0x1234567800000000 | (uint32_t)(x)) + +static int64_t cvb(uint64_t x) +{ + int64_t ret = FAIL; + + signum = -1; + asm("cvb %[ret],%[x]" : [ret] "+r" (ret) : [x] "R" (x)); + + return ret; +} + +static int64_t cvby(uint64_t x) +{ + int64_t ret = FAIL; + + signum = -1; + asm("cvby %[ret],%[x]" : [ret] "+r" (ret) : [x] "T" (x)); + + return ret; +} + +static int64_t cvbg(__uint128_t x) +{ + int64_t ret = FAIL; + + signum = -1; + asm("cvbg %[ret],%[x]" : [ret] "+r" (ret) : [x] "T" (x)); + + return ret; +} + +int main(void) +{ + __uint128_t m = (((__uint128_t)0x9223372036854775) << 16) | 0x8070; + struct sigaction act; + int err; + + memset(&act, 0, sizeof(act)); + act.sa_handler = signal_handler; + err = sigaction(SIGFPE, &act, NULL); + assert(err == 0); + err = sigaction(SIGILL, &act, NULL); + assert(err == 0); + + assert(cvb(0xc) == OK32(0) && signum == -1); + assert(cvb(0x1c) == OK32(1) && signum == -1); + assert(cvb(0x25594c) == OK32(25594) && signum == -1); + assert(cvb(0x1d) == OK32(-1) && signum == -1); + assert(cvb(0x2147483647c) == OK32(0x7fffffff) && signum == -1); + assert(cvb(0x2147483648d) == OK32(-0x80000000) && signum == -1); + assert(cvb(0x7) == FAIL && signum == SIGILL); + assert(cvb(0x2147483648c) == OK32(0x80000000) && signum == SIGFPE); + assert(cvb(0x3000000000c) == OK32(0xb2d05e00) && signum == SIGFPE); + assert(cvb(0x2147483649d) == OK32(0x7fffffff) && signum == SIGFPE); + assert(cvb(0x3000000000d) == OK32(0x4d2fa200) && signum == SIGFPE); + + assert(cvby(0xc) == OK32(0)); + assert(cvby(0x1c) == OK32(1)); + assert(cvby(0x25594c) == OK32(25594)); + assert(cvby(0x1d) == OK32(-1)); + assert(cvby(0x2147483647c) == OK32(0x7fffffff)); + assert(cvby(0x2147483648d) == OK32(-0x80000000)); + assert(cvby(0x7) == FAIL && signum == SIGILL); + assert(cvby(0x2147483648c) == OK32(0x80000000) && signum == SIGFPE); + assert(cvby(0x3000000000c) == OK32(0xb2d05e00) && signum == SIGFPE); + assert(cvby(0x2147483649d) == OK32(0x7fffffff) && signum == SIGFPE); + assert(cvby(0x3000000000d) == OK32(0x4d2fa200) && signum == SIGFPE); + + assert(cvbg(0xc) == 0); + assert(cvbg(0x1c) == 1); + assert(cvbg(0x25594c) == 25594); + assert(cvbg(0x1d) == -1); + assert(cvbg(m + 0xc) == 0x7fffffffffffffff); + assert(cvbg(m + 0x1d) == -0x8000000000000000); + assert(cvbg(0x7) == FAIL && signum == SIGILL); + assert(cvbg(m + 0x1c) == FAIL && signum == SIGFPE); + assert(cvbg(m + 0x2d) == FAIL && signum == SIGFPE); + assert(cvbg(((__uint128_t)1 << 80) + 0xc) == FAIL && signum == SIGFPE); + assert(cvbg(((__uint128_t)1 << 80) + 0xd) == FAIL && signum == SIGFPE); + + return EXIT_SUCCESS; +} diff --git a/tests/tcg/s390x/cvd.c b/tests/tcg/s390x/cvd.c new file mode 100644 index 0000000000..d776688985 --- /dev/null +++ b/tests/tcg/s390x/cvd.c @@ -0,0 +1,63 @@ +/* + * Test the CONVERT TO DECIMAL instruction. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#include <assert.h> +#include <stdint.h> +#include <stdlib.h> + +static uint64_t cvd(int32_t x) +{ + uint64_t ret; + + asm("cvd %[x],%[ret]" : [ret] "=R" (ret) : [x] "r" (x)); + + return ret; +} + +static uint64_t cvdy(int32_t x) +{ + uint64_t ret; + + asm("cvdy %[x],%[ret]" : [ret] "=T" (ret) : [x] "r" (x)); + + return ret; +} + +static __uint128_t cvdg(int64_t x) +{ + __uint128_t ret; + + asm("cvdg %[x],%[ret]" : [ret] "=T" (ret) : [x] "r" (x)); + + return ret; +} + +int main(void) +{ + __uint128_t m = (((__uint128_t)0x9223372036854775) << 16) | 0x8070; + + assert(cvd(0) == 0xc); + assert(cvd(1) == 0x1c); + assert(cvd(25594) == 0x25594c); + assert(cvd(-1) == 0x1d); + assert(cvd(0x7fffffff) == 0x2147483647c); + assert(cvd(-0x80000000) == 0x2147483648d); + + assert(cvdy(0) == 0xc); + assert(cvdy(1) == 0x1c); + assert(cvdy(25594) == 0x25594c); + assert(cvdy(-1) == 0x1d); + assert(cvdy(0x7fffffff) == 0x2147483647c); + assert(cvdy(-0x80000000) == 0x2147483648d); + + assert(cvdg(0) == 0xc); + assert(cvdg(1) == 0x1c); + assert(cvdg(25594) == 0x25594c); + assert(cvdg(-1) == 0x1d); + assert(cvdg(0x7fffffffffffffff) == (m + 0xc)); + assert(cvdg(-0x8000000000000000) == (m + 0x1d)); + + return EXIT_SUCCESS; +} diff --git a/tests/unit/test-util-filemonitor.c b/tests/unit/test-util-filemonitor.c index a22de27595..02e67fc96a 100644 --- a/tests/unit/test-util-filemonitor.c +++ b/tests/unit/test-util-filemonitor.c @@ -360,6 +360,14 @@ test_file_monitor_events(void) { .type = QFILE_MONITOR_TEST_OP_EVENT, .filesrc = "one.txt", .watchid = &watch4, .eventid = QFILE_MONITOR_EVENT_DELETED }, +#ifdef __FreeBSD__ + { .type = QFILE_MONITOR_TEST_OP_EVENT, + .filesrc = "two.txt", .watchid = &watch0, + .eventid = QFILE_MONITOR_EVENT_DELETED }, + { .type = QFILE_MONITOR_TEST_OP_EVENT, + .filesrc = "two.txt", .watchid = &watch2, + .eventid = QFILE_MONITOR_EVENT_DELETED }, +#endif { .type = QFILE_MONITOR_TEST_OP_EVENT, .filesrc = "two.txt", .watchid = &watch0, .eventid = QFILE_MONITOR_EVENT_CREATED }, diff --git a/tests/vm/basevm.py b/tests/vm/basevm.py index 61725b8325..c0d62c0803 100644 --- a/tests/vm/basevm.py +++ b/tests/vm/basevm.py @@ -423,6 +423,8 @@ class BaseVM(object): def console_sshd_config(self, prompt): self.console_wait(prompt) self.console_send("echo 'PermitRootLogin yes' >> /etc/ssh/sshd_config\n") + self.console_wait(prompt) + self.console_send("echo 'UseDNS no' >> /etc/ssh/sshd_config\n") for var in self.envvars: self.console_wait(prompt) self.console_send("echo 'AcceptEnv %s' >> /etc/ssh/sshd_config\n" % var) diff --git a/tests/vm/freebsd b/tests/vm/freebsd index b581bd17fb..1247f40a38 100755 --- a/tests/vm/freebsd +++ b/tests/vm/freebsd @@ -108,6 +108,7 @@ class FreeBSDVM(basevm.BaseVM): prompt = "root@freebsd:~ #" self.console_ssh_init(prompt, "root", self._config["root_pass"]) self.console_sshd_config(prompt) + self.console_wait_send(prompt, "service sshd reload\n") # setup virtio-blk #1 (tarfile) self.console_wait(prompt) diff --git a/util/meson.build b/util/meson.build index af3bf5692d..0ef9886be0 100644 --- a/util/meson.build +++ b/util/meson.build @@ -104,7 +104,11 @@ if have_block util_ss.add(files('throttle.c')) util_ss.add(files('timed-average.c')) if config_host_data.get('CONFIG_INOTIFY1') - util_ss.add(files('filemonitor-inotify.c')) + freebsd_dep = [] + if host_os == 'freebsd' + freebsd_dep = inotify + endif + util_ss.add(files('filemonitor-inotify.c'), freebsd_dep) else util_ss.add(files('filemonitor-stub.c')) endif diff --git a/util/oslib-posix.c b/util/oslib-posix.c index 7c297003b9..3c379f96c2 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -42,6 +42,7 @@ #include "qemu/cutils.h" #include "qemu/units.h" #include "qemu/thread-context.h" +#include "qemu/main-loop.h" #ifdef CONFIG_LINUX #include <sys/syscall.h> @@ -63,11 +64,15 @@ struct MemsetThread; +static QLIST_HEAD(, MemsetContext) memset_contexts = + QLIST_HEAD_INITIALIZER(memset_contexts); + typedef struct MemsetContext { bool all_threads_created; bool any_thread_failed; struct MemsetThread *threads; int num_threads; + QLIST_ENTRY(MemsetContext) next; } MemsetContext; struct MemsetThread { @@ -412,19 +417,44 @@ static inline int get_memset_num_threads(size_t hpagesize, size_t numpages, return ret; } +static int wait_and_free_mem_prealloc_context(MemsetContext *context) +{ + int i, ret = 0, tmp; + + for (i = 0; i < context->num_threads; i++) { + tmp = (uintptr_t)qemu_thread_join(&context->threads[i].pgthread); + + if (tmp) { + ret = tmp; + } + } + g_free(context->threads); + g_free(context); + return ret; +} + static int touch_all_pages(char *area, size_t hpagesize, size_t numpages, - int max_threads, ThreadContext *tc, + int max_threads, ThreadContext *tc, bool async, bool use_madv_populate_write) { static gsize initialized = 0; - MemsetContext context = { - .num_threads = get_memset_num_threads(hpagesize, numpages, max_threads), - }; + MemsetContext *context = g_malloc0(sizeof(MemsetContext)); size_t numpages_per_thread, leftover; void *(*touch_fn)(void *); - int ret = 0, i = 0; + int ret, i = 0; char *addr = area; + /* + * Asynchronous preallocation is only allowed when using MADV_POPULATE_WRITE + * and prealloc context for thread placement. + */ + if (!use_madv_populate_write || !tc) { + async = false; + } + + context->num_threads = + get_memset_num_threads(hpagesize, numpages, max_threads); + if (g_once_init_enter(&initialized)) { qemu_mutex_init(&page_mutex); qemu_cond_init(&page_cond); @@ -432,8 +462,11 @@ static int touch_all_pages(char *area, size_t hpagesize, size_t numpages, } if (use_madv_populate_write) { - /* Avoid creating a single thread for MADV_POPULATE_WRITE */ - if (context.num_threads == 1) { + /* + * Avoid creating a single thread for MADV_POPULATE_WRITE when + * preallocating synchronously. + */ + if (context->num_threads == 1 && !async) { if (qemu_madvise(area, hpagesize * numpages, QEMU_MADV_POPULATE_WRITE)) { return -errno; @@ -445,50 +478,86 @@ static int touch_all_pages(char *area, size_t hpagesize, size_t numpages, touch_fn = do_touch_pages; } - context.threads = g_new0(MemsetThread, context.num_threads); - numpages_per_thread = numpages / context.num_threads; - leftover = numpages % context.num_threads; - for (i = 0; i < context.num_threads; i++) { - context.threads[i].addr = addr; - context.threads[i].numpages = numpages_per_thread + (i < leftover); - context.threads[i].hpagesize = hpagesize; - context.threads[i].context = &context; + context->threads = g_new0(MemsetThread, context->num_threads); + numpages_per_thread = numpages / context->num_threads; + leftover = numpages % context->num_threads; + for (i = 0; i < context->num_threads; i++) { + context->threads[i].addr = addr; + context->threads[i].numpages = numpages_per_thread + (i < leftover); + context->threads[i].hpagesize = hpagesize; + context->threads[i].context = context; if (tc) { - thread_context_create_thread(tc, &context.threads[i].pgthread, + thread_context_create_thread(tc, &context->threads[i].pgthread, "touch_pages", - touch_fn, &context.threads[i], + touch_fn, &context->threads[i], QEMU_THREAD_JOINABLE); } else { - qemu_thread_create(&context.threads[i].pgthread, "touch_pages", - touch_fn, &context.threads[i], + qemu_thread_create(&context->threads[i].pgthread, "touch_pages", + touch_fn, &context->threads[i], QEMU_THREAD_JOINABLE); } - addr += context.threads[i].numpages * hpagesize; + addr += context->threads[i].numpages * hpagesize; + } + + if (async) { + /* + * async requests currently require the BQL. Add it to the list and kick + * preallocation off during qemu_finish_async_prealloc_mem(). + */ + assert(bql_locked()); + QLIST_INSERT_HEAD(&memset_contexts, context, next); + return 0; } if (!use_madv_populate_write) { - sigbus_memset_context = &context; + sigbus_memset_context = context; } qemu_mutex_lock(&page_mutex); - context.all_threads_created = true; + context->all_threads_created = true; qemu_cond_broadcast(&page_cond); qemu_mutex_unlock(&page_mutex); - for (i = 0; i < context.num_threads; i++) { - int tmp = (uintptr_t)qemu_thread_join(&context.threads[i].pgthread); + ret = wait_and_free_mem_prealloc_context(context); + if (!use_madv_populate_write) { + sigbus_memset_context = NULL; + } + return ret; +} + +bool qemu_finish_async_prealloc_mem(Error **errp) +{ + int ret = 0, tmp; + MemsetContext *context, *next_context; + + /* Waiting for preallocation requires the BQL. */ + assert(bql_locked()); + if (QLIST_EMPTY(&memset_contexts)) { + return true; + } + + qemu_mutex_lock(&page_mutex); + QLIST_FOREACH(context, &memset_contexts, next) { + context->all_threads_created = true; + } + qemu_cond_broadcast(&page_cond); + qemu_mutex_unlock(&page_mutex); + + QLIST_FOREACH_SAFE(context, &memset_contexts, next, next_context) { + QLIST_REMOVE(context, next); + tmp = wait_and_free_mem_prealloc_context(context); if (tmp) { ret = tmp; } } - if (!use_madv_populate_write) { - sigbus_memset_context = NULL; + if (ret) { + error_setg_errno(errp, -ret, + "qemu_prealloc_mem: preallocating memory failed"); + return false; } - g_free(context.threads); - - return ret; + return true; } static bool madv_populate_write_possible(char *area, size_t pagesize) @@ -498,7 +567,7 @@ static bool madv_populate_write_possible(char *area, size_t pagesize) } bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads, - ThreadContext *tc, Error **errp) + ThreadContext *tc, bool async, Error **errp) { static gsize initialized; int ret; @@ -540,7 +609,7 @@ bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads, } /* touch pages simultaneously */ - ret = touch_all_pages(area, hpagesize, numpages, max_threads, tc, + ret = touch_all_pages(area, hpagesize, numpages, max_threads, tc, async, use_madv_populate_write); if (ret) { error_setg_errno(errp, -ret, diff --git a/util/oslib-win32.c b/util/oslib-win32.c index c4a5f05a49..b623830d62 100644 --- a/util/oslib-win32.c +++ b/util/oslib-win32.c @@ -265,7 +265,7 @@ int getpagesize(void) } bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads, - ThreadContext *tc, Error **errp) + ThreadContext *tc, bool async, Error **errp) { int i; size_t pagesize = qemu_real_host_page_size(); @@ -278,6 +278,12 @@ bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads, return true; } +bool qemu_finish_async_prealloc_mem(Error **errp) +{ + /* async prealloc not supported, there is nothing to finish */ + return true; +} + char *qemu_get_pid_name(pid_t pid) { /* XXX Implement me */ |