aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2018-06-15 15:27:48 +0100
committerPeter Maydell <peter.maydell@linaro.org>2018-06-15 15:27:48 +0100
commit81d386479640879d87ab9661c8fb44d586c965ec (patch)
treed406d297e04eb0b86fd16ac974aaf34673110110
parent2702c2d3eb74e3908c0c5dbf3a71c8987595a86e (diff)
parent14120108f87b3f9e1beacdf0a6096e464e62bb65 (diff)
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20180615' into staging
target-arm and miscellaneous queue: * fix KVM state save/restore for GICv3 priority registers for high IRQ numbers * hw/arm/mps2-tz: Put ethernet controller behind PPC * hw/sh/sh7750: Convert away from old_mmio * hw/m68k/mcf5206: Convert away from old_mmio * hw/block/pflash_cfi02: Convert away from old_mmio * hw/watchdog/wdt_i6300esb: Convert away from old_mmio * hw/input/pckbd: Convert away from old_mmio * hw/char/parallel: Convert away from old_mmio * armv7m: refactor to get rid of armv7m_init() function * arm: Don't crash if user tries to use a Cortex-M CPU without an NVIC * hw/core/or-irq: Support more than 16 inputs to an OR gate * cpu-defs.h: Document CPUIOTLBEntry 'addr' field * cputlb: Pass cpu_transaction_failed() the correct physaddr * CODING_STYLE: Define our preferred form for multiline comments * Add and use new stn_*_p() and ldn_*_p() memory access functions * target/arm: More parts of the upcoming SVE support * aspeed_scu: Implement RNG register * m25p80: add support for two bytes WRSR for Macronix chips * exec.c: Handle IOMMUs being in the path of TCG CPU memory accesses * target/arm: Allow ARMv6-M Thumb2 instructions # gpg: Signature made Fri 15 Jun 2018 15:24:03 BST # gpg: using RSA key 3C2525ED14360CDE # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" # gpg: aka "Peter Maydell <pmaydell@gmail.com>" # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" # Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE * remotes/pmaydell/tags/pull-target-arm-20180615: (43 commits) target/arm: Allow ARMv6-M Thumb2 instructions exec.c: Handle IOMMUs in address_space_translate_for_iotlb() iommu: Add IOMMU index argument to translate method iommu: Add IOMMU index argument to notifier APIs iommu: Add IOMMU index concept to IOMMU API m25p80: add support for two bytes WRSR for Macronix chips aspeed_scu: Implement RNG register target/arm: Implement SVE Floating Point Arithmetic - Unpredicated Group target/arm: Implement SVE Integer Wide Immediate - Unpredicated Group target/arm: Implement FDUP/DUP target/arm: Implement SVE Integer Compare - Scalars Group target/arm: Implement SVE Predicate Count Group target/arm: Implement SVE Partition Break Group target/arm: Implement SVE Integer Compare - Immediate Group target/arm: Implement SVE Integer Compare - Vectors Group target/arm: Implement SVE Select Vectors Group target/arm: Implement SVE vector splice (predicated) target/arm: Implement SVE reverse within elements target/arm: Implement SVE copy to vector (predicated) target/arm: Implement SVE conditionally broadcast/extract element ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--CODING_STYLE17
-rw-r--r--accel/tcg/cputlb.c59
-rw-r--r--docs/devel/loads-stores.rst15
-rw-r--r--exec.c263
-rw-r--r--hw/alpha/typhoon.c3
-rw-r--r--hw/arm/armv7m.c28
-rw-r--r--hw/arm/mps2-tz.c32
-rw-r--r--hw/arm/smmuv3.c2
-rw-r--r--hw/arm/stellaris.c12
-rw-r--r--hw/block/m25p80.c1
-rw-r--r--hw/block/pflash_cfi02.c97
-rw-r--r--hw/char/parallel.c50
-rw-r--r--hw/core/or-irq.c39
-rw-r--r--hw/dma/rc4030.c2
-rw-r--r--hw/i386/amd_iommu.c2
-rw-r--r--hw/i386/intel_iommu.c8
-rw-r--r--hw/input/pckbd.c14
-rw-r--r--hw/intc/arm_gicv3_kvm.c18
-rw-r--r--hw/intc/armv7m_nvic.c6
-rw-r--r--hw/m68k/mcf5206.c48
-rw-r--r--hw/misc/aspeed_scu.c20
-rw-r--r--hw/ppc/spapr_iommu.c5
-rw-r--r--hw/s390x/s390-pci-bus.c2
-rw-r--r--hw/s390x/s390-pci-inst.c4
-rw-r--r--hw/sh4/sh7750.c44
-rw-r--r--hw/sparc/sun4m_iommu.c3
-rw-r--r--hw/sparc64/sun4u_iommu.c2
-rw-r--r--hw/vfio/common.c6
-rw-r--r--hw/virtio/vhost.c7
-rw-r--r--hw/watchdog/wdt_i6300esb.c48
-rw-r--r--include/exec/cpu-all.h4
-rw-r--r--include/exec/cpu-defs.h9
-rw-r--r--include/exec/exec-all.h16
-rw-r--r--include/exec/memory.h65
-rw-r--r--include/hw/arm/arm.h8
-rw-r--r--include/hw/or-irq.h5
-rw-r--r--include/qemu/bswap.h52
-rw-r--r--include/qom/cpu.h3
-rw-r--r--memory.c33
-rw-r--r--target/arm/cpu.c18
-rw-r--r--target/arm/helper-sve.h294
-rw-r--r--target/arm/helper.h19
-rw-r--r--target/arm/sve.decode248
-rw-r--r--target/arm/sve_helper.c1250
-rw-r--r--target/arm/translate-a64.h26
-rw-r--r--target/arm/translate-sve.c1458
-rw-r--r--target/arm/translate.c43
-rw-r--r--target/arm/vec_helper.c69
48 files changed, 4114 insertions, 363 deletions
diff --git a/CODING_STYLE b/CODING_STYLE
index 12ba58ee29..ec075dedc4 100644
--- a/CODING_STYLE
+++ b/CODING_STYLE
@@ -124,6 +124,23 @@ We use traditional C-style /* */ comments and avoid // comments.
Rationale: The // form is valid in C99, so this is purely a matter of
consistency of style. The checkpatch script will warn you about this.
+Multiline comment blocks should have a row of stars on the left,
+and the initial /* and terminating */ both on their own lines:
+ /*
+ * like
+ * this
+ */
+This is the same format required by the Linux kernel coding style.
+
+(Some of the existing comments in the codebase use the GNU Coding
+Standards form which does not have stars on the left, or other
+variations; avoid these when writing new comments, but don't worry
+about converting to the preferred form unless you're editing that
+comment anyway.)
+
+Rationale: Consistency, and ease of visually picking out a multiline
+comment from the surrounding code.
+
8. trace-events style
8.1 0x prefix
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 05439039e9..0a721bb9c4 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -632,7 +632,8 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
}
sz = size;
- section = address_space_translate_for_iotlb(cpu, asidx, paddr, &xlat, &sz);
+ section = address_space_translate_for_iotlb(cpu, asidx, paddr, &xlat, &sz,
+ attrs, &prot);
assert(sz >= TARGET_PAGE_SIZE);
tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
@@ -664,6 +665,18 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];
/* refill the tlb */
+ /*
+ * At this point iotlb contains a physical section number in the lower
+ * TARGET_PAGE_BITS, and either
+ * + the ram_addr_t of the page base of the target RAM (if NOTDIRTY or ROM)
+ * + the offset within section->mr of the page base (otherwise)
+ * We subtract the vaddr (which is page aligned and thus won't
+ * disturb the low bits) to give an offset which can be added to the
+ * (non-page-aligned) vaddr of the eventual memory access to get
+ * the MemoryRegion offset for the access. Note that the vaddr we
+ * subtract here is that of the page base, and not the same as the
+ * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
+ */
env->iotlb[mmu_idx][index].addr = iotlb - vaddr;
env->iotlb[mmu_idx][index].attrs = attrs;
@@ -765,13 +778,16 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
target_ulong addr, uintptr_t retaddr, int size)
{
CPUState *cpu = ENV_GET_CPU(env);
- hwaddr physaddr = iotlbentry->addr;
- MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
+ hwaddr mr_offset;
+ MemoryRegionSection *section;
+ MemoryRegion *mr;
uint64_t val;
bool locked = false;
MemTxResult r;
- physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
+ section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
+ mr = section->mr;
+ mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
cpu->mem_io_pc = retaddr;
if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
cpu_io_recompile(cpu, retaddr);
@@ -783,9 +799,13 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
qemu_mutex_lock_iothread();
locked = true;
}
- r = memory_region_dispatch_read(mr, physaddr,
+ r = memory_region_dispatch_read(mr, mr_offset,
&val, size, iotlbentry->attrs);
if (r != MEMTX_OK) {
+ hwaddr physaddr = mr_offset +
+ section->offset_within_address_space -
+ section->offset_within_region;
+
cpu_transaction_failed(cpu, physaddr, addr, size, MMU_DATA_LOAD,
mmu_idx, iotlbentry->attrs, r, retaddr);
}
@@ -802,12 +822,15 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
uintptr_t retaddr, int size)
{
CPUState *cpu = ENV_GET_CPU(env);
- hwaddr physaddr = iotlbentry->addr;
- MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
+ hwaddr mr_offset;
+ MemoryRegionSection *section;
+ MemoryRegion *mr;
bool locked = false;
MemTxResult r;
- physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
+ section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
+ mr = section->mr;
+ mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
cpu_io_recompile(cpu, retaddr);
}
@@ -818,9 +841,13 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
qemu_mutex_lock_iothread();
locked = true;
}
- r = memory_region_dispatch_write(mr, physaddr,
+ r = memory_region_dispatch_write(mr, mr_offset,
val, size, iotlbentry->attrs);
if (r != MEMTX_OK) {
+ hwaddr physaddr = mr_offset +
+ section->offset_within_address_space -
+ section->offset_within_region;
+
cpu_transaction_failed(cpu, physaddr, addr, size, MMU_DATA_STORE,
mmu_idx, iotlbentry->attrs, r, retaddr);
}
@@ -868,12 +895,13 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
*/
tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
{
- int mmu_idx, index, pd;
+ int mmu_idx, index;
void *p;
MemoryRegion *mr;
+ MemoryRegionSection *section;
CPUState *cpu = ENV_GET_CPU(env);
CPUIOTLBEntry *iotlbentry;
- hwaddr physaddr;
+ hwaddr physaddr, mr_offset;
index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
mmu_idx = cpu_mmu_index(env, true);
@@ -884,8 +912,8 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
}
}
iotlbentry = &env->iotlb[mmu_idx][index];
- pd = iotlbentry->addr & ~TARGET_PAGE_MASK;
- mr = iotlb_to_region(cpu, pd, iotlbentry->attrs);
+ section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
+ mr = section->mr;
if (memory_region_is_unassigned(mr)) {
qemu_mutex_lock_iothread();
if (memory_region_request_mmio_ptr(mr, addr)) {
@@ -906,7 +934,10 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
* and use the MemTXResult it produced). However it is the
* simplest place we have currently available for the check.
*/
- physaddr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
+ mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
+ physaddr = mr_offset +
+ section->offset_within_address_space -
+ section->offset_within_region;
cpu_transaction_failed(cpu, physaddr, addr, 0, MMU_INST_FETCH, mmu_idx,
iotlbentry->attrs, MEMTX_DECODE_ERROR, 0);
diff --git a/docs/devel/loads-stores.rst b/docs/devel/loads-stores.rst
index 6a990cc243..57d8c524bf 100644
--- a/docs/devel/loads-stores.rst
+++ b/docs/devel/loads-stores.rst
@@ -53,9 +53,24 @@ The ``_{endian}`` infix is omitted for target-endian accesses.
The target endian accessors are only available to source
files which are built per-target.
+There are also functions which take the size as an argument:
+
+load: ``ldn{endian}_p(ptr, sz)``
+
+which performs an unsigned load of ``sz`` bytes from ``ptr``
+as an ``{endian}`` order value and returns it in a uint64_t.
+
+store: ``stn{endian}_p(ptr, sz, val)``
+
+which stores ``val`` to ``ptr`` as an ``{endian}`` order value
+of size ``sz`` bytes.
+
+
Regexes for git grep
- ``\<ldf\?[us]\?[bwlq]\(_[hbl]e\)\?_p\>``
- ``\<stf\?[bwlq]\(_[hbl]e\)\?_p\>``
+ - ``\<ldn_\([hbl]e\)?_p\>``
+ - ``\<stn_\([hbl]e\)?_p\>``
``cpu_{ld,st}_*``
~~~~~~~~~~~~~~~~~
diff --git a/exec.c b/exec.c
index f6645ede0c..ebadc0e302 100644
--- a/exec.c
+++ b/exec.c
@@ -501,8 +501,15 @@ static MemoryRegionSection address_space_translate_iommu(IOMMUMemoryRegion *iomm
do {
hwaddr addr = *xlat;
IOMMUMemoryRegionClass *imrc = memory_region_get_iommu_class_nocheck(iommu_mr);
- IOMMUTLBEntry iotlb = imrc->translate(iommu_mr, addr, is_write ?
- IOMMU_WO : IOMMU_RO);
+ int iommu_idx = 0;
+ IOMMUTLBEntry iotlb;
+
+ if (imrc->attrs_to_index) {
+ iommu_idx = imrc->attrs_to_index(iommu_mr, attrs);
+ }
+
+ iotlb = imrc->translate(iommu_mr, addr, is_write ?
+ IOMMU_WO : IOMMU_RO, iommu_idx);
if (!(iotlb.perm & (1 << is_write))) {
goto unassigned;
@@ -646,18 +653,144 @@ MemoryRegion *flatview_translate(FlatView *fv, hwaddr addr, hwaddr *xlat,
return mr;
}
+typedef struct TCGIOMMUNotifier {
+ IOMMUNotifier n;
+ MemoryRegion *mr;
+ CPUState *cpu;
+ int iommu_idx;
+ bool active;
+} TCGIOMMUNotifier;
+
+static void tcg_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
+{
+ TCGIOMMUNotifier *notifier = container_of(n, TCGIOMMUNotifier, n);
+
+ if (!notifier->active) {
+ return;
+ }
+ tlb_flush(notifier->cpu);
+ notifier->active = false;
+ /* We leave the notifier struct on the list to avoid reallocating it later.
+ * Generally the number of IOMMUs a CPU deals with will be small.
+ * In any case we can't unregister the iommu notifier from a notify
+ * callback.
+ */
+}
+
+static void tcg_register_iommu_notifier(CPUState *cpu,
+ IOMMUMemoryRegion *iommu_mr,
+ int iommu_idx)
+{
+ /* Make sure this CPU has an IOMMU notifier registered for this
+ * IOMMU/IOMMU index combination, so that we can flush its TLB
+ * when the IOMMU tells us the mappings we've cached have changed.
+ */
+ MemoryRegion *mr = MEMORY_REGION(iommu_mr);
+ TCGIOMMUNotifier *notifier;
+ int i;
+
+ for (i = 0; i < cpu->iommu_notifiers->len; i++) {
+ notifier = &g_array_index(cpu->iommu_notifiers, TCGIOMMUNotifier, i);
+ if (notifier->mr == mr && notifier->iommu_idx == iommu_idx) {
+ break;
+ }
+ }
+ if (i == cpu->iommu_notifiers->len) {
+ /* Not found, add a new entry at the end of the array */
+ cpu->iommu_notifiers = g_array_set_size(cpu->iommu_notifiers, i + 1);
+ notifier = &g_array_index(cpu->iommu_notifiers, TCGIOMMUNotifier, i);
+
+ notifier->mr = mr;
+ notifier->iommu_idx = iommu_idx;
+ notifier->cpu = cpu;
+ /* Rather than trying to register interest in the specific part
+ * of the iommu's address space that we've accessed and then
+ * expand it later as subsequent accesses touch more of it, we
+ * just register interest in the whole thing, on the assumption
+ * that iommu reconfiguration will be rare.
+ */
+ iommu_notifier_init(&notifier->n,
+ tcg_iommu_unmap_notify,
+ IOMMU_NOTIFIER_UNMAP,
+ 0,
+ HWADDR_MAX,
+ iommu_idx);
+ memory_region_register_iommu_notifier(notifier->mr, &notifier->n);
+ }
+
+ if (!notifier->active) {
+ notifier->active = true;
+ }
+}
+
+static void tcg_iommu_free_notifier_list(CPUState *cpu)
+{
+ /* Destroy the CPU's notifier list */
+ int i;
+ TCGIOMMUNotifier *notifier;
+
+ for (i = 0; i < cpu->iommu_notifiers->len; i++) {
+ notifier = &g_array_index(cpu->iommu_notifiers, TCGIOMMUNotifier, i);
+ memory_region_unregister_iommu_notifier(notifier->mr, &notifier->n);
+ }
+ g_array_free(cpu->iommu_notifiers, true);
+}
+
/* Called from RCU critical section */
MemoryRegionSection *
address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
- hwaddr *xlat, hwaddr *plen)
+ hwaddr *xlat, hwaddr *plen,
+ MemTxAttrs attrs, int *prot)
{
MemoryRegionSection *section;
+ IOMMUMemoryRegion *iommu_mr;
+ IOMMUMemoryRegionClass *imrc;
+ IOMMUTLBEntry iotlb;
+ int iommu_idx;
AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);
- section = address_space_translate_internal(d, addr, xlat, plen, false);
+ for (;;) {
+ section = address_space_translate_internal(d, addr, &addr, plen, false);
+
+ iommu_mr = memory_region_get_iommu(section->mr);
+ if (!iommu_mr) {
+ break;
+ }
+
+ imrc = memory_region_get_iommu_class_nocheck(iommu_mr);
+
+ iommu_idx = imrc->attrs_to_index(iommu_mr, attrs);
+ tcg_register_iommu_notifier(cpu, iommu_mr, iommu_idx);
+ /* We need all the permissions, so pass IOMMU_NONE so the IOMMU
+ * doesn't short-cut its translation table walk.
+ */
+ iotlb = imrc->translate(iommu_mr, addr, IOMMU_NONE, iommu_idx);
+ addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
+ | (addr & iotlb.addr_mask));
+ /* Update the caller's prot bits to remove permissions the IOMMU
+ * is giving us a failure response for. If we get down to no
+ * permissions left at all we can give up now.
+ */
+ if (!(iotlb.perm & IOMMU_RO)) {
+ *prot &= ~(PAGE_READ | PAGE_EXEC);
+ }
+ if (!(iotlb.perm & IOMMU_WO)) {
+ *prot &= ~PAGE_WRITE;
+ }
+
+ if (!*prot) {
+ goto translate_fail;
+ }
+
+ d = flatview_to_dispatch(address_space_to_flatview(iotlb.target_as));
+ }
assert(!memory_region_is_iommu(section->mr));
+ *xlat = addr;
return section;
+
+translate_fail:
+ return &d->map.sections[PHYS_SECTION_UNASSIGNED];
}
#endif
@@ -816,6 +949,9 @@ void cpu_exec_unrealizefn(CPUState *cpu)
if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
}
+#ifndef CONFIG_USER_ONLY
+ tcg_iommu_free_notifier_list(cpu);
+#endif
}
Property cpu_common_props[] = {
@@ -863,6 +999,8 @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp)
if (cc->vmsd != NULL) {
vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
}
+
+ cpu->iommu_notifiers = g_array_new(false, true, sizeof(TCGIOMMUNotifier));
#endif
}
@@ -2544,22 +2682,7 @@ static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
memory_notdirty_write_prepare(&ndi, current_cpu, current_cpu->mem_io_vaddr,
ram_addr, size);
- switch (size) {
- case 1:
- stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
- break;
- case 2:
- stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
- break;
- case 4:
- stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
- break;
- case 8:
- stq_p(qemu_map_ram_ptr(NULL, ram_addr), val);
- break;
- default:
- abort();
- }
+ stn_p(qemu_map_ram_ptr(NULL, ram_addr), size, val);
memory_notdirty_write_complete(&ndi);
}
@@ -2739,22 +2862,8 @@ static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
if (res) {
return res;
}
- switch (len) {
- case 1:
- *data = ldub_p(buf);
- return MEMTX_OK;
- case 2:
- *data = lduw_p(buf);
- return MEMTX_OK;
- case 4:
- *data = ldl_p(buf);
- return MEMTX_OK;
- case 8:
- *data = ldq_p(buf);
- return MEMTX_OK;
- default:
- abort();
- }
+ *data = ldn_p(buf, len);
+ return MEMTX_OK;
}
static MemTxResult subpage_write(void *opaque, hwaddr addr,
@@ -2768,22 +2877,7 @@ static MemTxResult subpage_write(void *opaque, hwaddr addr,
" value %"PRIx64"\n",
__func__, subpage, len, addr, value);
#endif
- switch (len) {
- case 1:
- stb_p(buf, value);
- break;
- case 2:
- stw_p(buf, value);
- break;
- case 4:
- stl_p(buf, value);
- break;
- case 8:
- stq_p(buf, value);
- break;
- default:
- abort();
- }
+ stn_p(buf, len, value);
return flatview_write(subpage->fv, addr + subpage->base, attrs, buf, len);
}
@@ -2897,14 +2991,15 @@ static const MemoryRegionOps readonly_mem_ops = {
},
};
-MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
+MemoryRegionSection *iotlb_to_section(CPUState *cpu,
+ hwaddr index, MemTxAttrs attrs)
{
int asidx = cpu_asidx_from_attrs(cpu, attrs);
CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
MemoryRegionSection *sections = d->map.sections;
- return sections[index & ~TARGET_PAGE_MASK].mr;
+ return &sections[index & ~TARGET_PAGE_MASK];
}
static void io_mem_init(void)
@@ -3128,34 +3223,8 @@ static MemTxResult flatview_write_continue(FlatView *fv, hwaddr addr,
l = memory_access_size(mr, l, addr1);
/* XXX: could force current_cpu to NULL to avoid
potential bugs */
- switch (l) {
- case 8:
- /* 64 bit write access */
- val = ldq_p(buf);
- result |= memory_region_dispatch_write(mr, addr1, val, 8,
- attrs);
- break;
- case 4:
- /* 32 bit write access */
- val = (uint32_t)ldl_p(buf);
- result |= memory_region_dispatch_write(mr, addr1, val, 4,
- attrs);
- break;
- case 2:
- /* 16 bit write access */
- val = lduw_p(buf);
- result |= memory_region_dispatch_write(mr, addr1, val, 2,
- attrs);
- break;
- case 1:
- /* 8 bit write access */
- val = ldub_p(buf);
- result |= memory_region_dispatch_write(mr, addr1, val, 1,
- attrs);
- break;
- default:
- abort();
- }
+ val = ldn_p(buf, l);
+ result |= memory_region_dispatch_write(mr, addr1, val, l, attrs);
} else {
/* RAM case */
ptr = qemu_ram_ptr_length(mr->ram_block, addr1, &l, false);
@@ -3216,34 +3285,8 @@ MemTxResult flatview_read_continue(FlatView *fv, hwaddr addr,
/* I/O case */
release_lock |= prepare_mmio_access(mr);
l = memory_access_size(mr, l, addr1);
- switch (l) {
- case 8:
- /* 64 bit read access */
- result |= memory_region_dispatch_read(mr, addr1, &val, 8,
- attrs);
- stq_p(buf, val);
- break;
- case 4:
- /* 32 bit read access */
- result |= memory_region_dispatch_read(mr, addr1, &val, 4,
- attrs);
- stl_p(buf, val);
- break;
- case 2:
- /* 16 bit read access */
- result |= memory_region_dispatch_read(mr, addr1, &val, 2,
- attrs);
- stw_p(buf, val);
- break;
- case 1:
- /* 8 bit read access */
- result |= memory_region_dispatch_read(mr, addr1, &val, 1,
- attrs);
- stb_p(buf, val);
- break;
- default:
- abort();
- }
+ result |= memory_region_dispatch_read(mr, addr1, &val, l, attrs);
+ stn_p(buf, l, val);
} else {
/* RAM case */
ptr = qemu_ram_ptr_length(mr->ram_block, addr1, &l, false);
diff --git a/hw/alpha/typhoon.c b/hw/alpha/typhoon.c
index 6a40869488..d3ed7cdbe8 100644
--- a/hw/alpha/typhoon.c
+++ b/hw/alpha/typhoon.c
@@ -666,7 +666,8 @@ static bool window_translate(TyphoonWindow *win, hwaddr addr,
Pchip and generate a machine check interrupt. */
static IOMMUTLBEntry typhoon_translate_iommu(IOMMUMemoryRegion *iommu,
hwaddr addr,
- IOMMUAccessFlags flag)
+ IOMMUAccessFlags flag,
+ int iommu_idx)
{
TyphoonPchip *pchip = container_of(iommu, TyphoonPchip, iommu);
IOMMUTLBEntry ret;
diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c
index f123cc7d3d..9e00d4037c 100644
--- a/hw/arm/armv7m.c
+++ b/hw/arm/armv7m.c
@@ -178,6 +178,12 @@ static void armv7m_realize(DeviceState *dev, Error **errp)
return;
}
}
+
+ /* Tell the CPU where the NVIC is; it will fail realize if it doesn't
+ * have one.
+ */
+ s->cpu->env.nvic = &s->nvic;
+
object_property_set_bool(OBJECT(s->cpu), true, "realized", &err);
if (err != NULL) {
error_propagate(errp, err);
@@ -202,7 +208,6 @@ static void armv7m_realize(DeviceState *dev, Error **errp)
sbd = SYS_BUS_DEVICE(&s->nvic);
sysbus_connect_irq(sbd, 0,
qdev_get_gpio_in(DEVICE(s->cpu), ARM_CPU_IRQ));
- s->cpu->env.nvic = &s->nvic;
memory_region_add_subregion(&s->container, 0xe000e000,
sysbus_mmio_get_region(sbd, 0));
@@ -261,27 +266,6 @@ static void armv7m_reset(void *opaque)
cpu_reset(CPU(cpu));
}
-/* Init CPU and memory for a v7-M based board.
- mem_size is in bytes.
- Returns the ARMv7M device. */
-
-DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq,
- const char *kernel_filename, const char *cpu_type)
-{
- DeviceState *armv7m;
-
- armv7m = qdev_create(NULL, TYPE_ARMV7M);
- qdev_prop_set_uint32(armv7m, "num-irq", num_irq);
- qdev_prop_set_string(armv7m, "cpu-type", cpu_type);
- object_property_set_link(OBJECT(armv7m), OBJECT(get_system_memory()),
- "memory", &error_abort);
- /* This will exit with an error if the user passed us a bad cpu_type */
- qdev_init_nofail(armv7m);
-
- armv7m_load_kernel(ARM_CPU(first_cpu), kernel_filename, mem_size);
- return armv7m;
-}
-
void armv7m_load_kernel(ARMCPU *cpu, const char *kernel_filename, int mem_size)
{
int image_size;
diff --git a/hw/arm/mps2-tz.c b/hw/arm/mps2-tz.c
index 8dc8bfd4ab..c5ef95e4cc 100644
--- a/hw/arm/mps2-tz.c
+++ b/hw/arm/mps2-tz.c
@@ -74,12 +74,13 @@ typedef struct {
UnimplementedDeviceState spi[5];
UnimplementedDeviceState i2c[4];
UnimplementedDeviceState i2s_audio;
- UnimplementedDeviceState gpio[5];
+ UnimplementedDeviceState gpio[4];
UnimplementedDeviceState dma[4];
UnimplementedDeviceState gfx;
CMSDKAPBUART uart[5];
SplitIRQ sec_resp_splitter;
qemu_or_irq uart_irq_orgate;
+ DeviceState *lan9118;
} MPS2TZMachineState;
#define TYPE_MPS2TZ_MACHINE "mps2tz"
@@ -224,6 +225,26 @@ static MemoryRegion *make_fpgaio(MPS2TZMachineState *mms, void *opaque,
return sysbus_mmio_get_region(SYS_BUS_DEVICE(fpgaio), 0);
}
+static MemoryRegion *make_eth_dev(MPS2TZMachineState *mms, void *opaque,
+ const char *name, hwaddr size)
+{
+ SysBusDevice *s;
+ DeviceState *iotkitdev = DEVICE(&mms->iotkit);
+ NICInfo *nd = &nd_table[0];
+
+ /* In hardware this is a LAN9220; the LAN9118 is software compatible
+ * except that it doesn't support the checksum-offload feature.
+ */
+ qemu_check_nic_model(nd, "lan9118");
+ mms->lan9118 = qdev_create(NULL, "lan9118");
+ qdev_set_nic_properties(mms->lan9118, nd);
+ qdev_init_nofail(mms->lan9118);
+
+ s = SYS_BUS_DEVICE(mms->lan9118);
+ sysbus_connect_irq(s, 0, qdev_get_gpio_in_named(iotkitdev, "EXP_IRQ", 16));
+ return sysbus_mmio_get_region(s, 0);
+}
+
static void mps2tz_common_init(MachineState *machine)
{
MPS2TZMachineState *mms = MPS2TZ_MACHINE(machine);
@@ -363,7 +384,7 @@ static void mps2tz_common_init(MachineState *machine)
{ "gpio1", make_unimp_dev, &mms->gpio[1], 0x40101000, 0x1000 },
{ "gpio2", make_unimp_dev, &mms->gpio[2], 0x40102000, 0x1000 },
{ "gpio3", make_unimp_dev, &mms->gpio[3], 0x40103000, 0x1000 },
- { "gpio4", make_unimp_dev, &mms->gpio[4], 0x40104000, 0x1000 },
+ { "eth", make_eth_dev, NULL, 0x42000000, 0x100000 },
},
}, {
.name = "ahb_ppcexp1",
@@ -447,13 +468,6 @@ static void mps2tz_common_init(MachineState *machine)
"cfg_sec_resp", 0));
}
- /* In hardware this is a LAN9220; the LAN9118 is software compatible
- * except that it doesn't support the checksum-offload feature.
- * The ethernet controller is not behind a PPC.
- */
- lan9118_init(&nd_table[0], 0x42000000,
- qdev_get_gpio_in_named(iotkitdev, "EXP_IRQ", 16));
-
create_unimplemented_device("FPGA NS PC", 0x48007000, 0x1000);
armv7m_load_kernel(ARM_CPU(first_cpu), machine->kernel_filename, 0x400000);
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 42dc521c13..978330900d 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -538,7 +538,7 @@ static int smmuv3_decode_config(IOMMUMemoryRegion *mr, SMMUTransCfg *cfg,
}
static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr,
- IOMMUAccessFlags flag)
+ IOMMUAccessFlags flag, int iommu_idx)
{
SMMUDevice *sdev = container_of(mr, SMMUDevice, iommu);
SMMUv3State *s = sdev->smmu;
diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c
index 502a20842c..a8f1f6a912 100644
--- a/hw/arm/stellaris.c
+++ b/hw/arm/stellaris.c
@@ -20,6 +20,7 @@
#include "qemu/log.h"
#include "exec/address-spaces.h"
#include "sysemu/sysemu.h"
+#include "hw/arm/armv7m.h"
#include "hw/char/pl011.h"
#include "hw/misc/unimp.h"
#include "cpu.h"
@@ -1298,8 +1299,13 @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
&error_fatal);
memory_region_add_subregion(system_memory, 0x20000000, sram);
- nvic = armv7m_init(system_memory, flash_size, NUM_IRQ_LINES,
- ms->kernel_filename, ms->cpu_type);
+ nvic = qdev_create(NULL, TYPE_ARMV7M);
+ qdev_prop_set_uint32(nvic, "num-irq", NUM_IRQ_LINES);
+ qdev_prop_set_string(nvic, "cpu-type", ms->cpu_type);
+ object_property_set_link(OBJECT(nvic), OBJECT(get_system_memory()),
+ "memory", &error_abort);
+ /* This will exit with an error if the user passed us a bad cpu_type */
+ qdev_init_nofail(nvic);
qdev_connect_gpio_out_named(nvic, "SYSRESETREQ", 0,
qemu_allocate_irq(&do_sys_reset, NULL, 0));
@@ -1431,6 +1437,8 @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
create_unimplemented_device("analogue-comparator", 0x4003c000, 0x1000);
create_unimplemented_device("hibernation", 0x400fc000, 0x1000);
create_unimplemented_device("flash-control", 0x400fd000, 0x1000);
+
+ armv7m_load_kernel(ARM_CPU(first_cpu), ms->kernel_filename, flash_size);
}
/* FIXME: Figure out how to generate these from stellaris_boards. */
diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index a5ccffb4aa..b0ed8fa418 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -698,6 +698,7 @@ static void complete_collecting_data(Flash *s)
case MAN_MACRONIX:
s->quad_enable = extract32(s->data[0], 6, 1);
if (s->len > 1) {
+ s->volatile_cfg = s->data[1];
s->four_bytes_address_mode = extract32(s->data[1], 5, 1);
}
break;
diff --git a/hw/block/pflash_cfi02.c b/hw/block/pflash_cfi02.c
index a8b3f7f978..6c18e5e578 100644
--- a/hw/block/pflash_cfi02.c
+++ b/hw/block/pflash_cfi02.c
@@ -493,102 +493,41 @@ static void pflash_write (pflash_t *pfl, hwaddr offset,
pfl->cmd = 0;
}
-
-static uint32_t pflash_readb_be(void *opaque, hwaddr addr)
-{
- return pflash_read(opaque, addr, 1, 1);
-}
-
-static uint32_t pflash_readb_le(void *opaque, hwaddr addr)
+static uint64_t pflash_be_readfn(void *opaque, hwaddr addr, unsigned size)
{
- return pflash_read(opaque, addr, 1, 0);
+ return pflash_read(opaque, addr, size, 1);
}
-static uint32_t pflash_readw_be(void *opaque, hwaddr addr)
+static void pflash_be_writefn(void *opaque, hwaddr addr,
+ uint64_t value, unsigned size)
{
- pflash_t *pfl = opaque;
-
- return pflash_read(pfl, addr, 2, 1);
-}
-
-static uint32_t pflash_readw_le(void *opaque, hwaddr addr)
-{
- pflash_t *pfl = opaque;
-
- return pflash_read(pfl, addr, 2, 0);
-}
-
-static uint32_t pflash_readl_be(void *opaque, hwaddr addr)
-{
- pflash_t *pfl = opaque;
-
- return pflash_read(pfl, addr, 4, 1);
-}
-
-static uint32_t pflash_readl_le(void *opaque, hwaddr addr)
-{
- pflash_t *pfl = opaque;
-
- return pflash_read(pfl, addr, 4, 0);
+ pflash_write(opaque, addr, value, size, 1);
}
-static void pflash_writeb_be(void *opaque, hwaddr addr,
- uint32_t value)
+static uint64_t pflash_le_readfn(void *opaque, hwaddr addr, unsigned size)
{
- pflash_write(opaque, addr, value, 1, 1);
+ return pflash_read(opaque, addr, size, 0);
}
-static void pflash_writeb_le(void *opaque, hwaddr addr,
- uint32_t value)
+static void pflash_le_writefn(void *opaque, hwaddr addr,
+ uint64_t value, unsigned size)
{
- pflash_write(opaque, addr, value, 1, 0);
-}
-
-static void pflash_writew_be(void *opaque, hwaddr addr,
- uint32_t value)
-{
- pflash_t *pfl = opaque;
-
- pflash_write(pfl, addr, value, 2, 1);
-}
-
-static void pflash_writew_le(void *opaque, hwaddr addr,
- uint32_t value)
-{
- pflash_t *pfl = opaque;
-
- pflash_write(pfl, addr, value, 2, 0);
-}
-
-static void pflash_writel_be(void *opaque, hwaddr addr,
- uint32_t value)
-{
- pflash_t *pfl = opaque;
-
- pflash_write(pfl, addr, value, 4, 1);
-}
-
-static void pflash_writel_le(void *opaque, hwaddr addr,
- uint32_t value)
-{
- pflash_t *pfl = opaque;
-
- pflash_write(pfl, addr, value, 4, 0);
+ pflash_write(opaque, addr, value, size, 0);
}
static const MemoryRegionOps pflash_cfi02_ops_be = {
- .old_mmio = {
- .read = { pflash_readb_be, pflash_readw_be, pflash_readl_be, },
- .write = { pflash_writeb_be, pflash_writew_be, pflash_writel_be, },
- },
+ .read = pflash_be_readfn,
+ .write = pflash_be_writefn,
+ .valid.min_access_size = 1,
+ .valid.max_access_size = 4,
.endianness = DEVICE_NATIVE_ENDIAN,
};
static const MemoryRegionOps pflash_cfi02_ops_le = {
- .old_mmio = {
- .read = { pflash_readb_le, pflash_readw_le, pflash_readl_le, },
- .write = { pflash_writeb_le, pflash_writew_le, pflash_writel_le, },
- },
+ .read = pflash_le_readfn,
+ .write = pflash_le_writefn,
+ .valid.min_access_size = 1,
+ .valid.max_access_size = 4,
.endianness = DEVICE_NATIVE_ENDIAN,
};
diff --git a/hw/char/parallel.c b/hw/char/parallel.c
index 1542d62201..35748e6c1b 100644
--- a/hw/char/parallel.c
+++ b/hw/char/parallel.c
@@ -554,56 +554,28 @@ static void parallel_isa_realizefn(DeviceState *dev, Error **errp)
}
/* Memory mapped interface */
-static uint32_t parallel_mm_readb (void *opaque, hwaddr addr)
+static uint64_t parallel_mm_readfn(void *opaque, hwaddr addr, unsigned size)
{
ParallelState *s = opaque;
- return parallel_ioport_read_sw(s, addr >> s->it_shift) & 0xFF;
+ return parallel_ioport_read_sw(s, addr >> s->it_shift) &
+ MAKE_64BIT_MASK(0, size * 8);
}
-static void parallel_mm_writeb (void *opaque,
- hwaddr addr, uint32_t value)
+static void parallel_mm_writefn(void *opaque, hwaddr addr,
+ uint64_t value, unsigned size)
{
ParallelState *s = opaque;
- parallel_ioport_write_sw(s, addr >> s->it_shift, value & 0xFF);
-}
-
-static uint32_t parallel_mm_readw (void *opaque, hwaddr addr)
-{
- ParallelState *s = opaque;
-
- return parallel_ioport_read_sw(s, addr >> s->it_shift) & 0xFFFF;
-}
-
-static void parallel_mm_writew (void *opaque,
- hwaddr addr, uint32_t value)
-{
- ParallelState *s = opaque;
-
- parallel_ioport_write_sw(s, addr >> s->it_shift, value & 0xFFFF);
-}
-
-static uint32_t parallel_mm_readl (void *opaque, hwaddr addr)
-{
- ParallelState *s = opaque;
-
- return parallel_ioport_read_sw(s, addr >> s->it_shift);
-}
-
-static void parallel_mm_writel (void *opaque,
- hwaddr addr, uint32_t value)
-{
- ParallelState *s = opaque;
-
- parallel_ioport_write_sw(s, addr >> s->it_shift, value);
+ parallel_ioport_write_sw(s, addr >> s->it_shift,
+ value & MAKE_64BIT_MASK(0, size * 8));
}
static const MemoryRegionOps parallel_mm_ops = {
- .old_mmio = {
- .read = { parallel_mm_readb, parallel_mm_readw, parallel_mm_readl },
- .write = { parallel_mm_writeb, parallel_mm_writew, parallel_mm_writel },
- },
+ .read = parallel_mm_readfn,
+ .write = parallel_mm_writefn,
+ .valid.min_access_size = 1,
+ .valid.max_access_size = 4,
.endianness = DEVICE_NATIVE_ENDIAN,
};
diff --git a/hw/core/or-irq.c b/hw/core/or-irq.c
index f9d76c4641..a86901b673 100644
--- a/hw/core/or-irq.c
+++ b/hw/core/or-irq.c
@@ -66,14 +66,49 @@ static void or_irq_init(Object *obj)
qdev_init_gpio_out(DEVICE(obj), &s->out_irq, 1);
}
+/* The original version of this device had a fixed 16 entries in its
+ * VMState array; devices with more inputs than this need to
+ * migrate the extra lines via a subsection.
+ * The subsection migrates as much of the levels[] array as is needed
+ * (including repeating the first 16 elements), to avoid the awkwardness
+ * of splitting it in two to meet the requirements of VMSTATE_VARRAY_UINT16.
+ */
+#define OLD_MAX_OR_LINES 16
+#if MAX_OR_LINES < OLD_MAX_OR_LINES
+#error MAX_OR_LINES must be at least 16 for migration compatibility
+#endif
+
+static bool vmstate_extras_needed(void *opaque)
+{
+ qemu_or_irq *s = OR_IRQ(opaque);
+
+ return s->num_lines >= OLD_MAX_OR_LINES;
+}
+
+static const VMStateDescription vmstate_or_irq_extras = {
+ .name = "or-irq-extras",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = vmstate_extras_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_VARRAY_UINT16_UNSAFE(levels, qemu_or_irq, num_lines, 0,
+ vmstate_info_bool, bool),
+ VMSTATE_END_OF_LIST(),
+ },
+};
+
static const VMStateDescription vmstate_or_irq = {
.name = TYPE_OR_IRQ,
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
- VMSTATE_BOOL_ARRAY(levels, qemu_or_irq, MAX_OR_LINES),
+ VMSTATE_BOOL_SUB_ARRAY(levels, qemu_or_irq, 0, OLD_MAX_OR_LINES),
VMSTATE_END_OF_LIST(),
- }
+ },
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_or_irq_extras,
+ NULL
+ },
};
static Property or_irq_properties[] = {
diff --git a/hw/dma/rc4030.c b/hw/dma/rc4030.c
index 5d4833eeca..ccd8612888 100644
--- a/hw/dma/rc4030.c
+++ b/hw/dma/rc4030.c
@@ -491,7 +491,7 @@ static const MemoryRegionOps jazzio_ops = {
};
static IOMMUTLBEntry rc4030_dma_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
- IOMMUAccessFlags flag)
+ IOMMUAccessFlags flag, int iommu_idx)
{
rc4030State *s = container_of(iommu, rc4030State, dma_mr);
IOMMUTLBEntry ret = {
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 63d46ff6ee..1fd669fef8 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -991,7 +991,7 @@ static inline bool amdvi_is_interrupt_addr(hwaddr addr)
}
static IOMMUTLBEntry amdvi_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
- IOMMUAccessFlags flag)
+ IOMMUAccessFlags flag, int iommu_idx)
{
AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
AMDVIState *s = as->iommu_state;
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index b5a09b7908..0a8cd4e9cc 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -1023,7 +1023,7 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
static int vtd_sync_shadow_page_hook(IOMMUTLBEntry *entry,
void *private)
{
- memory_region_notify_iommu((IOMMUMemoryRegion *)private, *entry);
+ memory_region_notify_iommu((IOMMUMemoryRegion *)private, 0, *entry);
return 0;
}
@@ -1581,7 +1581,7 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
.addr_mask = size - 1,
.perm = IOMMU_NONE,
};
- memory_region_notify_iommu(&vtd_as->iommu, entry);
+ memory_region_notify_iommu(&vtd_as->iommu, 0, entry);
}
}
}
@@ -2015,7 +2015,7 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
entry.iova = addr;
entry.perm = IOMMU_NONE;
entry.translated_addr = 0;
- memory_region_notify_iommu(&vtd_dev_as->iommu, entry);
+ memory_region_notify_iommu(&vtd_dev_as->iommu, 0, entry);
done:
return true;
@@ -2471,7 +2471,7 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
}
static IOMMUTLBEntry vtd_iommu_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
- IOMMUAccessFlags flag)
+ IOMMUAccessFlags flag, int iommu_idx)
{
VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
IntelIOMMUState *s = vtd_as->iommu_state;
diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c
index f17f18e51b..f33e3fc63d 100644
--- a/hw/input/pckbd.c
+++ b/hw/input/pckbd.c
@@ -434,7 +434,7 @@ static const VMStateDescription vmstate_kbd = {
};
/* Memory mapped interface */
-static uint32_t kbd_mm_readb (void *opaque, hwaddr addr)
+static uint64_t kbd_mm_readfn(void *opaque, hwaddr addr, unsigned size)
{
KBDState *s = opaque;
@@ -444,7 +444,8 @@ static uint32_t kbd_mm_readb (void *opaque, hwaddr addr)
return kbd_read_data(s, 0, 1) & 0xff;
}
-static void kbd_mm_writeb (void *opaque, hwaddr addr, uint32_t value)
+static void kbd_mm_writefn(void *opaque, hwaddr addr,
+ uint64_t value, unsigned size)
{
KBDState *s = opaque;
@@ -454,12 +455,13 @@ static void kbd_mm_writeb (void *opaque, hwaddr addr, uint32_t value)
kbd_write_data(s, 0, value & 0xff, 1);
}
+
static const MemoryRegionOps i8042_mmio_ops = {
+ .read = kbd_mm_readfn,
+ .write = kbd_mm_writefn,
+ .valid.min_access_size = 1,
+ .valid.max_access_size = 4,
.endianness = DEVICE_NATIVE_ENDIAN,
- .old_mmio = {
- .read = { kbd_mm_readb, kbd_mm_readb, kbd_mm_readb },
- .write = { kbd_mm_writeb, kbd_mm_writeb, kbd_mm_writeb },
- },
};
void i8042_mm_init(qemu_irq kbd_irq, qemu_irq mouse_irq,
diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index 5649cac46e..d8d3b25403 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -135,7 +135,14 @@ static void kvm_dist_get_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
uint32_t reg, *field;
int irq;
- field = (uint32_t *)bmp;
+ /* For the KVM GICv3, affinity routing is always enabled, and the first 8
+ * GICD_IPRIORITYR<n> registers are always RAZ/WI. The corresponding
+ * functionality is replaced by GICR_IPRIORITYR<n>. It doesn't need to
+ * sync them. So it needs to skip the field of GIC_INTERNAL irqs in bmp and
+ * offset.
+ */
+ field = (uint32_t *)(bmp + GIC_INTERNAL);
+ offset += (GIC_INTERNAL * 8) / 8;
for_each_dist_irq_reg(irq, s->num_irq, 8) {
kvm_gicd_access(s, offset, &reg, false);
*field = reg;
@@ -149,7 +156,14 @@ static void kvm_dist_put_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
uint32_t reg, *field;
int irq;
- field = (uint32_t *)bmp;
+ /* For the KVM GICv3, affinity routing is always enabled, and the first 8
+ * GICD_IPRIORITYR<n> registers are always RAZ/WI. The corresponding
+ * functionality is replaced by GICR_IPRIORITYR<n>. It doesn't need to
+ * sync them. So it needs to skip the field of GIC_INTERNAL irqs in bmp and
+ * offset.
+ */
+ field = (uint32_t *)(bmp + GIC_INTERNAL);
+ offset += (GIC_INTERNAL * 8) / 8;
for_each_dist_irq_reg(irq, s->num_irq, 8) {
reg = *field;
kvm_gicd_access(s, offset, &reg, true);
diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c
index c51151fa8a..661be8878a 100644
--- a/hw/intc/armv7m_nvic.c
+++ b/hw/intc/armv7m_nvic.c
@@ -2183,7 +2183,11 @@ static void armv7m_nvic_realize(DeviceState *dev, Error **errp)
int regionlen;
s->cpu = ARM_CPU(qemu_get_cpu(0));
- assert(s->cpu);
+
+ if (!s->cpu || !arm_feature(&s->cpu->env, ARM_FEATURE_M)) {
+ error_setg(errp, "The NVIC can only be used with a Cortex-M CPU");
+ return;
+ }
if (s->num_irq > NVIC_MAX_IRQ) {
error_setg(errp, "num-irq %d exceeds NVIC maximum", s->num_irq);
diff --git a/hw/m68k/mcf5206.c b/hw/m68k/mcf5206.c
index 7abd84ac47..d7f26d6810 100644
--- a/hw/m68k/mcf5206.c
+++ b/hw/m68k/mcf5206.c
@@ -512,19 +512,43 @@ static void m5206_mbar_writel(void *opaque, hwaddr offset,
m5206_mbar_write(s, offset, value, 4);
}
+static uint64_t m5206_mbar_readfn(void *opaque, hwaddr addr, unsigned size)
+{
+ switch (size) {
+ case 1:
+ return m5206_mbar_readb(opaque, addr);
+ case 2:
+ return m5206_mbar_readw(opaque, addr);
+ case 4:
+ return m5206_mbar_readl(opaque, addr);
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void m5206_mbar_writefn(void *opaque, hwaddr addr,
+ uint64_t value, unsigned size)
+{
+ switch (size) {
+ case 1:
+ m5206_mbar_writeb(opaque, addr, value);
+ break;
+ case 2:
+ m5206_mbar_writew(opaque, addr, value);
+ break;
+ case 4:
+ m5206_mbar_writel(opaque, addr, value);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
static const MemoryRegionOps m5206_mbar_ops = {
- .old_mmio = {
- .read = {
- m5206_mbar_readb,
- m5206_mbar_readw,
- m5206_mbar_readl,
- },
- .write = {
- m5206_mbar_writeb,
- m5206_mbar_writew,
- m5206_mbar_writel,
- },
- },
+ .read = m5206_mbar_readfn,
+ .write = m5206_mbar_writefn,
+ .valid.min_access_size = 1,
+ .valid.max_access_size = 4,
.endianness = DEVICE_NATIVE_ENDIAN,
};
diff --git a/hw/misc/aspeed_scu.c b/hw/misc/aspeed_scu.c
index 5e6d5744ee..59315010db 100644
--- a/hw/misc/aspeed_scu.c
+++ b/hw/misc/aspeed_scu.c
@@ -16,6 +16,7 @@
#include "qapi/visitor.h"
#include "qemu/bitops.h"
#include "qemu/log.h"
+#include "crypto/random.h"
#include "trace.h"
#define TO_REG(offset) ((offset) >> 2)
@@ -154,6 +155,19 @@ static const uint32_t ast2500_a1_resets[ASPEED_SCU_NR_REGS] = {
[BMC_DEV_ID] = 0x00002402U
};
+static uint32_t aspeed_scu_get_random(void)
+{
+ Error *err = NULL;
+ uint32_t num;
+
+ if (qcrypto_random_bytes((uint8_t *)&num, sizeof(num), &err)) {
+ error_report_err(err);
+ exit(1);
+ }
+
+ return num;
+}
+
static uint64_t aspeed_scu_read(void *opaque, hwaddr offset, unsigned size)
{
AspeedSCUState *s = ASPEED_SCU(opaque);
@@ -167,6 +181,12 @@ static uint64_t aspeed_scu_read(void *opaque, hwaddr offset, unsigned size)
}
switch (reg) {
+ case RNG_DATA:
+ /* On hardware, RNG_DATA works regardless of
+ * the state of the enable bit in RNG_CTRL
+ */
+ s->regs[RNG_DATA] = aspeed_scu_get_random();
+ break;
case WAKEUP_EN:
qemu_log_mask(LOG_GUEST_ERROR,
"%s: Read of write-only offset 0x%" HWADDR_PRIx "\n",
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index aaa6010d5c..1b0880ac9e 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -112,7 +112,8 @@ static void spapr_tce_free_table(uint64_t *table, int fd, uint32_t nb_table)
/* Called from RCU critical section */
static IOMMUTLBEntry spapr_tce_translate_iommu(IOMMUMemoryRegion *iommu,
hwaddr addr,
- IOMMUAccessFlags flag)
+ IOMMUAccessFlags flag,
+ int iommu_idx)
{
sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu);
uint64_t tce;
@@ -428,7 +429,7 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
entry.translated_addr = tce & page_mask;
entry.addr_mask = ~page_mask;
entry.perm = spapr_tce_iommu_access_flags(tce);
- memory_region_notify_iommu(&tcet->iommu, entry);
+ memory_region_notify_iommu(&tcet->iommu, 0, entry);
return H_SUCCESS;
}
diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 10da87458e..e3e0ebb7f6 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -484,7 +484,7 @@ uint16_t s390_guest_io_table_walk(uint64_t g_iota, hwaddr addr,
}
static IOMMUTLBEntry s390_translate_iommu(IOMMUMemoryRegion *mr, hwaddr addr,
- IOMMUAccessFlags flag)
+ IOMMUAccessFlags flag, int iommu_idx)
{
S390PCIIOMMU *iommu = container_of(mr, S390PCIIOMMU, iommu_mr);
S390IOTLBEntry *entry;
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index d1a5f79678..7b61367ee3 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -589,7 +589,7 @@ static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry)
}
notify.perm = IOMMU_NONE;
- memory_region_notify_iommu(&iommu->iommu_mr, notify);
+ memory_region_notify_iommu(&iommu->iommu_mr, 0, notify);
notify.perm = entry->perm;
}
@@ -601,7 +601,7 @@ static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry)
g_hash_table_replace(iommu->iotlb, &cache->iova, cache);
}
- memory_region_notify_iommu(&iommu->iommu_mr, notify);
+ memory_region_notify_iommu(&iommu->iommu_mr, 0, notify);
}
int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
diff --git a/hw/sh4/sh7750.c b/hw/sh4/sh7750.c
index 2dc07a904b..2fb6e618d9 100644
--- a/hw/sh4/sh7750.c
+++ b/hw/sh4/sh7750.c
@@ -450,15 +450,43 @@ static void sh7750_mem_writel(void *opaque, hwaddr addr,
}
}
+static uint64_t sh7750_mem_readfn(void *opaque, hwaddr addr, unsigned size)
+{
+ switch (size) {
+ case 1:
+ return sh7750_mem_readb(opaque, addr);
+ case 2:
+ return sh7750_mem_readw(opaque, addr);
+ case 4:
+ return sh7750_mem_readl(opaque, addr);
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void sh7750_mem_writefn(void *opaque, hwaddr addr,
+ uint64_t value, unsigned size)
+{
+ switch (size) {
+ case 1:
+ sh7750_mem_writeb(opaque, addr, value);
+ break;
+ case 2:
+ sh7750_mem_writew(opaque, addr, value);
+ break;
+ case 4:
+ sh7750_mem_writel(opaque, addr, value);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
static const MemoryRegionOps sh7750_mem_ops = {
- .old_mmio = {
- .read = {sh7750_mem_readb,
- sh7750_mem_readw,
- sh7750_mem_readl },
- .write = {sh7750_mem_writeb,
- sh7750_mem_writew,
- sh7750_mem_writel },
- },
+ .read = sh7750_mem_readfn,
+ .write = sh7750_mem_writefn,
+ .valid.min_access_size = 1,
+ .valid.max_access_size = 4,
.endianness = DEVICE_NATIVE_ENDIAN,
};
diff --git a/hw/sparc/sun4m_iommu.c b/hw/sparc/sun4m_iommu.c
index b677601fc6..7ca1e3fce4 100644
--- a/hw/sparc/sun4m_iommu.c
+++ b/hw/sparc/sun4m_iommu.c
@@ -282,7 +282,8 @@ static void iommu_bad_addr(IOMMUState *s, hwaddr addr,
/* Called from RCU critical section */
static IOMMUTLBEntry sun4m_translate_iommu(IOMMUMemoryRegion *iommu,
hwaddr addr,
- IOMMUAccessFlags flags)
+ IOMMUAccessFlags flags,
+ int iommu_idx)
{
IOMMUState *is = container_of(iommu, IOMMUState, iommu);
hwaddr page, pa;
diff --git a/hw/sparc64/sun4u_iommu.c b/hw/sparc64/sun4u_iommu.c
index eb3aaa87e6..1ef7645ba5 100644
--- a/hw/sparc64/sun4u_iommu.c
+++ b/hw/sparc64/sun4u_iommu.c
@@ -73,7 +73,7 @@
/* Called from RCU critical section */
static IOMMUTLBEntry sun4u_translate_iommu(IOMMUMemoryRegion *iommu,
hwaddr addr,
- IOMMUAccessFlags flag)
+ IOMMUAccessFlags flag, int iommu_idx)
{
IOMMUState *is = container_of(iommu, IOMMUState, iommu);
hwaddr baseaddr, offset;
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 8e57265edf..fb396cf00a 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -507,6 +507,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
if (memory_region_is_iommu(section->mr)) {
VFIOGuestIOMMU *giommu;
IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr);
+ int iommu_idx;
trace_vfio_listener_region_add_iommu(iova, end);
/*
@@ -523,10 +524,13 @@ static void vfio_listener_region_add(MemoryListener *listener,
llend = int128_add(int128_make64(section->offset_within_region),
section->size);
llend = int128_sub(llend, int128_one());
+ iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr,
+ MEMTXATTRS_UNSPECIFIED);
iommu_notifier_init(&giommu->n, vfio_iommu_map_notify,
IOMMU_NOTIFIER_ALL,
section->offset_within_region,
- int128_get64(llend));
+ int128_get64(llend),
+ iommu_idx);
QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
memory_region_register_iommu_notifier(section->mr, &giommu->n);
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 96175b214d..b129cb9ddd 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -662,6 +662,8 @@ static void vhost_iommu_region_add(MemoryListener *listener,
iommu_listener);
struct vhost_iommu *iommu;
Int128 end;
+ int iommu_idx;
+ IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr);
if (!memory_region_is_iommu(section->mr)) {
return;
@@ -671,10 +673,13 @@ static void vhost_iommu_region_add(MemoryListener *listener,
end = int128_add(int128_make64(section->offset_within_region),
section->size);
end = int128_sub(end, int128_one());
+ iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr,
+ MEMTXATTRS_UNSPECIFIED);
iommu_notifier_init(&iommu->n, vhost_iommu_unmap_notify,
IOMMU_NOTIFIER_UNMAP,
section->offset_within_region,
- int128_get64(end));
+ int128_get64(end),
+ iommu_idx);
iommu->mr = section->mr;
iommu->iommu_offset = section->offset_within_address_space -
section->offset_within_region;
diff --git a/hw/watchdog/wdt_i6300esb.c b/hw/watchdog/wdt_i6300esb.c
index e596b0804d..7b59469888 100644
--- a/hw/watchdog/wdt_i6300esb.c
+++ b/hw/watchdog/wdt_i6300esb.c
@@ -361,19 +361,43 @@ static void i6300esb_mem_writel(void *vp, hwaddr addr, uint32_t val)
}
}
+static uint64_t i6300esb_mem_readfn(void *opaque, hwaddr addr, unsigned size)
+{
+ switch (size) {
+ case 1:
+ return i6300esb_mem_readb(opaque, addr);
+ case 2:
+ return i6300esb_mem_readw(opaque, addr);
+ case 4:
+ return i6300esb_mem_readl(opaque, addr);
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void i6300esb_mem_writefn(void *opaque, hwaddr addr,
+ uint64_t value, unsigned size)
+{
+ switch (size) {
+ case 1:
+ i6300esb_mem_writeb(opaque, addr, value);
+ break;
+ case 2:
+ i6300esb_mem_writew(opaque, addr, value);
+ break;
+ case 4:
+ i6300esb_mem_writel(opaque, addr, value);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
static const MemoryRegionOps i6300esb_ops = {
- .old_mmio = {
- .read = {
- i6300esb_mem_readb,
- i6300esb_mem_readw,
- i6300esb_mem_readl,
- },
- .write = {
- i6300esb_mem_writeb,
- i6300esb_mem_writew,
- i6300esb_mem_writel,
- },
- },
+ .read = i6300esb_mem_readfn,
+ .write = i6300esb_mem_writefn,
+ .valid.min_access_size = 1,
+ .valid.max_access_size = 4,
.endianness = DEVICE_LITTLE_ENDIAN,
};
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index a635f532f9..7fa726b8e3 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -133,6 +133,8 @@ static inline void tswap64s(uint64_t *s)
#define stq_p(p, v) stq_be_p(p, v)
#define stfl_p(p, v) stfl_be_p(p, v)
#define stfq_p(p, v) stfq_be_p(p, v)
+#define ldn_p(p, sz) ldn_be_p(p, sz)
+#define stn_p(p, sz, v) stn_be_p(p, sz, v)
#else
#define lduw_p(p) lduw_le_p(p)
#define ldsw_p(p) ldsw_le_p(p)
@@ -145,6 +147,8 @@ static inline void tswap64s(uint64_t *s)
#define stq_p(p, v) stq_le_p(p, v)
#define stfl_p(p, v) stfl_le_p(p, v)
#define stfq_p(p, v) stfq_le_p(p, v)
+#define ldn_p(p, sz) ldn_le_p(p, sz)
+#define stn_p(p, sz, v) stn_le_p(p, sz, v)
#endif
/* MMU memory access macros */
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index e43ff8346b..a171ffc1a4 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -127,6 +127,15 @@ QEMU_BUILD_BUG_ON(sizeof(CPUTLBEntry) != (1 << CPU_TLB_ENTRY_BITS));
* structs into one.)
*/
typedef struct CPUIOTLBEntry {
+ /*
+ * @addr contains:
+ * - in the lower TARGET_PAGE_BITS, a physical section number
+ * - with the lower TARGET_PAGE_BITS masked off, an offset which
+ * must be added to the virtual address to obtain:
+ * + the ram_addr_t of the target RAM (if the physical section
+ * number is PHYS_SECTION_NOTDIRTY or PHYS_SECTION_ROM)
+ * + the offset within the target MemoryRegion (otherwise)
+ */
hwaddr addr;
MemTxAttrs attrs;
} CPUIOTLBEntry;
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 4d09eaba72..8bbea787a9 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -437,8 +437,17 @@ void tb_lock_reset(void);
#if !defined(CONFIG_USER_ONLY)
-struct MemoryRegion *iotlb_to_region(CPUState *cpu,
- hwaddr index, MemTxAttrs attrs);
+/**
+ * iotlb_to_section:
+ * @cpu: CPU performing the access
+ * @index: TCG CPU IOTLB entry
+ *
+ * Given a TCG CPU IOTLB entry, return the MemoryRegionSection that
+ * it refers to. @index will have been initially created and returned
+ * by memory_region_section_get_iotlb().
+ */
+struct MemoryRegionSection *iotlb_to_section(CPUState *cpu,
+ hwaddr index, MemTxAttrs attrs);
void tlb_fill(CPUState *cpu, target_ulong addr, int size,
MMUAccessType access_type, int mmu_idx, uintptr_t retaddr);
@@ -469,7 +478,8 @@ void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr);
MemoryRegionSection *
address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
- hwaddr *xlat, hwaddr *plen);
+ hwaddr *xlat, hwaddr *plen,
+ MemTxAttrs attrs, int *prot);
hwaddr memory_region_section_get_iotlb(CPUState *cpu,
MemoryRegionSection *section,
target_ulong vaddr,
diff --git a/include/exec/memory.h b/include/exec/memory.h
index eb2ba06519..050323f532 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -98,18 +98,21 @@ struct IOMMUNotifier {
/* Notify for address space range start <= addr <= end */
hwaddr start;
hwaddr end;
+ int iommu_idx;
QLIST_ENTRY(IOMMUNotifier) node;
};
typedef struct IOMMUNotifier IOMMUNotifier;
static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
IOMMUNotifierFlag flags,
- hwaddr start, hwaddr end)
+ hwaddr start, hwaddr end,
+ int iommu_idx)
{
n->notify = fn;
n->notifier_flags = flags;
n->start = start;
n->end = end;
+ n->iommu_idx = iommu_idx;
}
/*
@@ -206,6 +209,20 @@ enum IOMMUMemoryRegionAttr {
* to report whenever mappings are changed, by calling
* memory_region_notify_iommu() (or, if necessary, by calling
* memory_region_notify_one() for each registered notifier).
+ *
+ * Conceptually an IOMMU provides a mapping from input address
+ * to an output TLB entry. If the IOMMU is aware of memory transaction
+ * attributes and the output TLB entry depends on the transaction
+ * attributes, we represent this using IOMMU indexes. Each index
+ * selects a particular translation table that the IOMMU has:
+ * @attrs_to_index returns the IOMMU index for a set of transaction attributes
+ * @translate takes an input address and an IOMMU index
+ * and the mapping returned can only depend on the input address and the
+ * IOMMU index.
+ *
+ * Most IOMMUs don't care about the transaction attributes and support
+ * only a single IOMMU index. A more complex IOMMU might have one index
+ * for secure transactions and one for non-secure transactions.
*/
typedef struct IOMMUMemoryRegionClass {
/* private */
@@ -234,9 +251,10 @@ typedef struct IOMMUMemoryRegionClass {
* @iommu: the IOMMUMemoryRegion
* @hwaddr: address to be translated within the memory region
* @flag: requested access permissions
+ * @iommu_idx: IOMMU index for the translation
*/
IOMMUTLBEntry (*translate)(IOMMUMemoryRegion *iommu, hwaddr addr,
- IOMMUAccessFlags flag);
+ IOMMUAccessFlags flag, int iommu_idx);
/* Returns minimum supported page size in bytes.
* If this method is not provided then the minimum is assumed to
* be TARGET_PAGE_SIZE.
@@ -290,6 +308,29 @@ typedef struct IOMMUMemoryRegionClass {
*/
int (*get_attr)(IOMMUMemoryRegion *iommu, enum IOMMUMemoryRegionAttr attr,
void *data);
+
+ /* Return the IOMMU index to use for a given set of transaction attributes.
+ *
+ * Optional method: if an IOMMU only supports a single IOMMU index then
+ * the default implementation of memory_region_iommu_attrs_to_index()
+ * will return 0.
+ *
+ * The indexes supported by an IOMMU must be contiguous, starting at 0.
+ *
+ * @iommu: the IOMMUMemoryRegion
+ * @attrs: memory transaction attributes
+ */
+ int (*attrs_to_index)(IOMMUMemoryRegion *iommu, MemTxAttrs attrs);
+
+ /* Return the number of IOMMU indexes this IOMMU supports.
+ *
+ * Optional method: if this method is not provided, then
+ * memory_region_iommu_num_indexes() will return 1, indicating that
+ * only a single IOMMU index is supported.
+ *
+ * @iommu: the IOMMUMemoryRegion
+ */
+ int (*num_indexes)(IOMMUMemoryRegion *iommu);
} IOMMUMemoryRegionClass;
typedef struct CoalescedMemoryRange CoalescedMemoryRange;
@@ -971,11 +1012,13 @@ uint64_t memory_region_iommu_get_min_page_size(IOMMUMemoryRegion *iommu_mr);
* should be notified with an UNMAP followed by a MAP.
*
* @iommu_mr: the memory region that was changed
+ * @iommu_idx: the IOMMU index for the translation table which has changed
* @entry: the new entry in the IOMMU translation table. The entry
* replaces all old entries for the same virtual I/O address range.
* Deleted entries have .@perm == 0.
*/
void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr,
+ int iommu_idx,
IOMMUTLBEntry entry);
/**
@@ -1055,6 +1098,24 @@ int memory_region_iommu_get_attr(IOMMUMemoryRegion *iommu_mr,
void *data);
/**
+ * memory_region_iommu_attrs_to_index: return the IOMMU index to
+ * use for translations with the given memory transaction attributes.
+ *
+ * @iommu_mr: the memory region
+ * @attrs: the memory transaction attributes
+ */
+int memory_region_iommu_attrs_to_index(IOMMUMemoryRegion *iommu_mr,
+ MemTxAttrs attrs);
+
+/**
+ * memory_region_iommu_num_indexes: return the total number of IOMMU
+ * indexes that this IOMMU supports.
+ *
+ * @iommu_mr: the memory region
+ */
+int memory_region_iommu_num_indexes(IOMMUMemoryRegion *iommu_mr);
+
+/**
* memory_region_name: get a memory region's name
*
* Returns the string that was used to initialize the memory region.
diff --git a/include/hw/arm/arm.h b/include/hw/arm/arm.h
index 70fa2287e2..ffed39252d 100644
--- a/include/hw/arm/arm.h
+++ b/include/hw/arm/arm.h
@@ -23,9 +23,6 @@ typedef enum {
ARM_ENDIANNESS_BE32,
} arm_endianness;
-/* armv7m.c */
-DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq,
- const char *kernel_filename, const char *cpu_type);
/**
* armv7m_load_kernel:
* @cpu: CPU
@@ -33,9 +30,8 @@ DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq,
* @mem_size: mem_size: maximum image size to load
*
* Load the guest image for an ARMv7M system. This must be called by
- * any ARMv7M board, either directly or via armv7m_init(). (This is
- * necessary to ensure that the CPU resets correctly on system reset,
- * as well as for kernel loading.)
+ * any ARMv7M board. (This is necessary to ensure that the CPU resets
+ * correctly on system reset, as well as for kernel loading.)
*/
void armv7m_load_kernel(ARMCPU *cpu, const char *kernel_filename, int mem_size);
diff --git a/include/hw/or-irq.h b/include/hw/or-irq.h
index 3f6fc1b58a..5a31e5a188 100644
--- a/include/hw/or-irq.h
+++ b/include/hw/or-irq.h
@@ -31,7 +31,10 @@
#define TYPE_OR_IRQ "or-irq"
-#define MAX_OR_LINES 16
+/* This can safely be increased if necessary without breaking
+ * migration compatibility (as long as it remains greater than 15).
+ */
+#define MAX_OR_LINES 32
typedef struct OrIRQState qemu_or_irq;
diff --git a/include/qemu/bswap.h b/include/qemu/bswap.h
index 3f28f661b1..a684c1a7a2 100644
--- a/include/qemu/bswap.h
+++ b/include/qemu/bswap.h
@@ -290,6 +290,15 @@ typedef union {
* For accessors that take a guest address rather than a
* host address, see the cpu_{ld,st}_* accessors defined in
* cpu_ldst.h.
+ *
+ * For cases where the size to be used is not fixed at compile time,
+ * there are
+ * stn{endian}_p(ptr, sz, val)
+ * which stores @val to @ptr as an @endian-order number @sz bytes in size
+ * and
+ * ldn{endian}_p(ptr, sz)
+ * which loads @sz bytes from @ptr as an unsigned @endian-order number
+ * and returns it in a uint64_t.
*/
static inline int ldub_p(const void *ptr)
@@ -495,6 +504,49 @@ static inline unsigned long leul_to_cpu(unsigned long v)
#endif
}
+/* Store v to p as a sz byte value in host order */
+#define DO_STN_LDN_P(END) \
+ static inline void stn_## END ## _p(void *ptr, int sz, uint64_t v) \
+ { \
+ switch (sz) { \
+ case 1: \
+ stb_p(ptr, v); \
+ break; \
+ case 2: \
+ stw_ ## END ## _p(ptr, v); \
+ break; \
+ case 4: \
+ stl_ ## END ## _p(ptr, v); \
+ break; \
+ case 8: \
+ stq_ ## END ## _p(ptr, v); \
+ break; \
+ default: \
+ g_assert_not_reached(); \
+ } \
+ } \
+ static inline uint64_t ldn_## END ## _p(const void *ptr, int sz) \
+ { \
+ switch (sz) { \
+ case 1: \
+ return ldub_p(ptr); \
+ case 2: \
+ return lduw_ ## END ## _p(ptr); \
+ case 4: \
+ return (uint32_t)ldl_ ## END ## _p(ptr); \
+ case 8: \
+ return ldq_ ## END ## _p(ptr); \
+ default: \
+ g_assert_not_reached(); \
+ } \
+ }
+
+DO_STN_LDN_P(he)
+DO_STN_LDN_P(le)
+DO_STN_LDN_P(be)
+
+#undef DO_STN_LDN_P
+
#undef le_bswap
#undef be_bswap
#undef le_bswaps
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 9d3afc6c75..cce2fd6acc 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -429,6 +429,9 @@ struct CPUState {
uint16_t pending_tlb_flush;
int hvf_fd;
+
+ /* track IOMMUs whose translations we've cached in the TCG TLB */
+ GArray *iommu_notifiers;
};
QTAILQ_HEAD(CPUTailQ, CPUState);
diff --git a/memory.c b/memory.c
index 3212acc7f4..21aa57d24c 100644
--- a/memory.c
+++ b/memory.c
@@ -1799,6 +1799,9 @@ void memory_region_register_iommu_notifier(MemoryRegion *mr,
iommu_mr = IOMMU_MEMORY_REGION(mr);
assert(n->notifier_flags != IOMMU_NOTIFIER_NONE);
assert(n->start <= n->end);
+ assert(n->iommu_idx >= 0 &&
+ n->iommu_idx < memory_region_iommu_num_indexes(iommu_mr));
+
QLIST_INSERT_HEAD(&iommu_mr->iommu_notify, n, node);
memory_region_update_iommu_notify_flags(iommu_mr);
}
@@ -1829,7 +1832,7 @@ void memory_region_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n)
granularity = memory_region_iommu_get_min_page_size(iommu_mr);
for (addr = 0; addr < memory_region_size(mr); addr += granularity) {
- iotlb = imrc->translate(iommu_mr, addr, IOMMU_NONE);
+ iotlb = imrc->translate(iommu_mr, addr, IOMMU_NONE, n->iommu_idx);
if (iotlb.perm != IOMMU_NONE) {
n->notify(n, &iotlb);
}
@@ -1891,6 +1894,7 @@ void memory_region_notify_one(IOMMUNotifier *notifier,
}
void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr,
+ int iommu_idx,
IOMMUTLBEntry entry)
{
IOMMUNotifier *iommu_notifier;
@@ -1898,7 +1902,9 @@ void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr,
assert(memory_region_is_iommu(MEMORY_REGION(iommu_mr)));
IOMMU_NOTIFIER_FOREACH(iommu_notifier, iommu_mr) {
- memory_region_notify_one(iommu_notifier, &entry);
+ if (iommu_notifier->iommu_idx == iommu_idx) {
+ memory_region_notify_one(iommu_notifier, &entry);
+ }
}
}
@@ -1915,6 +1921,29 @@ int memory_region_iommu_get_attr(IOMMUMemoryRegion *iommu_mr,
return imrc->get_attr(iommu_mr, attr, data);
}
+int memory_region_iommu_attrs_to_index(IOMMUMemoryRegion *iommu_mr,
+ MemTxAttrs attrs)
+{
+ IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr);
+
+ if (!imrc->attrs_to_index) {
+ return 0;
+ }
+
+ return imrc->attrs_to_index(iommu_mr, attrs);
+}
+
+int memory_region_iommu_num_indexes(IOMMUMemoryRegion *iommu_mr)
+{
+ IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr);
+
+ if (!imrc->num_indexes) {
+ return 1;
+ }
+
+ return imrc->num_indexes(iommu_mr);
+}
+
void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
{
uint8_t mask = 1 << client;
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index ab047b9402..e1de45e904 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -767,6 +767,24 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
return;
}
+#ifndef CONFIG_USER_ONLY
+ /* The NVIC and M-profile CPU are two halves of a single piece of
+ * hardware; trying to use one without the other is a command line
+ * error and will result in segfaults if not caught here.
+ */
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ if (!env->nvic) {
+ error_setg(errp, "This board cannot be used with Cortex-M CPUs");
+ return;
+ }
+ } else {
+ if (env->nvic) {
+ error_setg(errp, "This board can only be used with Cortex-M CPUs");
+ return;
+ }
+ }
+#endif
+
cpu_exec_realizefn(cs, &local_err);
if (local_err != NULL) {
error_propagate(errp, local_err);
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index 94f4356ce9..2e76084992 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -195,6 +195,15 @@ DEF_HELPER_FLAGS_5(sve_lsl_zpzz_s, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_5(sve_lsl_zpzz_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_sel_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_sel_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_sel_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_sel_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_5(sve_asr_zpzw_b, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sve_asr_zpzw_h, TCG_CALL_NO_RWG,
@@ -416,6 +425,230 @@ DEF_HELPER_FLAGS_4(sve_cpy_z_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(sve_ext, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_insr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_insr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_insr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_insr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_3(sve_rev_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_rev_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_rev_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_rev_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_tbl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_tbl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_tbl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_tbl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve_sunpk_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_sunpk_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_sunpk_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve_uunpk_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_uunpk_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_uunpk_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_zip_p, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_uzp_p, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_trn_p, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_rev_p, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_punpk_p, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_zip_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_zip_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_zip_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_zip_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_uzp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_uzp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_uzp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_uzp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_trn_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_trn_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_trn_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_trn_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_compact_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_compact_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_2(sve_last_active_element, TCG_CALL_NO_RWG, s32, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_revb_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_revb_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_revb_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_revh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_revh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_revw_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_rbit_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_rbit_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_rbit_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_rbit_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_splice, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_cmpeq_ppzz_b, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpne_ppzz_b, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpge_ppzz_b, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpgt_ppzz_b, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmphi_ppzz_b, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmphs_ppzz_b, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_cmpeq_ppzz_h, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpne_ppzz_h, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpge_ppzz_h, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpgt_ppzz_h, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmphi_ppzz_h, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmphs_ppzz_h, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_cmpeq_ppzz_s, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpne_ppzz_s, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpge_ppzz_s, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpgt_ppzz_s, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmphi_ppzz_s, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmphs_ppzz_s, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_cmpeq_ppzz_d, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpne_ppzz_d, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpge_ppzz_d, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpgt_ppzz_d, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmphi_ppzz_d, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmphs_ppzz_d, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_cmpeq_ppzw_b, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpne_ppzw_b, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpge_ppzw_b, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpgt_ppzw_b, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmphi_ppzw_b, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmphs_ppzw_b, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmple_ppzw_b, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmplt_ppzw_b, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmplo_ppzw_b, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpls_ppzw_b, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_cmpeq_ppzw_h, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpne_ppzw_h, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpge_ppzw_h, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpgt_ppzw_h, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmphi_ppzw_h, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmphs_ppzw_h, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmple_ppzw_h, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmplt_ppzw_h, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmplo_ppzw_h, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpls_ppzw_h, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_cmpeq_ppzw_s, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpne_ppzw_s, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpge_ppzw_s, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpgt_ppzw_s, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmphi_ppzw_s, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmphs_ppzw_s, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmple_ppzw_s, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmplt_ppzw_s, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmplo_ppzw_s, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpls_ppzw_s, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_cmpeq_ppzi_b, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpne_ppzi_b, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpgt_ppzi_b, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpge_ppzi_b, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmplt_ppzi_b, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmple_ppzi_b, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmphs_ppzi_b, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmphi_ppzi_b, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmplo_ppzi_b, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpls_ppzi_b, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_cmpeq_ppzi_h, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpne_ppzi_h, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpgt_ppzi_h, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpge_ppzi_h, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmplt_ppzi_h, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmple_ppzi_h, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmphs_ppzi_h, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmphi_ppzi_h, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmplo_ppzi_h, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpls_ppzi_h, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_cmpeq_ppzi_s, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpne_ppzi_s, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpgt_ppzi_s, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpge_ppzi_s, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmplt_ppzi_s, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmple_ppzi_s, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmphs_ppzi_s, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmphi_ppzi_s, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmplo_ppzi_s, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpls_ppzi_s, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_cmpeq_ppzi_d, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpne_ppzi_d, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpgt_ppzi_d, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpge_ppzi_d, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmplt_ppzi_d, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmple_ppzi_d, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmphs_ppzi_d, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmphi_ppzi_d, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmplo_ppzi_d, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpls_ppzi_d, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_5(sve_and_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sve_bic_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sve_eor_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
@@ -425,3 +658,64 @@ DEF_HELPER_FLAGS_5(sve_orn_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sve_nor_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sve_nand_pppp, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_brkpa, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_brkpb, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_brkpas, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_brkpbs, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_brka_z, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_brkb_z, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_brka_m, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_brkb_m, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_brkas_z, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_brkbs_z, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_brkas_m, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_brkbs_m, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_brkn, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_brkns, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve_cntp, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve_while, TCG_CALL_NO_RWG, i32, ptr, i32, i32)
+
+DEF_HELPER_FLAGS_4(sve_subri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_subri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_subri_s, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_subri_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(sve_smaxi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_smaxi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_smaxi_s, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_smaxi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(sve_smini_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_smini_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_smini_s, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_smini_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(sve_umaxi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_umaxi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_umaxi_s, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_umaxi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(sve_umini_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_umini_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_umini_s, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_umini_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_5(gvec_recps_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_recps_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_recps_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_rsqrts_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_rsqrts_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_rsqrts_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 0c6a144458..879a7229e9 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -601,6 +601,25 @@ DEF_HELPER_FLAGS_5(gvec_fcmlas_idx, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_5(gvec_fcmlad, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fsub_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmul_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_ftsmul_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
#ifdef TARGET_AARCH64
#include "helper-a64.h"
#include "helper-sve.h"
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index 4761d1921e..6f436f9096 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -24,6 +24,7 @@
%imm4_16_p1 16:4 !function=plus1
%imm6_22_5 22:1 5:5
+%imm7_22_16 22:2 16:5
%imm8_16_10 16:5 10:3
%imm9_16_10 16:s6 10:3
@@ -41,6 +42,8 @@
# Signed 8-bit immediate, optionally shifted left by 8.
%sh8_i8s 5:9 !function=expand_imm_sh8s
+# Unsigned 8-bit immediate, optionally shifted left by 8.
+%sh8_i8u 5:9 !function=expand_imm_sh8u
# Either a copy of rd (at bit 0), or a different source
# as propagated via the MOVPRFX instruction.
@@ -58,6 +61,7 @@
&rri_esz rd rn imm esz
&rrr_esz rd rn rm esz
&rpr_esz rd pg rn esz
+&rpr_s rd pg rn s
&rprr_s rd pg rn rm s
&rprr_esz rd pg rn rm esz
&rprrr_esz rd pg rn rm ra esz
@@ -65,6 +69,8 @@
&ptrue rd esz pat s
&incdec_cnt rd pat esz imm d u
&incdec2_cnt rd rn pat esz imm d u
+&incdec_pred rd pg esz d u
+&incdec2_pred rd rn pg esz d u
###########################################################################
# Named instruction formats. These are generally used to
@@ -77,6 +83,9 @@
@pd_pn ........ esz:2 .. .... ....... rn:4 . rd:4 &rr_esz
@rd_rn ........ esz:2 ...... ...... rn:5 rd:5 &rr_esz
+# Two operand with governing predicate, flags setting
+@pd_pg_pn_s ........ . s:1 ...... .. pg:4 . rn:4 . rd:4 &rpr_s
+
# Three operand with unused vector element size
@rd_rn_rm_e0 ........ ... rm:5 ... ... rn:5 rd:5 &rrr_esz esz=0
@@ -85,6 +94,15 @@
# Three operand, vector element size
@rd_rn_rm ........ esz:2 . rm:5 ... ... rn:5 rd:5 &rrr_esz
+@pd_pn_pm ........ esz:2 .. rm:4 ....... rn:4 . rd:4 &rrr_esz
+@rdn_rm ........ esz:2 ...... ...... rm:5 rd:5 \
+ &rrr_esz rn=%reg_movprfx
+@rdn_sh_i8u ........ esz:2 ...... ...... ..... rd:5 \
+ &rri_esz rn=%reg_movprfx imm=%sh8_i8u
+@rdn_i8u ........ esz:2 ...... ... imm:8 rd:5 \
+ &rri_esz rn=%reg_movprfx
+@rdn_i8s ........ esz:2 ...... ... imm:s8 rd:5 \
+ &rri_esz rn=%reg_movprfx
# Three operand with "memory" size, aka immediate left shift
@rd_rn_msz_rm ........ ... rm:5 .... imm:2 rn:5 rd:5 &rrri
@@ -94,6 +112,8 @@
&rprr_esz rn=%reg_movprfx
@rdm_pg_rn ........ esz:2 ... ... ... pg:3 rn:5 rd:5 \
&rprr_esz rm=%reg_movprfx
+@rd_pg4_rn_rm ........ esz:2 . rm:5 .. pg:4 rn:5 rd:5 &rprr_esz
+@pd_pg_rn_rm ........ esz:2 . rm:5 ... pg:3 rn:5 . rd:4 &rprr_esz
# Three register operand, with governing predicate, vector element size
@rda_pg_rn_rm ........ esz:2 . rm:5 ... pg:3 rn:5 rd:5 \
@@ -103,6 +123,7 @@
# One register operand, with governing predicate, vector element size
@rd_pg_rn ........ esz:2 ... ... ... pg:3 rn:5 rd:5 &rpr_esz
+@rd_pg4_pn ........ esz:2 ... ... .. pg:4 . rn:4 rd:5 &rpr_esz
# Two register operands with a 6-bit signed immediate.
@rd_rn_i6 ........ ... rn:5 ..... imm:s6 rd:5 &rri
@@ -125,6 +146,11 @@
@rdn_dbm ........ .. .... dbm:13 rd:5 \
&rr_dbm rn=%reg_movprfx
+# Predicate output, vector and immediate input,
+# controlling predicate, element size.
+@pd_pg_rn_i7 ........ esz:2 . imm:7 . pg:3 rn:5 . rd:4 &rpri_esz
+@pd_pg_rn_i5 ........ esz:2 . imm:s5 ... pg:3 rn:5 . rd:4 &rpri_esz
+
# Basic Load/Store with 9-bit immediate offset
@pd_rn_i9 ........ ........ ...... rn:5 . rd:4 \
&rri imm=%imm9_16_10
@@ -138,6 +164,12 @@
@incdec2_cnt ........ esz:2 .. .... ...... pat:5 rd:5 \
&incdec2_cnt imm=%imm4_16_p1 rn=%reg_movprfx
+# One register, predicate.
+# User must fill in U and D.
+@incdec_pred ........ esz:2 .... .. ..... .. pg:4 rd:5 &incdec_pred
+@incdec2_pred ........ esz:2 .... .. ..... .. pg:4 rd:5 \
+ &incdec2_pred rn=%reg_movprfx
+
###########################################################################
# Instruction patterns. Grouped according to the SVE encodingindex.xhtml.
@@ -369,6 +401,145 @@ CPY_z_i 00000101 .. 01 .... 00 . ........ ..... @rdn_pg4 imm=%sh8_i8s
EXT 00000101 001 ..... 000 ... rm:5 rd:5 \
&rrri rn=%reg_movprfx imm=%imm8_16_10
+### SVE Permute - Unpredicated Group
+
+# SVE broadcast general register
+DUP_s 00000101 .. 1 00000 001110 ..... ..... @rd_rn
+
+# SVE broadcast indexed element
+DUP_x 00000101 .. 1 ..... 001000 rn:5 rd:5 \
+ &rri imm=%imm7_22_16
+
+# SVE insert SIMD&FP scalar register
+INSR_f 00000101 .. 1 10100 001110 ..... ..... @rdn_rm
+
+# SVE insert general register
+INSR_r 00000101 .. 1 00100 001110 ..... ..... @rdn_rm
+
+# SVE reverse vector elements
+REV_v 00000101 .. 1 11000 001110 ..... ..... @rd_rn
+
+# SVE vector table lookup
+TBL 00000101 .. 1 ..... 001100 ..... ..... @rd_rn_rm
+
+# SVE unpack vector elements
+UNPK 00000101 esz:2 1100 u:1 h:1 001110 rn:5 rd:5
+
+### SVE Permute - Predicates Group
+
+# SVE permute predicate elements
+ZIP1_p 00000101 .. 10 .... 010 000 0 .... 0 .... @pd_pn_pm
+ZIP2_p 00000101 .. 10 .... 010 001 0 .... 0 .... @pd_pn_pm
+UZP1_p 00000101 .. 10 .... 010 010 0 .... 0 .... @pd_pn_pm
+UZP2_p 00000101 .. 10 .... 010 011 0 .... 0 .... @pd_pn_pm
+TRN1_p 00000101 .. 10 .... 010 100 0 .... 0 .... @pd_pn_pm
+TRN2_p 00000101 .. 10 .... 010 101 0 .... 0 .... @pd_pn_pm
+
+# SVE reverse predicate elements
+REV_p 00000101 .. 11 0100 010 000 0 .... 0 .... @pd_pn
+
+# SVE unpack predicate elements
+PUNPKLO 00000101 00 11 0000 010 000 0 .... 0 .... @pd_pn_e0
+PUNPKHI 00000101 00 11 0001 010 000 0 .... 0 .... @pd_pn_e0
+
+### SVE Permute - Interleaving Group
+
+# SVE permute vector elements
+ZIP1_z 00000101 .. 1 ..... 011 000 ..... ..... @rd_rn_rm
+ZIP2_z 00000101 .. 1 ..... 011 001 ..... ..... @rd_rn_rm
+UZP1_z 00000101 .. 1 ..... 011 010 ..... ..... @rd_rn_rm
+UZP2_z 00000101 .. 1 ..... 011 011 ..... ..... @rd_rn_rm
+TRN1_z 00000101 .. 1 ..... 011 100 ..... ..... @rd_rn_rm
+TRN2_z 00000101 .. 1 ..... 011 101 ..... ..... @rd_rn_rm
+
+### SVE Permute - Predicated Group
+
+# SVE compress active elements
+# Note esz >= 2
+COMPACT 00000101 .. 100001 100 ... ..... ..... @rd_pg_rn
+
+# SVE conditionally broadcast element to vector
+CLASTA_z 00000101 .. 10100 0 100 ... ..... ..... @rdn_pg_rm
+CLASTB_z 00000101 .. 10100 1 100 ... ..... ..... @rdn_pg_rm
+
+# SVE conditionally copy element to SIMD&FP scalar
+CLASTA_v 00000101 .. 10101 0 100 ... ..... ..... @rd_pg_rn
+CLASTB_v 00000101 .. 10101 1 100 ... ..... ..... @rd_pg_rn
+
+# SVE conditionally copy element to general register
+CLASTA_r 00000101 .. 11000 0 101 ... ..... ..... @rd_pg_rn
+CLASTB_r 00000101 .. 11000 1 101 ... ..... ..... @rd_pg_rn
+
+# SVE copy element to SIMD&FP scalar register
+LASTA_v 00000101 .. 10001 0 100 ... ..... ..... @rd_pg_rn
+LASTB_v 00000101 .. 10001 1 100 ... ..... ..... @rd_pg_rn
+
+# SVE copy element to general register
+LASTA_r 00000101 .. 10000 0 101 ... ..... ..... @rd_pg_rn
+LASTB_r 00000101 .. 10000 1 101 ... ..... ..... @rd_pg_rn
+
+# SVE copy element from SIMD&FP scalar register
+CPY_m_v 00000101 .. 100000 100 ... ..... ..... @rd_pg_rn
+
+# SVE copy element from general register to vector (predicated)
+CPY_m_r 00000101 .. 101000 101 ... ..... ..... @rd_pg_rn
+
+# SVE reverse within elements
+# Note esz >= operation size
+REVB 00000101 .. 1001 00 100 ... ..... ..... @rd_pg_rn
+REVH 00000101 .. 1001 01 100 ... ..... ..... @rd_pg_rn
+REVW 00000101 .. 1001 10 100 ... ..... ..... @rd_pg_rn
+RBIT 00000101 .. 1001 11 100 ... ..... ..... @rd_pg_rn
+
+# SVE vector splice (predicated)
+SPLICE 00000101 .. 101 100 100 ... ..... ..... @rdn_pg_rm
+
+### SVE Select Vectors Group
+
+# SVE select vector elements (predicated)
+SEL_zpzz 00000101 .. 1 ..... 11 .... ..... ..... @rd_pg4_rn_rm
+
+### SVE Integer Compare - Vectors Group
+
+# SVE integer compare_vectors
+CMPHS_ppzz 00100100 .. 0 ..... 000 ... ..... 0 .... @pd_pg_rn_rm
+CMPHI_ppzz 00100100 .. 0 ..... 000 ... ..... 1 .... @pd_pg_rn_rm
+CMPGE_ppzz 00100100 .. 0 ..... 100 ... ..... 0 .... @pd_pg_rn_rm
+CMPGT_ppzz 00100100 .. 0 ..... 100 ... ..... 1 .... @pd_pg_rn_rm
+CMPEQ_ppzz 00100100 .. 0 ..... 101 ... ..... 0 .... @pd_pg_rn_rm
+CMPNE_ppzz 00100100 .. 0 ..... 101 ... ..... 1 .... @pd_pg_rn_rm
+
+# SVE integer compare with wide elements
+# Note these require esz != 3.
+CMPEQ_ppzw 00100100 .. 0 ..... 001 ... ..... 0 .... @pd_pg_rn_rm
+CMPNE_ppzw 00100100 .. 0 ..... 001 ... ..... 1 .... @pd_pg_rn_rm
+CMPGE_ppzw 00100100 .. 0 ..... 010 ... ..... 0 .... @pd_pg_rn_rm
+CMPGT_ppzw 00100100 .. 0 ..... 010 ... ..... 1 .... @pd_pg_rn_rm
+CMPLT_ppzw 00100100 .. 0 ..... 011 ... ..... 0 .... @pd_pg_rn_rm
+CMPLE_ppzw 00100100 .. 0 ..... 011 ... ..... 1 .... @pd_pg_rn_rm
+CMPHS_ppzw 00100100 .. 0 ..... 110 ... ..... 0 .... @pd_pg_rn_rm
+CMPHI_ppzw 00100100 .. 0 ..... 110 ... ..... 1 .... @pd_pg_rn_rm
+CMPLO_ppzw 00100100 .. 0 ..... 111 ... ..... 0 .... @pd_pg_rn_rm
+CMPLS_ppzw 00100100 .. 0 ..... 111 ... ..... 1 .... @pd_pg_rn_rm
+
+### SVE Integer Compare - Unsigned Immediate Group
+
+# SVE integer compare with unsigned immediate
+CMPHS_ppzi 00100100 .. 1 ....... 0 ... ..... 0 .... @pd_pg_rn_i7
+CMPHI_ppzi 00100100 .. 1 ....... 0 ... ..... 1 .... @pd_pg_rn_i7
+CMPLO_ppzi 00100100 .. 1 ....... 1 ... ..... 0 .... @pd_pg_rn_i7
+CMPLS_ppzi 00100100 .. 1 ....... 1 ... ..... 1 .... @pd_pg_rn_i7
+
+### SVE Integer Compare - Signed Immediate Group
+
+# SVE integer compare with signed immediate
+CMPGE_ppzi 00100101 .. 0 ..... 000 ... ..... 0 .... @pd_pg_rn_i5
+CMPGT_ppzi 00100101 .. 0 ..... 000 ... ..... 1 .... @pd_pg_rn_i5
+CMPLT_ppzi 00100101 .. 0 ..... 001 ... ..... 0 .... @pd_pg_rn_i5
+CMPLE_ppzi 00100101 .. 0 ..... 001 ... ..... 1 .... @pd_pg_rn_i5
+CMPEQ_ppzi 00100101 .. 0 ..... 100 ... ..... 0 .... @pd_pg_rn_i5
+CMPNE_ppzi 00100101 .. 0 ..... 100 ... ..... 1 .... @pd_pg_rn_i5
+
### SVE Predicate Logical Operations Group
# SVE predicate logical operations
@@ -410,6 +581,83 @@ PFIRST 00100101 01 011 000 11000 00 .... 0 .... @pd_pn_e0
# SVE predicate next active
PNEXT 00100101 .. 011 001 11000 10 .... 0 .... @pd_pn
+### SVE Partition Break Group
+
+# SVE propagate break from previous partition
+BRKPA 00100101 0. 00 .... 11 .... 0 .... 0 .... @pd_pg_pn_pm_s
+BRKPB 00100101 0. 00 .... 11 .... 0 .... 1 .... @pd_pg_pn_pm_s
+
+# SVE partition break condition
+BRKA_z 00100101 0. 01000001 .... 0 .... 0 .... @pd_pg_pn_s
+BRKB_z 00100101 1. 01000001 .... 0 .... 0 .... @pd_pg_pn_s
+BRKA_m 00100101 0. 01000001 .... 0 .... 1 .... @pd_pg_pn_s
+BRKB_m 00100101 1. 01000001 .... 0 .... 1 .... @pd_pg_pn_s
+
+# SVE propagate break to next partition
+BRKN 00100101 0. 01100001 .... 0 .... 0 .... @pd_pg_pn_s
+
+### SVE Predicate Count Group
+
+# SVE predicate count
+CNTP 00100101 .. 100 000 10 .... 0 .... ..... @rd_pg4_pn
+
+# SVE inc/dec register by predicate count
+INCDECP_r 00100101 .. 10110 d:1 10001 00 .... ..... @incdec_pred u=1
+
+# SVE inc/dec vector by predicate count
+INCDECP_z 00100101 .. 10110 d:1 10000 00 .... ..... @incdec2_pred u=1
+
+# SVE saturating inc/dec register by predicate count
+SINCDECP_r_32 00100101 .. 1010 d:1 u:1 10001 00 .... ..... @incdec_pred
+SINCDECP_r_64 00100101 .. 1010 d:1 u:1 10001 10 .... ..... @incdec_pred
+
+# SVE saturating inc/dec vector by predicate count
+SINCDECP_z 00100101 .. 1010 d:1 u:1 10000 00 .... ..... @incdec2_pred
+
+### SVE Integer Compare - Scalars Group
+
+# SVE conditionally terminate scalars
+CTERM 00100101 1 sf:1 1 rm:5 001000 rn:5 ne:1 0000
+
+# SVE integer compare scalar count and limit
+WHILE 00100101 esz:2 1 rm:5 000 sf:1 u:1 1 rn:5 eq:1 rd:4
+
+### SVE Integer Wide Immediate - Unpredicated Group
+
+# SVE broadcast floating-point immediate (unpredicated)
+FDUP 00100101 esz:2 111 00 1110 imm:8 rd:5
+
+# SVE broadcast integer immediate (unpredicated)
+DUP_i 00100101 esz:2 111 00 011 . ........ rd:5 imm=%sh8_i8s
+
+# SVE integer add/subtract immediate (unpredicated)
+ADD_zzi 00100101 .. 100 000 11 . ........ ..... @rdn_sh_i8u
+SUB_zzi 00100101 .. 100 001 11 . ........ ..... @rdn_sh_i8u
+SUBR_zzi 00100101 .. 100 011 11 . ........ ..... @rdn_sh_i8u
+SQADD_zzi 00100101 .. 100 100 11 . ........ ..... @rdn_sh_i8u
+UQADD_zzi 00100101 .. 100 101 11 . ........ ..... @rdn_sh_i8u
+SQSUB_zzi 00100101 .. 100 110 11 . ........ ..... @rdn_sh_i8u
+UQSUB_zzi 00100101 .. 100 111 11 . ........ ..... @rdn_sh_i8u
+
+# SVE integer min/max immediate (unpredicated)
+SMAX_zzi 00100101 .. 101 000 110 ........ ..... @rdn_i8s
+UMAX_zzi 00100101 .. 101 001 110 ........ ..... @rdn_i8u
+SMIN_zzi 00100101 .. 101 010 110 ........ ..... @rdn_i8s
+UMIN_zzi 00100101 .. 101 011 110 ........ ..... @rdn_i8u
+
+# SVE integer multiply immediate (unpredicated)
+MUL_zzi 00100101 .. 110 000 110 ........ ..... @rdn_i8s
+
+### SVE Floating Point Arithmetic - Unpredicated Group
+
+# SVE floating-point arithmetic (unpredicated)
+FADD_zzz 01100101 .. 0 ..... 000 000 ..... ..... @rd_rn_rm
+FSUB_zzz 01100101 .. 0 ..... 000 001 ..... ..... @rd_rn_rm
+FMUL_zzz 01100101 .. 0 ..... 000 010 ..... ..... @rd_rn_rm
+FTSMUL 01100101 .. 0 ..... 000 011 ..... ..... @rd_rn_rm
+FRECPS 01100101 .. 0 ..... 000 110 ..... ..... @rd_rn_rm
+FRSQRTS 01100101 .. 0 ..... 000 111 ..... ..... @rd_rn_rm
+
### SVE Memory - 32-bit Gather and Unsized Contiguous Group
# SVE load predicate register
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index b825e44cb5..128bbf9b04 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -74,6 +74,28 @@ static uint32_t iter_predtest_fwd(uint64_t d, uint64_t g, uint32_t flags)
return flags;
}
+/* This is an iterative function, called for each Pd and Pg word
+ * moving backward.
+ */
+static uint32_t iter_predtest_bwd(uint64_t d, uint64_t g, uint32_t flags)
+{
+ if (likely(g)) {
+ /* Compute C from first (i.e last) !(D & G).
+ Use bit 2 to signal first G bit seen. */
+ if (!(flags & 4)) {
+ flags += 4 - 1; /* add bit 2, subtract C from PREDTEST_INIT */
+ flags |= (d & pow2floor(g)) == 0;
+ }
+
+ /* Accumulate Z from each D & G. */
+ flags |= ((d & g) != 0) << 1;
+
+ /* Compute N from last (i.e first) D & G. Replace previous. */
+ flags = deposit32(flags, 31, 1, (d & (g & -g)) != 0);
+ }
+ return flags;
+}
+
/* The same for a single word predicate. */
uint32_t HELPER(sve_predtest1)(uint64_t d, uint64_t g)
{
@@ -238,6 +260,26 @@ static inline uint64_t expand_pred_s(uint8_t byte)
return word[byte & 0x11];
}
+/* Swap 16-bit words within a 32-bit word. */
+static inline uint32_t hswap32(uint32_t h)
+{
+ return rol32(h, 16);
+}
+
+/* Swap 16-bit words within a 64-bit word. */
+static inline uint64_t hswap64(uint64_t h)
+{
+ uint64_t m = 0x0000ffff0000ffffull;
+ h = rol64(h, 32);
+ return ((h & m) << 16) | ((h >> 16) & m);
+}
+
+/* Swap 32-bit words within a 64-bit word. */
+static inline uint64_t wswap64(uint64_t h)
+{
+ return rol64(h, 32);
+}
+
#define LOGICAL_PPPP(NAME, FUNC) \
void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
{ \
@@ -616,6 +658,20 @@ DO_ZPZ(sve_neg_h, uint16_t, H1_2, DO_NEG)
DO_ZPZ(sve_neg_s, uint32_t, H1_4, DO_NEG)
DO_ZPZ_D(sve_neg_d, uint64_t, DO_NEG)
+DO_ZPZ(sve_revb_h, uint16_t, H1_2, bswap16)
+DO_ZPZ(sve_revb_s, uint32_t, H1_4, bswap32)
+DO_ZPZ_D(sve_revb_d, uint64_t, bswap64)
+
+DO_ZPZ(sve_revh_s, uint32_t, H1_4, hswap32)
+DO_ZPZ_D(sve_revh_d, uint64_t, hswap64)
+
+DO_ZPZ_D(sve_revw_d, uint64_t, wswap64)
+
+DO_ZPZ(sve_rbit_b, uint8_t, H1, revbit8)
+DO_ZPZ(sve_rbit_h, uint16_t, H1_2, revbit16)
+DO_ZPZ(sve_rbit_s, uint32_t, H1_4, revbit32)
+DO_ZPZ_D(sve_rbit_d, uint64_t, revbit64)
+
/* Three-operand expander, unpredicated, in which the third operand is "wide".
*/
#define DO_ZZW(NAME, TYPE, TYPEW, H, OP) \
@@ -748,6 +804,46 @@ DO_VPZ_D(sve_uminv_d, uint64_t, uint64_t, -1, DO_MIN)
#undef DO_VPZ
#undef DO_VPZ_D
+/* Two vector operand, one scalar operand, unpredicated. */
+#define DO_ZZI(NAME, TYPE, OP) \
+void HELPER(NAME)(void *vd, void *vn, uint64_t s64, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(TYPE); \
+ TYPE s = s64, *d = vd, *n = vn; \
+ for (i = 0; i < opr_sz; ++i) { \
+ d[i] = OP(n[i], s); \
+ } \
+}
+
+#define DO_SUBR(X, Y) (Y - X)
+
+DO_ZZI(sve_subri_b, uint8_t, DO_SUBR)
+DO_ZZI(sve_subri_h, uint16_t, DO_SUBR)
+DO_ZZI(sve_subri_s, uint32_t, DO_SUBR)
+DO_ZZI(sve_subri_d, uint64_t, DO_SUBR)
+
+DO_ZZI(sve_smaxi_b, int8_t, DO_MAX)
+DO_ZZI(sve_smaxi_h, int16_t, DO_MAX)
+DO_ZZI(sve_smaxi_s, int32_t, DO_MAX)
+DO_ZZI(sve_smaxi_d, int64_t, DO_MAX)
+
+DO_ZZI(sve_smini_b, int8_t, DO_MIN)
+DO_ZZI(sve_smini_h, int16_t, DO_MIN)
+DO_ZZI(sve_smini_s, int32_t, DO_MIN)
+DO_ZZI(sve_smini_d, int64_t, DO_MIN)
+
+DO_ZZI(sve_umaxi_b, uint8_t, DO_MAX)
+DO_ZZI(sve_umaxi_h, uint16_t, DO_MAX)
+DO_ZZI(sve_umaxi_s, uint32_t, DO_MAX)
+DO_ZZI(sve_umaxi_d, uint64_t, DO_MAX)
+
+DO_ZZI(sve_umini_b, uint8_t, DO_MIN)
+DO_ZZI(sve_umini_h, uint16_t, DO_MIN)
+DO_ZZI(sve_umini_s, uint32_t, DO_MIN)
+DO_ZZI(sve_umini_d, uint64_t, DO_MIN)
+
+#undef DO_ZZI
+
#undef DO_AND
#undef DO_ORR
#undef DO_EOR
@@ -762,6 +858,7 @@ DO_VPZ_D(sve_uminv_d, uint64_t, uint64_t, -1, DO_MIN)
#undef DO_ASR
#undef DO_LSR
#undef DO_LSL
+#undef DO_SUBR
/* Similar to the ARM LastActiveElement pseudocode function, except the
result is multiplied by the element size. This includes the not found
@@ -1560,3 +1657,1156 @@ void HELPER(sve_ext)(void *vd, void *vn, void *vm, uint32_t desc)
memcpy(vd + n_siz, &tmp, n_ofs);
}
}
+
+#define DO_INSR(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, uint64_t val, uint32_t desc) \
+{ \
+ intptr_t opr_sz = simd_oprsz(desc); \
+ swap_memmove(vd + sizeof(TYPE), vn, opr_sz - sizeof(TYPE)); \
+ *(TYPE *)(vd + H(0)) = val; \
+}
+
+DO_INSR(sve_insr_b, uint8_t, H1)
+DO_INSR(sve_insr_h, uint16_t, H1_2)
+DO_INSR(sve_insr_s, uint32_t, H1_4)
+DO_INSR(sve_insr_d, uint64_t, )
+
+#undef DO_INSR
+
+void HELPER(sve_rev_b)(void *vd, void *vn, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc);
+ for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) {
+ uint64_t f = *(uint64_t *)(vn + i);
+ uint64_t b = *(uint64_t *)(vn + j);
+ *(uint64_t *)(vd + i) = bswap64(b);
+ *(uint64_t *)(vd + j) = bswap64(f);
+ }
+}
+
+void HELPER(sve_rev_h)(void *vd, void *vn, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc);
+ for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) {
+ uint64_t f = *(uint64_t *)(vn + i);
+ uint64_t b = *(uint64_t *)(vn + j);
+ *(uint64_t *)(vd + i) = hswap64(b);
+ *(uint64_t *)(vd + j) = hswap64(f);
+ }
+}
+
+void HELPER(sve_rev_s)(void *vd, void *vn, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc);
+ for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) {
+ uint64_t f = *(uint64_t *)(vn + i);
+ uint64_t b = *(uint64_t *)(vn + j);
+ *(uint64_t *)(vd + i) = rol64(b, 32);
+ *(uint64_t *)(vd + j) = rol64(f, 32);
+ }
+}
+
+void HELPER(sve_rev_d)(void *vd, void *vn, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc);
+ for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) {
+ uint64_t f = *(uint64_t *)(vn + i);
+ uint64_t b = *(uint64_t *)(vn + j);
+ *(uint64_t *)(vd + i) = b;
+ *(uint64_t *)(vd + j) = f;
+ }
+}
+
+#define DO_TBL(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ uintptr_t elem = opr_sz / sizeof(TYPE); \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ ARMVectorReg tmp; \
+ if (unlikely(vd == vn)) { \
+ n = memcpy(&tmp, vn, opr_sz); \
+ } \
+ for (i = 0; i < elem; i++) { \
+ TYPE j = m[H(i)]; \
+ d[H(i)] = j < elem ? n[H(j)] : 0; \
+ } \
+}
+
+DO_TBL(sve_tbl_b, uint8_t, H1)
+DO_TBL(sve_tbl_h, uint16_t, H2)
+DO_TBL(sve_tbl_s, uint32_t, H4)
+DO_TBL(sve_tbl_d, uint64_t, )
+
+#undef TBL
+
+#define DO_UNPK(NAME, TYPED, TYPES, HD, HS) \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ TYPED *d = vd; \
+ TYPES *n = vn; \
+ ARMVectorReg tmp; \
+ if (unlikely(vn - vd < opr_sz)) { \
+ n = memcpy(&tmp, n, opr_sz / 2); \
+ } \
+ for (i = 0; i < opr_sz / sizeof(TYPED); i++) { \
+ d[HD(i)] = n[HS(i)]; \
+ } \
+}
+
+DO_UNPK(sve_sunpk_h, int16_t, int8_t, H2, H1)
+DO_UNPK(sve_sunpk_s, int32_t, int16_t, H4, H2)
+DO_UNPK(sve_sunpk_d, int64_t, int32_t, , H4)
+
+DO_UNPK(sve_uunpk_h, uint16_t, uint8_t, H2, H1)
+DO_UNPK(sve_uunpk_s, uint32_t, uint16_t, H4, H2)
+DO_UNPK(sve_uunpk_d, uint64_t, uint32_t, , H4)
+
+#undef DO_UNPK
+
+/* Mask of bits included in the even numbered predicates of width esz.
+ * We also use this for expand_bits/compress_bits, and so extend the
+ * same pattern out to 16-bit units.
+ */
+static const uint64_t even_bit_esz_masks[5] = {
+ 0x5555555555555555ull,
+ 0x3333333333333333ull,
+ 0x0f0f0f0f0f0f0f0full,
+ 0x00ff00ff00ff00ffull,
+ 0x0000ffff0000ffffull,
+};
+
+/* Zero-extend units of 2**N bits to units of 2**(N+1) bits.
+ * For N==0, this corresponds to the operation that in qemu/bitops.h
+ * we call half_shuffle64; this algorithm is from Hacker's Delight,
+ * section 7-2 Shuffling Bits.
+ */
+static uint64_t expand_bits(uint64_t x, int n)
+{
+ int i;
+
+ x &= 0xffffffffu;
+ for (i = 4; i >= n; i--) {
+ int sh = 1 << i;
+ x = ((x << sh) | x) & even_bit_esz_masks[i];
+ }
+ return x;
+}
+
+/* Compress units of 2**(N+1) bits to units of 2**N bits.
+ * For N==0, this corresponds to the operation that in qemu/bitops.h
+ * we call half_unshuffle64; this algorithm is from Hacker's Delight,
+ * section 7-2 Shuffling Bits, where it is called an inverse half shuffle.
+ */
+static uint64_t compress_bits(uint64_t x, int n)
+{
+ int i;
+
+ for (i = n; i <= 4; i++) {
+ int sh = 1 << i;
+ x &= even_bit_esz_masks[i];
+ x = (x >> sh) | x;
+ }
+ return x & 0xffffffffu;
+}
+
+void HELPER(sve_zip_p)(void *vd, void *vn, void *vm, uint32_t pred_desc)
+{
+ intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ int esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
+ intptr_t high = extract32(pred_desc, SIMD_DATA_SHIFT + 2, 1);
+ uint64_t *d = vd;
+ intptr_t i;
+
+ if (oprsz <= 8) {
+ uint64_t nn = *(uint64_t *)vn;
+ uint64_t mm = *(uint64_t *)vm;
+ int half = 4 * oprsz;
+
+ nn = extract64(nn, high * half, half);
+ mm = extract64(mm, high * half, half);
+ nn = expand_bits(nn, esz);
+ mm = expand_bits(mm, esz);
+ d[0] = nn + (mm << (1 << esz));
+ } else {
+ ARMPredicateReg tmp_n, tmp_m;
+
+ /* We produce output faster than we consume input.
+ Therefore we must be mindful of possible overlap. */
+ if ((vn - vd) < (uintptr_t)oprsz) {
+ vn = memcpy(&tmp_n, vn, oprsz);
+ }
+ if ((vm - vd) < (uintptr_t)oprsz) {
+ vm = memcpy(&tmp_m, vm, oprsz);
+ }
+ if (high) {
+ high = oprsz >> 1;
+ }
+
+ if ((high & 3) == 0) {
+ uint32_t *n = vn, *m = vm;
+ high >>= 2;
+
+ for (i = 0; i < DIV_ROUND_UP(oprsz, 8); i++) {
+ uint64_t nn = n[H4(high + i)];
+ uint64_t mm = m[H4(high + i)];
+
+ nn = expand_bits(nn, esz);
+ mm = expand_bits(mm, esz);
+ d[i] = nn + (mm << (1 << esz));
+ }
+ } else {
+ uint8_t *n = vn, *m = vm;
+ uint16_t *d16 = vd;
+
+ for (i = 0; i < oprsz / 2; i++) {
+ uint16_t nn = n[H1(high + i)];
+ uint16_t mm = m[H1(high + i)];
+
+ nn = expand_bits(nn, esz);
+ mm = expand_bits(mm, esz);
+ d16[H2(i)] = nn + (mm << (1 << esz));
+ }
+ }
+ }
+}
+
+void HELPER(sve_uzp_p)(void *vd, void *vn, void *vm, uint32_t pred_desc)
+{
+ intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ int esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
+ int odd = extract32(pred_desc, SIMD_DATA_SHIFT + 2, 1) << esz;
+ uint64_t *d = vd, *n = vn, *m = vm;
+ uint64_t l, h;
+ intptr_t i;
+
+ if (oprsz <= 8) {
+ l = compress_bits(n[0] >> odd, esz);
+ h = compress_bits(m[0] >> odd, esz);
+ d[0] = extract64(l + (h << (4 * oprsz)), 0, 8 * oprsz);
+ } else {
+ ARMPredicateReg tmp_m;
+ intptr_t oprsz_16 = oprsz / 16;
+
+ if ((vm - vd) < (uintptr_t)oprsz) {
+ m = memcpy(&tmp_m, vm, oprsz);
+ }
+
+ for (i = 0; i < oprsz_16; i++) {
+ l = n[2 * i + 0];
+ h = n[2 * i + 1];
+ l = compress_bits(l >> odd, esz);
+ h = compress_bits(h >> odd, esz);
+ d[i] = l + (h << 32);
+ }
+
+ /* For VL which is not a power of 2, the results from M do not
+ align nicely with the uint64_t for D. Put the aligned results
+ from M into TMP_M and then copy it into place afterward. */
+ if (oprsz & 15) {
+ d[i] = compress_bits(n[2 * i] >> odd, esz);
+
+ for (i = 0; i < oprsz_16; i++) {
+ l = m[2 * i + 0];
+ h = m[2 * i + 1];
+ l = compress_bits(l >> odd, esz);
+ h = compress_bits(h >> odd, esz);
+ tmp_m.p[i] = l + (h << 32);
+ }
+ tmp_m.p[i] = compress_bits(m[2 * i] >> odd, esz);
+
+ swap_memmove(vd + oprsz / 2, &tmp_m, oprsz / 2);
+ } else {
+ for (i = 0; i < oprsz_16; i++) {
+ l = m[2 * i + 0];
+ h = m[2 * i + 1];
+ l = compress_bits(l >> odd, esz);
+ h = compress_bits(h >> odd, esz);
+ d[oprsz_16 + i] = l + (h << 32);
+ }
+ }
+ }
+}
+
+void HELPER(sve_trn_p)(void *vd, void *vn, void *vm, uint32_t pred_desc)
+{
+ intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ uintptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
+ bool odd = extract32(pred_desc, SIMD_DATA_SHIFT + 2, 1);
+ uint64_t *d = vd, *n = vn, *m = vm;
+ uint64_t mask;
+ int shr, shl;
+ intptr_t i;
+
+ shl = 1 << esz;
+ shr = 0;
+ mask = even_bit_esz_masks[esz];
+ if (odd) {
+ mask <<= shl;
+ shr = shl;
+ shl = 0;
+ }
+
+ for (i = 0; i < DIV_ROUND_UP(oprsz, 8); i++) {
+ uint64_t nn = (n[i] & mask) >> shr;
+ uint64_t mm = (m[i] & mask) << shl;
+ d[i] = nn + mm;
+ }
+}
+
+/* Reverse units of 2**N bits. */
+static uint64_t reverse_bits_64(uint64_t x, int n)
+{
+ int i, sh;
+
+ x = bswap64(x);
+ for (i = 2, sh = 4; i >= n; i--, sh >>= 1) {
+ uint64_t mask = even_bit_esz_masks[i];
+ x = ((x & mask) << sh) | ((x >> sh) & mask);
+ }
+ return x;
+}
+
+static uint8_t reverse_bits_8(uint8_t x, int n)
+{
+ static const uint8_t mask[3] = { 0x55, 0x33, 0x0f };
+ int i, sh;
+
+ for (i = 2, sh = 4; i >= n; i--, sh >>= 1) {
+ x = ((x & mask[i]) << sh) | ((x >> sh) & mask[i]);
+ }
+ return x;
+}
+
+void HELPER(sve_rev_p)(void *vd, void *vn, uint32_t pred_desc)
+{
+ intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ int esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
+ intptr_t i, oprsz_2 = oprsz / 2;
+
+ if (oprsz <= 8) {
+ uint64_t l = *(uint64_t *)vn;
+ l = reverse_bits_64(l << (64 - 8 * oprsz), esz);
+ *(uint64_t *)vd = l;
+ } else if ((oprsz & 15) == 0) {
+ for (i = 0; i < oprsz_2; i += 8) {
+ intptr_t ih = oprsz - 8 - i;
+ uint64_t l = reverse_bits_64(*(uint64_t *)(vn + i), esz);
+ uint64_t h = reverse_bits_64(*(uint64_t *)(vn + ih), esz);
+ *(uint64_t *)(vd + i) = h;
+ *(uint64_t *)(vd + ih) = l;
+ }
+ } else {
+ for (i = 0; i < oprsz_2; i += 1) {
+ intptr_t il = H1(i);
+ intptr_t ih = H1(oprsz - 1 - i);
+ uint8_t l = reverse_bits_8(*(uint8_t *)(vn + il), esz);
+ uint8_t h = reverse_bits_8(*(uint8_t *)(vn + ih), esz);
+ *(uint8_t *)(vd + il) = h;
+ *(uint8_t *)(vd + ih) = l;
+ }
+ }
+}
+
+void HELPER(sve_punpk_p)(void *vd, void *vn, uint32_t pred_desc)
+{
+ intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ intptr_t high = extract32(pred_desc, SIMD_DATA_SHIFT + 2, 1);
+ uint64_t *d = vd;
+ intptr_t i;
+
+ if (oprsz <= 8) {
+ uint64_t nn = *(uint64_t *)vn;
+ int half = 4 * oprsz;
+
+ nn = extract64(nn, high * half, half);
+ nn = expand_bits(nn, 0);
+ d[0] = nn;
+ } else {
+ ARMPredicateReg tmp_n;
+
+ /* We produce output faster than we consume input.
+ Therefore we must be mindful of possible overlap. */
+ if ((vn - vd) < (uintptr_t)oprsz) {
+ vn = memcpy(&tmp_n, vn, oprsz);
+ }
+ if (high) {
+ high = oprsz >> 1;
+ }
+
+ if ((high & 3) == 0) {
+ uint32_t *n = vn;
+ high >>= 2;
+
+ for (i = 0; i < DIV_ROUND_UP(oprsz, 8); i++) {
+ uint64_t nn = n[H4(high + i)];
+ d[i] = expand_bits(nn, 0);
+ }
+ } else {
+ uint16_t *d16 = vd;
+ uint8_t *n = vn;
+
+ for (i = 0; i < oprsz / 2; i++) {
+ uint16_t nn = n[H1(high + i)];
+ d16[H2(i)] = expand_bits(nn, 0);
+ }
+ }
+ }
+}
+
+#define DO_ZIP(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{ \
+ intptr_t oprsz = simd_oprsz(desc); \
+ intptr_t i, oprsz_2 = oprsz / 2; \
+ ARMVectorReg tmp_n, tmp_m; \
+ /* We produce output faster than we consume input. \
+ Therefore we must be mindful of possible overlap. */ \
+ if (unlikely((vn - vd) < (uintptr_t)oprsz)) { \
+ vn = memcpy(&tmp_n, vn, oprsz_2); \
+ } \
+ if (unlikely((vm - vd) < (uintptr_t)oprsz)) { \
+ vm = memcpy(&tmp_m, vm, oprsz_2); \
+ } \
+ for (i = 0; i < oprsz_2; i += sizeof(TYPE)) { \
+ *(TYPE *)(vd + H(2 * i + 0)) = *(TYPE *)(vn + H(i)); \
+ *(TYPE *)(vd + H(2 * i + sizeof(TYPE))) = *(TYPE *)(vm + H(i)); \
+ } \
+}
+
+DO_ZIP(sve_zip_b, uint8_t, H1)
+DO_ZIP(sve_zip_h, uint16_t, H1_2)
+DO_ZIP(sve_zip_s, uint32_t, H1_4)
+DO_ZIP(sve_zip_d, uint64_t, )
+
+#define DO_UZP(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{ \
+ intptr_t oprsz = simd_oprsz(desc); \
+ intptr_t oprsz_2 = oprsz / 2; \
+ intptr_t odd_ofs = simd_data(desc); \
+ intptr_t i; \
+ ARMVectorReg tmp_m; \
+ if (unlikely((vm - vd) < (uintptr_t)oprsz)) { \
+ vm = memcpy(&tmp_m, vm, oprsz); \
+ } \
+ for (i = 0; i < oprsz_2; i += sizeof(TYPE)) { \
+ *(TYPE *)(vd + H(i)) = *(TYPE *)(vn + H(2 * i + odd_ofs)); \
+ } \
+ for (i = 0; i < oprsz_2; i += sizeof(TYPE)) { \
+ *(TYPE *)(vd + H(oprsz_2 + i)) = *(TYPE *)(vm + H(2 * i + odd_ofs)); \
+ } \
+}
+
+DO_UZP(sve_uzp_b, uint8_t, H1)
+DO_UZP(sve_uzp_h, uint16_t, H1_2)
+DO_UZP(sve_uzp_s, uint32_t, H1_4)
+DO_UZP(sve_uzp_d, uint64_t, )
+
+#define DO_TRN(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{ \
+ intptr_t oprsz = simd_oprsz(desc); \
+ intptr_t odd_ofs = simd_data(desc); \
+ intptr_t i; \
+ for (i = 0; i < oprsz; i += 2 * sizeof(TYPE)) { \
+ TYPE ae = *(TYPE *)(vn + H(i + odd_ofs)); \
+ TYPE be = *(TYPE *)(vm + H(i + odd_ofs)); \
+ *(TYPE *)(vd + H(i + 0)) = ae; \
+ *(TYPE *)(vd + H(i + sizeof(TYPE))) = be; \
+ } \
+}
+
+DO_TRN(sve_trn_b, uint8_t, H1)
+DO_TRN(sve_trn_h, uint16_t, H1_2)
+DO_TRN(sve_trn_s, uint32_t, H1_4)
+DO_TRN(sve_trn_d, uint64_t, )
+
+#undef DO_ZIP
+#undef DO_UZP
+#undef DO_TRN
+
+void HELPER(sve_compact_s)(void *vd, void *vn, void *vg, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc) / 4;
+ uint32_t *d = vd, *n = vn;
+ uint8_t *pg = vg;
+
+ for (i = j = 0; i < opr_sz; i++) {
+ if (pg[H1(i / 2)] & (i & 1 ? 0x10 : 0x01)) {
+ d[H4(j)] = n[H4(i)];
+ j++;
+ }
+ }
+ for (; j < opr_sz; j++) {
+ d[H4(j)] = 0;
+ }
+}
+
+void HELPER(sve_compact_d)(void *vd, void *vn, void *vg, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn;
+ uint8_t *pg = vg;
+
+ for (i = j = 0; i < opr_sz; i++) {
+ if (pg[H1(i)] & 1) {
+ d[j] = n[i];
+ j++;
+ }
+ }
+ for (; j < opr_sz; j++) {
+ d[j] = 0;
+ }
+}
+
+/* Similar to the ARM LastActiveElement pseudocode function, except the
+ * result is multiplied by the element size. This includes the not found
+ * indication; e.g. not found for esz=3 is -8.
+ */
+int32_t HELPER(sve_last_active_element)(void *vg, uint32_t pred_desc)
+{
+ intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
+
+ return last_active_element(vg, DIV_ROUND_UP(oprsz, 8), esz);
+}
+
+void HELPER(sve_splice)(void *vd, void *vn, void *vm, void *vg, uint32_t desc)
+{
+ intptr_t opr_sz = simd_oprsz(desc) / 8;
+ int esz = simd_data(desc);
+ uint64_t pg, first_g, last_g, len, mask = pred_esz_masks[esz];
+ intptr_t i, first_i, last_i;
+ ARMVectorReg tmp;
+
+ first_i = last_i = 0;
+ first_g = last_g = 0;
+
+ /* Find the extent of the active elements within VG. */
+ for (i = QEMU_ALIGN_UP(opr_sz, 8) - 8; i >= 0; i -= 8) {
+ pg = *(uint64_t *)(vg + i) & mask;
+ if (pg) {
+ if (last_g == 0) {
+ last_g = pg;
+ last_i = i;
+ }
+ first_g = pg;
+ first_i = i;
+ }
+ }
+
+ len = 0;
+ if (first_g != 0) {
+ first_i = first_i * 8 + ctz64(first_g);
+ last_i = last_i * 8 + 63 - clz64(last_g);
+ len = last_i - first_i + (1 << esz);
+ if (vd == vm) {
+ vm = memcpy(&tmp, vm, opr_sz * 8);
+ }
+ swap_memmove(vd, vn + first_i, len);
+ }
+ swap_memmove(vd + len, vm, opr_sz * 8 - len);
+}
+
+void HELPER(sve_sel_zpzz_b)(void *vd, void *vn, void *vm,
+ void *vg, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn, *m = vm;
+ uint8_t *pg = vg;
+
+ for (i = 0; i < opr_sz; i += 1) {
+ uint64_t nn = n[i], mm = m[i];
+ uint64_t pp = expand_pred_b(pg[H1(i)]);
+ d[i] = (nn & pp) | (mm & ~pp);
+ }
+}
+
+void HELPER(sve_sel_zpzz_h)(void *vd, void *vn, void *vm,
+ void *vg, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn, *m = vm;
+ uint8_t *pg = vg;
+
+ for (i = 0; i < opr_sz; i += 1) {
+ uint64_t nn = n[i], mm = m[i];
+ uint64_t pp = expand_pred_h(pg[H1(i)]);
+ d[i] = (nn & pp) | (mm & ~pp);
+ }
+}
+
+void HELPER(sve_sel_zpzz_s)(void *vd, void *vn, void *vm,
+ void *vg, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn, *m = vm;
+ uint8_t *pg = vg;
+
+ for (i = 0; i < opr_sz; i += 1) {
+ uint64_t nn = n[i], mm = m[i];
+ uint64_t pp = expand_pred_s(pg[H1(i)]);
+ d[i] = (nn & pp) | (mm & ~pp);
+ }
+}
+
+void HELPER(sve_sel_zpzz_d)(void *vd, void *vn, void *vm,
+ void *vg, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn, *m = vm;
+ uint8_t *pg = vg;
+
+ for (i = 0; i < opr_sz; i += 1) {
+ uint64_t nn = n[i], mm = m[i];
+ d[i] = (pg[H1(i)] & 1 ? nn : mm);
+ }
+}
+
+/* Two operand comparison controlled by a predicate.
+ * ??? It is very tempting to want to be able to expand this inline
+ * with x86 instructions, e.g.
+ *
+ * vcmpeqw zm, zn, %ymm0
+ * vpmovmskb %ymm0, %eax
+ * and $0x5555, %eax
+ * and pg, %eax
+ *
+ * or even aarch64, e.g.
+ *
+ * // mask = 4000 1000 0400 0100 0040 0010 0004 0001
+ * cmeq v0.8h, zn, zm
+ * and v0.8h, v0.8h, mask
+ * addv h0, v0.8h
+ * and v0.8b, pg
+ *
+ * However, coming up with an abstraction that allows vector inputs and
+ * a scalar output, and also handles the byte-ordering of sub-uint64_t
+ * scalar outputs, is tricky.
+ */
+#define DO_CMP_PPZZ(NAME, TYPE, OP, H, MASK) \
+uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
+{ \
+ intptr_t opr_sz = simd_oprsz(desc); \
+ uint32_t flags = PREDTEST_INIT; \
+ intptr_t i = opr_sz; \
+ do { \
+ uint64_t out = 0, pg; \
+ do { \
+ i -= sizeof(TYPE), out <<= sizeof(TYPE); \
+ TYPE nn = *(TYPE *)(vn + H(i)); \
+ TYPE mm = *(TYPE *)(vm + H(i)); \
+ out |= nn OP mm; \
+ } while (i & 63); \
+ pg = *(uint64_t *)(vg + (i >> 3)) & MASK; \
+ out &= pg; \
+ *(uint64_t *)(vd + (i >> 3)) = out; \
+ flags = iter_predtest_bwd(out, pg, flags); \
+ } while (i > 0); \
+ return flags; \
+}
+
+#define DO_CMP_PPZZ_B(NAME, TYPE, OP) \
+ DO_CMP_PPZZ(NAME, TYPE, OP, H1, 0xffffffffffffffffull)
+#define DO_CMP_PPZZ_H(NAME, TYPE, OP) \
+ DO_CMP_PPZZ(NAME, TYPE, OP, H1_2, 0x5555555555555555ull)
+#define DO_CMP_PPZZ_S(NAME, TYPE, OP) \
+ DO_CMP_PPZZ(NAME, TYPE, OP, H1_4, 0x1111111111111111ull)
+#define DO_CMP_PPZZ_D(NAME, TYPE, OP) \
+ DO_CMP_PPZZ(NAME, TYPE, OP, , 0x0101010101010101ull)
+
+DO_CMP_PPZZ_B(sve_cmpeq_ppzz_b, uint8_t, ==)
+DO_CMP_PPZZ_H(sve_cmpeq_ppzz_h, uint16_t, ==)
+DO_CMP_PPZZ_S(sve_cmpeq_ppzz_s, uint32_t, ==)
+DO_CMP_PPZZ_D(sve_cmpeq_ppzz_d, uint64_t, ==)
+
+DO_CMP_PPZZ_B(sve_cmpne_ppzz_b, uint8_t, !=)
+DO_CMP_PPZZ_H(sve_cmpne_ppzz_h, uint16_t, !=)
+DO_CMP_PPZZ_S(sve_cmpne_ppzz_s, uint32_t, !=)
+DO_CMP_PPZZ_D(sve_cmpne_ppzz_d, uint64_t, !=)
+
+DO_CMP_PPZZ_B(sve_cmpgt_ppzz_b, int8_t, >)
+DO_CMP_PPZZ_H(sve_cmpgt_ppzz_h, int16_t, >)
+DO_CMP_PPZZ_S(sve_cmpgt_ppzz_s, int32_t, >)
+DO_CMP_PPZZ_D(sve_cmpgt_ppzz_d, int64_t, >)
+
+DO_CMP_PPZZ_B(sve_cmpge_ppzz_b, int8_t, >=)
+DO_CMP_PPZZ_H(sve_cmpge_ppzz_h, int16_t, >=)
+DO_CMP_PPZZ_S(sve_cmpge_ppzz_s, int32_t, >=)
+DO_CMP_PPZZ_D(sve_cmpge_ppzz_d, int64_t, >=)
+
+DO_CMP_PPZZ_B(sve_cmphi_ppzz_b, uint8_t, >)
+DO_CMP_PPZZ_H(sve_cmphi_ppzz_h, uint16_t, >)
+DO_CMP_PPZZ_S(sve_cmphi_ppzz_s, uint32_t, >)
+DO_CMP_PPZZ_D(sve_cmphi_ppzz_d, uint64_t, >)
+
+DO_CMP_PPZZ_B(sve_cmphs_ppzz_b, uint8_t, >=)
+DO_CMP_PPZZ_H(sve_cmphs_ppzz_h, uint16_t, >=)
+DO_CMP_PPZZ_S(sve_cmphs_ppzz_s, uint32_t, >=)
+DO_CMP_PPZZ_D(sve_cmphs_ppzz_d, uint64_t, >=)
+
+#undef DO_CMP_PPZZ_B
+#undef DO_CMP_PPZZ_H
+#undef DO_CMP_PPZZ_S
+#undef DO_CMP_PPZZ_D
+#undef DO_CMP_PPZZ
+
+/* Similar, but the second source is "wide". */
+#define DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H, MASK) \
+uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
+{ \
+ intptr_t opr_sz = simd_oprsz(desc); \
+ uint32_t flags = PREDTEST_INIT; \
+ intptr_t i = opr_sz; \
+ do { \
+ uint64_t out = 0, pg; \
+ do { \
+ TYPEW mm = *(TYPEW *)(vm + i - 8); \
+ do { \
+ i -= sizeof(TYPE), out <<= sizeof(TYPE); \
+ TYPE nn = *(TYPE *)(vn + H(i)); \
+ out |= nn OP mm; \
+ } while (i & 7); \
+ } while (i & 63); \
+ pg = *(uint64_t *)(vg + (i >> 3)) & MASK; \
+ out &= pg; \
+ *(uint64_t *)(vd + (i >> 3)) = out; \
+ flags = iter_predtest_bwd(out, pg, flags); \
+ } while (i > 0); \
+ return flags; \
+}
+
+#define DO_CMP_PPZW_B(NAME, TYPE, TYPEW, OP) \
+ DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1, 0xffffffffffffffffull)
+#define DO_CMP_PPZW_H(NAME, TYPE, TYPEW, OP) \
+ DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1_2, 0x5555555555555555ull)
+#define DO_CMP_PPZW_S(NAME, TYPE, TYPEW, OP) \
+ DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1_4, 0x1111111111111111ull)
+
+DO_CMP_PPZW_B(sve_cmpeq_ppzw_b, uint8_t, uint64_t, ==)
+DO_CMP_PPZW_H(sve_cmpeq_ppzw_h, uint16_t, uint64_t, ==)
+DO_CMP_PPZW_S(sve_cmpeq_ppzw_s, uint32_t, uint64_t, ==)
+
+DO_CMP_PPZW_B(sve_cmpne_ppzw_b, uint8_t, uint64_t, !=)
+DO_CMP_PPZW_H(sve_cmpne_ppzw_h, uint16_t, uint64_t, !=)
+DO_CMP_PPZW_S(sve_cmpne_ppzw_s, uint32_t, uint64_t, !=)
+
+DO_CMP_PPZW_B(sve_cmpgt_ppzw_b, int8_t, int64_t, >)
+DO_CMP_PPZW_H(sve_cmpgt_ppzw_h, int16_t, int64_t, >)
+DO_CMP_PPZW_S(sve_cmpgt_ppzw_s, int32_t, int64_t, >)
+
+DO_CMP_PPZW_B(sve_cmpge_ppzw_b, int8_t, int64_t, >=)
+DO_CMP_PPZW_H(sve_cmpge_ppzw_h, int16_t, int64_t, >=)
+DO_CMP_PPZW_S(sve_cmpge_ppzw_s, int32_t, int64_t, >=)
+
+DO_CMP_PPZW_B(sve_cmphi_ppzw_b, uint8_t, uint64_t, >)
+DO_CMP_PPZW_H(sve_cmphi_ppzw_h, uint16_t, uint64_t, >)
+DO_CMP_PPZW_S(sve_cmphi_ppzw_s, uint32_t, uint64_t, >)
+
+DO_CMP_PPZW_B(sve_cmphs_ppzw_b, uint8_t, uint64_t, >=)
+DO_CMP_PPZW_H(sve_cmphs_ppzw_h, uint16_t, uint64_t, >=)
+DO_CMP_PPZW_S(sve_cmphs_ppzw_s, uint32_t, uint64_t, >=)
+
+DO_CMP_PPZW_B(sve_cmplt_ppzw_b, int8_t, int64_t, <)
+DO_CMP_PPZW_H(sve_cmplt_ppzw_h, int16_t, int64_t, <)
+DO_CMP_PPZW_S(sve_cmplt_ppzw_s, int32_t, int64_t, <)
+
+DO_CMP_PPZW_B(sve_cmple_ppzw_b, int8_t, int64_t, <=)
+DO_CMP_PPZW_H(sve_cmple_ppzw_h, int16_t, int64_t, <=)
+DO_CMP_PPZW_S(sve_cmple_ppzw_s, int32_t, int64_t, <=)
+
+DO_CMP_PPZW_B(sve_cmplo_ppzw_b, uint8_t, uint64_t, <)
+DO_CMP_PPZW_H(sve_cmplo_ppzw_h, uint16_t, uint64_t, <)
+DO_CMP_PPZW_S(sve_cmplo_ppzw_s, uint32_t, uint64_t, <)
+
+DO_CMP_PPZW_B(sve_cmpls_ppzw_b, uint8_t, uint64_t, <=)
+DO_CMP_PPZW_H(sve_cmpls_ppzw_h, uint16_t, uint64_t, <=)
+DO_CMP_PPZW_S(sve_cmpls_ppzw_s, uint32_t, uint64_t, <=)
+
+#undef DO_CMP_PPZW_B
+#undef DO_CMP_PPZW_H
+#undef DO_CMP_PPZW_S
+#undef DO_CMP_PPZW
+
+/* Similar, but the second source is immediate. */
+#define DO_CMP_PPZI(NAME, TYPE, OP, H, MASK) \
+uint32_t HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \
+{ \
+ intptr_t opr_sz = simd_oprsz(desc); \
+ uint32_t flags = PREDTEST_INIT; \
+ TYPE mm = simd_data(desc); \
+ intptr_t i = opr_sz; \
+ do { \
+ uint64_t out = 0, pg; \
+ do { \
+ i -= sizeof(TYPE), out <<= sizeof(TYPE); \
+ TYPE nn = *(TYPE *)(vn + H(i)); \
+ out |= nn OP mm; \
+ } while (i & 63); \
+ pg = *(uint64_t *)(vg + (i >> 3)) & MASK; \
+ out &= pg; \
+ *(uint64_t *)(vd + (i >> 3)) = out; \
+ flags = iter_predtest_bwd(out, pg, flags); \
+ } while (i > 0); \
+ return flags; \
+}
+
+#define DO_CMP_PPZI_B(NAME, TYPE, OP) \
+ DO_CMP_PPZI(NAME, TYPE, OP, H1, 0xffffffffffffffffull)
+#define DO_CMP_PPZI_H(NAME, TYPE, OP) \
+ DO_CMP_PPZI(NAME, TYPE, OP, H1_2, 0x5555555555555555ull)
+#define DO_CMP_PPZI_S(NAME, TYPE, OP) \
+ DO_CMP_PPZI(NAME, TYPE, OP, H1_4, 0x1111111111111111ull)
+#define DO_CMP_PPZI_D(NAME, TYPE, OP) \
+ DO_CMP_PPZI(NAME, TYPE, OP, , 0x0101010101010101ull)
+
+DO_CMP_PPZI_B(sve_cmpeq_ppzi_b, uint8_t, ==)
+DO_CMP_PPZI_H(sve_cmpeq_ppzi_h, uint16_t, ==)
+DO_CMP_PPZI_S(sve_cmpeq_ppzi_s, uint32_t, ==)
+DO_CMP_PPZI_D(sve_cmpeq_ppzi_d, uint64_t, ==)
+
+DO_CMP_PPZI_B(sve_cmpne_ppzi_b, uint8_t, !=)
+DO_CMP_PPZI_H(sve_cmpne_ppzi_h, uint16_t, !=)
+DO_CMP_PPZI_S(sve_cmpne_ppzi_s, uint32_t, !=)
+DO_CMP_PPZI_D(sve_cmpne_ppzi_d, uint64_t, !=)
+
+DO_CMP_PPZI_B(sve_cmpgt_ppzi_b, int8_t, >)
+DO_CMP_PPZI_H(sve_cmpgt_ppzi_h, int16_t, >)
+DO_CMP_PPZI_S(sve_cmpgt_ppzi_s, int32_t, >)
+DO_CMP_PPZI_D(sve_cmpgt_ppzi_d, int64_t, >)
+
+DO_CMP_PPZI_B(sve_cmpge_ppzi_b, int8_t, >=)
+DO_CMP_PPZI_H(sve_cmpge_ppzi_h, int16_t, >=)
+DO_CMP_PPZI_S(sve_cmpge_ppzi_s, int32_t, >=)
+DO_CMP_PPZI_D(sve_cmpge_ppzi_d, int64_t, >=)
+
+DO_CMP_PPZI_B(sve_cmphi_ppzi_b, uint8_t, >)
+DO_CMP_PPZI_H(sve_cmphi_ppzi_h, uint16_t, >)
+DO_CMP_PPZI_S(sve_cmphi_ppzi_s, uint32_t, >)
+DO_CMP_PPZI_D(sve_cmphi_ppzi_d, uint64_t, >)
+
+DO_CMP_PPZI_B(sve_cmphs_ppzi_b, uint8_t, >=)
+DO_CMP_PPZI_H(sve_cmphs_ppzi_h, uint16_t, >=)
+DO_CMP_PPZI_S(sve_cmphs_ppzi_s, uint32_t, >=)
+DO_CMP_PPZI_D(sve_cmphs_ppzi_d, uint64_t, >=)
+
+DO_CMP_PPZI_B(sve_cmplt_ppzi_b, int8_t, <)
+DO_CMP_PPZI_H(sve_cmplt_ppzi_h, int16_t, <)
+DO_CMP_PPZI_S(sve_cmplt_ppzi_s, int32_t, <)
+DO_CMP_PPZI_D(sve_cmplt_ppzi_d, int64_t, <)
+
+DO_CMP_PPZI_B(sve_cmple_ppzi_b, int8_t, <=)
+DO_CMP_PPZI_H(sve_cmple_ppzi_h, int16_t, <=)
+DO_CMP_PPZI_S(sve_cmple_ppzi_s, int32_t, <=)
+DO_CMP_PPZI_D(sve_cmple_ppzi_d, int64_t, <=)
+
+DO_CMP_PPZI_B(sve_cmplo_ppzi_b, uint8_t, <)
+DO_CMP_PPZI_H(sve_cmplo_ppzi_h, uint16_t, <)
+DO_CMP_PPZI_S(sve_cmplo_ppzi_s, uint32_t, <)
+DO_CMP_PPZI_D(sve_cmplo_ppzi_d, uint64_t, <)
+
+DO_CMP_PPZI_B(sve_cmpls_ppzi_b, uint8_t, <=)
+DO_CMP_PPZI_H(sve_cmpls_ppzi_h, uint16_t, <=)
+DO_CMP_PPZI_S(sve_cmpls_ppzi_s, uint32_t, <=)
+DO_CMP_PPZI_D(sve_cmpls_ppzi_d, uint64_t, <=)
+
+#undef DO_CMP_PPZI_B
+#undef DO_CMP_PPZI_H
+#undef DO_CMP_PPZI_S
+#undef DO_CMP_PPZI_D
+#undef DO_CMP_PPZI
+
+/* Similar to the ARM LastActive pseudocode function. */
+static bool last_active_pred(void *vd, void *vg, intptr_t oprsz)
+{
+ intptr_t i;
+
+ for (i = QEMU_ALIGN_UP(oprsz, 8) - 8; i >= 0; i -= 8) {
+ uint64_t pg = *(uint64_t *)(vg + i);
+ if (pg) {
+ return (pow2floor(pg) & *(uint64_t *)(vd + i)) != 0;
+ }
+ }
+ return 0;
+}
+
+/* Compute a mask into RETB that is true for all G, up to and including
+ * (if after) or excluding (if !after) the first G & N.
+ * Return true if BRK found.
+ */
+static bool compute_brk(uint64_t *retb, uint64_t n, uint64_t g,
+ bool brk, bool after)
+{
+ uint64_t b;
+
+ if (brk) {
+ b = 0;
+ } else if ((g & n) == 0) {
+ /* For all G, no N are set; break not found. */
+ b = g;
+ } else {
+ /* Break somewhere in N. Locate it. */
+ b = g & n; /* guard true, pred true */
+ b = b & -b; /* first such */
+ if (after) {
+ b = b | (b - 1); /* break after same */
+ } else {
+ b = b - 1; /* break before same */
+ }
+ brk = true;
+ }
+
+ *retb = b;
+ return brk;
+}
+
+/* Compute a zeroing BRK. */
+static void compute_brk_z(uint64_t *d, uint64_t *n, uint64_t *g,
+ intptr_t oprsz, bool after)
+{
+ bool brk = false;
+ intptr_t i;
+
+ for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) {
+ uint64_t this_b, this_g = g[i];
+
+ brk = compute_brk(&this_b, n[i], this_g, brk, after);
+ d[i] = this_b & this_g;
+ }
+}
+
+/* Likewise, but also compute flags. */
+static uint32_t compute_brks_z(uint64_t *d, uint64_t *n, uint64_t *g,
+ intptr_t oprsz, bool after)
+{
+ uint32_t flags = PREDTEST_INIT;
+ bool brk = false;
+ intptr_t i;
+
+ for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) {
+ uint64_t this_b, this_d, this_g = g[i];
+
+ brk = compute_brk(&this_b, n[i], this_g, brk, after);
+ d[i] = this_d = this_b & this_g;
+ flags = iter_predtest_fwd(this_d, this_g, flags);
+ }
+ return flags;
+}
+
+/* Compute a merging BRK. */
+static void compute_brk_m(uint64_t *d, uint64_t *n, uint64_t *g,
+ intptr_t oprsz, bool after)
+{
+ bool brk = false;
+ intptr_t i;
+
+ for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) {
+ uint64_t this_b, this_g = g[i];
+
+ brk = compute_brk(&this_b, n[i], this_g, brk, after);
+ d[i] = (this_b & this_g) | (d[i] & ~this_g);
+ }
+}
+
+/* Likewise, but also compute flags. */
+static uint32_t compute_brks_m(uint64_t *d, uint64_t *n, uint64_t *g,
+ intptr_t oprsz, bool after)
+{
+ uint32_t flags = PREDTEST_INIT;
+ bool brk = false;
+ intptr_t i;
+
+ for (i = 0; i < oprsz / 8; ++i) {
+ uint64_t this_b, this_d = d[i], this_g = g[i];
+
+ brk = compute_brk(&this_b, n[i], this_g, brk, after);
+ d[i] = this_d = (this_b & this_g) | (this_d & ~this_g);
+ flags = iter_predtest_fwd(this_d, this_g, flags);
+ }
+ return flags;
+}
+
+static uint32_t do_zero(ARMPredicateReg *d, intptr_t oprsz)
+{
+ /* It is quicker to zero the whole predicate than loop on OPRSZ.
+ * The compiler should turn this into 4 64-bit integer stores.
+ */
+ memset(d, 0, sizeof(ARMPredicateReg));
+ return PREDTEST_INIT;
+}
+
+void HELPER(sve_brkpa)(void *vd, void *vn, void *vm, void *vg,
+ uint32_t pred_desc)
+{
+ intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ if (last_active_pred(vn, vg, oprsz)) {
+ compute_brk_z(vd, vm, vg, oprsz, true);
+ } else {
+ do_zero(vd, oprsz);
+ }
+}
+
+uint32_t HELPER(sve_brkpas)(void *vd, void *vn, void *vm, void *vg,
+ uint32_t pred_desc)
+{
+ intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ if (last_active_pred(vn, vg, oprsz)) {
+ return compute_brks_z(vd, vm, vg, oprsz, true);
+ } else {
+ return do_zero(vd, oprsz);
+ }
+}
+
+void HELPER(sve_brkpb)(void *vd, void *vn, void *vm, void *vg,
+ uint32_t pred_desc)
+{
+ intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ if (last_active_pred(vn, vg, oprsz)) {
+ compute_brk_z(vd, vm, vg, oprsz, false);
+ } else {
+ do_zero(vd, oprsz);
+ }
+}
+
+uint32_t HELPER(sve_brkpbs)(void *vd, void *vn, void *vm, void *vg,
+ uint32_t pred_desc)
+{
+ intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ if (last_active_pred(vn, vg, oprsz)) {
+ return compute_brks_z(vd, vm, vg, oprsz, false);
+ } else {
+ return do_zero(vd, oprsz);
+ }
+}
+
+void HELPER(sve_brka_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+ intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ compute_brk_z(vd, vn, vg, oprsz, true);
+}
+
+uint32_t HELPER(sve_brkas_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+ intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ return compute_brks_z(vd, vn, vg, oprsz, true);
+}
+
+void HELPER(sve_brkb_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+ intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ compute_brk_z(vd, vn, vg, oprsz, false);
+}
+
+uint32_t HELPER(sve_brkbs_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+ intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ return compute_brks_z(vd, vn, vg, oprsz, false);
+}
+
+void HELPER(sve_brka_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+ intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ compute_brk_m(vd, vn, vg, oprsz, true);
+}
+
+uint32_t HELPER(sve_brkas_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+ intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ return compute_brks_m(vd, vn, vg, oprsz, true);
+}
+
+void HELPER(sve_brkb_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+ intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ compute_brk_m(vd, vn, vg, oprsz, false);
+}
+
+uint32_t HELPER(sve_brkbs_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+ intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ return compute_brks_m(vd, vn, vg, oprsz, false);
+}
+
+void HELPER(sve_brkn)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+ intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+
+ if (!last_active_pred(vn, vg, oprsz)) {
+ do_zero(vd, oprsz);
+ }
+}
+
+/* As if PredTest(Ones(PL), D, esz). */
+static uint32_t predtest_ones(ARMPredicateReg *d, intptr_t oprsz,
+ uint64_t esz_mask)
+{
+ uint32_t flags = PREDTEST_INIT;
+ intptr_t i;
+
+ for (i = 0; i < oprsz / 8; i++) {
+ flags = iter_predtest_fwd(d->p[i], esz_mask, flags);
+ }
+ if (oprsz & 7) {
+ uint64_t mask = ~(-1ULL << (8 * (oprsz & 7)));
+ flags = iter_predtest_fwd(d->p[i], esz_mask & mask, flags);
+ }
+ return flags;
+}
+
+uint32_t HELPER(sve_brkns)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+ intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+
+ if (last_active_pred(vn, vg, oprsz)) {
+ return predtest_ones(vd, oprsz, -1);
+ } else {
+ return do_zero(vd, oprsz);
+ }
+}
+
+uint64_t HELPER(sve_cntp)(void *vn, void *vg, uint32_t pred_desc)
+{
+ intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
+ uint64_t *n = vn, *g = vg, sum = 0, mask = pred_esz_masks[esz];
+ intptr_t i;
+
+ for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) {
+ uint64_t t = n[i] & g[i] & mask;
+ sum += ctpop64(t);
+ }
+ return sum;
+}
+
+uint32_t HELPER(sve_while)(void *vd, uint32_t count, uint32_t pred_desc)
+{
+ uintptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
+ uint64_t esz_mask = pred_esz_masks[esz];
+ ARMPredicateReg *d = vd;
+ uint32_t flags;
+ intptr_t i;
+
+ /* Begin with a zero predicate register. */
+ flags = do_zero(d, oprsz);
+ if (count == 0) {
+ return flags;
+ }
+
+ /* Scale from predicate element count to bits. */
+ count <<= esz;
+ /* Bound to the bits in the predicate. */
+ count = MIN(count, oprsz * 8);
+
+ /* Set all of the requested bits. */
+ for (i = 0; i < count / 64; ++i) {
+ d->p[i] = esz_mask;
+ }
+ if (count & 63) {
+ d->p[i] = MAKE_64BIT_MASK(0, count & 63) & esz_mask;
+ }
+
+ return predtest_ones(d, oprsz, esz_mask);
+}
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
index dd9c09f89b..63d958cf50 100644
--- a/target/arm/translate-a64.h
+++ b/target/arm/translate-a64.h
@@ -67,18 +67,26 @@ static inline void assert_fp_access_checked(DisasContext *s)
static inline int vec_reg_offset(DisasContext *s, int regno,
int element, TCGMemOp size)
{
- int offs = 0;
+ int element_size = 1 << size;
+ int offs = element * element_size;
#ifdef HOST_WORDS_BIGENDIAN
/* This is complicated slightly because vfp.zregs[n].d[0] is
- * still the low half and vfp.zregs[n].d[1] the high half
- * of the 128 bit vector, even on big endian systems.
- * Calculate the offset assuming a fully bigendian 128 bits,
- * then XOR to account for the order of the two 64 bit halves.
+ * still the lowest and vfp.zregs[n].d[15] the highest of the
+ * 256 byte vector, even on big endian systems.
+ *
+ * Calculate the offset assuming fully little-endian,
+ * then XOR to account for the order of the 8-byte units.
+ *
+ * For 16 byte elements, the two 8 byte halves will not form a
+ * host int128 if the host is bigendian, since they're in the
+ * wrong order. However the only 16 byte operation we have is
+ * a move, so we can ignore this for the moment. More complicated
+ * operations will have to special case loading and storing from
+ * the zregs array.
*/
- offs += (16 - ((element + 1) * (1 << size)));
- offs ^= 8;
-#else
- offs += element * (1 << size);
+ if (element_size < 8) {
+ offs ^= 8 - element_size;
+ }
#endif
offs += offsetof(CPUARMState, vfp.zregs[regno]);
assert_fp_access_checked(s);
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index c48d4b530a..226c97579c 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -33,6 +33,15 @@
#include "trace-tcg.h"
#include "translate-a64.h"
+
+typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
+ TCGv_i64, uint32_t, uint32_t);
+
+typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
+ TCGv_ptr, TCGv_i32);
+typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
+ TCGv_ptr, TCGv_ptr, TCGv_i32);
+
/*
* Helpers for extracting complex instruction fields.
*/
@@ -68,6 +77,11 @@ static inline int expand_imm_sh8s(int x)
return (int8_t)x << (x & 0x100 ? 8 : 0);
}
+static inline int expand_imm_sh8u(int x)
+{
+ return (uint8_t)x << (x & 0x100 ? 8 : 0);
+}
+
/*
* Include the generated decoder.
*/
@@ -373,6 +387,8 @@ static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
return do_zpzz_ool(s, a, fns[a->esz]);
}
+DO_ZPZZ(SEL, sel)
+
#undef DO_ZPZZ
/*
@@ -1957,6 +1973,1448 @@ static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
}
/*
+ *** SVE Permute - Unpredicated Group
+ */
+
+static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
+{
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
+ vsz, vsz, cpu_reg_sp(s, a->rn));
+ }
+ return true;
+}
+
+static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
+{
+ if ((a->imm & 0x1f) == 0) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ unsigned dofs = vec_full_reg_offset(s, a->rd);
+ unsigned esz, index;
+
+ esz = ctz32(a->imm);
+ index = a->imm >> (esz + 1);
+
+ if ((index << esz) < vsz) {
+ unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
+ tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
+ } else {
+ tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
+ }
+ }
+ return true;
+}
+
+static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
+{
+ typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
+ static gen_insr * const fns[4] = {
+ gen_helper_sve_insr_b, gen_helper_sve_insr_h,
+ gen_helper_sve_insr_s, gen_helper_sve_insr_d,
+ };
+ unsigned vsz = vec_full_reg_size(s);
+ TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
+ TCGv_ptr t_zd = tcg_temp_new_ptr();
+ TCGv_ptr t_zn = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
+ tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
+
+ fns[a->esz](t_zd, t_zn, val, desc);
+
+ tcg_temp_free_ptr(t_zd);
+ tcg_temp_free_ptr(t_zn);
+ tcg_temp_free_i32(desc);
+}
+
+static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+ if (sve_access_check(s)) {
+ TCGv_i64 t = tcg_temp_new_i64();
+ tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
+ do_insr_i64(s, a, t);
+ tcg_temp_free_i64(t);
+ }
+ return true;
+}
+
+static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+ if (sve_access_check(s)) {
+ do_insr_i64(s, a, cpu_reg(s, a->rm));
+ }
+ return true;
+}
+
+static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
+{
+ static gen_helper_gvec_2 * const fns[4] = {
+ gen_helper_sve_rev_b, gen_helper_sve_rev_h,
+ gen_helper_sve_rev_s, gen_helper_sve_rev_d
+ };
+
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vsz, vsz, 0, fns[a->esz]);
+ }
+ return true;
+}
+
+static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+ static gen_helper_gvec_3 * const fns[4] = {
+ gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
+ gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
+ };
+
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm),
+ vsz, vsz, 0, fns[a->esz]);
+ }
+ return true;
+}
+
+static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
+{
+ static gen_helper_gvec_2 * const fns[4][2] = {
+ { NULL, NULL },
+ { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
+ { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
+ { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
+ };
+
+ if (a->esz == 0) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn)
+ + (a->h ? vsz / 2 : 0),
+ vsz, vsz, 0, fns[a->esz][a->u]);
+ }
+ return true;
+}
+
+/*
+ *** SVE Permute - Predicates Group
+ */
+
+static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
+ gen_helper_gvec_3 *fn)
+{
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ unsigned vsz = pred_full_reg_size(s);
+
+ /* Predicate sizes may be smaller and cannot use simd_desc.
+ We cannot round up, as we do elsewhere, because we need
+ the exact size for ZIP2 and REV. We retain the style for
+ the other helpers for consistency. */
+ TCGv_ptr t_d = tcg_temp_new_ptr();
+ TCGv_ptr t_n = tcg_temp_new_ptr();
+ TCGv_ptr t_m = tcg_temp_new_ptr();
+ TCGv_i32 t_desc;
+ int desc;
+
+ desc = vsz - 2;
+ desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
+ desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
+
+ tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
+ tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
+ tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
+ t_desc = tcg_const_i32(desc);
+
+ fn(t_d, t_n, t_m, t_desc);
+
+ tcg_temp_free_ptr(t_d);
+ tcg_temp_free_ptr(t_n);
+ tcg_temp_free_ptr(t_m);
+ tcg_temp_free_i32(t_desc);
+ return true;
+}
+
+static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
+ gen_helper_gvec_2 *fn)
+{
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ unsigned vsz = pred_full_reg_size(s);
+ TCGv_ptr t_d = tcg_temp_new_ptr();
+ TCGv_ptr t_n = tcg_temp_new_ptr();
+ TCGv_i32 t_desc;
+ int desc;
+
+ tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
+ tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
+
+ /* Predicate sizes may be smaller and cannot use simd_desc.
+ We cannot round up, as we do elsewhere, because we need
+ the exact size for ZIP2 and REV. We retain the style for
+ the other helpers for consistency. */
+
+ desc = vsz - 2;
+ desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
+ desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
+ t_desc = tcg_const_i32(desc);
+
+ fn(t_d, t_n, t_desc);
+
+ tcg_temp_free_i32(t_desc);
+ tcg_temp_free_ptr(t_d);
+ tcg_temp_free_ptr(t_n);
+ return true;
+}
+
+static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+ return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
+}
+
+static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+ return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
+}
+
+static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+ return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
+}
+
+static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+ return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
+}
+
+static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+ return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
+}
+
+static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+ return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
+}
+
+static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
+{
+ return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
+}
+
+static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
+{
+ return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
+}
+
+static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
+{
+ return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
+}
+
+/*
+ *** SVE Permute - Interleaving Group
+ */
+
+static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
+{
+ static gen_helper_gvec_3 * const fns[4] = {
+ gen_helper_sve_zip_b, gen_helper_sve_zip_h,
+ gen_helper_sve_zip_s, gen_helper_sve_zip_d,
+ };
+
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ unsigned high_ofs = high ? vsz / 2 : 0;
+ tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn) + high_ofs,
+ vec_full_reg_offset(s, a->rm) + high_ofs,
+ vsz, vsz, 0, fns[a->esz]);
+ }
+ return true;
+}
+
+static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
+ gen_helper_gvec_3 *fn)
+{
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm),
+ vsz, vsz, data, fn);
+ }
+ return true;
+}
+
+static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+ return do_zip(s, a, false);
+}
+
+static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+ return do_zip(s, a, true);
+}
+
+static gen_helper_gvec_3 * const uzp_fns[4] = {
+ gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
+ gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
+};
+
+static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+ return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
+}
+
+static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+ return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
+}
+
+static gen_helper_gvec_3 * const trn_fns[4] = {
+ gen_helper_sve_trn_b, gen_helper_sve_trn_h,
+ gen_helper_sve_trn_s, gen_helper_sve_trn_d,
+};
+
+static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+ return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
+}
+
+static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+ return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
+}
+
+/*
+ *** SVE Permute Vector - Predicated Group
+ */
+
+static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+ static gen_helper_gvec_3 * const fns[4] = {
+ NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
+ };
+ return do_zpz_ool(s, a, fns[a->esz]);
+}
+
+/* Call the helper that computes the ARM LastActiveElement pseudocode
+ * function, scaled by the element size. This includes the not found
+ * indication; e.g. not found for esz=3 is -8.
+ */
+static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
+{
+ /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
+ * round up, as we do elsewhere, because we need the exact size.
+ */
+ TCGv_ptr t_p = tcg_temp_new_ptr();
+ TCGv_i32 t_desc;
+ unsigned vsz = pred_full_reg_size(s);
+ unsigned desc;
+
+ desc = vsz - 2;
+ desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
+
+ tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
+ t_desc = tcg_const_i32(desc);
+
+ gen_helper_sve_last_active_element(ret, t_p, t_desc);
+
+ tcg_temp_free_i32(t_desc);
+ tcg_temp_free_ptr(t_p);
+}
+
+/* Increment LAST to the offset of the next element in the vector,
+ * wrapping around to 0.
+ */
+static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
+{
+ unsigned vsz = vec_full_reg_size(s);
+
+ tcg_gen_addi_i32(last, last, 1 << esz);
+ if (is_power_of_2(vsz)) {
+ tcg_gen_andi_i32(last, last, vsz - 1);
+ } else {
+ TCGv_i32 max = tcg_const_i32(vsz);
+ TCGv_i32 zero = tcg_const_i32(0);
+ tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
+ tcg_temp_free_i32(max);
+ tcg_temp_free_i32(zero);
+ }
+}
+
+/* If LAST < 0, set LAST to the offset of the last element in the vector. */
+static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
+{
+ unsigned vsz = vec_full_reg_size(s);
+
+ if (is_power_of_2(vsz)) {
+ tcg_gen_andi_i32(last, last, vsz - 1);
+ } else {
+ TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
+ TCGv_i32 zero = tcg_const_i32(0);
+ tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
+ tcg_temp_free_i32(max);
+ tcg_temp_free_i32(zero);
+ }
+}
+
+/* Load an unsigned element of ESZ from BASE+OFS. */
+static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
+{
+ TCGv_i64 r = tcg_temp_new_i64();
+
+ switch (esz) {
+ case 0:
+ tcg_gen_ld8u_i64(r, base, ofs);
+ break;
+ case 1:
+ tcg_gen_ld16u_i64(r, base, ofs);
+ break;
+ case 2:
+ tcg_gen_ld32u_i64(r, base, ofs);
+ break;
+ case 3:
+ tcg_gen_ld_i64(r, base, ofs);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ return r;
+}
+
+/* Load an unsigned element of ESZ from RM[LAST]. */
+static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
+ int rm, int esz)
+{
+ TCGv_ptr p = tcg_temp_new_ptr();
+ TCGv_i64 r;
+
+ /* Convert offset into vector into offset into ENV.
+ * The final adjustment for the vector register base
+ * is added via constant offset to the load.
+ */
+#ifdef HOST_WORDS_BIGENDIAN
+ /* Adjust for element ordering. See vec_reg_offset. */
+ if (esz < 3) {
+ tcg_gen_xori_i32(last, last, 8 - (1 << esz));
+ }
+#endif
+ tcg_gen_ext_i32_ptr(p, last);
+ tcg_gen_add_ptr(p, p, cpu_env);
+
+ r = load_esz(p, vec_full_reg_offset(s, rm), esz);
+ tcg_temp_free_ptr(p);
+
+ return r;
+}
+
+/* Compute CLAST for a Zreg. */
+static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
+{
+ TCGv_i32 last;
+ TCGLabel *over;
+ TCGv_i64 ele;
+ unsigned vsz, esz = a->esz;
+
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ last = tcg_temp_local_new_i32();
+ over = gen_new_label();
+
+ find_last_active(s, last, esz, a->pg);
+
+ /* There is of course no movcond for a 2048-bit vector,
+ * so we must branch over the actual store.
+ */
+ tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
+
+ if (!before) {
+ incr_last_active(s, last, esz);
+ }
+
+ ele = load_last_active(s, last, a->rm, esz);
+ tcg_temp_free_i32(last);
+
+ vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
+ tcg_temp_free_i64(ele);
+
+ /* If this insn used MOVPRFX, we may need a second move. */
+ if (a->rd != a->rn) {
+ TCGLabel *done = gen_new_label();
+ tcg_gen_br(done);
+
+ gen_set_label(over);
+ do_mov_z(s, a->rd, a->rn);
+
+ gen_set_label(done);
+ } else {
+ gen_set_label(over);
+ }
+ return true;
+}
+
+static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
+{
+ return do_clast_vector(s, a, false);
+}
+
+static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
+{
+ return do_clast_vector(s, a, true);
+}
+
+/* Compute CLAST for a scalar. */
+static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
+ bool before, TCGv_i64 reg_val)
+{
+ TCGv_i32 last = tcg_temp_new_i32();
+ TCGv_i64 ele, cmp, zero;
+
+ find_last_active(s, last, esz, pg);
+
+ /* Extend the original value of last prior to incrementing. */
+ cmp = tcg_temp_new_i64();
+ tcg_gen_ext_i32_i64(cmp, last);
+
+ if (!before) {
+ incr_last_active(s, last, esz);
+ }
+
+ /* The conceit here is that while last < 0 indicates not found, after
+ * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
+ * from which we can load garbage. We then discard the garbage with
+ * a conditional move.
+ */
+ ele = load_last_active(s, last, rm, esz);
+ tcg_temp_free_i32(last);
+
+ zero = tcg_const_i64(0);
+ tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
+
+ tcg_temp_free_i64(zero);
+ tcg_temp_free_i64(cmp);
+ tcg_temp_free_i64(ele);
+}
+
+/* Compute CLAST for a Vreg. */
+static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
+{
+ if (sve_access_check(s)) {
+ int esz = a->esz;
+ int ofs = vec_reg_offset(s, a->rd, 0, esz);
+ TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
+
+ do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
+ write_fp_dreg(s, a->rd, reg);
+ tcg_temp_free_i64(reg);
+ }
+ return true;
+}
+
+static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+ return do_clast_fp(s, a, false);
+}
+
+static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+ return do_clast_fp(s, a, true);
+}
+
+/* Compute CLAST for a Xreg. */
+static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
+{
+ TCGv_i64 reg;
+
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ reg = cpu_reg(s, a->rd);
+ switch (a->esz) {
+ case 0:
+ tcg_gen_ext8u_i64(reg, reg);
+ break;
+ case 1:
+ tcg_gen_ext16u_i64(reg, reg);
+ break;
+ case 2:
+ tcg_gen_ext32u_i64(reg, reg);
+ break;
+ case 3:
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
+ return true;
+}
+
+static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+ return do_clast_general(s, a, false);
+}
+
+static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+ return do_clast_general(s, a, true);
+}
+
+/* Compute LAST for a scalar. */
+static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
+ int pg, int rm, bool before)
+{
+ TCGv_i32 last = tcg_temp_new_i32();
+ TCGv_i64 ret;
+
+ find_last_active(s, last, esz, pg);
+ if (before) {
+ wrap_last_active(s, last, esz);
+ } else {
+ incr_last_active(s, last, esz);
+ }
+
+ ret = load_last_active(s, last, rm, esz);
+ tcg_temp_free_i32(last);
+ return ret;
+}
+
+/* Compute LAST for a Vreg. */
+static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
+{
+ if (sve_access_check(s)) {
+ TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
+ write_fp_dreg(s, a->rd, val);
+ tcg_temp_free_i64(val);
+ }
+ return true;
+}
+
+static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+ return do_last_fp(s, a, false);
+}
+
+static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+ return do_last_fp(s, a, true);
+}
+
+/* Compute LAST for a Xreg. */
+static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
+{
+ if (sve_access_check(s)) {
+ TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
+ tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
+ tcg_temp_free_i64(val);
+ }
+ return true;
+}
+
+static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+ return do_last_general(s, a, false);
+}
+
+static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+ return do_last_general(s, a, true);
+}
+
+static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+ if (sve_access_check(s)) {
+ do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
+ }
+ return true;
+}
+
+static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+ if (sve_access_check(s)) {
+ int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
+ TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
+ do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
+ tcg_temp_free_i64(t);
+ }
+ return true;
+}
+
+static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+ static gen_helper_gvec_3 * const fns[4] = {
+ NULL,
+ gen_helper_sve_revb_h,
+ gen_helper_sve_revb_s,
+ gen_helper_sve_revb_d,
+ };
+ return do_zpz_ool(s, a, fns[a->esz]);
+}
+
+static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+ static gen_helper_gvec_3 * const fns[4] = {
+ NULL,
+ NULL,
+ gen_helper_sve_revh_s,
+ gen_helper_sve_revh_d,
+ };
+ return do_zpz_ool(s, a, fns[a->esz]);
+}
+
+static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+ return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
+}
+
+static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+ static gen_helper_gvec_3 * const fns[4] = {
+ gen_helper_sve_rbit_b,
+ gen_helper_sve_rbit_h,
+ gen_helper_sve_rbit_s,
+ gen_helper_sve_rbit_d,
+ };
+ return do_zpz_ool(s, a, fns[a->esz]);
+}
+
+static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
+{
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm),
+ pred_full_reg_offset(s, a->pg),
+ vsz, vsz, a->esz, gen_helper_sve_splice);
+ }
+ return true;
+}
+
+/*
+ *** SVE Integer Compare - Vectors Group
+ */
+
+static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
+ gen_helper_gvec_flags_4 *gen_fn)
+{
+ TCGv_ptr pd, zn, zm, pg;
+ unsigned vsz;
+ TCGv_i32 t;
+
+ if (gen_fn == NULL) {
+ return false;
+ }
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ vsz = vec_full_reg_size(s);
+ t = tcg_const_i32(simd_desc(vsz, vsz, 0));
+ pd = tcg_temp_new_ptr();
+ zn = tcg_temp_new_ptr();
+ zm = tcg_temp_new_ptr();
+ pg = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
+ tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
+ tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
+ tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
+
+ gen_fn(t, pd, zn, zm, pg, t);
+
+ tcg_temp_free_ptr(pd);
+ tcg_temp_free_ptr(zn);
+ tcg_temp_free_ptr(zm);
+ tcg_temp_free_ptr(pg);
+
+ do_pred_flags(t);
+
+ tcg_temp_free_i32(t);
+ return true;
+}
+
+#define DO_PPZZ(NAME, name) \
+static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a, \
+ uint32_t insn) \
+{ \
+ static gen_helper_gvec_flags_4 * const fns[4] = { \
+ gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
+ gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
+ }; \
+ return do_ppzz_flags(s, a, fns[a->esz]); \
+}
+
+DO_PPZZ(CMPEQ, cmpeq)
+DO_PPZZ(CMPNE, cmpne)
+DO_PPZZ(CMPGT, cmpgt)
+DO_PPZZ(CMPGE, cmpge)
+DO_PPZZ(CMPHI, cmphi)
+DO_PPZZ(CMPHS, cmphs)
+
+#undef DO_PPZZ
+
+#define DO_PPZW(NAME, name) \
+static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a, \
+ uint32_t insn) \
+{ \
+ static gen_helper_gvec_flags_4 * const fns[4] = { \
+ gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
+ gen_helper_sve_##name##_ppzw_s, NULL \
+ }; \
+ return do_ppzz_flags(s, a, fns[a->esz]); \
+}
+
+DO_PPZW(CMPEQ, cmpeq)
+DO_PPZW(CMPNE, cmpne)
+DO_PPZW(CMPGT, cmpgt)
+DO_PPZW(CMPGE, cmpge)
+DO_PPZW(CMPHI, cmphi)
+DO_PPZW(CMPHS, cmphs)
+DO_PPZW(CMPLT, cmplt)
+DO_PPZW(CMPLE, cmple)
+DO_PPZW(CMPLO, cmplo)
+DO_PPZW(CMPLS, cmpls)
+
+#undef DO_PPZW
+
+/*
+ *** SVE Integer Compare - Immediate Groups
+ */
+
+static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
+ gen_helper_gvec_flags_3 *gen_fn)
+{
+ TCGv_ptr pd, zn, pg;
+ unsigned vsz;
+ TCGv_i32 t;
+
+ if (gen_fn == NULL) {
+ return false;
+ }
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ vsz = vec_full_reg_size(s);
+ t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
+ pd = tcg_temp_new_ptr();
+ zn = tcg_temp_new_ptr();
+ pg = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
+ tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
+ tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
+
+ gen_fn(t, pd, zn, pg, t);
+
+ tcg_temp_free_ptr(pd);
+ tcg_temp_free_ptr(zn);
+ tcg_temp_free_ptr(pg);
+
+ do_pred_flags(t);
+
+ tcg_temp_free_i32(t);
+ return true;
+}
+
+#define DO_PPZI(NAME, name) \
+static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a, \
+ uint32_t insn) \
+{ \
+ static gen_helper_gvec_flags_3 * const fns[4] = { \
+ gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
+ gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
+ }; \
+ return do_ppzi_flags(s, a, fns[a->esz]); \
+}
+
+DO_PPZI(CMPEQ, cmpeq)
+DO_PPZI(CMPNE, cmpne)
+DO_PPZI(CMPGT, cmpgt)
+DO_PPZI(CMPGE, cmpge)
+DO_PPZI(CMPHI, cmphi)
+DO_PPZI(CMPHS, cmphs)
+DO_PPZI(CMPLT, cmplt)
+DO_PPZI(CMPLE, cmple)
+DO_PPZI(CMPLO, cmplo)
+DO_PPZI(CMPLS, cmpls)
+
+#undef DO_PPZI
+
+/*
+ *** SVE Partition Break Group
+ */
+
+static bool do_brk3(DisasContext *s, arg_rprr_s *a,
+ gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
+{
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ unsigned vsz = pred_full_reg_size(s);
+
+ /* Predicate sizes may be smaller and cannot use simd_desc. */
+ TCGv_ptr d = tcg_temp_new_ptr();
+ TCGv_ptr n = tcg_temp_new_ptr();
+ TCGv_ptr m = tcg_temp_new_ptr();
+ TCGv_ptr g = tcg_temp_new_ptr();
+ TCGv_i32 t = tcg_const_i32(vsz - 2);
+
+ tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
+ tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
+ tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
+ tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
+
+ if (a->s) {
+ fn_s(t, d, n, m, g, t);
+ do_pred_flags(t);
+ } else {
+ fn(d, n, m, g, t);
+ }
+ tcg_temp_free_ptr(d);
+ tcg_temp_free_ptr(n);
+ tcg_temp_free_ptr(m);
+ tcg_temp_free_ptr(g);
+ tcg_temp_free_i32(t);
+ return true;
+}
+
+static bool do_brk2(DisasContext *s, arg_rpr_s *a,
+ gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
+{
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ unsigned vsz = pred_full_reg_size(s);
+
+ /* Predicate sizes may be smaller and cannot use simd_desc. */
+ TCGv_ptr d = tcg_temp_new_ptr();
+ TCGv_ptr n = tcg_temp_new_ptr();
+ TCGv_ptr g = tcg_temp_new_ptr();
+ TCGv_i32 t = tcg_const_i32(vsz - 2);
+
+ tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
+ tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
+ tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
+
+ if (a->s) {
+ fn_s(t, d, n, g, t);
+ do_pred_flags(t);
+ } else {
+ fn(d, n, g, t);
+ }
+ tcg_temp_free_ptr(d);
+ tcg_temp_free_ptr(n);
+ tcg_temp_free_ptr(g);
+ tcg_temp_free_i32(t);
+ return true;
+}
+
+static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
+{
+ return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
+}
+
+static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
+{
+ return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
+}
+
+static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
+{
+ return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
+}
+
+static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
+{
+ return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
+}
+
+static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
+{
+ return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
+}
+
+static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
+{
+ return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
+}
+
+static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
+{
+ return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
+}
+
+/*
+ *** SVE Predicate Count Group
+ */
+
+static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
+{
+ unsigned psz = pred_full_reg_size(s);
+
+ if (psz <= 8) {
+ uint64_t psz_mask;
+
+ tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
+ if (pn != pg) {
+ TCGv_i64 g = tcg_temp_new_i64();
+ tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
+ tcg_gen_and_i64(val, val, g);
+ tcg_temp_free_i64(g);
+ }
+
+ /* Reduce the pred_esz_masks value simply to reduce the
+ * size of the code generated here.
+ */
+ psz_mask = MAKE_64BIT_MASK(0, psz * 8);
+ tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
+
+ tcg_gen_ctpop_i64(val, val);
+ } else {
+ TCGv_ptr t_pn = tcg_temp_new_ptr();
+ TCGv_ptr t_pg = tcg_temp_new_ptr();
+ unsigned desc;
+ TCGv_i32 t_desc;
+
+ desc = psz - 2;
+ desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
+
+ tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
+ tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
+ t_desc = tcg_const_i32(desc);
+
+ gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
+ tcg_temp_free_ptr(t_pn);
+ tcg_temp_free_ptr(t_pg);
+ tcg_temp_free_i32(t_desc);
+ }
+}
+
+static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
+{
+ if (sve_access_check(s)) {
+ do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
+ }
+ return true;
+}
+
+static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
+ uint32_t insn)
+{
+ if (sve_access_check(s)) {
+ TCGv_i64 reg = cpu_reg(s, a->rd);
+ TCGv_i64 val = tcg_temp_new_i64();
+
+ do_cntp(s, val, a->esz, a->pg, a->pg);
+ if (a->d) {
+ tcg_gen_sub_i64(reg, reg, val);
+ } else {
+ tcg_gen_add_i64(reg, reg, val);
+ }
+ tcg_temp_free_i64(val);
+ }
+ return true;
+}
+
+static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
+ uint32_t insn)
+{
+ if (a->esz == 0) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ TCGv_i64 val = tcg_temp_new_i64();
+ GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
+
+ do_cntp(s, val, a->esz, a->pg, a->pg);
+ gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn), val, vsz, vsz);
+ }
+ return true;
+}
+
+static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
+ uint32_t insn)
+{
+ if (sve_access_check(s)) {
+ TCGv_i64 reg = cpu_reg(s, a->rd);
+ TCGv_i64 val = tcg_temp_new_i64();
+
+ do_cntp(s, val, a->esz, a->pg, a->pg);
+ do_sat_addsub_32(reg, val, a->u, a->d);
+ }
+ return true;
+}
+
+static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
+ uint32_t insn)
+{
+ if (sve_access_check(s)) {
+ TCGv_i64 reg = cpu_reg(s, a->rd);
+ TCGv_i64 val = tcg_temp_new_i64();
+
+ do_cntp(s, val, a->esz, a->pg, a->pg);
+ do_sat_addsub_64(reg, val, a->u, a->d);
+ }
+ return true;
+}
+
+static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
+ uint32_t insn)
+{
+ if (a->esz == 0) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ TCGv_i64 val = tcg_temp_new_i64();
+ do_cntp(s, val, a->esz, a->pg, a->pg);
+ do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
+ }
+ return true;
+}
+
+/*
+ *** SVE Integer Compare Scalars Group
+ */
+
+static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
+{
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
+ TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
+ TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
+ TCGv_i64 cmp = tcg_temp_new_i64();
+
+ tcg_gen_setcond_i64(cond, cmp, rn, rm);
+ tcg_gen_extrl_i64_i32(cpu_NF, cmp);
+ tcg_temp_free_i64(cmp);
+
+ /* VF = !NF & !CF. */
+ tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
+ tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
+
+ /* Both NF and VF actually look at bit 31. */
+ tcg_gen_neg_i32(cpu_NF, cpu_NF);
+ tcg_gen_neg_i32(cpu_VF, cpu_VF);
+ return true;
+}
+
+static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
+{
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1);
+ TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1);
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i32 t2, t3;
+ TCGv_ptr ptr;
+ unsigned desc, vsz = vec_full_reg_size(s);
+ TCGCond cond;
+
+ if (!a->sf) {
+ if (a->u) {
+ tcg_gen_ext32u_i64(op0, op0);
+ tcg_gen_ext32u_i64(op1, op1);
+ } else {
+ tcg_gen_ext32s_i64(op0, op0);
+ tcg_gen_ext32s_i64(op1, op1);
+ }
+ }
+
+ /* For the helper, compress the different conditions into a computation
+ * of how many iterations for which the condition is true.
+ *
+ * This is slightly complicated by 0 <= UINT64_MAX, which is nominally
+ * 2**64 iterations, overflowing to 0. Of course, predicate registers
+ * aren't that large, so any value >= predicate size is sufficient.
+ */
+ tcg_gen_sub_i64(t0, op1, op0);
+
+ /* t0 = MIN(op1 - op0, vsz). */
+ tcg_gen_movi_i64(t1, vsz);
+ tcg_gen_umin_i64(t0, t0, t1);
+ if (a->eq) {
+ /* Equality means one more iteration. */
+ tcg_gen_addi_i64(t0, t0, 1);
+ }
+
+ /* t0 = (condition true ? t0 : 0). */
+ cond = (a->u
+ ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
+ : (a->eq ? TCG_COND_LE : TCG_COND_LT));
+ tcg_gen_movi_i64(t1, 0);
+ tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
+
+ t2 = tcg_temp_new_i32();
+ tcg_gen_extrl_i64_i32(t2, t0);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+
+ desc = (vsz / 8) - 2;
+ desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
+ t3 = tcg_const_i32(desc);
+
+ ptr = tcg_temp_new_ptr();
+ tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
+
+ gen_helper_sve_while(t2, ptr, t2, t3);
+ do_pred_flags(t2);
+
+ tcg_temp_free_ptr(ptr);
+ tcg_temp_free_i32(t2);
+ tcg_temp_free_i32(t3);
+ return true;
+}
+
+/*
+ *** SVE Integer Wide Immediate - Unpredicated Group
+ */
+
+static bool trans_FDUP(DisasContext *s, arg_FDUP *a, uint32_t insn)
+{
+ if (a->esz == 0) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ int dofs = vec_full_reg_offset(s, a->rd);
+ uint64_t imm;
+
+ /* Decode the VFP immediate. */
+ imm = vfp_expand_imm(a->esz, a->imm);
+ imm = dup_const(a->esz, imm);
+
+ tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
+ }
+ return true;
+}
+
+static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a, uint32_t insn)
+{
+ if (a->esz == 0 && extract32(insn, 13, 1)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ int dofs = vec_full_reg_offset(s, a->rd);
+
+ tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
+ }
+ return true;
+}
+
+static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
+{
+ if (a->esz == 0 && extract32(insn, 13, 1)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
+ }
+ return true;
+}
+
+static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
+{
+ a->imm = -a->imm;
+ return trans_ADD_zzi(s, a, insn);
+}
+
+static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
+{
+ static const GVecGen2s op[4] = {
+ { .fni8 = tcg_gen_vec_sub8_i64,
+ .fniv = tcg_gen_sub_vec,
+ .fno = gen_helper_sve_subri_b,
+ .opc = INDEX_op_sub_vec,
+ .vece = MO_8,
+ .scalar_first = true },
+ { .fni8 = tcg_gen_vec_sub16_i64,
+ .fniv = tcg_gen_sub_vec,
+ .fno = gen_helper_sve_subri_h,
+ .opc = INDEX_op_sub_vec,
+ .vece = MO_16,
+ .scalar_first = true },
+ { .fni4 = tcg_gen_sub_i32,
+ .fniv = tcg_gen_sub_vec,
+ .fno = gen_helper_sve_subri_s,
+ .opc = INDEX_op_sub_vec,
+ .vece = MO_32,
+ .scalar_first = true },
+ { .fni8 = tcg_gen_sub_i64,
+ .fniv = tcg_gen_sub_vec,
+ .fno = gen_helper_sve_subri_d,
+ .opc = INDEX_op_sub_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64,
+ .scalar_first = true }
+ };
+
+ if (a->esz == 0 && extract32(insn, 13, 1)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ TCGv_i64 c = tcg_const_i64(a->imm);
+ tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vsz, vsz, c, &op[a->esz]);
+ tcg_temp_free_i64(c);
+ }
+ return true;
+}
+
+static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
+{
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
+ }
+ return true;
+}
+
+static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, uint32_t insn,
+ bool u, bool d)
+{
+ if (a->esz == 0 && extract32(insn, 13, 1)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ TCGv_i64 val = tcg_const_i64(a->imm);
+ do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
+ tcg_temp_free_i64(val);
+ }
+ return true;
+}
+
+static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
+{
+ return do_zzi_sat(s, a, insn, false, false);
+}
+
+static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
+{
+ return do_zzi_sat(s, a, insn, true, false);
+}
+
+static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
+{
+ return do_zzi_sat(s, a, insn, false, true);
+}
+
+static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
+{
+ return do_zzi_sat(s, a, insn, true, true);
+}
+
+static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
+{
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ TCGv_i64 c = tcg_const_i64(a->imm);
+
+ tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ c, vsz, vsz, 0, fn);
+ tcg_temp_free_i64(c);
+ }
+ return true;
+}
+
+#define DO_ZZI(NAME, name) \
+static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a, \
+ uint32_t insn) \
+{ \
+ static gen_helper_gvec_2i * const fns[4] = { \
+ gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
+ gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
+ }; \
+ return do_zzi_ool(s, a, fns[a->esz]); \
+}
+
+DO_ZZI(SMAX, smax)
+DO_ZZI(UMAX, umax)
+DO_ZZI(SMIN, smin)
+DO_ZZI(UMIN, umin)
+
+#undef DO_ZZI
+
+/*
+ *** SVE Floating Point Arithmetic - Unpredicated Group
+ */
+
+static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
+ gen_helper_gvec_3_ptr *fn)
+{
+ if (fn == NULL) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
+ tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm),
+ status, vsz, vsz, 0, fn);
+ tcg_temp_free_ptr(status);
+ }
+ return true;
+}
+
+
+#define DO_FP3(NAME, name) \
+static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a, uint32_t insn) \
+{ \
+ static gen_helper_gvec_3_ptr * const fns[4] = { \
+ NULL, gen_helper_gvec_##name##_h, \
+ gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
+ }; \
+ return do_zzz_fp(s, a, fns[a->esz]); \
+}
+
+DO_FP3(FADD_zzz, fadd)
+DO_FP3(FSUB_zzz, fsub)
+DO_FP3(FMUL_zzz, fmul)
+DO_FP3(FTSMUL, ftsmul)
+DO_FP3(FRECPS, recps)
+DO_FP3(FRSQRTS, rsqrts)
+
+#undef DO_FP3
+
+/*
*** SVE Memory - 32-bit Gather and Unsized Contiguous Group
*/
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 0ff5edf2ce..f405c82fb2 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -9965,7 +9965,8 @@ static bool thumb_insn_is_16bit(DisasContext *s, uint32_t insn)
* end up actually treating this as two 16-bit insns, though,
* if it's half of a bl/blx pair that might span a page boundary.
*/
- if (arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
+ if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
+ arm_dc_feature(s, ARM_FEATURE_M)) {
/* Thumb2 cores (including all M profile ones) always treat
* 32-bit insns as 32-bit.
*/
@@ -10085,10 +10086,38 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
int conds;
int logic_cc;
- /* The only 32 bit insn that's allowed for Thumb1 is the combined
- * BL/BLX prefix and suffix.
+ /*
+ * ARMv6-M supports a limited subset of Thumb2 instructions.
+ * Other Thumb1 architectures allow only 32-bit
+ * combined BL/BLX prefix and suffix.
*/
- if ((insn & 0xf800e800) != 0xf000e800) {
+ if (arm_dc_feature(s, ARM_FEATURE_M) &&
+ !arm_dc_feature(s, ARM_FEATURE_V7)) {
+ int i;
+ bool found = false;
+ const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
+ 0xf3b08040 /* dsb */,
+ 0xf3b08050 /* dmb */,
+ 0xf3b08060 /* isb */,
+ 0xf3e08000 /* mrs */,
+ 0xf000d000 /* bl */};
+ const uint32_t armv6m_mask[] = {0xffe0d000,
+ 0xfff0d0f0,
+ 0xfff0d0f0,
+ 0xfff0d0f0,
+ 0xffe0d000,
+ 0xf800d000};
+
+ for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
+ if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ goto illegal_op;
+ }
+ } else if ((insn & 0xf800e800) != 0xf000e800) {
ARCH(6T2);
}
@@ -11009,7 +11038,11 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
}
break;
case 3: /* Special control operations. */
- ARCH(7);
+ if (!arm_dc_feature(s, ARM_FEATURE_V7) &&
+ !(arm_dc_feature(s, ARM_FEATURE_V6) &&
+ arm_dc_feature(s, ARM_FEATURE_M))) {
+ goto illegal_op;
+ }
op = (insn >> 4) & 0xf;
switch (op) {
case 2: /* clrex */
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index 25e209da31..f504dd53c8 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -426,3 +426,72 @@ void HELPER(gvec_fcmlad)(void *vd, void *vn, void *vm,
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
+
+/* Floating-point trigonometric starting value.
+ * See the ARM ARM pseudocode function FPTrigSMul.
+ */
+static float16 float16_ftsmul(float16 op1, uint16_t op2, float_status *stat)
+{
+ float16 result = float16_mul(op1, op1, stat);
+ if (!float16_is_any_nan(result)) {
+ result = float16_set_sign(result, op2 & 1);
+ }
+ return result;
+}
+
+static float32 float32_ftsmul(float32 op1, uint32_t op2, float_status *stat)
+{
+ float32 result = float32_mul(op1, op1, stat);
+ if (!float32_is_any_nan(result)) {
+ result = float32_set_sign(result, op2 & 1);
+ }
+ return result;
+}
+
+static float64 float64_ftsmul(float64 op1, uint64_t op2, float_status *stat)
+{
+ float64 result = float64_mul(op1, op1, stat);
+ if (!float64_is_any_nan(result)) {
+ result = float64_set_sign(result, op2 & 1);
+ }
+ return result;
+}
+
+#define DO_3OP(NAME, FUNC, TYPE) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
+{ \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
+ d[i] = FUNC(n[i], m[i], stat); \
+ } \
+}
+
+DO_3OP(gvec_fadd_h, float16_add, float16)
+DO_3OP(gvec_fadd_s, float32_add, float32)
+DO_3OP(gvec_fadd_d, float64_add, float64)
+
+DO_3OP(gvec_fsub_h, float16_sub, float16)
+DO_3OP(gvec_fsub_s, float32_sub, float32)
+DO_3OP(gvec_fsub_d, float64_sub, float64)
+
+DO_3OP(gvec_fmul_h, float16_mul, float16)
+DO_3OP(gvec_fmul_s, float32_mul, float32)
+DO_3OP(gvec_fmul_d, float64_mul, float64)
+
+DO_3OP(gvec_ftsmul_h, float16_ftsmul, float16)
+DO_3OP(gvec_ftsmul_s, float32_ftsmul, float32)
+DO_3OP(gvec_ftsmul_d, float64_ftsmul, float64)
+
+#ifdef TARGET_AARCH64
+
+DO_3OP(gvec_recps_h, helper_recpsf_f16, float16)
+DO_3OP(gvec_recps_s, helper_recpsf_f32, float32)
+DO_3OP(gvec_recps_d, helper_recpsf_f64, float64)
+
+DO_3OP(gvec_rsqrts_h, helper_rsqrtsf_f16, float16)
+DO_3OP(gvec_rsqrts_s, helper_rsqrtsf_f32, float32)
+DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64)
+
+#endif
+#undef DO_3OP