aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2018-10-08 15:01:30 +0100
committerPeter Maydell <peter.maydell@linaro.org>2018-10-08 15:01:30 +0100
commit7c69b7c849641a39ba3defa40d384a2ba24cd7a2 (patch)
tree2a3929c352b68589d0ee5d3b36573d93d9ae707e
parent079911cb6e26898e16f5bb56ef4f9d33cf92d32d (diff)
parent74e2e59b8d0a68be0956310fc349179c89fd7be0 (diff)
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20181008' into staging
target-arm queue: * target/arm: fix error in a code comment * virt: Suppress external aborts on virt-2.10 and earlier * target/arm: Correct condition for v8M callee stack push * target/arm: Don't read r4 from v8M exception stackframe twice * target/arm: Support SVE in system emulation mode * target/arm: Implement v8M hardware stack limit checking * hw/display/bcm2835_fb: Silence Coverity warning about multiply overflow # gpg: Signature made Mon 08 Oct 2018 14:58:53 BST # gpg: using RSA key 3C2525ED14360CDE # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" # gpg: aka "Peter Maydell <pmaydell@gmail.com>" # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" # Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE * remotes/pmaydell/tags/pull-target-arm-20181008: (33 commits) hw/display/bcm2835_fb: Silence Coverity warning about multiply overflow target/arm: Add v8M stack checks for MSR to SP_NS target/arm: Add v8M stack checks for VLDM/VSTM target/arm: Add v8M stack checks for Thumb push/pop target/arm: Add v8M stack checks for T32 load/store single target/arm: Add v8M stack checks for Thumb2 LDM/STM target/arm: Add v8M stack checks for LDRD/STRD (imm) target/arm: Add v8M stack limit checks on NS function calls target/arm: Add v8M stack checks on exception entry target/arm: Add some comments in Thumb decode target/arm: Add v8M stack checks on ADD/SUB/MOV of SP target/arm: Move v7m_using_psp() to internals.h target/arm: Define new EXCP type for v8M stack overflows target/arm: Define new TBFLAG for v8M stack checking target/arm: Pass TCGMemOpIdx to sve memory helpers target/arm: Rewrite vector gather first-fault loads target/arm: Rewrite vector gather stores target/arm: Rewrite vector gather loads target/arm: Split contiguous stores for endianness target/arm: Split contiguous loads for endianness ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--hw/arm/virt.c2
-rw-r--r--hw/display/bcm2835_fb.c2
-rw-r--r--target/arm/cpu.h17
-rw-r--r--target/arm/cpu64.c42
-rw-r--r--target/arm/helper-sve.h389
-rw-r--r--target/arm/helper.c337
-rw-r--r--target/arm/helper.h2
-rw-r--r--target/arm/internals.h44
-rw-r--r--target/arm/kvm.c2
-rw-r--r--target/arm/kvm_arm.h4
-rw-r--r--target/arm/op_helper.c24
-rw-r--r--target/arm/sve_helper.c1969
-rw-r--r--target/arm/translate-a64.c8
-rw-r--r--target/arm/translate-sve.c690
-rw-r--r--target/arm/translate.c198
-rw-r--r--target/arm/translate.h1
16 files changed, 2623 insertions, 1108 deletions
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 0b57f87abc..a472566074 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1926,6 +1926,8 @@ static void virt_machine_2_10_options(MachineClass *mc)
{
virt_machine_2_11_options(mc);
SET_MACHINE_COMPAT(mc, VIRT_COMPAT_2_10);
+ /* before 2.11 we never faulted accesses to bad addresses */
+ mc->ignore_memory_transaction_failures = true;
}
DEFINE_VIRT_MACHINE(2, 10)
diff --git a/hw/display/bcm2835_fb.c b/hw/display/bcm2835_fb.c
index d534d00a65..599863e4e1 100644
--- a/hw/display/bcm2835_fb.c
+++ b/hw/display/bcm2835_fb.c
@@ -190,7 +190,7 @@ static void fb_update_display(void *opaque)
}
if (s->invalidate) {
- hwaddr base = s->config.base + xoff + yoff * src_width;
+ hwaddr base = s->config.base + xoff + (hwaddr)yoff * src_width;
framebuffer_update_memory_section(&s->fbsection, s->dma_mr,
base,
s->config.yres, src_width);
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 65c0fa0a65..3a2aff1192 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -56,6 +56,7 @@
#define EXCP_SEMIHOST 16 /* semihosting call */
#define EXCP_NOCP 17 /* v7M NOCP UsageFault */
#define EXCP_INVSTATE 18 /* v7M INVSTATE UsageFault */
+#define EXCP_STKOF 19 /* v8M STKOF UsageFault */
/* NB: add new EXCP_ defines to the array in arm_log_exception() too */
#define ARMV7M_EXCP_RESET 1
@@ -910,12 +911,20 @@ int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
int aarch64_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg);
int aarch64_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq);
+void aarch64_sve_change_el(CPUARMState *env, int old_el, int new_el);
+#else
+static inline void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) { }
+static inline void aarch64_sve_change_el(CPUARMState *env, int o, int n) { }
#endif
target_ulong do_arm_semihosting(CPUARMState *env);
void aarch64_sync_32_to_64(CPUARMState *env);
void aarch64_sync_64_to_32(CPUARMState *env);
+int fp_exception_el(CPUARMState *env, int cur_el);
+int sve_exception_el(CPUARMState *env, int cur_el);
+uint32_t sve_zcr_len_for_el(CPUARMState *env, int el);
+
static inline bool is_a64(CPUARMState *env)
{
return env->aarch64;
@@ -1336,8 +1345,10 @@ FIELD(V7M_CCR, UNALIGN_TRP, 3, 1)
FIELD(V7M_CCR, DIV_0_TRP, 4, 1)
FIELD(V7M_CCR, BFHFNMIGN, 8, 1)
FIELD(V7M_CCR, STKALIGN, 9, 1)
+FIELD(V7M_CCR, STKOFHFNMIGN, 10, 1)
FIELD(V7M_CCR, DC, 16, 1)
FIELD(V7M_CCR, IC, 17, 1)
+FIELD(V7M_CCR, BP, 18, 1)
/* V7M SCR bits */
FIELD(V7M_SCR, SLEEPONEXIT, 1, 1)
@@ -1378,6 +1389,7 @@ FIELD(V7M_CFSR, UNDEFINSTR, 16 + 0, 1)
FIELD(V7M_CFSR, INVSTATE, 16 + 1, 1)
FIELD(V7M_CFSR, INVPC, 16 + 2, 1)
FIELD(V7M_CFSR, NOCP, 16 + 3, 1)
+FIELD(V7M_CFSR, STKOF, 16 + 4, 1)
FIELD(V7M_CFSR, UNALIGNED, 16 + 8, 1)
FIELD(V7M_CFSR, DIVBYZERO, 16 + 9, 1)
@@ -2842,6 +2854,9 @@ static inline bool arm_cpu_data_is_big_endian(CPUARMState *env)
/* For M profile only, Handler (ie not Thread) mode */
#define ARM_TBFLAG_HANDLER_SHIFT 21
#define ARM_TBFLAG_HANDLER_MASK (1 << ARM_TBFLAG_HANDLER_SHIFT)
+/* For M profile only, whether we should generate stack-limit checks */
+#define ARM_TBFLAG_STACKCHECK_SHIFT 22
+#define ARM_TBFLAG_STACKCHECK_MASK (1 << ARM_TBFLAG_STACKCHECK_SHIFT)
/* Bit usage when in AArch64 state */
#define ARM_TBFLAG_TBI0_SHIFT 0 /* TBI0 for EL0/1 or TBI for EL2/3 */
@@ -2884,6 +2899,8 @@ static inline bool arm_cpu_data_is_big_endian(CPUARMState *env)
(((F) & ARM_TBFLAG_BE_DATA_MASK) >> ARM_TBFLAG_BE_DATA_SHIFT)
#define ARM_TBFLAG_HANDLER(F) \
(((F) & ARM_TBFLAG_HANDLER_MASK) >> ARM_TBFLAG_HANDLER_SHIFT)
+#define ARM_TBFLAG_STACKCHECK(F) \
+ (((F) & ARM_TBFLAG_STACKCHECK_MASK) >> ARM_TBFLAG_STACKCHECK_SHIFT)
#define ARM_TBFLAG_TBI0(F) \
(((F) & ARM_TBFLAG_TBI0_MASK) >> ARM_TBFLAG_TBI0_SHIFT)
#define ARM_TBFLAG_TBI1(F) \
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 800bff780e..db71504cb5 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -410,45 +410,3 @@ static void aarch64_cpu_register_types(void)
}
type_init(aarch64_cpu_register_types)
-
-/* The manual says that when SVE is enabled and VQ is widened the
- * implementation is allowed to zero the previously inaccessible
- * portion of the registers. The corollary to that is that when
- * SVE is enabled and VQ is narrowed we are also allowed to zero
- * the now inaccessible portion of the registers.
- *
- * The intent of this is that no predicate bit beyond VQ is ever set.
- * Which means that some operations on predicate registers themselves
- * may operate on full uint64_t or even unrolled across the maximum
- * uint64_t[4]. Performing 4 bits of host arithmetic unconditionally
- * may well be cheaper than conditionals to restrict the operation
- * to the relevant portion of a uint16_t[16].
- *
- * TODO: Need to call this for changes to the real system registers
- * and EL state changes.
- */
-void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq)
-{
- int i, j;
- uint64_t pmask;
-
- assert(vq >= 1 && vq <= ARM_MAX_VQ);
- assert(vq <= arm_env_get_cpu(env)->sve_max_vq);
-
- /* Zap the high bits of the zregs. */
- for (i = 0; i < 32; i++) {
- memset(&env->vfp.zregs[i].d[2 * vq], 0, 16 * (ARM_MAX_VQ - vq));
- }
-
- /* Zap the high bits of the pregs and ffr. */
- pmask = 0;
- if (vq & 3) {
- pmask = ~(-1ULL << (16 * (vq & 3)));
- }
- for (j = vq / 4; j < ARM_MAX_VQ / 4; j++) {
- for (i = 0; i < 17; ++i) {
- env->vfp.pregs[i].p[j] &= pmask;
- }
- pmask = 0;
- }
-}
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index 023952a9a4..9e79182ab4 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -1128,20 +1128,35 @@ DEF_HELPER_FLAGS_4(sve_ld2bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld3bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld4bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ld1hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ld2hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ld3hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ld4hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-
-DEF_HELPER_FLAGS_4(sve_ld1ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ld2ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ld3ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ld4ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-
-DEF_HELPER_FLAGS_4(sve_ld1dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ld2dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ld3dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ld4dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
@@ -1150,13 +1165,21 @@ DEF_HELPER_FLAGS_4(sve_ld1bhs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld1bss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld1bds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ld1hsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ld1hdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ld1hss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ld1hds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hsu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1hsu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ld1sdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ld1sds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1sdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ldff1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ldff1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
@@ -1166,17 +1189,28 @@ DEF_HELPER_FLAGS_4(sve_ldff1bhs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ldff1bss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ldff1bds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ldff1hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ldff1hsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ldff1hdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ldff1hss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ldff1hds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hsu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldff1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hsu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ldff1ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ldff1sdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ldff1sds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1sdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ldff1dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldff1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ldnf1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ldnf1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
@@ -1186,218 +1220,357 @@ DEF_HELPER_FLAGS_4(sve_ldnf1bhs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ldnf1bss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ldnf1bds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ldnf1hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ldnf1hsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ldnf1hdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ldnf1hss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ldnf1hds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hsu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldnf1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hsu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ldnf1ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ldnf1sdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ldnf1sds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1sdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ldnf1dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldnf1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st2bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st3bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st4bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_st1hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_st2hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_st3hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_st4hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_st1ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_st2ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_st3ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_st4ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_st1dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_st2dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_st3dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_st4dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st1bh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st1bs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st1bd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_st1hs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_st1hd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1hs_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1hd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1hs_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1hd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_st1sd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1sd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1sd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldbsu_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldhsu_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldhsu_le_zsu, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhsu_be_zsu, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldss_le_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldssu_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldss_be_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldbss_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldhss_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldhss_le_zsu, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhss_be_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldbsu_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldhsu_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldhsu_le_zss, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhsu_be_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldssu_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldss_le_zss, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldss_be_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldbss_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldhss_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldhss_le_zss, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhss_be_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldbdu_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldhdu_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldhdu_le_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldsdu_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldhdu_be_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldddu_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldsdu_le_zsu, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsdu_be_zsu, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_lddd_le_zsu, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_lddd_be_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldbds_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldhds_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldhds_le_zsu, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhds_be_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldsds_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldsds_le_zsu, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsds_be_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldbdu_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldhdu_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldhdu_le_zss, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhdu_be_zss, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsdu_le_zss, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsdu_be_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldsdu_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_lddd_le_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldddu_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_lddd_be_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldbds_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldhds_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldhds_le_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldsds_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldhds_be_zss, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsds_le_zss, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsds_be_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldbdu_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldhdu_zd, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldhdu_le_zd, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhdu_be_zd, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsdu_le_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldsdu_zd, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldsdu_be_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldddu_zd, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_lddd_le_zd, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_lddd_be_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldbds_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldhds_zd, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldhds_le_zd, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhds_be_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldsds_zd, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldsds_le_zd, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsds_be_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldffbsu_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffhsu_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zsu, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhsu_be_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffssu_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffss_le_zsu, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffss_be_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldffbss_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffhss_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffhss_le_zsu, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhss_be_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldffbsu_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffhsu_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zss, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhsu_be_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffssu_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffss_le_zss, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffss_be_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldffbss_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffhss_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffhss_le_zss, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhss_be_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldffbdu_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffhdu_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffsdu_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffddu_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zsu, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zsu, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffdd_le_zsu, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffdd_be_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldffbds_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffhds_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffhds_le_zsu, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhds_be_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffsds_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffsds_le_zsu, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsds_be_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldffbdu_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffhdu_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zss, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zss, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zss, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffsdu_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffdd_le_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffddu_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffdd_be_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldffbds_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffhds_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffhds_le_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffsds_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffhds_be_zss, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsds_le_zss, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsds_be_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldffbdu_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffhdu_zd, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zd, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zd, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffsdu_zd, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffddu_zd, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffdd_le_zd, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffdd_be_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldffbds_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffhds_zd, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffhds_le_zd, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhds_be_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffsds_zd, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffsds_le_zd, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsds_be_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_stbs_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_sths_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_sths_le_zsu, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sths_be_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_stss_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_stss_le_zsu, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stss_be_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_stbs_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_sths_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_sths_le_zss, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sths_be_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_stss_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_stss_le_zss, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stss_be_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_stbd_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_sthd_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_sthd_le_zsu, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sthd_be_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_stsd_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_stsd_le_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_stdd_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_stsd_be_zsu, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stdd_le_zsu, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stdd_be_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_stbd_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_sthd_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_sthd_le_zss, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sthd_be_zss, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stsd_le_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_stsd_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_stsd_be_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_stdd_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_stdd_le_zss, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stdd_be_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_stbd_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_sthd_zd, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_sthd_le_zd, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sthd_be_zd, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stsd_le_zd, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stsd_be_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_stsd_zd, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_stdd_le_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_stdd_zd, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_stdd_be_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 64b1564594..c83f7c1109 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -4400,78 +4400,105 @@ static const ARMCPRegInfo debug_lpae_cp_reginfo[] = {
REGINFO_SENTINEL
};
-/* Return the exception level to which SVE-disabled exceptions should
- * be taken, or 0 if SVE is enabled.
+/* Return the exception level to which exceptions should be taken
+ * via SVEAccessTrap. If an exception should be routed through
+ * AArch64.AdvSIMDFPAccessTrap, return 0; fp_exception_el should
+ * take care of raising that exception.
+ * C.f. the ARM pseudocode function CheckSVEEnabled.
*/
-static int sve_exception_el(CPUARMState *env)
+int sve_exception_el(CPUARMState *env, int el)
{
#ifndef CONFIG_USER_ONLY
- unsigned current_el = arm_current_el(env);
+ if (el <= 1) {
+ bool disabled = false;
- /* The CPACR.ZEN controls traps to EL1:
- * 0, 2 : trap EL0 and EL1 accesses
- * 1 : trap only EL0 accesses
- * 3 : trap no accesses
- */
- switch (extract32(env->cp15.cpacr_el1, 16, 2)) {
- default:
- if (current_el <= 1) {
- /* Trap to PL1, which might be EL1 or EL3 */
- if (arm_is_secure(env) && !arm_el_is_aa64(env, 3)) {
- return 3;
- }
- return 1;
+ /* The CPACR.ZEN controls traps to EL1:
+ * 0, 2 : trap EL0 and EL1 accesses
+ * 1 : trap only EL0 accesses
+ * 3 : trap no accesses
+ */
+ if (!extract32(env->cp15.cpacr_el1, 16, 1)) {
+ disabled = true;
+ } else if (!extract32(env->cp15.cpacr_el1, 17, 1)) {
+ disabled = el == 0;
}
- break;
- case 1:
- if (current_el == 0) {
- return 1;
+ if (disabled) {
+ /* route_to_el2 */
+ return (arm_feature(env, ARM_FEATURE_EL2)
+ && !arm_is_secure(env)
+ && (env->cp15.hcr_el2 & HCR_TGE) ? 2 : 1);
}
- break;
- case 3:
- break;
- }
- /* Similarly for CPACR.FPEN, after having checked ZEN. */
- switch (extract32(env->cp15.cpacr_el1, 20, 2)) {
- default:
- if (current_el <= 1) {
- if (arm_is_secure(env) && !arm_el_is_aa64(env, 3)) {
- return 3;
- }
- return 1;
+ /* Check CPACR.FPEN. */
+ if (!extract32(env->cp15.cpacr_el1, 20, 1)) {
+ disabled = true;
+ } else if (!extract32(env->cp15.cpacr_el1, 21, 1)) {
+ disabled = el == 0;
}
- break;
- case 1:
- if (current_el == 0) {
- return 1;
+ if (disabled) {
+ return 0;
}
- break;
- case 3:
- break;
}
- /* CPTR_EL2. Check both TZ and TFP. */
- if (current_el <= 2
- && (env->cp15.cptr_el[2] & (CPTR_TFP | CPTR_TZ))
- && !arm_is_secure_below_el3(env)) {
- return 2;
+ /* CPTR_EL2. Since TZ and TFP are positive,
+ * they will be zero when EL2 is not present.
+ */
+ if (el <= 2 && !arm_is_secure_below_el3(env)) {
+ if (env->cp15.cptr_el[2] & CPTR_TZ) {
+ return 2;
+ }
+ if (env->cp15.cptr_el[2] & CPTR_TFP) {
+ return 0;
+ }
}
- /* CPTR_EL3. Check both EZ and TFP. */
- if (!(env->cp15.cptr_el[3] & CPTR_EZ)
- || (env->cp15.cptr_el[3] & CPTR_TFP)) {
+ /* CPTR_EL3. Since EZ is negative we must check for EL3. */
+ if (arm_feature(env, ARM_FEATURE_EL3)
+ && !(env->cp15.cptr_el[3] & CPTR_EZ)) {
return 3;
}
#endif
return 0;
}
+/*
+ * Given that SVE is enabled, return the vector length for EL.
+ */
+uint32_t sve_zcr_len_for_el(CPUARMState *env, int el)
+{
+ ARMCPU *cpu = arm_env_get_cpu(env);
+ uint32_t zcr_len = cpu->sve_max_vq - 1;
+
+ if (el <= 1) {
+ zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[1]);
+ }
+ if (el < 2 && arm_feature(env, ARM_FEATURE_EL2)) {
+ zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[2]);
+ }
+ if (el < 3 && arm_feature(env, ARM_FEATURE_EL3)) {
+ zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[3]);
+ }
+ return zcr_len;
+}
+
static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
+ int cur_el = arm_current_el(env);
+ int old_len = sve_zcr_len_for_el(env, cur_el);
+ int new_len;
+
/* Bits other than [3:0] are RAZ/WI. */
raw_write(env, ri, value & 0xf);
+
+ /*
+ * Because we arrived here, we know both FP and SVE are enabled;
+ * otherwise we would have trapped access to the ZCR_ELn register.
+ */
+ new_len = sve_zcr_len_for_el(env, cur_el);
+ if (new_len < old_len) {
+ aarch64_sve_narrow_vq(env, new_len + 1);
+ }
}
static const ARMCPRegInfo zcr_el1_reginfo = {
@@ -5018,9 +5045,10 @@ void register_cp_regs_for_features(ARMCPU *cpu)
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 3,
.access = PL1_R, .type = ARM_CP_CONST,
.resetvalue = 0 },
- { .name = "ID_AA64PFR4_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
+ { .name = "ID_AA64ZFR0_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 4,
.access = PL1_R, .type = ARM_CP_CONST,
+ /* At present, only SVEver == 0 is defined anyway. */
.resetvalue = 0 },
{ .name = "ID_AA64PFR5_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 5,
@@ -6554,18 +6582,6 @@ pend_fault:
return false;
}
-/* Return true if we're using the process stack pointer (not the MSP) */
-static bool v7m_using_psp(CPUARMState *env)
-{
- /* Handler mode always uses the main stack; for thread mode
- * the CONTROL.SPSEL bit determines the answer.
- * Note that in v7M it is not possible to be in Handler mode with
- * CONTROL.SPSEL non-zero, but in v8M it is, so we must check both.
- */
- return !arm_v7m_is_handler_mode(env) &&
- env->v7m.control[env->v7m.secure] & R_V7M_CONTROL_SPSEL_MASK;
-}
-
/* Write to v7M CONTROL.SPSEL bit for the specified security bank.
* This may change the current stack pointer between Main and Process
* stack pointers if it is done for the CONTROL register for the current
@@ -6722,6 +6738,10 @@ void HELPER(v7m_blxns)(CPUARMState *env, uint32_t dest)
"BLXNS with misaligned SP is UNPREDICTABLE\n");
}
+ if (sp < v7m_sp_limit(env)) {
+ raise_exception(env, EXCP_STKOF, 0, 1);
+ }
+
saved_psr = env->v7m.exception;
if (env->v7m.control[M_REG_S] & R_V7M_CONTROL_SFPA_MASK) {
saved_psr |= XPSR_SFPA;
@@ -6851,6 +6871,8 @@ static bool v7m_push_callee_stack(ARMCPU *cpu, uint32_t lr, bool dotailchain,
uint32_t frameptr;
ARMMMUIdx mmu_idx;
bool stacked_ok;
+ uint32_t limit;
+ bool want_psp;
if (dotailchain) {
bool mode = lr & R_V7M_EXCRET_MODE_MASK;
@@ -6860,12 +6882,34 @@ static bool v7m_push_callee_stack(ARMCPU *cpu, uint32_t lr, bool dotailchain,
mmu_idx = arm_v7m_mmu_idx_for_secstate_and_priv(env, M_REG_S, priv);
frame_sp_p = get_v7m_sp_ptr(env, M_REG_S, mode,
lr & R_V7M_EXCRET_SPSEL_MASK);
+ want_psp = mode && (lr & R_V7M_EXCRET_SPSEL_MASK);
+ if (want_psp) {
+ limit = env->v7m.psplim[M_REG_S];
+ } else {
+ limit = env->v7m.msplim[M_REG_S];
+ }
} else {
mmu_idx = core_to_arm_mmu_idx(env, cpu_mmu_index(env, false));
frame_sp_p = &env->regs[13];
+ limit = v7m_sp_limit(env);
}
frameptr = *frame_sp_p - 0x28;
+ if (frameptr < limit) {
+ /*
+ * Stack limit failure: set SP to the limit value, and generate
+ * STKOF UsageFault. Stack pushes below the limit must not be
+ * performed. It is IMPDEF whether pushes above the limit are
+ * performed; we choose not to.
+ */
+ qemu_log_mask(CPU_LOG_INT,
+ "...STKOF during callee-saves register stacking\n");
+ env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_STKOF_MASK;
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE,
+ env->v7m.secure);
+ *frame_sp_p = limit;
+ return true;
+ }
/* Write as much of the stack frame as we can. A write failure may
* cause us to pend a derived exception.
@@ -6889,10 +6933,7 @@ static bool v7m_push_callee_stack(ARMCPU *cpu, uint32_t lr, bool dotailchain,
v7m_stack_write(cpu, frameptr + 0x24, env->regs[11], mmu_idx,
ignore_faults);
- /* Update SP regardless of whether any of the stack accesses failed.
- * When we implement v8M stack limit checking then this attempt to
- * update SP might also fail and result in a derived exception.
- */
+ /* Update SP regardless of whether any of the stack accesses failed. */
*frame_sp_p = frameptr;
return !stacked_ok;
@@ -6938,7 +6979,7 @@ static void v7m_exception_taken(ARMCPU *cpu, uint32_t lr, bool dotailchain,
* not already saved.
*/
if (lr & R_V7M_EXCRET_DCRS_MASK &&
- !(dotailchain && (lr & R_V7M_EXCRET_ES_MASK))) {
+ !(dotailchain && !(lr & R_V7M_EXCRET_ES_MASK))) {
push_failed = v7m_push_callee_stack(cpu, lr, dotailchain,
ignore_stackfaults);
}
@@ -7040,6 +7081,26 @@ static bool v7m_push_stack(ARMCPU *cpu)
frameptr -= 0x20;
+ if (arm_feature(env, ARM_FEATURE_V8)) {
+ uint32_t limit = v7m_sp_limit(env);
+
+ if (frameptr < limit) {
+ /*
+ * Stack limit failure: set SP to the limit value, and generate
+ * STKOF UsageFault. Stack pushes below the limit must not be
+ * performed. It is IMPDEF whether pushes above the limit are
+ * performed; we choose not to.
+ */
+ qemu_log_mask(CPU_LOG_INT,
+ "...STKOF during stacking\n");
+ env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_STKOF_MASK;
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE,
+ env->v7m.secure);
+ env->regs[13] = limit;
+ return true;
+ }
+ }
+
/* Write as much of the stack frame as we can. If we fail a stack
* write this will result in a derived exception being pended
* (which may be taken in preference to the one we started with
@@ -7055,10 +7116,7 @@ static bool v7m_push_stack(ARMCPU *cpu)
v7m_stack_write(cpu, frameptr + 24, env->regs[15], mmu_idx, false) &&
v7m_stack_write(cpu, frameptr + 28, xpsr, mmu_idx, false);
- /* Update SP regardless of whether any of the stack accesses failed.
- * When we implement v8M stack limit checking then this attempt to
- * update SP might also fail and result in a derived exception.
- */
+ /* Update SP regardless of whether any of the stack accesses failed. */
env->regs[13] = frameptr;
return !stacked_ok;
@@ -7304,7 +7362,6 @@ static void do_v7m_exception_exit(ARMCPU *cpu)
pop_ok = pop_ok &&
v7m_stack_read(cpu, &env->regs[4], frameptr + 0x8, mmu_idx) &&
- v7m_stack_read(cpu, &env->regs[4], frameptr + 0x8, mmu_idx) &&
v7m_stack_read(cpu, &env->regs[5], frameptr + 0xc, mmu_idx) &&
v7m_stack_read(cpu, &env->regs[6], frameptr + 0x10, mmu_idx) &&
v7m_stack_read(cpu, &env->regs[7], frameptr + 0x14, mmu_idx) &&
@@ -7512,6 +7569,7 @@ static void arm_log_exception(int idx)
[EXCP_SEMIHOST] = "Semihosting call",
[EXCP_NOCP] = "v7M NOCP UsageFault",
[EXCP_INVSTATE] = "v7M INVSTATE UsageFault",
+ [EXCP_STKOF] = "v8M STKOF UsageFault",
};
if (idx >= 0 && idx < ARRAY_SIZE(excnames)) {
@@ -7667,6 +7725,10 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs)
armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure);
env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_INVSTATE_MASK;
break;
+ case EXCP_STKOF:
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure);
+ env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_STKOF_MASK;
+ break;
case EXCP_SWI:
/* The PC already points to the next instruction. */
armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SVC, env->v7m.secure);
@@ -8310,8 +8372,11 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs)
unsigned int new_el = env->exception.target_el;
target_ulong addr = env->cp15.vbar_el[new_el];
unsigned int new_mode = aarch64_pstate_mode(new_el, true);
+ unsigned int cur_el = arm_current_el(env);
- if (arm_current_el(env) < new_el) {
+ aarch64_sve_change_el(env, cur_el, new_el);
+
+ if (cur_el < new_el) {
/* Entry vector offset depends on whether the implemented EL
* immediately lower than the target level is using AArch32 or AArch64
*/
@@ -10929,11 +10994,23 @@ void HELPER(v7m_msr)(CPUARMState *env, uint32_t maskreg, uint32_t val)
* currently in handler mode or not, using the NS CONTROL.SPSEL.
*/
bool spsel = env->v7m.control[M_REG_NS] & R_V7M_CONTROL_SPSEL_MASK;
+ bool is_psp = !arm_v7m_is_handler_mode(env) && spsel;
+ uint32_t limit;
if (!env->v7m.secure) {
return;
}
- if (!arm_v7m_is_handler_mode(env) && spsel) {
+
+ limit = is_psp ? env->v7m.psplim[false] : env->v7m.msplim[false];
+
+ if (val < limit) {
+ CPUState *cs = CPU(arm_env_get_cpu(env));
+
+ cpu_restore_state(cs, GETPC(), true);
+ raise_exception(env, EXCP_STKOF, 0, 1);
+ }
+
+ if (is_psp) {
env->v7m.other_ss_psp = val;
} else {
env->v7m.other_ss_msp = val;
@@ -12516,11 +12593,10 @@ uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes)
/* Return the exception level to which FP-disabled exceptions should
* be taken, or 0 if FP is enabled.
*/
-static inline int fp_exception_el(CPUARMState *env)
+int fp_exception_el(CPUARMState *env, int cur_el)
{
#ifndef CONFIG_USER_ONLY
int fpen;
- int cur_el = arm_current_el(env);
/* CPACR and the CPTR registers don't exist before v6, so FP is
* always accessible
@@ -12583,7 +12659,8 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
target_ulong *cs_base, uint32_t *pflags)
{
ARMMMUIdx mmu_idx = core_to_arm_mmu_idx(env, cpu_mmu_index(env, false));
- int fp_el = fp_exception_el(env);
+ int current_el = arm_current_el(env);
+ int fp_el = fp_exception_el(env, current_el);
uint32_t flags;
if (is_a64(env)) {
@@ -12594,7 +12671,7 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
flags |= (arm_regime_tbi1(env, mmu_idx) << ARM_TBFLAG_TBI1_SHIFT);
if (arm_feature(env, ARM_FEATURE_SVE)) {
- int sve_el = sve_exception_el(env);
+ int sve_el = sve_exception_el(env, current_el);
uint32_t zcr_len;
/* If SVE is disabled, but FP is enabled,
@@ -12603,19 +12680,7 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
if (sve_el != 0 && fp_el == 0) {
zcr_len = 0;
} else {
- int current_el = arm_current_el(env);
- ARMCPU *cpu = arm_env_get_cpu(env);
-
- zcr_len = cpu->sve_max_vq - 1;
- if (current_el <= 1) {
- zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[1]);
- }
- if (current_el < 2 && arm_feature(env, ARM_FEATURE_EL2)) {
- zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[2]);
- }
- if (current_el < 3 && arm_feature(env, ARM_FEATURE_EL3)) {
- zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[3]);
- }
+ zcr_len = sve_zcr_len_for_el(env, current_el);
}
flags |= sve_el << ARM_TBFLAG_SVEEXC_EL_SHIFT;
flags |= zcr_len << ARM_TBFLAG_ZCR_LEN_SHIFT;
@@ -12668,6 +12733,98 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
flags |= ARM_TBFLAG_HANDLER_MASK;
}
+ /* v8M always applies stack limit checks unless CCR.STKOFHFNMIGN is
+ * suppressing them because the requested execution priority is less than 0.
+ */
+ if (arm_feature(env, ARM_FEATURE_V8) &&
+ arm_feature(env, ARM_FEATURE_M) &&
+ !((mmu_idx & ARM_MMU_IDX_M_NEGPRI) &&
+ (env->v7m.ccr[env->v7m.secure] & R_V7M_CCR_STKOFHFNMIGN_MASK))) {
+ flags |= ARM_TBFLAG_STACKCHECK_MASK;
+ }
+
*pflags = flags;
*cs_base = 0;
}
+
+#ifdef TARGET_AARCH64
+/*
+ * The manual says that when SVE is enabled and VQ is widened the
+ * implementation is allowed to zero the previously inaccessible
+ * portion of the registers. The corollary to that is that when
+ * SVE is enabled and VQ is narrowed we are also allowed to zero
+ * the now inaccessible portion of the registers.
+ *
+ * The intent of this is that no predicate bit beyond VQ is ever set.
+ * Which means that some operations on predicate registers themselves
+ * may operate on full uint64_t or even unrolled across the maximum
+ * uint64_t[4]. Performing 4 bits of host arithmetic unconditionally
+ * may well be cheaper than conditionals to restrict the operation
+ * to the relevant portion of a uint16_t[16].
+ */
+void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq)
+{
+ int i, j;
+ uint64_t pmask;
+
+ assert(vq >= 1 && vq <= ARM_MAX_VQ);
+ assert(vq <= arm_env_get_cpu(env)->sve_max_vq);
+
+ /* Zap the high bits of the zregs. */
+ for (i = 0; i < 32; i++) {
+ memset(&env->vfp.zregs[i].d[2 * vq], 0, 16 * (ARM_MAX_VQ - vq));
+ }
+
+ /* Zap the high bits of the pregs and ffr. */
+ pmask = 0;
+ if (vq & 3) {
+ pmask = ~(-1ULL << (16 * (vq & 3)));
+ }
+ for (j = vq / 4; j < ARM_MAX_VQ / 4; j++) {
+ for (i = 0; i < 17; ++i) {
+ env->vfp.pregs[i].p[j] &= pmask;
+ }
+ pmask = 0;
+ }
+}
+
+/*
+ * Notice a change in SVE vector size when changing EL.
+ */
+void aarch64_sve_change_el(CPUARMState *env, int old_el, int new_el)
+{
+ int old_len, new_len;
+
+ /* Nothing to do if no SVE. */
+ if (!arm_feature(env, ARM_FEATURE_SVE)) {
+ return;
+ }
+
+ /* Nothing to do if FP is disabled in either EL. */
+ if (fp_exception_el(env, old_el) || fp_exception_el(env, new_el)) {
+ return;
+ }
+
+ /*
+ * DDI0584A.d sec 3.2: "If SVE instructions are disabled or trapped
+ * at ELx, or not available because the EL is in AArch32 state, then
+ * for all purposes other than a direct read, the ZCR_ELx.LEN field
+ * has an effective value of 0".
+ *
+ * Consider EL2 (aa64, vq=4) -> EL0 (aa32) -> EL1 (aa64, vq=0).
+ * If we ignore aa32 state, we would fail to see the vq4->vq0 transition
+ * from EL2->EL1. Thus we go ahead and narrow when entering aa32 so that
+ * we already have the correct register contents when encountering the
+ * vq0->vq0 transition between EL0->EL1.
+ */
+ old_len = (arm_el_is_aa64(env, old_el) && !sve_exception_el(env, old_el)
+ ? sve_zcr_len_for_el(env, old_el) : 0);
+ new_len = (arm_el_is_aa64(env, new_el) && !sve_exception_el(env, new_el)
+ ? sve_zcr_len_for_el(env, new_el) : 0);
+
+ /* When changing vector length, clear inaccessible state. */
+ if (new_len < old_len) {
+ aarch64_sve_narrow_vq(env, new_len + 1);
+ }
+}
+#endif
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 59e8c3bd1b..8c9590091b 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -69,6 +69,8 @@ DEF_HELPER_2(v7m_blxns, void, env, i32)
DEF_HELPER_3(v7m_tt, i32, env, i32, i32)
+DEF_HELPER_2(v8m_stackcheck, void, env, i32)
+
DEF_HELPER_4(access_check_cp_reg, void, env, ptr, i32, i32)
DEF_HELPER_3(set_cp_reg, void, env, ptr, i32)
DEF_HELPER_2(get_cp_reg, i32, env, ptr)
diff --git a/target/arm/internals.h b/target/arm/internals.h
index dc9357766c..a4fc709bcc 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -94,6 +94,15 @@ FIELD(V7M_EXCRET, RES1, 7, 25) /* including the must-be-1 prefix */
#define M_FAKE_FSR_NSC_EXEC 0xf /* NS executing in S&NSC memory */
#define M_FAKE_FSR_SFAULT 0xe /* SecureFault INVTRAN, INVEP or AUVIOL */
+/**
+ * raise_exception: Raise the specified exception.
+ * Raise a guest exception with the specified value, syndrome register
+ * and target exception level. This should be called from helper functions,
+ * and never returns because we will longjump back up to the CPU main loop.
+ */
+void QEMU_NORETURN raise_exception(CPUARMState *env, uint32_t excp,
+ uint32_t syndrome, uint32_t target_el);
+
/*
* For AArch64, map a given EL to an index in the banked_spsr array.
* Note that this mapping and the AArch32 mapping defined in bank_number()
@@ -796,4 +805,39 @@ static inline uint32_t arm_debug_exception_fsr(CPUARMState *env)
}
}
+/* Note make_memop_idx reserves 4 bits for mmu_idx, and MO_BSWAP is bit 3.
+ * Thus a TCGMemOpIdx, without any MO_ALIGN bits, fits in 8 bits.
+ */
+#define MEMOPIDX_SHIFT 8
+
+/**
+ * v7m_using_psp: Return true if using process stack pointer
+ * Return true if the CPU is currently using the process stack
+ * pointer, or false if it is using the main stack pointer.
+ */
+static inline bool v7m_using_psp(CPUARMState *env)
+{
+ /* Handler mode always uses the main stack; for thread mode
+ * the CONTROL.SPSEL bit determines the answer.
+ * Note that in v7M it is not possible to be in Handler mode with
+ * CONTROL.SPSEL non-zero, but in v8M it is, so we must check both.
+ */
+ return !arm_v7m_is_handler_mode(env) &&
+ env->v7m.control[env->v7m.secure] & R_V7M_CONTROL_SPSEL_MASK;
+}
+
+/**
+ * v7m_sp_limit: Return SP limit for current CPU state
+ * Return the SP limit value for the current CPU security state
+ * and stack pointer.
+ */
+static inline uint32_t v7m_sp_limit(CPUARMState *env)
+{
+ if (v7m_using_psp(env)) {
+ return env->v7m.psplim[env->v7m.secure];
+ } else {
+ return env->v7m.msplim[env->v7m.secure];
+ }
+}
+
#endif
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 65f867d569..54ef5f711b 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -310,7 +310,7 @@ static int compare_u64(const void *a, const void *b)
return 0;
}
-/* Initialize the CPUState's cpreg list according to the kernel's
+/* Initialize the ARMCPU cpreg list according to the kernel's
* definition of what CPU registers it knows about (and throw away
* the previous TCG-created cpreg list).
*/
diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
index 863f205822..5948e8b560 100644
--- a/target/arm/kvm_arm.h
+++ b/target/arm/kvm_arm.h
@@ -50,9 +50,9 @@ void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group,
/**
* kvm_arm_init_cpreg_list:
- * @cs: CPUState
+ * @cpu: ARMCPU
*
- * Initialize the CPUState's cpreg list according to the kernel's
+ * Initialize the ARMCPU cpreg list according to the kernel's
* definition of what CPU registers it knows about (and throw away
* the previous TCG-created cpreg list).
*
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
index 952b8d122b..fb15a13e6c 100644
--- a/target/arm/op_helper.c
+++ b/target/arm/op_helper.c
@@ -28,8 +28,8 @@
#define SIGNBIT (uint32_t)0x80000000
#define SIGNBIT64 ((uint64_t)1 << 63)
-static void raise_exception(CPUARMState *env, uint32_t excp,
- uint32_t syndrome, uint32_t target_el)
+void raise_exception(CPUARMState *env, uint32_t excp,
+ uint32_t syndrome, uint32_t target_el)
{
CPUState *cs = CPU(arm_env_get_cpu(env));
@@ -238,6 +238,25 @@ void arm_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr,
#endif /* !defined(CONFIG_USER_ONLY) */
+void HELPER(v8m_stackcheck)(CPUARMState *env, uint32_t newvalue)
+{
+ /*
+ * Perform the v8M stack limit check for SP updates from translated code,
+ * raising an exception if the limit is breached.
+ */
+ if (newvalue < v7m_sp_limit(env)) {
+ CPUState *cs = CPU(arm_env_get_cpu(env));
+
+ /*
+ * Stack limit exceptions are a rare case, so rather than syncing
+ * PC/condbits before the call, we use cpu_restore_state() to
+ * get them right before raising the exception.
+ */
+ cpu_restore_state(cs, GETPC(), true);
+ raise_exception(env, EXCP_STKOF, 0, 1);
+ }
+}
+
uint32_t HELPER(add_setq)(CPUARMState *env, uint32_t a, uint32_t b)
{
uint32_t res = a + b;
@@ -1082,6 +1101,7 @@ void HELPER(exception_return)(CPUARMState *env)
"AArch64 EL%d PC 0x%" PRIx64 "\n",
cur_el, new_el, env->pc);
}
+ aarch64_sve_change_el(env, cur_el, new_el);
qemu_mutex_lock_iothread();
arm_call_el_change_hook(arm_env_get_cpu(env));
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index 0f98097253..8cbc6516ab 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -19,6 +19,7 @@
#include "qemu/osdep.h"
#include "cpu.h"
+#include "internals.h"
#include "exec/exec-all.h"
#include "exec/cpu_ldst.h"
#include "exec/helper-proto.h"
@@ -1688,6 +1689,47 @@ static void swap_memmove(void *vd, void *vs, size_t n)
}
}
+/* Similarly for memset of 0. */
+static void swap_memzero(void *vd, size_t n)
+{
+ uintptr_t d = (uintptr_t)vd;
+ uintptr_t o = (d | n) & 7;
+ size_t i;
+
+ /* Usually, the first bit of a predicate is set, so N is 0. */
+ if (likely(n == 0)) {
+ return;
+ }
+
+#ifndef HOST_WORDS_BIGENDIAN
+ o = 0;
+#endif
+ switch (o) {
+ case 0:
+ memset(vd, 0, n);
+ break;
+
+ case 4:
+ for (i = 0; i < n; i += 4) {
+ *(uint32_t *)H1_4(d + i) = 0;
+ }
+ break;
+
+ case 2:
+ case 6:
+ for (i = 0; i < n; i += 2) {
+ *(uint16_t *)H1_2(d + i) = 0;
+ }
+ break;
+
+ default:
+ for (i = 0; i < n; i++) {
+ *(uint8_t *)H1(d + i) = 0;
+ }
+ break;
+ }
+}
+
void HELPER(sve_ext)(void *vd, void *vn, void *vm, uint32_t desc)
{
intptr_t opr_sz = simd_oprsz(desc);
@@ -3927,161 +3969,471 @@ void HELPER(sve_fcmla_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc)
/*
* Load contiguous data, protected by a governing predicate.
*/
-#define DO_LD1(NAME, FN, TYPEE, TYPEM, H) \
-static void do_##NAME(CPUARMState *env, void *vd, void *vg, \
- target_ulong addr, intptr_t oprsz, \
- uintptr_t ra) \
-{ \
- intptr_t i = 0; \
- do { \
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
- do { \
- TYPEM m = 0; \
- if (pg & 1) { \
- m = FN(env, addr, ra); \
- } \
- *(TYPEE *)(vd + H(i)) = m; \
- i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \
- addr += sizeof(TYPEM); \
- } while (i & 15); \
- } while (i < oprsz); \
-} \
-void HELPER(NAME)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- do_##NAME(env, &env->vfp.zregs[simd_data(desc)], vg, \
- addr, simd_oprsz(desc), GETPC()); \
+
+/*
+ * Load elements into @vd, controlled by @vg, from @host + @mem_ofs.
+ * Memory is valid through @host + @mem_max. The register element
+ * indicies are inferred from @mem_ofs, as modified by the types for
+ * which the helper is built. Return the @mem_ofs of the first element
+ * not loaded (which is @mem_max if they are all loaded).
+ *
+ * For softmmu, we have fully validated the guest page. For user-only,
+ * we cannot fully validate without taking the mmap lock, but since we
+ * know the access is within one host page, if any access is valid they
+ * all must be valid. However, when @vg is all false, it may be that
+ * no access is valid.
+ */
+typedef intptr_t sve_ld1_host_fn(void *vd, void *vg, void *host,
+ intptr_t mem_ofs, intptr_t mem_max);
+
+/*
+ * Load one element into @vd + @reg_off from (@env, @vaddr, @ra).
+ * The controlling predicate is known to be true.
+ */
+typedef void sve_ld1_tlb_fn(CPUARMState *env, void *vd, intptr_t reg_off,
+ target_ulong vaddr, TCGMemOpIdx oi, uintptr_t ra);
+typedef sve_ld1_tlb_fn sve_st1_tlb_fn;
+
+/*
+ * Generate the above primitives.
+ */
+
+#define DO_LD_HOST(NAME, H, TYPEE, TYPEM, HOST) \
+static intptr_t sve_##NAME##_host(void *vd, void *vg, void *host, \
+ intptr_t mem_off, const intptr_t mem_max) \
+{ \
+ intptr_t reg_off = mem_off * (sizeof(TYPEE) / sizeof(TYPEM)); \
+ uint64_t *pg = vg; \
+ while (mem_off + sizeof(TYPEM) <= mem_max) { \
+ TYPEM val = 0; \
+ if (likely((pg[reg_off >> 6] >> (reg_off & 63)) & 1)) { \
+ val = HOST(host + mem_off); \
+ } \
+ *(TYPEE *)(vd + H(reg_off)) = val; \
+ mem_off += sizeof(TYPEM), reg_off += sizeof(TYPEE); \
+ } \
+ return mem_off; \
+}
+
+#ifdef CONFIG_SOFTMMU
+#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, HOST, MOEND, TLB) \
+static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \
+ target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \
+{ \
+ TYPEM val = TLB(env, addr, oi, ra); \
+ *(TYPEE *)(vd + H(reg_off)) = val; \
}
+#else
+#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, HOST, MOEND, TLB) \
+static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \
+ target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \
+{ \
+ TYPEM val = HOST(g2h(addr)); \
+ *(TYPEE *)(vd + H(reg_off)) = val; \
+}
+#endif
-#define DO_LD2(NAME, FN, TYPEE, TYPEM, H) \
-void HELPER(NAME)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- intptr_t i, oprsz = simd_oprsz(desc); \
- intptr_t ra = GETPC(); \
- unsigned rd = simd_data(desc); \
- void *d1 = &env->vfp.zregs[rd]; \
- void *d2 = &env->vfp.zregs[(rd + 1) & 31]; \
- for (i = 0; i < oprsz; ) { \
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
- do { \
- TYPEM m1 = 0, m2 = 0; \
- if (pg & 1) { \
- m1 = FN(env, addr, ra); \
- m2 = FN(env, addr + sizeof(TYPEM), ra); \
- } \
- *(TYPEE *)(d1 + H(i)) = m1; \
- *(TYPEE *)(d2 + H(i)) = m2; \
- i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \
- addr += 2 * sizeof(TYPEM); \
- } while (i & 15); \
- } \
+#define DO_LD_PRIM_1(NAME, H, TE, TM) \
+ DO_LD_HOST(NAME, H, TE, TM, ldub_p) \
+ DO_LD_TLB(NAME, H, TE, TM, ldub_p, 0, helper_ret_ldub_mmu)
+
+DO_LD_PRIM_1(ld1bb, H1, uint8_t, uint8_t)
+DO_LD_PRIM_1(ld1bhu, H1_2, uint16_t, uint8_t)
+DO_LD_PRIM_1(ld1bhs, H1_2, uint16_t, int8_t)
+DO_LD_PRIM_1(ld1bsu, H1_4, uint32_t, uint8_t)
+DO_LD_PRIM_1(ld1bss, H1_4, uint32_t, int8_t)
+DO_LD_PRIM_1(ld1bdu, , uint64_t, uint8_t)
+DO_LD_PRIM_1(ld1bds, , uint64_t, int8_t)
+
+#define DO_LD_PRIM_2(NAME, end, MOEND, H, TE, TM, PH, PT) \
+ DO_LD_HOST(NAME##_##end, H, TE, TM, PH##_##end##_p) \
+ DO_LD_TLB(NAME##_##end, H, TE, TM, PH##_##end##_p, \
+ MOEND, helper_##end##_##PT##_mmu)
+
+DO_LD_PRIM_2(ld1hh, le, MO_LE, H1_2, uint16_t, uint16_t, lduw, lduw)
+DO_LD_PRIM_2(ld1hsu, le, MO_LE, H1_4, uint32_t, uint16_t, lduw, lduw)
+DO_LD_PRIM_2(ld1hss, le, MO_LE, H1_4, uint32_t, int16_t, lduw, lduw)
+DO_LD_PRIM_2(ld1hdu, le, MO_LE, , uint64_t, uint16_t, lduw, lduw)
+DO_LD_PRIM_2(ld1hds, le, MO_LE, , uint64_t, int16_t, lduw, lduw)
+
+DO_LD_PRIM_2(ld1ss, le, MO_LE, H1_4, uint32_t, uint32_t, ldl, ldul)
+DO_LD_PRIM_2(ld1sdu, le, MO_LE, , uint64_t, uint32_t, ldl, ldul)
+DO_LD_PRIM_2(ld1sds, le, MO_LE, , uint64_t, int32_t, ldl, ldul)
+
+DO_LD_PRIM_2(ld1dd, le, MO_LE, , uint64_t, uint64_t, ldq, ldq)
+
+DO_LD_PRIM_2(ld1hh, be, MO_BE, H1_2, uint16_t, uint16_t, lduw, lduw)
+DO_LD_PRIM_2(ld1hsu, be, MO_BE, H1_4, uint32_t, uint16_t, lduw, lduw)
+DO_LD_PRIM_2(ld1hss, be, MO_BE, H1_4, uint32_t, int16_t, lduw, lduw)
+DO_LD_PRIM_2(ld1hdu, be, MO_BE, , uint64_t, uint16_t, lduw, lduw)
+DO_LD_PRIM_2(ld1hds, be, MO_BE, , uint64_t, int16_t, lduw, lduw)
+
+DO_LD_PRIM_2(ld1ss, be, MO_BE, H1_4, uint32_t, uint32_t, ldl, ldul)
+DO_LD_PRIM_2(ld1sdu, be, MO_BE, , uint64_t, uint32_t, ldl, ldul)
+DO_LD_PRIM_2(ld1sds, be, MO_BE, , uint64_t, int32_t, ldl, ldul)
+
+DO_LD_PRIM_2(ld1dd, be, MO_BE, , uint64_t, uint64_t, ldq, ldq)
+
+#undef DO_LD_TLB
+#undef DO_LD_HOST
+#undef DO_LD_PRIM_1
+#undef DO_LD_PRIM_2
+
+/*
+ * Skip through a sequence of inactive elements in the guarding predicate @vg,
+ * beginning at @reg_off bounded by @reg_max. Return the offset of the active
+ * element >= @reg_off, or @reg_max if there were no active elements at all.
+ */
+static intptr_t find_next_active(uint64_t *vg, intptr_t reg_off,
+ intptr_t reg_max, int esz)
+{
+ uint64_t pg_mask = pred_esz_masks[esz];
+ uint64_t pg = (vg[reg_off >> 6] & pg_mask) >> (reg_off & 63);
+
+ /* In normal usage, the first element is active. */
+ if (likely(pg & 1)) {
+ return reg_off;
+ }
+
+ if (pg == 0) {
+ reg_off &= -64;
+ do {
+ reg_off += 64;
+ if (unlikely(reg_off >= reg_max)) {
+ /* The entire predicate was false. */
+ return reg_max;
+ }
+ pg = vg[reg_off >> 6] & pg_mask;
+ } while (pg == 0);
+ }
+ reg_off += ctz64(pg);
+
+ /* We should never see an out of range predicate bit set. */
+ tcg_debug_assert(reg_off < reg_max);
+ return reg_off;
}
-#define DO_LD3(NAME, FN, TYPEE, TYPEM, H) \
-void HELPER(NAME)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- intptr_t i, oprsz = simd_oprsz(desc); \
- intptr_t ra = GETPC(); \
- unsigned rd = simd_data(desc); \
- void *d1 = &env->vfp.zregs[rd]; \
- void *d2 = &env->vfp.zregs[(rd + 1) & 31]; \
- void *d3 = &env->vfp.zregs[(rd + 2) & 31]; \
- for (i = 0; i < oprsz; ) { \
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
- do { \
- TYPEM m1 = 0, m2 = 0, m3 = 0; \
- if (pg & 1) { \
- m1 = FN(env, addr, ra); \
- m2 = FN(env, addr + sizeof(TYPEM), ra); \
- m3 = FN(env, addr + 2 * sizeof(TYPEM), ra); \
- } \
- *(TYPEE *)(d1 + H(i)) = m1; \
- *(TYPEE *)(d2 + H(i)) = m2; \
- *(TYPEE *)(d3 + H(i)) = m3; \
- i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \
- addr += 3 * sizeof(TYPEM); \
- } while (i & 15); \
- } \
+/*
+ * Return the maximum offset <= @mem_max which is still within the page
+ * referenced by @base + @mem_off.
+ */
+static intptr_t max_for_page(target_ulong base, intptr_t mem_off,
+ intptr_t mem_max)
+{
+ target_ulong addr = base + mem_off;
+ intptr_t split = -(intptr_t)(addr | TARGET_PAGE_MASK);
+ return MIN(split, mem_max - mem_off) + mem_off;
}
-#define DO_LD4(NAME, FN, TYPEE, TYPEM, H) \
-void HELPER(NAME)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- intptr_t i, oprsz = simd_oprsz(desc); \
- intptr_t ra = GETPC(); \
- unsigned rd = simd_data(desc); \
- void *d1 = &env->vfp.zregs[rd]; \
- void *d2 = &env->vfp.zregs[(rd + 1) & 31]; \
- void *d3 = &env->vfp.zregs[(rd + 2) & 31]; \
- void *d4 = &env->vfp.zregs[(rd + 3) & 31]; \
- for (i = 0; i < oprsz; ) { \
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
- do { \
- TYPEM m1 = 0, m2 = 0, m3 = 0, m4 = 0; \
- if (pg & 1) { \
- m1 = FN(env, addr, ra); \
- m2 = FN(env, addr + sizeof(TYPEM), ra); \
- m3 = FN(env, addr + 2 * sizeof(TYPEM), ra); \
- m4 = FN(env, addr + 3 * sizeof(TYPEM), ra); \
- } \
- *(TYPEE *)(d1 + H(i)) = m1; \
- *(TYPEE *)(d2 + H(i)) = m2; \
- *(TYPEE *)(d3 + H(i)) = m3; \
- *(TYPEE *)(d4 + H(i)) = m4; \
- i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \
- addr += 4 * sizeof(TYPEM); \
- } while (i & 15); \
- } \
+static inline void set_helper_retaddr(uintptr_t ra)
+{
+#ifdef CONFIG_USER_ONLY
+ helper_retaddr = ra;
+#endif
}
-DO_LD1(sve_ld1bhu_r, cpu_ldub_data_ra, uint16_t, uint8_t, H1_2)
-DO_LD1(sve_ld1bhs_r, cpu_ldsb_data_ra, uint16_t, int8_t, H1_2)
-DO_LD1(sve_ld1bsu_r, cpu_ldub_data_ra, uint32_t, uint8_t, H1_4)
-DO_LD1(sve_ld1bss_r, cpu_ldsb_data_ra, uint32_t, int8_t, H1_4)
-DO_LD1(sve_ld1bdu_r, cpu_ldub_data_ra, uint64_t, uint8_t, )
-DO_LD1(sve_ld1bds_r, cpu_ldsb_data_ra, uint64_t, int8_t, )
+/*
+ * The result of tlb_vaddr_to_host for user-only is just g2h(x),
+ * which is always non-null. Elide the useless test.
+ */
+static inline bool test_host_page(void *host)
+{
+#ifdef CONFIG_USER_ONLY
+ return true;
+#else
+ return likely(host != NULL);
+#endif
+}
-DO_LD1(sve_ld1hsu_r, cpu_lduw_data_ra, uint32_t, uint16_t, H1_4)
-DO_LD1(sve_ld1hss_r, cpu_ldsw_data_ra, uint32_t, int16_t, H1_4)
-DO_LD1(sve_ld1hdu_r, cpu_lduw_data_ra, uint64_t, uint16_t, )
-DO_LD1(sve_ld1hds_r, cpu_ldsw_data_ra, uint64_t, int16_t, )
+/*
+ * Common helper for all contiguous one-register predicated loads.
+ */
+static void sve_ld1_r(CPUARMState *env, void *vg, const target_ulong addr,
+ uint32_t desc, const uintptr_t retaddr,
+ const int esz, const int msz,
+ sve_ld1_host_fn *host_fn,
+ sve_ld1_tlb_fn *tlb_fn)
+{
+ const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+ const int mmu_idx = get_mmuidx(oi);
+ const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+ void *vd = &env->vfp.zregs[rd];
+ const int diffsz = esz - msz;
+ const intptr_t reg_max = simd_oprsz(desc);
+ const intptr_t mem_max = reg_max >> diffsz;
+ ARMVectorReg scratch;
+ void *host;
+ intptr_t split, reg_off, mem_off;
+
+ /* Find the first active element. */
+ reg_off = find_next_active(vg, 0, reg_max, esz);
+ if (unlikely(reg_off == reg_max)) {
+ /* The entire predicate was false; no load occurs. */
+ memset(vd, 0, reg_max);
+ return;
+ }
+ mem_off = reg_off >> diffsz;
+ set_helper_retaddr(retaddr);
+
+ /*
+ * If the (remaining) load is entirely within a single page, then:
+ * For softmmu, and the tlb hits, then no faults will occur;
+ * For user-only, either the first load will fault or none will.
+ * We can thus perform the load directly to the destination and
+ * Vd will be unmodified on any exception path.
+ */
+ split = max_for_page(addr, mem_off, mem_max);
+ if (likely(split == mem_max)) {
+ host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx);
+ if (test_host_page(host)) {
+ mem_off = host_fn(vd, vg, host - mem_off, mem_off, mem_max);
+ tcg_debug_assert(mem_off == mem_max);
+ set_helper_retaddr(0);
+ /* After having taken any fault, zero leading inactive elements. */
+ swap_memzero(vd, reg_off);
+ return;
+ }
+ }
-DO_LD1(sve_ld1sdu_r, cpu_ldl_data_ra, uint64_t, uint32_t, )
-DO_LD1(sve_ld1sds_r, cpu_ldl_data_ra, uint64_t, int32_t, )
+ /*
+ * Perform the predicated read into a temporary, thus ensuring
+ * if the load of the last element faults, Vd is not modified.
+ */
+#ifdef CONFIG_USER_ONLY
+ swap_memzero(&scratch, reg_off);
+ host_fn(&scratch, vg, g2h(addr), mem_off, mem_max);
+#else
+ memset(&scratch, 0, reg_max);
+ goto start;
+ while (1) {
+ reg_off = find_next_active(vg, reg_off, reg_max, esz);
+ if (reg_off >= reg_max) {
+ break;
+ }
+ mem_off = reg_off >> diffsz;
+ split = max_for_page(addr, mem_off, mem_max);
+
+ start:
+ if (split - mem_off >= (1 << msz)) {
+ /* At least one whole element on this page. */
+ host = tlb_vaddr_to_host(env, addr + mem_off,
+ MMU_DATA_LOAD, mmu_idx);
+ if (host) {
+ mem_off = host_fn(&scratch, vg, host - mem_off,
+ mem_off, split);
+ reg_off = mem_off << diffsz;
+ continue;
+ }
+ }
-DO_LD1(sve_ld1bb_r, cpu_ldub_data_ra, uint8_t, uint8_t, H1)
-DO_LD2(sve_ld2bb_r, cpu_ldub_data_ra, uint8_t, uint8_t, H1)
-DO_LD3(sve_ld3bb_r, cpu_ldub_data_ra, uint8_t, uint8_t, H1)
-DO_LD4(sve_ld4bb_r, cpu_ldub_data_ra, uint8_t, uint8_t, H1)
+ /*
+ * Perform one normal read. This may fault, longjmping out to the
+ * main loop in order to raise an exception. It may succeed, and
+ * as a side-effect load the TLB entry for the next round. Finally,
+ * in the extremely unlikely case we're performing this operation
+ * on I/O memory, it may succeed but not bring in the TLB entry.
+ * But even then we have still made forward progress.
+ */
+ tlb_fn(env, &scratch, reg_off, addr + mem_off, oi, retaddr);
+ reg_off += 1 << esz;
+ }
+#endif
-DO_LD1(sve_ld1hh_r, cpu_lduw_data_ra, uint16_t, uint16_t, H1_2)
-DO_LD2(sve_ld2hh_r, cpu_lduw_data_ra, uint16_t, uint16_t, H1_2)
-DO_LD3(sve_ld3hh_r, cpu_lduw_data_ra, uint16_t, uint16_t, H1_2)
-DO_LD4(sve_ld4hh_r, cpu_lduw_data_ra, uint16_t, uint16_t, H1_2)
+ set_helper_retaddr(0);
+ memcpy(vd, &scratch, reg_max);
+}
-DO_LD1(sve_ld1ss_r, cpu_ldl_data_ra, uint32_t, uint32_t, H1_4)
-DO_LD2(sve_ld2ss_r, cpu_ldl_data_ra, uint32_t, uint32_t, H1_4)
-DO_LD3(sve_ld3ss_r, cpu_ldl_data_ra, uint32_t, uint32_t, H1_4)
-DO_LD4(sve_ld4ss_r, cpu_ldl_data_ra, uint32_t, uint32_t, H1_4)
+#define DO_LD1_1(NAME, ESZ) \
+void HELPER(sve_##NAME##_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, 0, \
+ sve_##NAME##_host, sve_##NAME##_tlb); \
+}
+
+#define DO_LD1_2(NAME, ESZ, MSZ) \
+void HELPER(sve_##NAME##_le_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \
+ sve_##NAME##_le_host, sve_##NAME##_le_tlb); \
+} \
+void HELPER(sve_##NAME##_be_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \
+ sve_##NAME##_be_host, sve_##NAME##_be_tlb); \
+}
+
+DO_LD1_1(ld1bb, 0)
+DO_LD1_1(ld1bhu, 1)
+DO_LD1_1(ld1bhs, 1)
+DO_LD1_1(ld1bsu, 2)
+DO_LD1_1(ld1bss, 2)
+DO_LD1_1(ld1bdu, 3)
+DO_LD1_1(ld1bds, 3)
-DO_LD1(sve_ld1dd_r, cpu_ldq_data_ra, uint64_t, uint64_t, )
-DO_LD2(sve_ld2dd_r, cpu_ldq_data_ra, uint64_t, uint64_t, )
-DO_LD3(sve_ld3dd_r, cpu_ldq_data_ra, uint64_t, uint64_t, )
-DO_LD4(sve_ld4dd_r, cpu_ldq_data_ra, uint64_t, uint64_t, )
+DO_LD1_2(ld1hh, 1, 1)
+DO_LD1_2(ld1hsu, 2, 1)
+DO_LD1_2(ld1hss, 2, 1)
+DO_LD1_2(ld1hdu, 3, 1)
+DO_LD1_2(ld1hds, 3, 1)
-#undef DO_LD1
-#undef DO_LD2
-#undef DO_LD3
-#undef DO_LD4
+DO_LD1_2(ld1ss, 2, 2)
+DO_LD1_2(ld1sdu, 3, 2)
+DO_LD1_2(ld1sds, 3, 2)
+
+DO_LD1_2(ld1dd, 3, 3)
+
+#undef DO_LD1_1
+#undef DO_LD1_2
/*
- * Load contiguous data, first-fault and no-fault.
+ * Common helpers for all contiguous 2,3,4-register predicated loads.
*/
+static void sve_ld2_r(CPUARMState *env, void *vg, target_ulong addr,
+ uint32_t desc, int size, uintptr_t ra,
+ sve_ld1_tlb_fn *tlb_fn)
+{
+ const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+ const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+ intptr_t i, oprsz = simd_oprsz(desc);
+ ARMVectorReg scratch[2] = { };
-#ifdef CONFIG_USER_ONLY
+ set_helper_retaddr(ra);
+ for (i = 0; i < oprsz; ) {
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
+ do {
+ if (pg & 1) {
+ tlb_fn(env, &scratch[0], i, addr, oi, ra);
+ tlb_fn(env, &scratch[1], i, addr + size, oi, ra);
+ }
+ i += size, pg >>= size;
+ addr += 2 * size;
+ } while (i & 15);
+ }
+ set_helper_retaddr(0);
+
+ /* Wait until all exceptions have been raised to write back. */
+ memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz);
+ memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz);
+}
+
+static void sve_ld3_r(CPUARMState *env, void *vg, target_ulong addr,
+ uint32_t desc, int size, uintptr_t ra,
+ sve_ld1_tlb_fn *tlb_fn)
+{
+ const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+ const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+ intptr_t i, oprsz = simd_oprsz(desc);
+ ARMVectorReg scratch[3] = { };
+
+ set_helper_retaddr(ra);
+ for (i = 0; i < oprsz; ) {
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
+ do {
+ if (pg & 1) {
+ tlb_fn(env, &scratch[0], i, addr, oi, ra);
+ tlb_fn(env, &scratch[1], i, addr + size, oi, ra);
+ tlb_fn(env, &scratch[2], i, addr + 2 * size, oi, ra);
+ }
+ i += size, pg >>= size;
+ addr += 3 * size;
+ } while (i & 15);
+ }
+ set_helper_retaddr(0);
+
+ /* Wait until all exceptions have been raised to write back. */
+ memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz);
+ memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz);
+ memcpy(&env->vfp.zregs[(rd + 2) & 31], &scratch[2], oprsz);
+}
+
+static void sve_ld4_r(CPUARMState *env, void *vg, target_ulong addr,
+ uint32_t desc, int size, uintptr_t ra,
+ sve_ld1_tlb_fn *tlb_fn)
+{
+ const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+ const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+ intptr_t i, oprsz = simd_oprsz(desc);
+ ARMVectorReg scratch[4] = { };
+
+ set_helper_retaddr(ra);
+ for (i = 0; i < oprsz; ) {
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
+ do {
+ if (pg & 1) {
+ tlb_fn(env, &scratch[0], i, addr, oi, ra);
+ tlb_fn(env, &scratch[1], i, addr + size, oi, ra);
+ tlb_fn(env, &scratch[2], i, addr + 2 * size, oi, ra);
+ tlb_fn(env, &scratch[3], i, addr + 3 * size, oi, ra);
+ }
+ i += size, pg >>= size;
+ addr += 4 * size;
+ } while (i & 15);
+ }
+ set_helper_retaddr(0);
+
+ /* Wait until all exceptions have been raised to write back. */
+ memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz);
+ memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz);
+ memcpy(&env->vfp.zregs[(rd + 2) & 31], &scratch[2], oprsz);
+ memcpy(&env->vfp.zregs[(rd + 3) & 31], &scratch[3], oprsz);
+}
+
+#define DO_LDN_1(N) \
+void __attribute__((flatten)) HELPER(sve_ld##N##bb_r) \
+ (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \
+{ \
+ sve_ld##N##_r(env, vg, addr, desc, 1, GETPC(), sve_ld1bb_tlb); \
+}
+
+#define DO_LDN_2(N, SUFF, SIZE) \
+void __attribute__((flatten)) HELPER(sve_ld##N##SUFF##_le_r) \
+ (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \
+{ \
+ sve_ld##N##_r(env, vg, addr, desc, SIZE, GETPC(), \
+ sve_ld1##SUFF##_le_tlb); \
+} \
+void __attribute__((flatten)) HELPER(sve_ld##N##SUFF##_be_r) \
+ (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \
+{ \
+ sve_ld##N##_r(env, vg, addr, desc, SIZE, GETPC(), \
+ sve_ld1##SUFF##_be_tlb); \
+}
+
+DO_LDN_1(2)
+DO_LDN_1(3)
+DO_LDN_1(4)
+
+DO_LDN_2(2, hh, 2)
+DO_LDN_2(3, hh, 2)
+DO_LDN_2(4, hh, 2)
+
+DO_LDN_2(2, ss, 4)
+DO_LDN_2(3, ss, 4)
+DO_LDN_2(4, ss, 4)
+
+DO_LDN_2(2, dd, 8)
+DO_LDN_2(3, dd, 8)
+DO_LDN_2(4, dd, 8)
+
+#undef DO_LDN_1
+#undef DO_LDN_2
+
+/*
+ * Load contiguous data, first-fault and no-fault.
+ *
+ * For user-only, one could argue that we should hold the mmap_lock during
+ * the operation so that there is no race between page_check_range and the
+ * load operation. However, unmapping pages out from under a running thread
+ * is extraordinarily unlikely. This theoretical race condition also affects
+ * linux-user/ in its get_user/put_user macros.
+ *
+ * TODO: Construct some helpers, written in assembly, that interact with
+ * handle_cpu_signal to produce memory ops which can properly report errors
+ * without racing.
+ */
/* Fault on byte I. All bits in FFR from I are cleared. The vector
* result from I is CONSTRAINED UNPREDICTABLE; we choose the MERGE
@@ -4100,573 +4452,932 @@ static void record_fault(CPUARMState *env, uintptr_t i, uintptr_t oprsz)
}
}
-/* Hold the mmap lock during the operation so that there is no race
- * between page_check_range and the load operation. We expect the
- * usual case to have no faults at all, so we check the whole range
- * first and if successful defer to the normal load operation.
- *
- * TODO: Change mmap_lock to a rwlock so that multiple readers
- * can run simultaneously. This will probably help other uses
- * within QEMU as well.
+/*
+ * Common helper for all contiguous first-fault loads.
*/
-#define DO_LDFF1(PART, FN, TYPEE, TYPEM, H) \
-static void do_sve_ldff1##PART(CPUARMState *env, void *vd, void *vg, \
- target_ulong addr, intptr_t oprsz, \
- bool first, uintptr_t ra) \
-{ \
- intptr_t i = 0; \
- do { \
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
- do { \
- TYPEM m = 0; \
- if (pg & 1) { \
- if (!first && \
- unlikely(page_check_range(addr, sizeof(TYPEM), \
- PAGE_READ))) { \
- record_fault(env, i, oprsz); \
- return; \
- } \
- m = FN(env, addr, ra); \
- first = false; \
- } \
- *(TYPEE *)(vd + H(i)) = m; \
- i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \
- addr += sizeof(TYPEM); \
- } while (i & 15); \
- } while (i < oprsz); \
-} \
-void HELPER(sve_ldff1##PART)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- intptr_t oprsz = simd_oprsz(desc); \
- unsigned rd = simd_data(desc); \
- void *vd = &env->vfp.zregs[rd]; \
- mmap_lock(); \
- if (likely(page_check_range(addr, oprsz, PAGE_READ) == 0)) { \
- do_sve_ld1##PART(env, vd, vg, addr, oprsz, GETPC()); \
- } else { \
- do_sve_ldff1##PART(env, vd, vg, addr, oprsz, true, GETPC()); \
- } \
- mmap_unlock(); \
-}
+static void sve_ldff1_r(CPUARMState *env, void *vg, const target_ulong addr,
+ uint32_t desc, const uintptr_t retaddr,
+ const int esz, const int msz,
+ sve_ld1_host_fn *host_fn,
+ sve_ld1_tlb_fn *tlb_fn)
+{
+ const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+ const int mmu_idx = get_mmuidx(oi);
+ const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+ void *vd = &env->vfp.zregs[rd];
+ const int diffsz = esz - msz;
+ const intptr_t reg_max = simd_oprsz(desc);
+ const intptr_t mem_max = reg_max >> diffsz;
+ intptr_t split, reg_off, mem_off;
+ void *host;
+
+ /* Skip to the first active element. */
+ reg_off = find_next_active(vg, 0, reg_max, esz);
+ if (unlikely(reg_off == reg_max)) {
+ /* The entire predicate was false; no load occurs. */
+ memset(vd, 0, reg_max);
+ return;
+ }
+ mem_off = reg_off >> diffsz;
+ set_helper_retaddr(retaddr);
+
+ /*
+ * If the (remaining) load is entirely within a single page, then:
+ * For softmmu, and the tlb hits, then no faults will occur;
+ * For user-only, either the first load will fault or none will.
+ * We can thus perform the load directly to the destination and
+ * Vd will be unmodified on any exception path.
+ */
+ split = max_for_page(addr, mem_off, mem_max);
+ if (likely(split == mem_max)) {
+ host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx);
+ if (test_host_page(host)) {
+ mem_off = host_fn(vd, vg, host - mem_off, mem_off, mem_max);
+ tcg_debug_assert(mem_off == mem_max);
+ set_helper_retaddr(0);
+ /* After any fault, zero any leading inactive elements. */
+ swap_memzero(vd, reg_off);
+ return;
+ }
+ }
-/* No-fault loads are like first-fault loads without the
- * first faulting special case.
- */
-#define DO_LDNF1(PART) \
-void HELPER(sve_ldnf1##PART)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- intptr_t oprsz = simd_oprsz(desc); \
- unsigned rd = simd_data(desc); \
- void *vd = &env->vfp.zregs[rd]; \
- mmap_lock(); \
- if (likely(page_check_range(addr, oprsz, PAGE_READ) == 0)) { \
- do_sve_ld1##PART(env, vd, vg, addr, oprsz, GETPC()); \
- } else { \
- do_sve_ldff1##PART(env, vd, vg, addr, oprsz, false, GETPC()); \
- } \
- mmap_unlock(); \
-}
+#ifdef CONFIG_USER_ONLY
+ /*
+ * The page(s) containing this first element at ADDR+MEM_OFF must
+ * be valid. Considering that this first element may be misaligned
+ * and cross a page boundary itself, take the rest of the page from
+ * the last byte of the element.
+ */
+ split = max_for_page(addr, mem_off + (1 << msz) - 1, mem_max);
+ mem_off = host_fn(vd, vg, g2h(addr), mem_off, split);
+ /* After any fault, zero any leading inactive elements. */
+ swap_memzero(vd, reg_off);
+ reg_off = mem_off << diffsz;
#else
+ /*
+ * Perform one normal read, which will fault or not.
+ * But it is likely to bring the page into the tlb.
+ */
+ tlb_fn(env, vd, reg_off, addr + mem_off, oi, retaddr);
+
+ /* After any fault, zero any leading predicated false elts. */
+ swap_memzero(vd, reg_off);
+ mem_off += 1 << msz;
+ reg_off += 1 << esz;
+
+ /* Try again to read the balance of the page. */
+ split = max_for_page(addr, mem_off - 1, mem_max);
+ if (split >= (1 << msz)) {
+ host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx);
+ if (host) {
+ mem_off = host_fn(vd, vg, host - mem_off, mem_off, split);
+ reg_off = mem_off << diffsz;
+ }
+ }
+#endif
-/* TODO: System mode is not yet supported.
- * This would probably use tlb_vaddr_to_host.
- */
-#define DO_LDFF1(PART, FN, TYPEE, TYPEM, H) \
-void HELPER(sve_ldff1##PART)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- g_assert_not_reached(); \
+ set_helper_retaddr(0);
+ record_fault(env, reg_off, reg_max);
}
-#define DO_LDNF1(PART) \
-void HELPER(sve_ldnf1##PART)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- g_assert_not_reached(); \
-}
+/*
+ * Common helper for all contiguous no-fault loads.
+ */
+static void sve_ldnf1_r(CPUARMState *env, void *vg, const target_ulong addr,
+ uint32_t desc, const int esz, const int msz,
+ sve_ld1_host_fn *host_fn)
+{
+ const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+ void *vd = &env->vfp.zregs[rd];
+ const int diffsz = esz - msz;
+ const intptr_t reg_max = simd_oprsz(desc);
+ const intptr_t mem_max = reg_max >> diffsz;
+ const int mmu_idx = cpu_mmu_index(env, false);
+ intptr_t split, reg_off, mem_off;
+ void *host;
+#ifdef CONFIG_USER_ONLY
+ host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_idx);
+ if (likely(page_check_range(addr, mem_max, PAGE_READ) == 0)) {
+ /* The entire operation is valid and will not fault. */
+ host_fn(vd, vg, host, 0, mem_max);
+ return;
+ }
#endif
-DO_LDFF1(bb_r, cpu_ldub_data_ra, uint8_t, uint8_t, H1)
-DO_LDFF1(bhu_r, cpu_ldub_data_ra, uint16_t, uint8_t, H1_2)
-DO_LDFF1(bhs_r, cpu_ldsb_data_ra, uint16_t, int8_t, H1_2)
-DO_LDFF1(bsu_r, cpu_ldub_data_ra, uint32_t, uint8_t, H1_4)
-DO_LDFF1(bss_r, cpu_ldsb_data_ra, uint32_t, int8_t, H1_4)
-DO_LDFF1(bdu_r, cpu_ldub_data_ra, uint64_t, uint8_t, )
-DO_LDFF1(bds_r, cpu_ldsb_data_ra, uint64_t, int8_t, )
+ /* There will be no fault, so we may modify in advance. */
+ memset(vd, 0, reg_max);
-DO_LDFF1(hh_r, cpu_lduw_data_ra, uint16_t, uint16_t, H1_2)
-DO_LDFF1(hsu_r, cpu_lduw_data_ra, uint32_t, uint16_t, H1_4)
-DO_LDFF1(hss_r, cpu_ldsw_data_ra, uint32_t, int8_t, H1_4)
-DO_LDFF1(hdu_r, cpu_lduw_data_ra, uint64_t, uint16_t, )
-DO_LDFF1(hds_r, cpu_ldsw_data_ra, uint64_t, int16_t, )
+ /* Skip to the first active element. */
+ reg_off = find_next_active(vg, 0, reg_max, esz);
+ if (unlikely(reg_off == reg_max)) {
+ /* The entire predicate was false; no load occurs. */
+ return;
+ }
+ mem_off = reg_off >> diffsz;
-DO_LDFF1(ss_r, cpu_ldl_data_ra, uint32_t, uint32_t, H1_4)
-DO_LDFF1(sdu_r, cpu_ldl_data_ra, uint64_t, uint32_t, )
-DO_LDFF1(sds_r, cpu_ldl_data_ra, uint64_t, int32_t, )
+#ifdef CONFIG_USER_ONLY
+ if (page_check_range(addr + mem_off, 1 << msz, PAGE_READ) == 0) {
+ /* At least one load is valid; take the rest of the page. */
+ split = max_for_page(addr, mem_off + (1 << msz) - 1, mem_max);
+ mem_off = host_fn(vd, vg, host, mem_off, split);
+ reg_off = mem_off << diffsz;
+ }
+#else
+ /*
+ * If the address is not in the TLB, we have no way to bring the
+ * entry into the TLB without also risking a fault. Note that
+ * the corollary is that we never load from an address not in RAM.
+ *
+ * This last is out of spec, in a weird corner case.
+ * Per the MemNF/MemSingleNF pseudocode, a NF load from Device memory
+ * must not actually hit the bus -- it returns UNKNOWN data instead.
+ * But if you map non-RAM with Normal memory attributes and do a NF
+ * load then it should access the bus. (Nobody ought actually do this
+ * in the real world, obviously.)
+ *
+ * Then there are the annoying special cases with watchpoints...
+ *
+ * TODO: Add a form of tlb_fill that does not raise an exception,
+ * with a form of tlb_vaddr_to_host and a set of loads to match.
+ * The non_fault_vaddr_to_host would handle everything, usually,
+ * and the loads would handle the iomem path for watchpoints.
+ */
+ host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx);
+ split = max_for_page(addr, mem_off, mem_max);
+ if (host && split >= (1 << msz)) {
+ mem_off = host_fn(vd, vg, host - mem_off, mem_off, split);
+ reg_off = mem_off << diffsz;
+ }
+#endif
-DO_LDFF1(dd_r, cpu_ldq_data_ra, uint64_t, uint64_t, )
+ record_fault(env, reg_off, reg_max);
+}
-#undef DO_LDFF1
+#define DO_LDFF1_LDNF1_1(PART, ESZ) \
+void HELPER(sve_ldff1##PART##_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, 0, \
+ sve_ld1##PART##_host, sve_ld1##PART##_tlb); \
+} \
+void HELPER(sve_ldnf1##PART##_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldnf1_r(env, vg, addr, desc, ESZ, 0, sve_ld1##PART##_host); \
+}
-DO_LDNF1(bb_r)
-DO_LDNF1(bhu_r)
-DO_LDNF1(bhs_r)
-DO_LDNF1(bsu_r)
-DO_LDNF1(bss_r)
-DO_LDNF1(bdu_r)
-DO_LDNF1(bds_r)
+#define DO_LDFF1_LDNF1_2(PART, ESZ, MSZ) \
+void HELPER(sve_ldff1##PART##_le_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \
+ sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \
+} \
+void HELPER(sve_ldnf1##PART##_le_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldnf1_r(env, vg, addr, desc, ESZ, MSZ, sve_ld1##PART##_le_host); \
+} \
+void HELPER(sve_ldff1##PART##_be_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \
+ sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \
+} \
+void HELPER(sve_ldnf1##PART##_be_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldnf1_r(env, vg, addr, desc, ESZ, MSZ, sve_ld1##PART##_be_host); \
+}
+
+DO_LDFF1_LDNF1_1(bb, 0)
+DO_LDFF1_LDNF1_1(bhu, 1)
+DO_LDFF1_LDNF1_1(bhs, 1)
+DO_LDFF1_LDNF1_1(bsu, 2)
+DO_LDFF1_LDNF1_1(bss, 2)
+DO_LDFF1_LDNF1_1(bdu, 3)
+DO_LDFF1_LDNF1_1(bds, 3)
-DO_LDNF1(hh_r)
-DO_LDNF1(hsu_r)
-DO_LDNF1(hss_r)
-DO_LDNF1(hdu_r)
-DO_LDNF1(hds_r)
+DO_LDFF1_LDNF1_2(hh, 1, 1)
+DO_LDFF1_LDNF1_2(hsu, 2, 1)
+DO_LDFF1_LDNF1_2(hss, 2, 1)
+DO_LDFF1_LDNF1_2(hdu, 3, 1)
+DO_LDFF1_LDNF1_2(hds, 3, 1)
-DO_LDNF1(ss_r)
-DO_LDNF1(sdu_r)
-DO_LDNF1(sds_r)
+DO_LDFF1_LDNF1_2(ss, 2, 2)
+DO_LDFF1_LDNF1_2(sdu, 3, 2)
+DO_LDFF1_LDNF1_2(sds, 3, 2)
-DO_LDNF1(dd_r)
+DO_LDFF1_LDNF1_2(dd, 3, 3)
-#undef DO_LDNF1
+#undef DO_LDFF1_LDNF1_1
+#undef DO_LDFF1_LDNF1_2
/*
* Store contiguous data, protected by a governing predicate.
*/
-#define DO_ST1(NAME, FN, TYPEE, TYPEM, H) \
-void HELPER(NAME)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- intptr_t i, oprsz = simd_oprsz(desc); \
- intptr_t ra = GETPC(); \
- unsigned rd = simd_data(desc); \
- void *vd = &env->vfp.zregs[rd]; \
- for (i = 0; i < oprsz; ) { \
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
- do { \
- if (pg & 1) { \
- TYPEM m = *(TYPEE *)(vd + H(i)); \
- FN(env, addr, m, ra); \
- } \
- i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \
- addr += sizeof(TYPEM); \
- } while (i & 15); \
- } \
-}
-#define DO_ST1_D(NAME, FN, TYPEM) \
-void HELPER(NAME)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- intptr_t i, oprsz = simd_oprsz(desc) / 8; \
- intptr_t ra = GETPC(); \
- unsigned rd = simd_data(desc); \
- uint64_t *d = &env->vfp.zregs[rd].d[0]; \
- uint8_t *pg = vg; \
- for (i = 0; i < oprsz; i += 1) { \
- if (pg[H1(i)] & 1) { \
- FN(env, addr, d[i], ra); \
- } \
- addr += sizeof(TYPEM); \
- } \
+#ifdef CONFIG_SOFTMMU
+#define DO_ST_TLB(NAME, H, TYPEM, HOST, MOEND, TLB) \
+static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \
+ target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \
+{ \
+ TLB(env, addr, *(TYPEM *)(vd + H(reg_off)), oi, ra); \
}
-
-#define DO_ST2(NAME, FN, TYPEE, TYPEM, H) \
-void HELPER(NAME)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- intptr_t i, oprsz = simd_oprsz(desc); \
- intptr_t ra = GETPC(); \
- unsigned rd = simd_data(desc); \
- void *d1 = &env->vfp.zregs[rd]; \
- void *d2 = &env->vfp.zregs[(rd + 1) & 31]; \
- for (i = 0; i < oprsz; ) { \
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
- do { \
- if (pg & 1) { \
- TYPEM m1 = *(TYPEE *)(d1 + H(i)); \
- TYPEM m2 = *(TYPEE *)(d2 + H(i)); \
- FN(env, addr, m1, ra); \
- FN(env, addr + sizeof(TYPEM), m2, ra); \
- } \
- i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \
- addr += 2 * sizeof(TYPEM); \
- } while (i & 15); \
- } \
+#else
+#define DO_ST_TLB(NAME, H, TYPEM, HOST, MOEND, TLB) \
+static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \
+ target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \
+{ \
+ HOST(g2h(addr), *(TYPEM *)(vd + H(reg_off))); \
}
+#endif
-#define DO_ST3(NAME, FN, TYPEE, TYPEM, H) \
-void HELPER(NAME)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- intptr_t i, oprsz = simd_oprsz(desc); \
- intptr_t ra = GETPC(); \
- unsigned rd = simd_data(desc); \
- void *d1 = &env->vfp.zregs[rd]; \
- void *d2 = &env->vfp.zregs[(rd + 1) & 31]; \
- void *d3 = &env->vfp.zregs[(rd + 2) & 31]; \
- for (i = 0; i < oprsz; ) { \
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
- do { \
- if (pg & 1) { \
- TYPEM m1 = *(TYPEE *)(d1 + H(i)); \
- TYPEM m2 = *(TYPEE *)(d2 + H(i)); \
- TYPEM m3 = *(TYPEE *)(d3 + H(i)); \
- FN(env, addr, m1, ra); \
- FN(env, addr + sizeof(TYPEM), m2, ra); \
- FN(env, addr + 2 * sizeof(TYPEM), m3, ra); \
- } \
- i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \
- addr += 3 * sizeof(TYPEM); \
- } while (i & 15); \
- } \
-}
+DO_ST_TLB(st1bb, H1, uint8_t, stb_p, 0, helper_ret_stb_mmu)
+DO_ST_TLB(st1bh, H1_2, uint16_t, stb_p, 0, helper_ret_stb_mmu)
+DO_ST_TLB(st1bs, H1_4, uint32_t, stb_p, 0, helper_ret_stb_mmu)
+DO_ST_TLB(st1bd, , uint64_t, stb_p, 0, helper_ret_stb_mmu)
-#define DO_ST4(NAME, FN, TYPEE, TYPEM, H) \
-void HELPER(NAME)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- intptr_t i, oprsz = simd_oprsz(desc); \
- intptr_t ra = GETPC(); \
- unsigned rd = simd_data(desc); \
- void *d1 = &env->vfp.zregs[rd]; \
- void *d2 = &env->vfp.zregs[(rd + 1) & 31]; \
- void *d3 = &env->vfp.zregs[(rd + 2) & 31]; \
- void *d4 = &env->vfp.zregs[(rd + 3) & 31]; \
- for (i = 0; i < oprsz; ) { \
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
- do { \
- if (pg & 1) { \
- TYPEM m1 = *(TYPEE *)(d1 + H(i)); \
- TYPEM m2 = *(TYPEE *)(d2 + H(i)); \
- TYPEM m3 = *(TYPEE *)(d3 + H(i)); \
- TYPEM m4 = *(TYPEE *)(d4 + H(i)); \
- FN(env, addr, m1, ra); \
- FN(env, addr + sizeof(TYPEM), m2, ra); \
- FN(env, addr + 2 * sizeof(TYPEM), m3, ra); \
- FN(env, addr + 3 * sizeof(TYPEM), m4, ra); \
- } \
- i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \
- addr += 4 * sizeof(TYPEM); \
- } while (i & 15); \
- } \
-}
+DO_ST_TLB(st1hh_le, H1_2, uint16_t, stw_le_p, MO_LE, helper_le_stw_mmu)
+DO_ST_TLB(st1hs_le, H1_4, uint32_t, stw_le_p, MO_LE, helper_le_stw_mmu)
+DO_ST_TLB(st1hd_le, , uint64_t, stw_le_p, MO_LE, helper_le_stw_mmu)
-DO_ST1(sve_st1bh_r, cpu_stb_data_ra, uint16_t, uint8_t, H1_2)
-DO_ST1(sve_st1bs_r, cpu_stb_data_ra, uint32_t, uint8_t, H1_4)
-DO_ST1_D(sve_st1bd_r, cpu_stb_data_ra, uint8_t)
+DO_ST_TLB(st1ss_le, H1_4, uint32_t, stl_le_p, MO_LE, helper_le_stl_mmu)
+DO_ST_TLB(st1sd_le, , uint64_t, stl_le_p, MO_LE, helper_le_stl_mmu)
-DO_ST1(sve_st1hs_r, cpu_stw_data_ra, uint32_t, uint16_t, H1_4)
-DO_ST1_D(sve_st1hd_r, cpu_stw_data_ra, uint16_t)
+DO_ST_TLB(st1dd_le, , uint64_t, stq_le_p, MO_LE, helper_le_stq_mmu)
-DO_ST1_D(sve_st1sd_r, cpu_stl_data_ra, uint32_t)
+DO_ST_TLB(st1hh_be, H1_2, uint16_t, stw_be_p, MO_BE, helper_be_stw_mmu)
+DO_ST_TLB(st1hs_be, H1_4, uint32_t, stw_be_p, MO_BE, helper_be_stw_mmu)
+DO_ST_TLB(st1hd_be, , uint64_t, stw_be_p, MO_BE, helper_be_stw_mmu)
-DO_ST1(sve_st1bb_r, cpu_stb_data_ra, uint8_t, uint8_t, H1)
-DO_ST2(sve_st2bb_r, cpu_stb_data_ra, uint8_t, uint8_t, H1)
-DO_ST3(sve_st3bb_r, cpu_stb_data_ra, uint8_t, uint8_t, H1)
-DO_ST4(sve_st4bb_r, cpu_stb_data_ra, uint8_t, uint8_t, H1)
+DO_ST_TLB(st1ss_be, H1_4, uint32_t, stl_be_p, MO_BE, helper_be_stl_mmu)
+DO_ST_TLB(st1sd_be, , uint64_t, stl_be_p, MO_BE, helper_be_stl_mmu)
-DO_ST1(sve_st1hh_r, cpu_stw_data_ra, uint16_t, uint16_t, H1_2)
-DO_ST2(sve_st2hh_r, cpu_stw_data_ra, uint16_t, uint16_t, H1_2)
-DO_ST3(sve_st3hh_r, cpu_stw_data_ra, uint16_t, uint16_t, H1_2)
-DO_ST4(sve_st4hh_r, cpu_stw_data_ra, uint16_t, uint16_t, H1_2)
+DO_ST_TLB(st1dd_be, , uint64_t, stq_be_p, MO_BE, helper_be_stq_mmu)
-DO_ST1(sve_st1ss_r, cpu_stl_data_ra, uint32_t, uint32_t, H1_4)
-DO_ST2(sve_st2ss_r, cpu_stl_data_ra, uint32_t, uint32_t, H1_4)
-DO_ST3(sve_st3ss_r, cpu_stl_data_ra, uint32_t, uint32_t, H1_4)
-DO_ST4(sve_st4ss_r, cpu_stl_data_ra, uint32_t, uint32_t, H1_4)
+#undef DO_ST_TLB
-DO_ST1_D(sve_st1dd_r, cpu_stq_data_ra, uint64_t)
+/*
+ * Common helpers for all contiguous 1,2,3,4-register predicated stores.
+ */
+static void sve_st1_r(CPUARMState *env, void *vg, target_ulong addr,
+ uint32_t desc, const uintptr_t ra,
+ const int esize, const int msize,
+ sve_st1_tlb_fn *tlb_fn)
+{
+ const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+ const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+ intptr_t i, oprsz = simd_oprsz(desc);
+ void *vd = &env->vfp.zregs[rd];
-void HELPER(sve_st2dd_r)(CPUARMState *env, void *vg,
- target_ulong addr, uint32_t desc)
+ set_helper_retaddr(ra);
+ for (i = 0; i < oprsz; ) {
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
+ do {
+ if (pg & 1) {
+ tlb_fn(env, vd, i, addr, oi, ra);
+ }
+ i += esize, pg >>= esize;
+ addr += msize;
+ } while (i & 15);
+ }
+ set_helper_retaddr(0);
+}
+
+static void sve_st2_r(CPUARMState *env, void *vg, target_ulong addr,
+ uint32_t desc, const uintptr_t ra,
+ const int esize, const int msize,
+ sve_st1_tlb_fn *tlb_fn)
{
- intptr_t i, oprsz = simd_oprsz(desc) / 8;
- intptr_t ra = GETPC();
- unsigned rd = simd_data(desc);
- uint64_t *d1 = &env->vfp.zregs[rd].d[0];
- uint64_t *d2 = &env->vfp.zregs[(rd + 1) & 31].d[0];
- uint8_t *pg = vg;
+ const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+ const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+ intptr_t i, oprsz = simd_oprsz(desc);
+ void *d1 = &env->vfp.zregs[rd];
+ void *d2 = &env->vfp.zregs[(rd + 1) & 31];
- for (i = 0; i < oprsz; i += 1) {
- if (pg[H1(i)] & 1) {
- cpu_stq_data_ra(env, addr, d1[i], ra);
- cpu_stq_data_ra(env, addr + 8, d2[i], ra);
- }
- addr += 2 * 8;
+ set_helper_retaddr(ra);
+ for (i = 0; i < oprsz; ) {
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
+ do {
+ if (pg & 1) {
+ tlb_fn(env, d1, i, addr, oi, ra);
+ tlb_fn(env, d2, i, addr + msize, oi, ra);
+ }
+ i += esize, pg >>= esize;
+ addr += 2 * msize;
+ } while (i & 15);
}
+ set_helper_retaddr(0);
}
-void HELPER(sve_st3dd_r)(CPUARMState *env, void *vg,
- target_ulong addr, uint32_t desc)
+static void sve_st3_r(CPUARMState *env, void *vg, target_ulong addr,
+ uint32_t desc, const uintptr_t ra,
+ const int esize, const int msize,
+ sve_st1_tlb_fn *tlb_fn)
{
- intptr_t i, oprsz = simd_oprsz(desc) / 8;
- intptr_t ra = GETPC();
- unsigned rd = simd_data(desc);
- uint64_t *d1 = &env->vfp.zregs[rd].d[0];
- uint64_t *d2 = &env->vfp.zregs[(rd + 1) & 31].d[0];
- uint64_t *d3 = &env->vfp.zregs[(rd + 2) & 31].d[0];
- uint8_t *pg = vg;
+ const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+ const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+ intptr_t i, oprsz = simd_oprsz(desc);
+ void *d1 = &env->vfp.zregs[rd];
+ void *d2 = &env->vfp.zregs[(rd + 1) & 31];
+ void *d3 = &env->vfp.zregs[(rd + 2) & 31];
- for (i = 0; i < oprsz; i += 1) {
- if (pg[H1(i)] & 1) {
- cpu_stq_data_ra(env, addr, d1[i], ra);
- cpu_stq_data_ra(env, addr + 8, d2[i], ra);
- cpu_stq_data_ra(env, addr + 16, d3[i], ra);
- }
- addr += 3 * 8;
+ set_helper_retaddr(ra);
+ for (i = 0; i < oprsz; ) {
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
+ do {
+ if (pg & 1) {
+ tlb_fn(env, d1, i, addr, oi, ra);
+ tlb_fn(env, d2, i, addr + msize, oi, ra);
+ tlb_fn(env, d3, i, addr + 2 * msize, oi, ra);
+ }
+ i += esize, pg >>= esize;
+ addr += 3 * msize;
+ } while (i & 15);
}
+ set_helper_retaddr(0);
}
-void HELPER(sve_st4dd_r)(CPUARMState *env, void *vg,
- target_ulong addr, uint32_t desc)
+static void sve_st4_r(CPUARMState *env, void *vg, target_ulong addr,
+ uint32_t desc, const uintptr_t ra,
+ const int esize, const int msize,
+ sve_st1_tlb_fn *tlb_fn)
{
- intptr_t i, oprsz = simd_oprsz(desc) / 8;
- intptr_t ra = GETPC();
- unsigned rd = simd_data(desc);
- uint64_t *d1 = &env->vfp.zregs[rd].d[0];
- uint64_t *d2 = &env->vfp.zregs[(rd + 1) & 31].d[0];
- uint64_t *d3 = &env->vfp.zregs[(rd + 2) & 31].d[0];
- uint64_t *d4 = &env->vfp.zregs[(rd + 3) & 31].d[0];
- uint8_t *pg = vg;
+ const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+ const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+ intptr_t i, oprsz = simd_oprsz(desc);
+ void *d1 = &env->vfp.zregs[rd];
+ void *d2 = &env->vfp.zregs[(rd + 1) & 31];
+ void *d3 = &env->vfp.zregs[(rd + 2) & 31];
+ void *d4 = &env->vfp.zregs[(rd + 3) & 31];
- for (i = 0; i < oprsz; i += 1) {
- if (pg[H1(i)] & 1) {
- cpu_stq_data_ra(env, addr, d1[i], ra);
- cpu_stq_data_ra(env, addr + 8, d2[i], ra);
- cpu_stq_data_ra(env, addr + 16, d3[i], ra);
- cpu_stq_data_ra(env, addr + 24, d4[i], ra);
- }
- addr += 4 * 8;
+ set_helper_retaddr(ra);
+ for (i = 0; i < oprsz; ) {
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
+ do {
+ if (pg & 1) {
+ tlb_fn(env, d1, i, addr, oi, ra);
+ tlb_fn(env, d2, i, addr + msize, oi, ra);
+ tlb_fn(env, d3, i, addr + 2 * msize, oi, ra);
+ tlb_fn(env, d4, i, addr + 3 * msize, oi, ra);
+ }
+ i += esize, pg >>= esize;
+ addr += 4 * msize;
+ } while (i & 15);
}
+ set_helper_retaddr(0);
}
-/* Loads with a vector index. */
+#define DO_STN_1(N, NAME, ESIZE) \
+void __attribute__((flatten)) HELPER(sve_st##N##NAME##_r) \
+ (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \
+{ \
+ sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, 1, \
+ sve_st1##NAME##_tlb); \
+}
-#define DO_LD1_ZPZ_S(NAME, TYPEI, TYPEM, FN) \
-void HELPER(NAME)(CPUARMState *env, void *vd, void *vg, void *vm, \
- target_ulong base, uint32_t desc) \
-{ \
- intptr_t i, oprsz = simd_oprsz(desc); \
- unsigned scale = simd_data(desc); \
- uintptr_t ra = GETPC(); \
- for (i = 0; i < oprsz; ) { \
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
- do { \
- TYPEM m = 0; \
- if (pg & 1) { \
- target_ulong off = *(TYPEI *)(vm + H1_4(i)); \
- m = FN(env, base + (off << scale), ra); \
- } \
- *(uint32_t *)(vd + H1_4(i)) = m; \
- i += 4, pg >>= 4; \
- } while (i & 15); \
- } \
+#define DO_STN_2(N, NAME, ESIZE, MSIZE) \
+void __attribute__((flatten)) HELPER(sve_st##N##NAME##_le_r) \
+ (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \
+{ \
+ sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, MSIZE, \
+ sve_st1##NAME##_le_tlb); \
+} \
+void __attribute__((flatten)) HELPER(sve_st##N##NAME##_be_r) \
+ (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \
+{ \
+ sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, MSIZE, \
+ sve_st1##NAME##_be_tlb); \
+}
+
+DO_STN_1(1, bb, 1)
+DO_STN_1(1, bh, 2)
+DO_STN_1(1, bs, 4)
+DO_STN_1(1, bd, 8)
+DO_STN_1(2, bb, 1)
+DO_STN_1(3, bb, 1)
+DO_STN_1(4, bb, 1)
+
+DO_STN_2(1, hh, 2, 2)
+DO_STN_2(1, hs, 4, 2)
+DO_STN_2(1, hd, 8, 2)
+DO_STN_2(2, hh, 2, 2)
+DO_STN_2(3, hh, 2, 2)
+DO_STN_2(4, hh, 2, 2)
+
+DO_STN_2(1, ss, 4, 4)
+DO_STN_2(1, sd, 8, 4)
+DO_STN_2(2, ss, 4, 4)
+DO_STN_2(3, ss, 4, 4)
+DO_STN_2(4, ss, 4, 4)
+
+DO_STN_2(1, dd, 8, 8)
+DO_STN_2(2, dd, 8, 8)
+DO_STN_2(3, dd, 8, 8)
+DO_STN_2(4, dd, 8, 8)
+
+#undef DO_STN_1
+#undef DO_STN_2
+
+/*
+ * Loads with a vector index.
+ */
+
+/*
+ * Load the element at @reg + @reg_ofs, sign or zero-extend as needed.
+ */
+typedef target_ulong zreg_off_fn(void *reg, intptr_t reg_ofs);
+
+static target_ulong off_zsu_s(void *reg, intptr_t reg_ofs)
+{
+ return *(uint32_t *)(reg + H1_4(reg_ofs));
}
-#define DO_LD1_ZPZ_D(NAME, TYPEI, TYPEM, FN) \
-void HELPER(NAME)(CPUARMState *env, void *vd, void *vg, void *vm, \
- target_ulong base, uint32_t desc) \
-{ \
- intptr_t i, oprsz = simd_oprsz(desc) / 8; \
- unsigned scale = simd_data(desc); \
- uintptr_t ra = GETPC(); \
- uint64_t *d = vd, *m = vm; uint8_t *pg = vg; \
- for (i = 0; i < oprsz; i++) { \
- TYPEM mm = 0; \
- if (pg[H1(i)] & 1) { \
- target_ulong off = (TYPEI)m[i]; \
- mm = FN(env, base + (off << scale), ra); \
- } \
- d[i] = mm; \
- } \
+static target_ulong off_zss_s(void *reg, intptr_t reg_ofs)
+{
+ return *(int32_t *)(reg + H1_4(reg_ofs));
}
-DO_LD1_ZPZ_S(sve_ldbsu_zsu, uint32_t, uint8_t, cpu_ldub_data_ra)
-DO_LD1_ZPZ_S(sve_ldhsu_zsu, uint32_t, uint16_t, cpu_lduw_data_ra)
-DO_LD1_ZPZ_S(sve_ldssu_zsu, uint32_t, uint32_t, cpu_ldl_data_ra)
-DO_LD1_ZPZ_S(sve_ldbss_zsu, uint32_t, int8_t, cpu_ldub_data_ra)
-DO_LD1_ZPZ_S(sve_ldhss_zsu, uint32_t, int16_t, cpu_lduw_data_ra)
-
-DO_LD1_ZPZ_S(sve_ldbsu_zss, int32_t, uint8_t, cpu_ldub_data_ra)
-DO_LD1_ZPZ_S(sve_ldhsu_zss, int32_t, uint16_t, cpu_lduw_data_ra)
-DO_LD1_ZPZ_S(sve_ldssu_zss, int32_t, uint32_t, cpu_ldl_data_ra)
-DO_LD1_ZPZ_S(sve_ldbss_zss, int32_t, int8_t, cpu_ldub_data_ra)
-DO_LD1_ZPZ_S(sve_ldhss_zss, int32_t, int16_t, cpu_lduw_data_ra)
-
-DO_LD1_ZPZ_D(sve_ldbdu_zsu, uint32_t, uint8_t, cpu_ldub_data_ra)
-DO_LD1_ZPZ_D(sve_ldhdu_zsu, uint32_t, uint16_t, cpu_lduw_data_ra)
-DO_LD1_ZPZ_D(sve_ldsdu_zsu, uint32_t, uint32_t, cpu_ldl_data_ra)
-DO_LD1_ZPZ_D(sve_ldddu_zsu, uint32_t, uint64_t, cpu_ldq_data_ra)
-DO_LD1_ZPZ_D(sve_ldbds_zsu, uint32_t, int8_t, cpu_ldub_data_ra)
-DO_LD1_ZPZ_D(sve_ldhds_zsu, uint32_t, int16_t, cpu_lduw_data_ra)
-DO_LD1_ZPZ_D(sve_ldsds_zsu, uint32_t, int32_t, cpu_ldl_data_ra)
-
-DO_LD1_ZPZ_D(sve_ldbdu_zss, int32_t, uint8_t, cpu_ldub_data_ra)
-DO_LD1_ZPZ_D(sve_ldhdu_zss, int32_t, uint16_t, cpu_lduw_data_ra)
-DO_LD1_ZPZ_D(sve_ldsdu_zss, int32_t, uint32_t, cpu_ldl_data_ra)
-DO_LD1_ZPZ_D(sve_ldddu_zss, int32_t, uint64_t, cpu_ldq_data_ra)
-DO_LD1_ZPZ_D(sve_ldbds_zss, int32_t, int8_t, cpu_ldub_data_ra)
-DO_LD1_ZPZ_D(sve_ldhds_zss, int32_t, int16_t, cpu_lduw_data_ra)
-DO_LD1_ZPZ_D(sve_ldsds_zss, int32_t, int32_t, cpu_ldl_data_ra)
-
-DO_LD1_ZPZ_D(sve_ldbdu_zd, uint64_t, uint8_t, cpu_ldub_data_ra)
-DO_LD1_ZPZ_D(sve_ldhdu_zd, uint64_t, uint16_t, cpu_lduw_data_ra)
-DO_LD1_ZPZ_D(sve_ldsdu_zd, uint64_t, uint32_t, cpu_ldl_data_ra)
-DO_LD1_ZPZ_D(sve_ldddu_zd, uint64_t, uint64_t, cpu_ldq_data_ra)
-DO_LD1_ZPZ_D(sve_ldbds_zd, uint64_t, int8_t, cpu_ldub_data_ra)
-DO_LD1_ZPZ_D(sve_ldhds_zd, uint64_t, int16_t, cpu_lduw_data_ra)
-DO_LD1_ZPZ_D(sve_ldsds_zd, uint64_t, int32_t, cpu_ldl_data_ra)
+static target_ulong off_zsu_d(void *reg, intptr_t reg_ofs)
+{
+ return (uint32_t)*(uint64_t *)(reg + reg_ofs);
+}
-/* First fault loads with a vector index. */
+static target_ulong off_zss_d(void *reg, intptr_t reg_ofs)
+{
+ return (int32_t)*(uint64_t *)(reg + reg_ofs);
+}
-#ifdef CONFIG_USER_ONLY
+static target_ulong off_zd_d(void *reg, intptr_t reg_ofs)
+{
+ return *(uint64_t *)(reg + reg_ofs);
+}
-#define DO_LDFF1_ZPZ(NAME, TYPEE, TYPEI, TYPEM, FN, H) \
-void HELPER(NAME)(CPUARMState *env, void *vd, void *vg, void *vm, \
- target_ulong base, uint32_t desc) \
-{ \
- intptr_t i, oprsz = simd_oprsz(desc); \
- unsigned scale = simd_data(desc); \
- uintptr_t ra = GETPC(); \
- bool first = true; \
- mmap_lock(); \
- for (i = 0; i < oprsz; ) { \
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
- do { \
- TYPEM m = 0; \
- if (pg & 1) { \
- target_ulong off = *(TYPEI *)(vm + H(i)); \
- target_ulong addr = base + (off << scale); \
- if (!first && \
- page_check_range(addr, sizeof(TYPEM), PAGE_READ)) { \
- record_fault(env, i, oprsz); \
- goto exit; \
- } \
- m = FN(env, addr, ra); \
- first = false; \
- } \
- *(TYPEE *)(vd + H(i)) = m; \
- i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \
- } while (i & 15); \
- } \
- exit: \
- mmap_unlock(); \
+static void sve_ld1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
+ target_ulong base, uint32_t desc, uintptr_t ra,
+ zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn)
+{
+ const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+ const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
+ intptr_t i, oprsz = simd_oprsz(desc);
+ ARMVectorReg scratch = { };
+
+ set_helper_retaddr(ra);
+ for (i = 0; i < oprsz; ) {
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
+ do {
+ if (likely(pg & 1)) {
+ target_ulong off = off_fn(vm, i);
+ tlb_fn(env, &scratch, i, base + (off << scale), oi, ra);
+ }
+ i += 4, pg >>= 4;
+ } while (i & 15);
+ }
+ set_helper_retaddr(0);
+
+ /* Wait until all exceptions have been raised to write back. */
+ memcpy(vd, &scratch, oprsz);
}
+static void sve_ld1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
+ target_ulong base, uint32_t desc, uintptr_t ra,
+ zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn)
+{
+ const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+ const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
+ intptr_t i, oprsz = simd_oprsz(desc) / 8;
+ ARMVectorReg scratch = { };
+
+ set_helper_retaddr(ra);
+ for (i = 0; i < oprsz; i++) {
+ uint8_t pg = *(uint8_t *)(vg + H1(i));
+ if (likely(pg & 1)) {
+ target_ulong off = off_fn(vm, i * 8);
+ tlb_fn(env, &scratch, i * 8, base + (off << scale), oi, ra);
+ }
+ }
+ set_helper_retaddr(0);
+
+ /* Wait until all exceptions have been raised to write back. */
+ memcpy(vd, &scratch, oprsz * 8);
+}
+
+#define DO_LD1_ZPZ_S(MEM, OFS) \
+void __attribute__((flatten)) HELPER(sve_ld##MEM##_##OFS) \
+ (CPUARMState *env, void *vd, void *vg, void *vm, \
+ target_ulong base, uint32_t desc) \
+{ \
+ sve_ld1_zs(env, vd, vg, vm, base, desc, GETPC(), \
+ off_##OFS##_s, sve_ld1##MEM##_tlb); \
+}
+
+#define DO_LD1_ZPZ_D(MEM, OFS) \
+void __attribute__((flatten)) HELPER(sve_ld##MEM##_##OFS) \
+ (CPUARMState *env, void *vd, void *vg, void *vm, \
+ target_ulong base, uint32_t desc) \
+{ \
+ sve_ld1_zd(env, vd, vg, vm, base, desc, GETPC(), \
+ off_##OFS##_d, sve_ld1##MEM##_tlb); \
+}
+
+DO_LD1_ZPZ_S(bsu, zsu)
+DO_LD1_ZPZ_S(bsu, zss)
+DO_LD1_ZPZ_D(bdu, zsu)
+DO_LD1_ZPZ_D(bdu, zss)
+DO_LD1_ZPZ_D(bdu, zd)
+
+DO_LD1_ZPZ_S(bss, zsu)
+DO_LD1_ZPZ_S(bss, zss)
+DO_LD1_ZPZ_D(bds, zsu)
+DO_LD1_ZPZ_D(bds, zss)
+DO_LD1_ZPZ_D(bds, zd)
+
+DO_LD1_ZPZ_S(hsu_le, zsu)
+DO_LD1_ZPZ_S(hsu_le, zss)
+DO_LD1_ZPZ_D(hdu_le, zsu)
+DO_LD1_ZPZ_D(hdu_le, zss)
+DO_LD1_ZPZ_D(hdu_le, zd)
+
+DO_LD1_ZPZ_S(hsu_be, zsu)
+DO_LD1_ZPZ_S(hsu_be, zss)
+DO_LD1_ZPZ_D(hdu_be, zsu)
+DO_LD1_ZPZ_D(hdu_be, zss)
+DO_LD1_ZPZ_D(hdu_be, zd)
+
+DO_LD1_ZPZ_S(hss_le, zsu)
+DO_LD1_ZPZ_S(hss_le, zss)
+DO_LD1_ZPZ_D(hds_le, zsu)
+DO_LD1_ZPZ_D(hds_le, zss)
+DO_LD1_ZPZ_D(hds_le, zd)
+
+DO_LD1_ZPZ_S(hss_be, zsu)
+DO_LD1_ZPZ_S(hss_be, zss)
+DO_LD1_ZPZ_D(hds_be, zsu)
+DO_LD1_ZPZ_D(hds_be, zss)
+DO_LD1_ZPZ_D(hds_be, zd)
+
+DO_LD1_ZPZ_S(ss_le, zsu)
+DO_LD1_ZPZ_S(ss_le, zss)
+DO_LD1_ZPZ_D(sdu_le, zsu)
+DO_LD1_ZPZ_D(sdu_le, zss)
+DO_LD1_ZPZ_D(sdu_le, zd)
+
+DO_LD1_ZPZ_S(ss_be, zsu)
+DO_LD1_ZPZ_S(ss_be, zss)
+DO_LD1_ZPZ_D(sdu_be, zsu)
+DO_LD1_ZPZ_D(sdu_be, zss)
+DO_LD1_ZPZ_D(sdu_be, zd)
+
+DO_LD1_ZPZ_D(sds_le, zsu)
+DO_LD1_ZPZ_D(sds_le, zss)
+DO_LD1_ZPZ_D(sds_le, zd)
+
+DO_LD1_ZPZ_D(sds_be, zsu)
+DO_LD1_ZPZ_D(sds_be, zss)
+DO_LD1_ZPZ_D(sds_be, zd)
+
+DO_LD1_ZPZ_D(dd_le, zsu)
+DO_LD1_ZPZ_D(dd_le, zss)
+DO_LD1_ZPZ_D(dd_le, zd)
+
+DO_LD1_ZPZ_D(dd_be, zsu)
+DO_LD1_ZPZ_D(dd_be, zss)
+DO_LD1_ZPZ_D(dd_be, zd)
+
+#undef DO_LD1_ZPZ_S
+#undef DO_LD1_ZPZ_D
+
+/* First fault loads with a vector index. */
+
+/* Load one element into VD+REG_OFF from (ENV,VADDR) without faulting.
+ * The controlling predicate is known to be true. Return true if the
+ * load was successful.
+ */
+typedef bool sve_ld1_nf_fn(CPUARMState *env, void *vd, intptr_t reg_off,
+ target_ulong vaddr, int mmu_idx);
+
+#ifdef CONFIG_SOFTMMU
+#define DO_LD_NF(NAME, H, TYPEE, TYPEM, HOST) \
+static bool sve_ld##NAME##_nf(CPUARMState *env, void *vd, intptr_t reg_off, \
+ target_ulong addr, int mmu_idx) \
+{ \
+ target_ulong next_page = -(addr | TARGET_PAGE_MASK); \
+ if (likely(next_page - addr >= sizeof(TYPEM))) { \
+ void *host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_idx); \
+ if (likely(host)) { \
+ TYPEM val = HOST(host); \
+ *(TYPEE *)(vd + H(reg_off)) = val; \
+ return true; \
+ } \
+ } \
+ return false; \
+}
#else
+#define DO_LD_NF(NAME, H, TYPEE, TYPEM, HOST) \
+static bool sve_ld##NAME##_nf(CPUARMState *env, void *vd, intptr_t reg_off, \
+ target_ulong addr, int mmu_idx) \
+{ \
+ if (likely(page_check_range(addr, sizeof(TYPEM), PAGE_READ))) { \
+ TYPEM val = HOST(g2h(addr)); \
+ *(TYPEE *)(vd + H(reg_off)) = val; \
+ return true; \
+ } \
+ return false; \
+}
+#endif
-#define DO_LDFF1_ZPZ(NAME, TYPEE, TYPEI, TYPEM, FN, H) \
-void HELPER(NAME)(CPUARMState *env, void *vd, void *vg, void *vm, \
- target_ulong base, uint32_t desc) \
-{ \
- g_assert_not_reached(); \
+DO_LD_NF(bsu, H1_4, uint32_t, uint8_t, ldub_p)
+DO_LD_NF(bss, H1_4, uint32_t, int8_t, ldsb_p)
+DO_LD_NF(bdu, , uint64_t, uint8_t, ldub_p)
+DO_LD_NF(bds, , uint64_t, int8_t, ldsb_p)
+
+DO_LD_NF(hsu_le, H1_4, uint32_t, uint16_t, lduw_le_p)
+DO_LD_NF(hss_le, H1_4, uint32_t, int16_t, ldsw_le_p)
+DO_LD_NF(hsu_be, H1_4, uint32_t, uint16_t, lduw_be_p)
+DO_LD_NF(hss_be, H1_4, uint32_t, int16_t, ldsw_be_p)
+DO_LD_NF(hdu_le, , uint64_t, uint16_t, lduw_le_p)
+DO_LD_NF(hds_le, , uint64_t, int16_t, ldsw_le_p)
+DO_LD_NF(hdu_be, , uint64_t, uint16_t, lduw_be_p)
+DO_LD_NF(hds_be, , uint64_t, int16_t, ldsw_be_p)
+
+DO_LD_NF(ss_le, H1_4, uint32_t, uint32_t, ldl_le_p)
+DO_LD_NF(ss_be, H1_4, uint32_t, uint32_t, ldl_be_p)
+DO_LD_NF(sdu_le, , uint64_t, uint32_t, ldl_le_p)
+DO_LD_NF(sds_le, , uint64_t, int32_t, ldl_le_p)
+DO_LD_NF(sdu_be, , uint64_t, uint32_t, ldl_be_p)
+DO_LD_NF(sds_be, , uint64_t, int32_t, ldl_be_p)
+
+DO_LD_NF(dd_le, , uint64_t, uint64_t, ldq_le_p)
+DO_LD_NF(dd_be, , uint64_t, uint64_t, ldq_be_p)
+
+/*
+ * Common helper for all gather first-faulting loads.
+ */
+static inline void sve_ldff1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
+ target_ulong base, uint32_t desc, uintptr_t ra,
+ zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn,
+ sve_ld1_nf_fn *nonfault_fn)
+{
+ const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+ const int mmu_idx = get_mmuidx(oi);
+ const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
+ intptr_t reg_off, reg_max = simd_oprsz(desc);
+ target_ulong addr;
+
+ /* Skip to the first true predicate. */
+ reg_off = find_next_active(vg, 0, reg_max, MO_32);
+ if (likely(reg_off < reg_max)) {
+ /* Perform one normal read, which will fault or not. */
+ set_helper_retaddr(ra);
+ addr = off_fn(vm, reg_off);
+ addr = base + (addr << scale);
+ tlb_fn(env, vd, reg_off, addr, oi, ra);
+
+ /* The rest of the reads will be non-faulting. */
+ set_helper_retaddr(0);
+ }
+
+ /* After any fault, zero the leading predicated false elements. */
+ swap_memzero(vd, reg_off);
+
+ while (likely((reg_off += 4) < reg_max)) {
+ uint64_t pg = *(uint64_t *)(vg + (reg_off >> 6) * 8);
+ if (likely((pg >> (reg_off & 63)) & 1)) {
+ addr = off_fn(vm, reg_off);
+ addr = base + (addr << scale);
+ if (!nonfault_fn(env, vd, reg_off, addr, mmu_idx)) {
+ record_fault(env, reg_off, reg_max);
+ break;
+ }
+ } else {
+ *(uint32_t *)(vd + H1_4(reg_off)) = 0;
+ }
+ }
}
-#endif
+static inline void sve_ldff1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
+ target_ulong base, uint32_t desc, uintptr_t ra,
+ zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn,
+ sve_ld1_nf_fn *nonfault_fn)
+{
+ const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+ const int mmu_idx = get_mmuidx(oi);
+ const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
+ intptr_t reg_off, reg_max = simd_oprsz(desc);
+ target_ulong addr;
+
+ /* Skip to the first true predicate. */
+ reg_off = find_next_active(vg, 0, reg_max, MO_64);
+ if (likely(reg_off < reg_max)) {
+ /* Perform one normal read, which will fault or not. */
+ set_helper_retaddr(ra);
+ addr = off_fn(vm, reg_off);
+ addr = base + (addr << scale);
+ tlb_fn(env, vd, reg_off, addr, oi, ra);
+
+ /* The rest of the reads will be non-faulting. */
+ set_helper_retaddr(0);
+ }
-#define DO_LDFF1_ZPZ_S(NAME, TYPEI, TYPEM, FN) \
- DO_LDFF1_ZPZ(NAME, uint32_t, TYPEI, TYPEM, FN, H1_4)
-#define DO_LDFF1_ZPZ_D(NAME, TYPEI, TYPEM, FN) \
- DO_LDFF1_ZPZ(NAME, uint64_t, TYPEI, TYPEM, FN, )
-
-DO_LDFF1_ZPZ_S(sve_ldffbsu_zsu, uint32_t, uint8_t, cpu_ldub_data_ra)
-DO_LDFF1_ZPZ_S(sve_ldffhsu_zsu, uint32_t, uint16_t, cpu_lduw_data_ra)
-DO_LDFF1_ZPZ_S(sve_ldffssu_zsu, uint32_t, uint32_t, cpu_ldl_data_ra)
-DO_LDFF1_ZPZ_S(sve_ldffbss_zsu, uint32_t, int8_t, cpu_ldub_data_ra)
-DO_LDFF1_ZPZ_S(sve_ldffhss_zsu, uint32_t, int16_t, cpu_lduw_data_ra)
-
-DO_LDFF1_ZPZ_S(sve_ldffbsu_zss, int32_t, uint8_t, cpu_ldub_data_ra)
-DO_LDFF1_ZPZ_S(sve_ldffhsu_zss, int32_t, uint16_t, cpu_lduw_data_ra)
-DO_LDFF1_ZPZ_S(sve_ldffssu_zss, int32_t, uint32_t, cpu_ldl_data_ra)
-DO_LDFF1_ZPZ_S(sve_ldffbss_zss, int32_t, int8_t, cpu_ldub_data_ra)
-DO_LDFF1_ZPZ_S(sve_ldffhss_zss, int32_t, int16_t, cpu_lduw_data_ra)
-
-DO_LDFF1_ZPZ_D(sve_ldffbdu_zsu, uint32_t, uint8_t, cpu_ldub_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffhdu_zsu, uint32_t, uint16_t, cpu_lduw_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffsdu_zsu, uint32_t, uint32_t, cpu_ldl_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffddu_zsu, uint32_t, uint64_t, cpu_ldq_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffbds_zsu, uint32_t, int8_t, cpu_ldub_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffhds_zsu, uint32_t, int16_t, cpu_lduw_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffsds_zsu, uint32_t, int32_t, cpu_ldl_data_ra)
-
-DO_LDFF1_ZPZ_D(sve_ldffbdu_zss, int32_t, uint8_t, cpu_ldub_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffhdu_zss, int32_t, uint16_t, cpu_lduw_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffsdu_zss, int32_t, uint32_t, cpu_ldl_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffddu_zss, int32_t, uint64_t, cpu_ldq_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffbds_zss, int32_t, int8_t, cpu_ldub_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffhds_zss, int32_t, int16_t, cpu_lduw_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffsds_zss, int32_t, int32_t, cpu_ldl_data_ra)
-
-DO_LDFF1_ZPZ_D(sve_ldffbdu_zd, uint64_t, uint8_t, cpu_ldub_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffhdu_zd, uint64_t, uint16_t, cpu_lduw_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffsdu_zd, uint64_t, uint32_t, cpu_ldl_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffddu_zd, uint64_t, uint64_t, cpu_ldq_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffbds_zd, uint64_t, int8_t, cpu_ldub_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffhds_zd, uint64_t, int16_t, cpu_lduw_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffsds_zd, uint64_t, int32_t, cpu_ldl_data_ra)
+ /* After any fault, zero the leading predicated false elements. */
+ swap_memzero(vd, reg_off);
-/* Stores with a vector index. */
+ while (likely((reg_off += 8) < reg_max)) {
+ uint8_t pg = *(uint8_t *)(vg + H1(reg_off >> 3));
+ if (likely(pg & 1)) {
+ addr = off_fn(vm, reg_off);
+ addr = base + (addr << scale);
+ if (!nonfault_fn(env, vd, reg_off, addr, mmu_idx)) {
+ record_fault(env, reg_off, reg_max);
+ break;
+ }
+ } else {
+ *(uint64_t *)(vd + reg_off) = 0;
+ }
+ }
+}
-#define DO_ST1_ZPZ_S(NAME, TYPEI, FN) \
-void HELPER(NAME)(CPUARMState *env, void *vd, void *vg, void *vm, \
- target_ulong base, uint32_t desc) \
+#define DO_LDFF1_ZPZ_S(MEM, OFS) \
+void HELPER(sve_ldff##MEM##_##OFS) \
+ (CPUARMState *env, void *vd, void *vg, void *vm, \
+ target_ulong base, uint32_t desc) \
{ \
- intptr_t i, oprsz = simd_oprsz(desc); \
- unsigned scale = simd_data(desc); \
- uintptr_t ra = GETPC(); \
- for (i = 0; i < oprsz; ) { \
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
- do { \
- if (likely(pg & 1)) { \
- target_ulong off = *(TYPEI *)(vm + H1_4(i)); \
- uint32_t d = *(uint32_t *)(vd + H1_4(i)); \
- FN(env, base + (off << scale), d, ra); \
- } \
- i += sizeof(uint32_t), pg >>= sizeof(uint32_t); \
- } while (i & 15); \
- } \
+ sve_ldff1_zs(env, vd, vg, vm, base, desc, GETPC(), \
+ off_##OFS##_s, sve_ld1##MEM##_tlb, sve_ld##MEM##_nf); \
}
-#define DO_ST1_ZPZ_D(NAME, TYPEI, FN) \
-void HELPER(NAME)(CPUARMState *env, void *vd, void *vg, void *vm, \
- target_ulong base, uint32_t desc) \
+#define DO_LDFF1_ZPZ_D(MEM, OFS) \
+void HELPER(sve_ldff##MEM##_##OFS) \
+ (CPUARMState *env, void *vd, void *vg, void *vm, \
+ target_ulong base, uint32_t desc) \
{ \
- intptr_t i, oprsz = simd_oprsz(desc) / 8; \
- unsigned scale = simd_data(desc); \
- uintptr_t ra = GETPC(); \
- uint64_t *d = vd, *m = vm; uint8_t *pg = vg; \
- for (i = 0; i < oprsz; i++) { \
- if (likely(pg[H1(i)] & 1)) { \
- target_ulong off = (target_ulong)(TYPEI)m[i] << scale; \
- FN(env, base + off, d[i], ra); \
- } \
- } \
-}
+ sve_ldff1_zd(env, vd, vg, vm, base, desc, GETPC(), \
+ off_##OFS##_d, sve_ld1##MEM##_tlb, sve_ld##MEM##_nf); \
+}
+
+DO_LDFF1_ZPZ_S(bsu, zsu)
+DO_LDFF1_ZPZ_S(bsu, zss)
+DO_LDFF1_ZPZ_D(bdu, zsu)
+DO_LDFF1_ZPZ_D(bdu, zss)
+DO_LDFF1_ZPZ_D(bdu, zd)
+
+DO_LDFF1_ZPZ_S(bss, zsu)
+DO_LDFF1_ZPZ_S(bss, zss)
+DO_LDFF1_ZPZ_D(bds, zsu)
+DO_LDFF1_ZPZ_D(bds, zss)
+DO_LDFF1_ZPZ_D(bds, zd)
+
+DO_LDFF1_ZPZ_S(hsu_le, zsu)
+DO_LDFF1_ZPZ_S(hsu_le, zss)
+DO_LDFF1_ZPZ_D(hdu_le, zsu)
+DO_LDFF1_ZPZ_D(hdu_le, zss)
+DO_LDFF1_ZPZ_D(hdu_le, zd)
+
+DO_LDFF1_ZPZ_S(hsu_be, zsu)
+DO_LDFF1_ZPZ_S(hsu_be, zss)
+DO_LDFF1_ZPZ_D(hdu_be, zsu)
+DO_LDFF1_ZPZ_D(hdu_be, zss)
+DO_LDFF1_ZPZ_D(hdu_be, zd)
+
+DO_LDFF1_ZPZ_S(hss_le, zsu)
+DO_LDFF1_ZPZ_S(hss_le, zss)
+DO_LDFF1_ZPZ_D(hds_le, zsu)
+DO_LDFF1_ZPZ_D(hds_le, zss)
+DO_LDFF1_ZPZ_D(hds_le, zd)
+
+DO_LDFF1_ZPZ_S(hss_be, zsu)
+DO_LDFF1_ZPZ_S(hss_be, zss)
+DO_LDFF1_ZPZ_D(hds_be, zsu)
+DO_LDFF1_ZPZ_D(hds_be, zss)
+DO_LDFF1_ZPZ_D(hds_be, zd)
+
+DO_LDFF1_ZPZ_S(ss_le, zsu)
+DO_LDFF1_ZPZ_S(ss_le, zss)
+DO_LDFF1_ZPZ_D(sdu_le, zsu)
+DO_LDFF1_ZPZ_D(sdu_le, zss)
+DO_LDFF1_ZPZ_D(sdu_le, zd)
+
+DO_LDFF1_ZPZ_S(ss_be, zsu)
+DO_LDFF1_ZPZ_S(ss_be, zss)
+DO_LDFF1_ZPZ_D(sdu_be, zsu)
+DO_LDFF1_ZPZ_D(sdu_be, zss)
+DO_LDFF1_ZPZ_D(sdu_be, zd)
+
+DO_LDFF1_ZPZ_D(sds_le, zsu)
+DO_LDFF1_ZPZ_D(sds_le, zss)
+DO_LDFF1_ZPZ_D(sds_le, zd)
+
+DO_LDFF1_ZPZ_D(sds_be, zsu)
+DO_LDFF1_ZPZ_D(sds_be, zss)
+DO_LDFF1_ZPZ_D(sds_be, zd)
+
+DO_LDFF1_ZPZ_D(dd_le, zsu)
+DO_LDFF1_ZPZ_D(dd_le, zss)
+DO_LDFF1_ZPZ_D(dd_le, zd)
+
+DO_LDFF1_ZPZ_D(dd_be, zsu)
+DO_LDFF1_ZPZ_D(dd_be, zss)
+DO_LDFF1_ZPZ_D(dd_be, zd)
-DO_ST1_ZPZ_S(sve_stbs_zsu, uint32_t, cpu_stb_data_ra)
-DO_ST1_ZPZ_S(sve_sths_zsu, uint32_t, cpu_stw_data_ra)
-DO_ST1_ZPZ_S(sve_stss_zsu, uint32_t, cpu_stl_data_ra)
+/* Stores with a vector index. */
-DO_ST1_ZPZ_S(sve_stbs_zss, int32_t, cpu_stb_data_ra)
-DO_ST1_ZPZ_S(sve_sths_zss, int32_t, cpu_stw_data_ra)
-DO_ST1_ZPZ_S(sve_stss_zss, int32_t, cpu_stl_data_ra)
+static void sve_st1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
+ target_ulong base, uint32_t desc, uintptr_t ra,
+ zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn)
+{
+ const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+ const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
+ intptr_t i, oprsz = simd_oprsz(desc);
-DO_ST1_ZPZ_D(sve_stbd_zsu, uint32_t, cpu_stb_data_ra)
-DO_ST1_ZPZ_D(sve_sthd_zsu, uint32_t, cpu_stw_data_ra)
-DO_ST1_ZPZ_D(sve_stsd_zsu, uint32_t, cpu_stl_data_ra)
-DO_ST1_ZPZ_D(sve_stdd_zsu, uint32_t, cpu_stq_data_ra)
+ set_helper_retaddr(ra);
+ for (i = 0; i < oprsz; ) {
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
+ do {
+ if (likely(pg & 1)) {
+ target_ulong off = off_fn(vm, i);
+ tlb_fn(env, vd, i, base + (off << scale), oi, ra);
+ }
+ i += 4, pg >>= 4;
+ } while (i & 15);
+ }
+ set_helper_retaddr(0);
+}
-DO_ST1_ZPZ_D(sve_stbd_zss, int32_t, cpu_stb_data_ra)
-DO_ST1_ZPZ_D(sve_sthd_zss, int32_t, cpu_stw_data_ra)
-DO_ST1_ZPZ_D(sve_stsd_zss, int32_t, cpu_stl_data_ra)
-DO_ST1_ZPZ_D(sve_stdd_zss, int32_t, cpu_stq_data_ra)
+static void sve_st1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
+ target_ulong base, uint32_t desc, uintptr_t ra,
+ zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn)
+{
+ const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+ const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
+ intptr_t i, oprsz = simd_oprsz(desc) / 8;
-DO_ST1_ZPZ_D(sve_stbd_zd, uint64_t, cpu_stb_data_ra)
-DO_ST1_ZPZ_D(sve_sthd_zd, uint64_t, cpu_stw_data_ra)
-DO_ST1_ZPZ_D(sve_stsd_zd, uint64_t, cpu_stl_data_ra)
-DO_ST1_ZPZ_D(sve_stdd_zd, uint64_t, cpu_stq_data_ra)
+ set_helper_retaddr(ra);
+ for (i = 0; i < oprsz; i++) {
+ uint8_t pg = *(uint8_t *)(vg + H1(i));
+ if (likely(pg & 1)) {
+ target_ulong off = off_fn(vm, i * 8);
+ tlb_fn(env, vd, i * 8, base + (off << scale), oi, ra);
+ }
+ }
+ set_helper_retaddr(0);
+}
+
+#define DO_ST1_ZPZ_S(MEM, OFS) \
+void __attribute__((flatten)) HELPER(sve_st##MEM##_##OFS) \
+ (CPUARMState *env, void *vd, void *vg, void *vm, \
+ target_ulong base, uint32_t desc) \
+{ \
+ sve_st1_zs(env, vd, vg, vm, base, desc, GETPC(), \
+ off_##OFS##_s, sve_st1##MEM##_tlb); \
+}
+
+#define DO_ST1_ZPZ_D(MEM, OFS) \
+void __attribute__((flatten)) HELPER(sve_st##MEM##_##OFS) \
+ (CPUARMState *env, void *vd, void *vg, void *vm, \
+ target_ulong base, uint32_t desc) \
+{ \
+ sve_st1_zd(env, vd, vg, vm, base, desc, GETPC(), \
+ off_##OFS##_d, sve_st1##MEM##_tlb); \
+}
+
+DO_ST1_ZPZ_S(bs, zsu)
+DO_ST1_ZPZ_S(hs_le, zsu)
+DO_ST1_ZPZ_S(hs_be, zsu)
+DO_ST1_ZPZ_S(ss_le, zsu)
+DO_ST1_ZPZ_S(ss_be, zsu)
+
+DO_ST1_ZPZ_S(bs, zss)
+DO_ST1_ZPZ_S(hs_le, zss)
+DO_ST1_ZPZ_S(hs_be, zss)
+DO_ST1_ZPZ_S(ss_le, zss)
+DO_ST1_ZPZ_S(ss_be, zss)
+
+DO_ST1_ZPZ_D(bd, zsu)
+DO_ST1_ZPZ_D(hd_le, zsu)
+DO_ST1_ZPZ_D(hd_be, zsu)
+DO_ST1_ZPZ_D(sd_le, zsu)
+DO_ST1_ZPZ_D(sd_be, zsu)
+DO_ST1_ZPZ_D(dd_le, zsu)
+DO_ST1_ZPZ_D(dd_be, zsu)
+
+DO_ST1_ZPZ_D(bd, zss)
+DO_ST1_ZPZ_D(hd_le, zss)
+DO_ST1_ZPZ_D(hd_be, zss)
+DO_ST1_ZPZ_D(sd_le, zss)
+DO_ST1_ZPZ_D(sd_be, zss)
+DO_ST1_ZPZ_D(dd_le, zss)
+DO_ST1_ZPZ_D(dd_be, zss)
+
+DO_ST1_ZPZ_D(bd, zd)
+DO_ST1_ZPZ_D(hd_le, zd)
+DO_ST1_ZPZ_D(hd_be, zd)
+DO_ST1_ZPZ_D(sd_le, zd)
+DO_ST1_ZPZ_D(sd_be, zd)
+DO_ST1_ZPZ_D(dd_le, zd)
+DO_ST1_ZPZ_D(dd_be, zd)
+
+#undef DO_ST1_ZPZ_S
+#undef DO_ST1_ZPZ_D
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 8ca3876707..8a24278d79 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -166,11 +166,15 @@ void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
cpu_fprintf(f, "\n");
return;
}
+ if (fp_exception_el(env, el) != 0) {
+ cpu_fprintf(f, " FPU disabled\n");
+ return;
+ }
cpu_fprintf(f, " FPCR=%08x FPSR=%08x\n",
vfp_get_fpcr(env), vfp_get_fpsr(env));
- if (arm_feature(env, ARM_FEATURE_SVE)) {
- int j, zcr_len = env->vfp.zcr_el[1] & 0xf; /* fix for system mode */
+ if (arm_feature(env, ARM_FEATURE_SVE) && sve_exception_el(env, el) == 0) {
+ int j, zcr_len = sve_zcr_len_for_el(env, el);
for (i = 0; i <= FFR_PRED_NUM; i++) {
bool eol;
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 667879564f..fe7aebdc19 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -4600,62 +4600,97 @@ static const uint8_t dtype_esz[16] = {
3, 2, 1, 3
};
+static TCGMemOpIdx sve_memopidx(DisasContext *s, int dtype)
+{
+ return make_memop_idx(s->be_data | dtype_mop[dtype], get_mem_index(s));
+}
+
static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
- gen_helper_gvec_mem *fn)
+ int dtype, gen_helper_gvec_mem *fn)
{
unsigned vsz = vec_full_reg_size(s);
TCGv_ptr t_pg;
- TCGv_i32 desc;
+ TCGv_i32 t_desc;
+ int desc;
/* For e.g. LD4, there are not enough arguments to pass all 4
* registers as pointers, so encode the regno into the data field.
* For consistency, do this even for LD1.
*/
- desc = tcg_const_i32(simd_desc(vsz, vsz, zt));
+ desc = sve_memopidx(s, dtype);
+ desc |= zt << MEMOPIDX_SHIFT;
+ desc = simd_desc(vsz, vsz, desc);
+ t_desc = tcg_const_i32(desc);
t_pg = tcg_temp_new_ptr();
tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
- fn(cpu_env, t_pg, addr, desc);
+ fn(cpu_env, t_pg, addr, t_desc);
tcg_temp_free_ptr(t_pg);
- tcg_temp_free_i32(desc);
+ tcg_temp_free_i32(t_desc);
}
static void do_ld_zpa(DisasContext *s, int zt, int pg,
TCGv_i64 addr, int dtype, int nreg)
{
- static gen_helper_gvec_mem * const fns[16][4] = {
- { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
- gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
- { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
-
- { gen_helper_sve_ld1sds_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1hh_r, gen_helper_sve_ld2hh_r,
- gen_helper_sve_ld3hh_r, gen_helper_sve_ld4hh_r },
- { gen_helper_sve_ld1hsu_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1hdu_r, NULL, NULL, NULL },
-
- { gen_helper_sve_ld1hds_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1hss_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1ss_r, gen_helper_sve_ld2ss_r,
- gen_helper_sve_ld3ss_r, gen_helper_sve_ld4ss_r },
- { gen_helper_sve_ld1sdu_r, NULL, NULL, NULL },
-
- { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1dd_r, gen_helper_sve_ld2dd_r,
- gen_helper_sve_ld3dd_r, gen_helper_sve_ld4dd_r },
+ static gen_helper_gvec_mem * const fns[2][16][4] = {
+ /* Little-endian */
+ { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
+ gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
+ { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
+
+ { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
+ gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
+ { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
+
+ { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
+ gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
+ { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
+
+ { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
+ gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
+
+ /* Big-endian */
+ { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
+ gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
+ { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
+
+ { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
+ gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
+ { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
+
+ { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
+ gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
+ { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
+
+ { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
+ gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } }
};
- gen_helper_gvec_mem *fn = fns[dtype][nreg];
+ gen_helper_gvec_mem *fn = fns[s->be_data == MO_BE][dtype][nreg];
/* While there are holes in the table, they are not
* accessible via the instruction encoding.
*/
assert(fn != NULL);
- do_mem_zpa(s, zt, pg, addr, fn);
+ do_mem_zpa(s, zt, pg, addr, dtype, fn);
}
static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
@@ -4689,59 +4724,104 @@ static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
{
- static gen_helper_gvec_mem * const fns[16] = {
- gen_helper_sve_ldff1bb_r,
- gen_helper_sve_ldff1bhu_r,
- gen_helper_sve_ldff1bsu_r,
- gen_helper_sve_ldff1bdu_r,
-
- gen_helper_sve_ldff1sds_r,
- gen_helper_sve_ldff1hh_r,
- gen_helper_sve_ldff1hsu_r,
- gen_helper_sve_ldff1hdu_r,
-
- gen_helper_sve_ldff1hds_r,
- gen_helper_sve_ldff1hss_r,
- gen_helper_sve_ldff1ss_r,
- gen_helper_sve_ldff1sdu_r,
-
- gen_helper_sve_ldff1bds_r,
- gen_helper_sve_ldff1bss_r,
- gen_helper_sve_ldff1bhs_r,
- gen_helper_sve_ldff1dd_r,
+ static gen_helper_gvec_mem * const fns[2][16] = {
+ /* Little-endian */
+ { gen_helper_sve_ldff1bb_r,
+ gen_helper_sve_ldff1bhu_r,
+ gen_helper_sve_ldff1bsu_r,
+ gen_helper_sve_ldff1bdu_r,
+
+ gen_helper_sve_ldff1sds_le_r,
+ gen_helper_sve_ldff1hh_le_r,
+ gen_helper_sve_ldff1hsu_le_r,
+ gen_helper_sve_ldff1hdu_le_r,
+
+ gen_helper_sve_ldff1hds_le_r,
+ gen_helper_sve_ldff1hss_le_r,
+ gen_helper_sve_ldff1ss_le_r,
+ gen_helper_sve_ldff1sdu_le_r,
+
+ gen_helper_sve_ldff1bds_r,
+ gen_helper_sve_ldff1bss_r,
+ gen_helper_sve_ldff1bhs_r,
+ gen_helper_sve_ldff1dd_le_r },
+
+ /* Big-endian */
+ { gen_helper_sve_ldff1bb_r,
+ gen_helper_sve_ldff1bhu_r,
+ gen_helper_sve_ldff1bsu_r,
+ gen_helper_sve_ldff1bdu_r,
+
+ gen_helper_sve_ldff1sds_be_r,
+ gen_helper_sve_ldff1hh_be_r,
+ gen_helper_sve_ldff1hsu_be_r,
+ gen_helper_sve_ldff1hdu_be_r,
+
+ gen_helper_sve_ldff1hds_be_r,
+ gen_helper_sve_ldff1hss_be_r,
+ gen_helper_sve_ldff1ss_be_r,
+ gen_helper_sve_ldff1sdu_be_r,
+
+ gen_helper_sve_ldff1bds_r,
+ gen_helper_sve_ldff1bss_r,
+ gen_helper_sve_ldff1bhs_r,
+ gen_helper_sve_ldff1dd_be_r },
};
if (sve_access_check(s)) {
TCGv_i64 addr = new_tmp_a64(s);
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
- do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
+ do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
+ fns[s->be_data == MO_BE][a->dtype]);
}
return true;
}
static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
{
- static gen_helper_gvec_mem * const fns[16] = {
- gen_helper_sve_ldnf1bb_r,
- gen_helper_sve_ldnf1bhu_r,
- gen_helper_sve_ldnf1bsu_r,
- gen_helper_sve_ldnf1bdu_r,
-
- gen_helper_sve_ldnf1sds_r,
- gen_helper_sve_ldnf1hh_r,
- gen_helper_sve_ldnf1hsu_r,
- gen_helper_sve_ldnf1hdu_r,
-
- gen_helper_sve_ldnf1hds_r,
- gen_helper_sve_ldnf1hss_r,
- gen_helper_sve_ldnf1ss_r,
- gen_helper_sve_ldnf1sdu_r,
-
- gen_helper_sve_ldnf1bds_r,
- gen_helper_sve_ldnf1bss_r,
- gen_helper_sve_ldnf1bhs_r,
- gen_helper_sve_ldnf1dd_r,
+ static gen_helper_gvec_mem * const fns[2][16] = {
+ /* Little-endian */
+ { gen_helper_sve_ldnf1bb_r,
+ gen_helper_sve_ldnf1bhu_r,
+ gen_helper_sve_ldnf1bsu_r,
+ gen_helper_sve_ldnf1bdu_r,
+
+ gen_helper_sve_ldnf1sds_le_r,
+ gen_helper_sve_ldnf1hh_le_r,
+ gen_helper_sve_ldnf1hsu_le_r,
+ gen_helper_sve_ldnf1hdu_le_r,
+
+ gen_helper_sve_ldnf1hds_le_r,
+ gen_helper_sve_ldnf1hss_le_r,
+ gen_helper_sve_ldnf1ss_le_r,
+ gen_helper_sve_ldnf1sdu_le_r,
+
+ gen_helper_sve_ldnf1bds_r,
+ gen_helper_sve_ldnf1bss_r,
+ gen_helper_sve_ldnf1bhs_r,
+ gen_helper_sve_ldnf1dd_le_r },
+
+ /* Big-endian */
+ { gen_helper_sve_ldnf1bb_r,
+ gen_helper_sve_ldnf1bhu_r,
+ gen_helper_sve_ldnf1bsu_r,
+ gen_helper_sve_ldnf1bdu_r,
+
+ gen_helper_sve_ldnf1sds_be_r,
+ gen_helper_sve_ldnf1hh_be_r,
+ gen_helper_sve_ldnf1hsu_be_r,
+ gen_helper_sve_ldnf1hdu_be_r,
+
+ gen_helper_sve_ldnf1hds_be_r,
+ gen_helper_sve_ldnf1hss_be_r,
+ gen_helper_sve_ldnf1ss_be_r,
+ gen_helper_sve_ldnf1sdu_be_r,
+
+ gen_helper_sve_ldnf1bds_r,
+ gen_helper_sve_ldnf1bss_r,
+ gen_helper_sve_ldnf1bhs_r,
+ gen_helper_sve_ldnf1dd_be_r },
};
if (sve_access_check(s)) {
@@ -4751,30 +4831,57 @@ static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
TCGv_i64 addr = new_tmp_a64(s);
tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
- do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
+ do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
+ fns[s->be_data == MO_BE][a->dtype]);
}
return true;
}
static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
{
- static gen_helper_gvec_mem * const fns[4] = {
- gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_r,
- gen_helper_sve_ld1ss_r, gen_helper_sve_ld1dd_r,
+ static gen_helper_gvec_mem * const fns[2][4] = {
+ { gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_le_r,
+ gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld1dd_le_r },
+ { gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_be_r,
+ gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld1dd_be_r },
};
unsigned vsz = vec_full_reg_size(s);
TCGv_ptr t_pg;
- TCGv_i32 desc;
+ TCGv_i32 t_desc;
+ int desc, poff;
/* Load the first quadword using the normal predicated load helpers. */
- desc = tcg_const_i32(simd_desc(16, 16, zt));
+ desc = sve_memopidx(s, msz_dtype(msz));
+ desc |= zt << MEMOPIDX_SHIFT;
+ desc = simd_desc(16, 16, desc);
+ t_desc = tcg_const_i32(desc);
+
+ poff = pred_full_reg_offset(s, pg);
+ if (vsz > 16) {
+ /*
+ * Zero-extend the first 16 bits of the predicate into a temporary.
+ * This avoids triggering an assert making sure we don't have bits
+ * set within a predicate beyond VQ, but we have lowered VQ to 1
+ * for this load operation.
+ */
+ TCGv_i64 tmp = tcg_temp_new_i64();
+#ifdef HOST_WORDS_BIGENDIAN
+ poff += 6;
+#endif
+ tcg_gen_ld16u_i64(tmp, cpu_env, poff);
+
+ poff = offsetof(CPUARMState, vfp.preg_tmp);
+ tcg_gen_st_i64(tmp, cpu_env, poff);
+ tcg_temp_free_i64(tmp);
+ }
+
t_pg = tcg_temp_new_ptr();
+ tcg_gen_addi_ptr(t_pg, cpu_env, poff);
- tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
- fns[msz](cpu_env, t_pg, addr, desc);
+ fns[s->be_data == MO_BE][msz](cpu_env, t_pg, addr, t_desc);
tcg_temp_free_ptr(t_pg);
- tcg_temp_free_i32(desc);
+ tcg_temp_free_i32(t_desc);
/* Replicate that first quadword. */
if (vsz > 16) {
@@ -4860,35 +4967,73 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
int msz, int esz, int nreg)
{
- static gen_helper_gvec_mem * const fn_single[4][4] = {
- { gen_helper_sve_st1bb_r, gen_helper_sve_st1bh_r,
- gen_helper_sve_st1bs_r, gen_helper_sve_st1bd_r },
- { NULL, gen_helper_sve_st1hh_r,
- gen_helper_sve_st1hs_r, gen_helper_sve_st1hd_r },
- { NULL, NULL,
- gen_helper_sve_st1ss_r, gen_helper_sve_st1sd_r },
- { NULL, NULL, NULL, gen_helper_sve_st1dd_r },
+ static gen_helper_gvec_mem * const fn_single[2][4][4] = {
+ { { gen_helper_sve_st1bb_r,
+ gen_helper_sve_st1bh_r,
+ gen_helper_sve_st1bs_r,
+ gen_helper_sve_st1bd_r },
+ { NULL,
+ gen_helper_sve_st1hh_le_r,
+ gen_helper_sve_st1hs_le_r,
+ gen_helper_sve_st1hd_le_r },
+ { NULL, NULL,
+ gen_helper_sve_st1ss_le_r,
+ gen_helper_sve_st1sd_le_r },
+ { NULL, NULL, NULL,
+ gen_helper_sve_st1dd_le_r } },
+ { { gen_helper_sve_st1bb_r,
+ gen_helper_sve_st1bh_r,
+ gen_helper_sve_st1bs_r,
+ gen_helper_sve_st1bd_r },
+ { NULL,
+ gen_helper_sve_st1hh_be_r,
+ gen_helper_sve_st1hs_be_r,
+ gen_helper_sve_st1hd_be_r },
+ { NULL, NULL,
+ gen_helper_sve_st1ss_be_r,
+ gen_helper_sve_st1sd_be_r },
+ { NULL, NULL, NULL,
+ gen_helper_sve_st1dd_be_r } },
};
- static gen_helper_gvec_mem * const fn_multiple[3][4] = {
- { gen_helper_sve_st2bb_r, gen_helper_sve_st2hh_r,
- gen_helper_sve_st2ss_r, gen_helper_sve_st2dd_r },
- { gen_helper_sve_st3bb_r, gen_helper_sve_st3hh_r,
- gen_helper_sve_st3ss_r, gen_helper_sve_st3dd_r },
- { gen_helper_sve_st4bb_r, gen_helper_sve_st4hh_r,
- gen_helper_sve_st4ss_r, gen_helper_sve_st4dd_r },
+ static gen_helper_gvec_mem * const fn_multiple[2][3][4] = {
+ { { gen_helper_sve_st2bb_r,
+ gen_helper_sve_st2hh_le_r,
+ gen_helper_sve_st2ss_le_r,
+ gen_helper_sve_st2dd_le_r },
+ { gen_helper_sve_st3bb_r,
+ gen_helper_sve_st3hh_le_r,
+ gen_helper_sve_st3ss_le_r,
+ gen_helper_sve_st3dd_le_r },
+ { gen_helper_sve_st4bb_r,
+ gen_helper_sve_st4hh_le_r,
+ gen_helper_sve_st4ss_le_r,
+ gen_helper_sve_st4dd_le_r } },
+ { { gen_helper_sve_st2bb_r,
+ gen_helper_sve_st2hh_be_r,
+ gen_helper_sve_st2ss_be_r,
+ gen_helper_sve_st2dd_be_r },
+ { gen_helper_sve_st3bb_r,
+ gen_helper_sve_st3hh_be_r,
+ gen_helper_sve_st3ss_be_r,
+ gen_helper_sve_st3dd_be_r },
+ { gen_helper_sve_st4bb_r,
+ gen_helper_sve_st4hh_be_r,
+ gen_helper_sve_st4ss_be_r,
+ gen_helper_sve_st4dd_be_r } },
};
gen_helper_gvec_mem *fn;
+ int be = s->be_data == MO_BE;
if (nreg == 0) {
/* ST1 */
- fn = fn_single[msz][esz];
+ fn = fn_single[be][msz][esz];
} else {
/* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
assert(msz == esz);
- fn = fn_multiple[nreg - 1][msz];
+ fn = fn_multiple[be][nreg - 1][msz];
}
assert(fn != NULL);
- do_mem_zpa(s, zt, pg, addr, fn);
+ do_mem_zpa(s, zt, pg, addr, msz_dtype(msz), fn);
}
static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a, uint32_t insn)
@@ -4926,111 +5071,203 @@ static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a, uint32_t insn)
*** SVE gather loads / scatter stores
*/
-static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, int scale,
- TCGv_i64 scalar, gen_helper_gvec_mem_scatter *fn)
+static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
+ int scale, TCGv_i64 scalar, int msz,
+ gen_helper_gvec_mem_scatter *fn)
{
unsigned vsz = vec_full_reg_size(s);
- TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, scale));
TCGv_ptr t_zm = tcg_temp_new_ptr();
TCGv_ptr t_pg = tcg_temp_new_ptr();
TCGv_ptr t_zt = tcg_temp_new_ptr();
+ TCGv_i32 t_desc;
+ int desc;
+
+ desc = sve_memopidx(s, msz_dtype(msz));
+ desc |= scale << MEMOPIDX_SHIFT;
+ desc = simd_desc(vsz, vsz, desc);
+ t_desc = tcg_const_i32(desc);
tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
- fn(cpu_env, t_zt, t_pg, t_zm, scalar, desc);
+ fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
tcg_temp_free_ptr(t_zt);
tcg_temp_free_ptr(t_zm);
tcg_temp_free_ptr(t_pg);
- tcg_temp_free_i32(desc);
+ tcg_temp_free_i32(t_desc);
}
-/* Indexed by [ff][xs][u][msz]. */
-static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][3] = {
- { { { gen_helper_sve_ldbss_zsu,
- gen_helper_sve_ldhss_zsu,
- NULL, },
- { gen_helper_sve_ldbsu_zsu,
- gen_helper_sve_ldhsu_zsu,
- gen_helper_sve_ldssu_zsu, } },
- { { gen_helper_sve_ldbss_zss,
- gen_helper_sve_ldhss_zss,
- NULL, },
- { gen_helper_sve_ldbsu_zss,
- gen_helper_sve_ldhsu_zss,
- gen_helper_sve_ldssu_zss, } } },
-
- { { { gen_helper_sve_ldffbss_zsu,
- gen_helper_sve_ldffhss_zsu,
- NULL, },
- { gen_helper_sve_ldffbsu_zsu,
- gen_helper_sve_ldffhsu_zsu,
- gen_helper_sve_ldffssu_zsu, } },
- { { gen_helper_sve_ldffbss_zss,
- gen_helper_sve_ldffhss_zss,
- NULL, },
- { gen_helper_sve_ldffbsu_zss,
- gen_helper_sve_ldffhsu_zss,
- gen_helper_sve_ldffssu_zss, } } }
+/* Indexed by [be][ff][xs][u][msz]. */
+static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][2][3] = {
+ /* Little-endian */
+ { { { { gen_helper_sve_ldbss_zsu,
+ gen_helper_sve_ldhss_le_zsu,
+ NULL, },
+ { gen_helper_sve_ldbsu_zsu,
+ gen_helper_sve_ldhsu_le_zsu,
+ gen_helper_sve_ldss_le_zsu, } },
+ { { gen_helper_sve_ldbss_zss,
+ gen_helper_sve_ldhss_le_zss,
+ NULL, },
+ { gen_helper_sve_ldbsu_zss,
+ gen_helper_sve_ldhsu_le_zss,
+ gen_helper_sve_ldss_le_zss, } } },
+
+ /* First-fault */
+ { { { gen_helper_sve_ldffbss_zsu,
+ gen_helper_sve_ldffhss_le_zsu,
+ NULL, },
+ { gen_helper_sve_ldffbsu_zsu,
+ gen_helper_sve_ldffhsu_le_zsu,
+ gen_helper_sve_ldffss_le_zsu, } },
+ { { gen_helper_sve_ldffbss_zss,
+ gen_helper_sve_ldffhss_le_zss,
+ NULL, },
+ { gen_helper_sve_ldffbsu_zss,
+ gen_helper_sve_ldffhsu_le_zss,
+ gen_helper_sve_ldffss_le_zss, } } } },
+
+ /* Big-endian */
+ { { { { gen_helper_sve_ldbss_zsu,
+ gen_helper_sve_ldhss_be_zsu,
+ NULL, },
+ { gen_helper_sve_ldbsu_zsu,
+ gen_helper_sve_ldhsu_be_zsu,
+ gen_helper_sve_ldss_be_zsu, } },
+ { { gen_helper_sve_ldbss_zss,
+ gen_helper_sve_ldhss_be_zss,
+ NULL, },
+ { gen_helper_sve_ldbsu_zss,
+ gen_helper_sve_ldhsu_be_zss,
+ gen_helper_sve_ldss_be_zss, } } },
+
+ /* First-fault */
+ { { { gen_helper_sve_ldffbss_zsu,
+ gen_helper_sve_ldffhss_be_zsu,
+ NULL, },
+ { gen_helper_sve_ldffbsu_zsu,
+ gen_helper_sve_ldffhsu_be_zsu,
+ gen_helper_sve_ldffss_be_zsu, } },
+ { { gen_helper_sve_ldffbss_zss,
+ gen_helper_sve_ldffhss_be_zss,
+ NULL, },
+ { gen_helper_sve_ldffbsu_zss,
+ gen_helper_sve_ldffhsu_be_zss,
+ gen_helper_sve_ldffss_be_zss, } } } },
};
/* Note that we overload xs=2 to indicate 64-bit offset. */
-static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][3][2][4] = {
- { { { gen_helper_sve_ldbds_zsu,
- gen_helper_sve_ldhds_zsu,
- gen_helper_sve_ldsds_zsu,
- NULL, },
- { gen_helper_sve_ldbdu_zsu,
- gen_helper_sve_ldhdu_zsu,
- gen_helper_sve_ldsdu_zsu,
- gen_helper_sve_ldddu_zsu, } },
- { { gen_helper_sve_ldbds_zss,
- gen_helper_sve_ldhds_zss,
- gen_helper_sve_ldsds_zss,
- NULL, },
- { gen_helper_sve_ldbdu_zss,
- gen_helper_sve_ldhdu_zss,
- gen_helper_sve_ldsdu_zss,
- gen_helper_sve_ldddu_zss, } },
- { { gen_helper_sve_ldbds_zd,
- gen_helper_sve_ldhds_zd,
- gen_helper_sve_ldsds_zd,
- NULL, },
- { gen_helper_sve_ldbdu_zd,
- gen_helper_sve_ldhdu_zd,
- gen_helper_sve_ldsdu_zd,
- gen_helper_sve_ldddu_zd, } } },
-
- { { { gen_helper_sve_ldffbds_zsu,
- gen_helper_sve_ldffhds_zsu,
- gen_helper_sve_ldffsds_zsu,
- NULL, },
- { gen_helper_sve_ldffbdu_zsu,
- gen_helper_sve_ldffhdu_zsu,
- gen_helper_sve_ldffsdu_zsu,
- gen_helper_sve_ldffddu_zsu, } },
- { { gen_helper_sve_ldffbds_zss,
- gen_helper_sve_ldffhds_zss,
- gen_helper_sve_ldffsds_zss,
- NULL, },
- { gen_helper_sve_ldffbdu_zss,
- gen_helper_sve_ldffhdu_zss,
- gen_helper_sve_ldffsdu_zss,
- gen_helper_sve_ldffddu_zss, } },
- { { gen_helper_sve_ldffbds_zd,
- gen_helper_sve_ldffhds_zd,
- gen_helper_sve_ldffsds_zd,
- NULL, },
- { gen_helper_sve_ldffbdu_zd,
- gen_helper_sve_ldffhdu_zd,
- gen_helper_sve_ldffsdu_zd,
- gen_helper_sve_ldffddu_zd, } } }
+static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][2][3][2][4] = {
+ /* Little-endian */
+ { { { { gen_helper_sve_ldbds_zsu,
+ gen_helper_sve_ldhds_le_zsu,
+ gen_helper_sve_ldsds_le_zsu,
+ NULL, },
+ { gen_helper_sve_ldbdu_zsu,
+ gen_helper_sve_ldhdu_le_zsu,
+ gen_helper_sve_ldsdu_le_zsu,
+ gen_helper_sve_lddd_le_zsu, } },
+ { { gen_helper_sve_ldbds_zss,
+ gen_helper_sve_ldhds_le_zss,
+ gen_helper_sve_ldsds_le_zss,
+ NULL, },
+ { gen_helper_sve_ldbdu_zss,
+ gen_helper_sve_ldhdu_le_zss,
+ gen_helper_sve_ldsdu_le_zss,
+ gen_helper_sve_lddd_le_zss, } },
+ { { gen_helper_sve_ldbds_zd,
+ gen_helper_sve_ldhds_le_zd,
+ gen_helper_sve_ldsds_le_zd,
+ NULL, },
+ { gen_helper_sve_ldbdu_zd,
+ gen_helper_sve_ldhdu_le_zd,
+ gen_helper_sve_ldsdu_le_zd,
+ gen_helper_sve_lddd_le_zd, } } },
+
+ /* First-fault */
+ { { { gen_helper_sve_ldffbds_zsu,
+ gen_helper_sve_ldffhds_le_zsu,
+ gen_helper_sve_ldffsds_le_zsu,
+ NULL, },
+ { gen_helper_sve_ldffbdu_zsu,
+ gen_helper_sve_ldffhdu_le_zsu,
+ gen_helper_sve_ldffsdu_le_zsu,
+ gen_helper_sve_ldffdd_le_zsu, } },
+ { { gen_helper_sve_ldffbds_zss,
+ gen_helper_sve_ldffhds_le_zss,
+ gen_helper_sve_ldffsds_le_zss,
+ NULL, },
+ { gen_helper_sve_ldffbdu_zss,
+ gen_helper_sve_ldffhdu_le_zss,
+ gen_helper_sve_ldffsdu_le_zss,
+ gen_helper_sve_ldffdd_le_zss, } },
+ { { gen_helper_sve_ldffbds_zd,
+ gen_helper_sve_ldffhds_le_zd,
+ gen_helper_sve_ldffsds_le_zd,
+ NULL, },
+ { gen_helper_sve_ldffbdu_zd,
+ gen_helper_sve_ldffhdu_le_zd,
+ gen_helper_sve_ldffsdu_le_zd,
+ gen_helper_sve_ldffdd_le_zd, } } } },
+
+ /* Big-endian */
+ { { { { gen_helper_sve_ldbds_zsu,
+ gen_helper_sve_ldhds_be_zsu,
+ gen_helper_sve_ldsds_be_zsu,
+ NULL, },
+ { gen_helper_sve_ldbdu_zsu,
+ gen_helper_sve_ldhdu_be_zsu,
+ gen_helper_sve_ldsdu_be_zsu,
+ gen_helper_sve_lddd_be_zsu, } },
+ { { gen_helper_sve_ldbds_zss,
+ gen_helper_sve_ldhds_be_zss,
+ gen_helper_sve_ldsds_be_zss,
+ NULL, },
+ { gen_helper_sve_ldbdu_zss,
+ gen_helper_sve_ldhdu_be_zss,
+ gen_helper_sve_ldsdu_be_zss,
+ gen_helper_sve_lddd_be_zss, } },
+ { { gen_helper_sve_ldbds_zd,
+ gen_helper_sve_ldhds_be_zd,
+ gen_helper_sve_ldsds_be_zd,
+ NULL, },
+ { gen_helper_sve_ldbdu_zd,
+ gen_helper_sve_ldhdu_be_zd,
+ gen_helper_sve_ldsdu_be_zd,
+ gen_helper_sve_lddd_be_zd, } } },
+
+ /* First-fault */
+ { { { gen_helper_sve_ldffbds_zsu,
+ gen_helper_sve_ldffhds_be_zsu,
+ gen_helper_sve_ldffsds_be_zsu,
+ NULL, },
+ { gen_helper_sve_ldffbdu_zsu,
+ gen_helper_sve_ldffhdu_be_zsu,
+ gen_helper_sve_ldffsdu_be_zsu,
+ gen_helper_sve_ldffdd_be_zsu, } },
+ { { gen_helper_sve_ldffbds_zss,
+ gen_helper_sve_ldffhds_be_zss,
+ gen_helper_sve_ldffsds_be_zss,
+ NULL, },
+ { gen_helper_sve_ldffbdu_zss,
+ gen_helper_sve_ldffhdu_be_zss,
+ gen_helper_sve_ldffsdu_be_zss,
+ gen_helper_sve_ldffdd_be_zss, } },
+ { { gen_helper_sve_ldffbds_zd,
+ gen_helper_sve_ldffhds_be_zd,
+ gen_helper_sve_ldffsds_be_zd,
+ NULL, },
+ { gen_helper_sve_ldffbdu_zd,
+ gen_helper_sve_ldffhdu_be_zd,
+ gen_helper_sve_ldffsdu_be_zd,
+ gen_helper_sve_ldffdd_be_zd, } } } },
};
static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn)
{
gen_helper_gvec_mem_scatter *fn = NULL;
+ int be = s->be_data == MO_BE;
if (!sve_access_check(s)) {
return true;
@@ -5038,22 +5275,23 @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn)
switch (a->esz) {
case MO_32:
- fn = gather_load_fn32[a->ff][a->xs][a->u][a->msz];
+ fn = gather_load_fn32[be][a->ff][a->xs][a->u][a->msz];
break;
case MO_64:
- fn = gather_load_fn64[a->ff][a->xs][a->u][a->msz];
+ fn = gather_load_fn64[be][a->ff][a->xs][a->u][a->msz];
break;
}
assert(fn != NULL);
do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
- cpu_reg_sp(s, a->rn), fn);
+ cpu_reg_sp(s, a->rn), a->msz, fn);
return true;
}
static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn)
{
gen_helper_gvec_mem_scatter *fn = NULL;
+ int be = s->be_data == MO_BE;
TCGv_i64 imm;
if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
@@ -5065,10 +5303,10 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn)
switch (a->esz) {
case MO_32:
- fn = gather_load_fn32[a->ff][0][a->u][a->msz];
+ fn = gather_load_fn32[be][a->ff][0][a->u][a->msz];
break;
case MO_64:
- fn = gather_load_fn64[a->ff][2][a->u][a->msz];
+ fn = gather_load_fn64[be][a->ff][2][a->u][a->msz];
break;
}
assert(fn != NULL);
@@ -5077,40 +5315,63 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn)
* by loading the immediate into the scalar parameter.
*/
imm = tcg_const_i64(a->imm << a->msz);
- do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
+ do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
tcg_temp_free_i64(imm);
return true;
}
-/* Indexed by [xs][msz]. */
-static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][3] = {
- { gen_helper_sve_stbs_zsu,
- gen_helper_sve_sths_zsu,
- gen_helper_sve_stss_zsu, },
- { gen_helper_sve_stbs_zss,
- gen_helper_sve_sths_zss,
- gen_helper_sve_stss_zss, },
+/* Indexed by [be][xs][msz]. */
+static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][3] = {
+ /* Little-endian */
+ { { gen_helper_sve_stbs_zsu,
+ gen_helper_sve_sths_le_zsu,
+ gen_helper_sve_stss_le_zsu, },
+ { gen_helper_sve_stbs_zss,
+ gen_helper_sve_sths_le_zss,
+ gen_helper_sve_stss_le_zss, } },
+ /* Big-endian */
+ { { gen_helper_sve_stbs_zsu,
+ gen_helper_sve_sths_be_zsu,
+ gen_helper_sve_stss_be_zsu, },
+ { gen_helper_sve_stbs_zss,
+ gen_helper_sve_sths_be_zss,
+ gen_helper_sve_stss_be_zss, } },
};
/* Note that we overload xs=2 to indicate 64-bit offset. */
-static gen_helper_gvec_mem_scatter * const scatter_store_fn64[3][4] = {
- { gen_helper_sve_stbd_zsu,
- gen_helper_sve_sthd_zsu,
- gen_helper_sve_stsd_zsu,
- gen_helper_sve_stdd_zsu, },
- { gen_helper_sve_stbd_zss,
- gen_helper_sve_sthd_zss,
- gen_helper_sve_stsd_zss,
- gen_helper_sve_stdd_zss, },
- { gen_helper_sve_stbd_zd,
- gen_helper_sve_sthd_zd,
- gen_helper_sve_stsd_zd,
- gen_helper_sve_stdd_zd, },
+static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][3][4] = {
+ /* Little-endian */
+ { { gen_helper_sve_stbd_zsu,
+ gen_helper_sve_sthd_le_zsu,
+ gen_helper_sve_stsd_le_zsu,
+ gen_helper_sve_stdd_le_zsu, },
+ { gen_helper_sve_stbd_zss,
+ gen_helper_sve_sthd_le_zss,
+ gen_helper_sve_stsd_le_zss,
+ gen_helper_sve_stdd_le_zss, },
+ { gen_helper_sve_stbd_zd,
+ gen_helper_sve_sthd_le_zd,
+ gen_helper_sve_stsd_le_zd,
+ gen_helper_sve_stdd_le_zd, } },
+ /* Big-endian */
+ { { gen_helper_sve_stbd_zsu,
+ gen_helper_sve_sthd_be_zsu,
+ gen_helper_sve_stsd_be_zsu,
+ gen_helper_sve_stdd_be_zsu, },
+ { gen_helper_sve_stbd_zss,
+ gen_helper_sve_sthd_be_zss,
+ gen_helper_sve_stsd_be_zss,
+ gen_helper_sve_stdd_be_zss, },
+ { gen_helper_sve_stbd_zd,
+ gen_helper_sve_sthd_be_zd,
+ gen_helper_sve_stsd_be_zd,
+ gen_helper_sve_stdd_be_zd, } },
};
static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a, uint32_t insn)
{
gen_helper_gvec_mem_scatter *fn;
+ int be = s->be_data == MO_BE;
if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
return false;
@@ -5120,22 +5381,23 @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a, uint32_t insn)
}
switch (a->esz) {
case MO_32:
- fn = scatter_store_fn32[a->xs][a->msz];
+ fn = scatter_store_fn32[be][a->xs][a->msz];
break;
case MO_64:
- fn = scatter_store_fn64[a->xs][a->msz];
+ fn = scatter_store_fn64[be][a->xs][a->msz];
break;
default:
g_assert_not_reached();
}
do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
- cpu_reg_sp(s, a->rn), fn);
+ cpu_reg_sp(s, a->rn), a->msz, fn);
return true;
}
static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a, uint32_t insn)
{
gen_helper_gvec_mem_scatter *fn = NULL;
+ int be = s->be_data == MO_BE;
TCGv_i64 imm;
if (a->esz < a->msz) {
@@ -5147,10 +5409,10 @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a, uint32_t insn)
switch (a->esz) {
case MO_32:
- fn = scatter_store_fn32[0][a->msz];
+ fn = scatter_store_fn32[be][0][a->msz];
break;
case MO_64:
- fn = scatter_store_fn64[2][a->msz];
+ fn = scatter_store_fn64[be][2][a->msz];
break;
}
assert(fn != NULL);
@@ -5159,7 +5421,7 @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a, uint32_t insn)
* by loading the immediate into the scalar parameter.
*/
imm = tcg_const_i64(a->imm << a->msz);
- do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
+ do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
tcg_temp_free_i64(imm);
return true;
}
diff --git a/target/arm/translate.c b/target/arm/translate.c
index c6a5d2ac44..1b4bacb522 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -239,6 +239,23 @@ static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
tcg_temp_free_i32(var);
}
+/*
+ * Variant of store_reg which applies v8M stack-limit checks before updating
+ * SP. If the check fails this will result in an exception being taken.
+ * We disable the stack checks for CONFIG_USER_ONLY because we have
+ * no idea what the stack limits should be in that case.
+ * If stack checking is not being done this just acts like store_reg().
+ */
+static void store_sp_checked(DisasContext *s, TCGv_i32 var)
+{
+#ifndef CONFIG_USER_ONLY
+ if (s->v8m_stackcheck) {
+ gen_helper_v8m_stackcheck(cpu_env, var);
+ }
+#endif
+ store_reg(s, 13, var);
+}
+
/* Value extensions. */
#define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
#define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
@@ -4212,6 +4229,18 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
if (insn & (1 << 24)) /* pre-decrement */
tcg_gen_addi_i32(addr, addr, -((insn & 0xff) << 2));
+ if (s->v8m_stackcheck && rn == 13 && w) {
+ /*
+ * Here 'addr' is the lowest address we will store to,
+ * and is either the old SP (if post-increment) or
+ * the new SP (if pre-decrement). For post-increment
+ * where the old value is below the limit and the new
+ * value is above, it is UNKNOWN whether the limit check
+ * triggers; we choose to trigger.
+ */
+ gen_helper_v8m_stackcheck(cpu_env, addr);
+ }
+
if (dp)
offset = 8;
else
@@ -10261,6 +10290,8 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
* 0b1111_1001_x11x_xxxx_xxxx_xxxx_xxxx_xxxx
* - load/store dual (pre-indexed)
*/
+ bool wback = extract32(insn, 21, 1);
+
if (rn == 15) {
if (insn & (1 << 21)) {
/* UNPREDICTABLE */
@@ -10272,8 +10303,29 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
addr = load_reg(s, rn);
}
offset = (insn & 0xff) * 4;
- if ((insn & (1 << 23)) == 0)
+ if ((insn & (1 << 23)) == 0) {
offset = -offset;
+ }
+
+ if (s->v8m_stackcheck && rn == 13 && wback) {
+ /*
+ * Here 'addr' is the current SP; if offset is +ve we're
+ * moving SP up, else down. It is UNKNOWN whether the limit
+ * check triggers when SP starts below the limit and ends
+ * up above it; check whichever of the current and final
+ * SP is lower, so QEMU will trigger in that situation.
+ */
+ if ((int32_t)offset < 0) {
+ TCGv_i32 newsp = tcg_temp_new_i32();
+
+ tcg_gen_addi_i32(newsp, addr, offset);
+ gen_helper_v8m_stackcheck(cpu_env, newsp);
+ tcg_temp_free_i32(newsp);
+ } else {
+ gen_helper_v8m_stackcheck(cpu_env, addr);
+ }
+ }
+
if (insn & (1 << 24)) {
tcg_gen_addi_i32(addr, addr, offset);
offset = 0;
@@ -10297,7 +10349,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
gen_aa32_st32(s, tmp, addr, get_mem_index(s));
tcg_temp_free_i32(tmp);
}
- if (insn & (1 << 21)) {
+ if (wback) {
/* Base writeback. */
tcg_gen_addi_i32(addr, addr, offset - 4);
store_reg(s, rn, addr);
@@ -10484,6 +10536,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
} else {
int i, loaded_base = 0;
TCGv_i32 loaded_var;
+ bool wback = extract32(insn, 21, 1);
/* Load/store multiple. */
addr = load_reg(s, rn);
offset = 0;
@@ -10491,10 +10544,26 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
if (insn & (1 << i))
offset += 4;
}
+
if (insn & (1 << 24)) {
tcg_gen_addi_i32(addr, addr, -offset);
}
+ if (s->v8m_stackcheck && rn == 13 && wback) {
+ /*
+ * If the writeback is incrementing SP rather than
+ * decrementing it, and the initial SP is below the
+ * stack limit but the final written-back SP would
+ * be above, then then we must not perform any memory
+ * accesses, but it is IMPDEF whether we generate
+ * an exception. We choose to do so in this case.
+ * At this point 'addr' is the lowest address, so
+ * either the original SP (if incrementing) or our
+ * final SP (if decrementing), so that's what we check.
+ */
+ gen_helper_v8m_stackcheck(cpu_env, addr);
+ }
+
loaded_var = NULL;
for (i = 0; i < 16; i++) {
if ((insn & (1 << i)) == 0)
@@ -10522,7 +10591,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
if (loaded_base) {
store_reg(s, rn, loaded_var);
}
- if (insn & (1 << 21)) {
+ if (wback) {
/* Base register writeback. */
if (insn & (1 << 24)) {
tcg_gen_addi_i32(addr, addr, -offset);
@@ -10583,7 +10652,13 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
if (gen_thumb2_data_op(s, op, conds, 0, tmp, tmp2))
goto illegal_op;
tcg_temp_free_i32(tmp2);
- if (rd != 15) {
+ if (rd == 13 &&
+ ((op == 2 && rn == 15) ||
+ (op == 8 && rn == 13) ||
+ (op == 13 && rn == 13))) {
+ /* MOV SP, ... or ADD SP, SP, ... or SUB SP, SP, ... */
+ store_sp_checked(s, tmp);
+ } else if (rd != 15) {
store_reg(s, rd, tmp);
} else {
tcg_temp_free_i32(tmp);
@@ -10600,6 +10675,10 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
tmp2 = load_reg(s, rm);
if ((insn & 0x70) != 0)
goto illegal_op;
+ /*
+ * 0b1111_1010_0xxx_xxxx_1111_xxxx_0000_xxxx:
+ * - MOV, MOVS (register-shifted register), flagsetting
+ */
op = (insn >> 21) & 3;
logic_cc = (insn & (1 << 20)) != 0;
gen_arm_shift_reg(tmp, op, tmp2, logic_cc);
@@ -11267,8 +11346,15 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
gen_jmp(s, s->pc + offset);
}
} else {
- /* Data processing immediate. */
+ /*
+ * 0b1111_0xxx_xxxx_0xxx_xxxx_xxxx
+ * - Data-processing (modified immediate, plain binary immediate)
+ */
if (insn & (1 << 25)) {
+ /*
+ * 0b1111_0x1x_xxxx_0xxx_xxxx_xxxx
+ * - Data-processing (plain binary immediate)
+ */
if (insn & (1 << 24)) {
if (insn & (1 << 20))
goto illegal_op;
@@ -11364,6 +11450,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
tmp = tcg_temp_new_i32();
tcg_gen_movi_i32(tmp, imm);
}
+ store_reg(s, rd, tmp);
} else {
/* Add/sub 12-bit immediate. */
if (rn == 15) {
@@ -11374,17 +11461,27 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
offset += imm;
tmp = tcg_temp_new_i32();
tcg_gen_movi_i32(tmp, offset);
+ store_reg(s, rd, tmp);
} else {
tmp = load_reg(s, rn);
if (insn & (1 << 23))
tcg_gen_subi_i32(tmp, tmp, imm);
else
tcg_gen_addi_i32(tmp, tmp, imm);
+ if (rn == 13 && rd == 13) {
+ /* ADD SP, SP, imm or SUB SP, SP, imm */
+ store_sp_checked(s, tmp);
+ } else {
+ store_reg(s, rd, tmp);
+ }
}
}
- store_reg(s, rd, tmp);
}
} else {
+ /*
+ * 0b1111_0x0x_xxxx_0xxx_xxxx_xxxx
+ * - Data-processing (modified immediate)
+ */
int shifter_out = 0;
/* modified 12-bit immediate. */
shift = ((insn & 0x04000000) >> 23) | ((insn & 0x7000) >> 12);
@@ -11426,7 +11523,11 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
goto illegal_op;
tcg_temp_free_i32(tmp2);
rd = (insn >> 8) & 0xf;
- if (rd != 15) {
+ if (rd == 13 && rn == 13
+ && (op == 8 || op == 13)) {
+ /* ADD(S) SP, SP, imm or SUB(S) SP, SP, imm */
+ store_sp_checked(s, tmp);
+ } else if (rd != 15) {
store_reg(s, rd, tmp);
} else {
tcg_temp_free_i32(tmp);
@@ -11535,7 +11636,6 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
imm = -imm;
/* Fall through. */
case 0xf: /* Pre-increment. */
- tcg_gen_addi_i32(addr, addr, imm);
writeback = 1;
break;
default:
@@ -11547,6 +11647,28 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
issinfo = writeback ? ISSInvalid : rs;
+ if (s->v8m_stackcheck && rn == 13 && writeback) {
+ /*
+ * Stackcheck. Here we know 'addr' is the current SP;
+ * if imm is +ve we're moving SP up, else down. It is
+ * UNKNOWN whether the limit check triggers when SP starts
+ * below the limit and ends up above it; we chose to do so.
+ */
+ if ((int32_t)imm < 0) {
+ TCGv_i32 newsp = tcg_temp_new_i32();
+
+ tcg_gen_addi_i32(newsp, addr, imm);
+ gen_helper_v8m_stackcheck(cpu_env, newsp);
+ tcg_temp_free_i32(newsp);
+ } else {
+ gen_helper_v8m_stackcheck(cpu_env, addr);
+ }
+ }
+
+ if (writeback && !postinc) {
+ tcg_gen_addi_i32(addr, addr, imm);
+ }
+
if (insn & (1 << 20)) {
/* Load. */
tmp = tcg_temp_new_i32();
@@ -11629,7 +11751,11 @@ static void disas_thumb_insn(DisasContext *s, uint32_t insn)
rd = insn & 7;
op = (insn >> 11) & 3;
if (op == 3) {
- /* add/subtract */
+ /*
+ * 0b0001_1xxx_xxxx_xxxx
+ * - Add, subtract (three low registers)
+ * - Add, subtract (two low registers and immediate)
+ */
rn = (insn >> 3) & 7;
tmp = load_reg(s, rn);
if (insn & (1 << 10)) {
@@ -11666,7 +11792,10 @@ static void disas_thumb_insn(DisasContext *s, uint32_t insn)
}
break;
case 2: case 3:
- /* arithmetic large immediate */
+ /*
+ * 0b001x_xxxx_xxxx_xxxx
+ * - Add, subtract, compare, move (one low register and immediate)
+ */
op = (insn >> 11) & 3;
rd = (insn >> 8) & 0x7;
if (op == 0) { /* mov */
@@ -11732,7 +11861,12 @@ static void disas_thumb_insn(DisasContext *s, uint32_t insn)
tmp2 = load_reg(s, rm);
tcg_gen_add_i32(tmp, tmp, tmp2);
tcg_temp_free_i32(tmp2);
- store_reg(s, rd, tmp);
+ if (rd == 13) {
+ /* ADD SP, SP, reg */
+ store_sp_checked(s, tmp);
+ } else {
+ store_reg(s, rd, tmp);
+ }
break;
case 1: /* cmp */
tmp = load_reg(s, rd);
@@ -11743,7 +11877,12 @@ static void disas_thumb_insn(DisasContext *s, uint32_t insn)
break;
case 2: /* mov/cpy */
tmp = load_reg(s, rm);
- store_reg(s, rd, tmp);
+ if (rd == 13) {
+ /* MOV SP, reg */
+ store_sp_checked(s, tmp);
+ } else {
+ store_reg(s, rd, tmp);
+ }
break;
case 3:
{
@@ -11793,7 +11932,10 @@ static void disas_thumb_insn(DisasContext *s, uint32_t insn)
break;
}
- /* data processing register */
+ /*
+ * 0b0100_00xx_xxxx_xxxx
+ * - Data-processing (two low registers)
+ */
rd = insn & 7;
rm = (insn >> 3) & 7;
op = (insn >> 6) & 0xf;
@@ -12071,7 +12213,10 @@ static void disas_thumb_insn(DisasContext *s, uint32_t insn)
break;
case 10:
- /* add to high reg */
+ /*
+ * 0b1010_xxxx_xxxx_xxxx
+ * - Add PC/SP (immediate)
+ */
rd = (insn >> 8) & 7;
if (insn & (1 << 11)) {
/* SP */
@@ -12091,13 +12236,17 @@ static void disas_thumb_insn(DisasContext *s, uint32_t insn)
op = (insn >> 8) & 0xf;
switch (op) {
case 0:
- /* adjust stack pointer */
+ /*
+ * 0b1011_0000_xxxx_xxxx
+ * - ADD (SP plus immediate)
+ * - SUB (SP minus immediate)
+ */
tmp = load_reg(s, 13);
val = (insn & 0x7f) * 4;
if (insn & (1 << 7))
val = -(int32_t)val;
tcg_gen_addi_i32(tmp, tmp, val);
- store_reg(s, 13, tmp);
+ store_sp_checked(s, tmp);
break;
case 2: /* sign/zero extend. */
@@ -12114,7 +12263,10 @@ static void disas_thumb_insn(DisasContext *s, uint32_t insn)
store_reg(s, rd, tmp);
break;
case 4: case 5: case 0xc: case 0xd:
- /* push/pop */
+ /*
+ * 0b1011_x10x_xxxx_xxxx
+ * - push/pop
+ */
addr = load_reg(s, 13);
if (insn & (1 << 8))
offset = 4;
@@ -12127,6 +12279,17 @@ static void disas_thumb_insn(DisasContext *s, uint32_t insn)
if ((insn & (1 << 11)) == 0) {
tcg_gen_addi_i32(addr, addr, -offset);
}
+
+ if (s->v8m_stackcheck) {
+ /*
+ * Here 'addr' is the lower of "old SP" and "new SP";
+ * if this is a pop that starts below the limit and ends
+ * above it, it is UNKNOWN whether the limit check triggers;
+ * we choose to trigger.
+ */
+ gen_helper_v8m_stackcheck(cpu_env, addr);
+ }
+
for (i = 0; i < 8; i++) {
if (insn & (1 << i)) {
if (insn & (1 << 11)) {
@@ -12451,6 +12614,7 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
dc->v7m_handler_mode = ARM_TBFLAG_HANDLER(dc->base.tb->flags);
dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
regime_is_secure(env, dc->mmu_idx);
+ dc->v8m_stackcheck = ARM_TBFLAG_STACKCHECK(dc->base.tb->flags);
dc->cp_regs = cpu->cp_regs;
dc->features = env->features;
diff --git a/target/arm/translate.h b/target/arm/translate.h
index 45f04244be..c1b65f3efb 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -38,6 +38,7 @@ typedef struct DisasContext {
int vec_stride;
bool v7m_handler_mode;
bool v8m_secure; /* true if v8M and we're in Secure mode */
+ bool v8m_stackcheck; /* true if we need to perform v8M stack limit checks */
/* Immediate value in AArch32 SVC insn; must be set if is_jmp == DISAS_SWI
* so that top level loop can generate correct syndrome information.
*/