aboutsummaryrefslogtreecommitdiff
path: root/target/arm/sve_helper.c
diff options
context:
space:
mode:
Diffstat (limited to 'target/arm/sve_helper.c')
-rw-r--r--target/arm/sve_helper.c211
1 files changed, 211 insertions, 0 deletions
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index 0d22a57a22..bd874e6fa2 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -3120,3 +3120,214 @@ DO_LDNF1(sds_r)
DO_LDNF1(dd_r)
#undef DO_LDNF1
+
+/*
+ * Store contiguous data, protected by a governing predicate.
+ */
+#define DO_ST1(NAME, FN, TYPEE, TYPEM, H) \
+void HELPER(NAME)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ intptr_t ra = GETPC(); \
+ unsigned rd = simd_data(desc); \
+ void *vd = &env->vfp.zregs[rd]; \
+ for (i = 0; i < oprsz; ) { \
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
+ do { \
+ if (pg & 1) { \
+ TYPEM m = *(TYPEE *)(vd + H(i)); \
+ FN(env, addr, m, ra); \
+ } \
+ i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \
+ addr += sizeof(TYPEM); \
+ } while (i & 15); \
+ } \
+}
+
+#define DO_ST1_D(NAME, FN, TYPEM) \
+void HELPER(NAME)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ intptr_t i, oprsz = simd_oprsz(desc) / 8; \
+ intptr_t ra = GETPC(); \
+ unsigned rd = simd_data(desc); \
+ uint64_t *d = &env->vfp.zregs[rd].d[0]; \
+ uint8_t *pg = vg; \
+ for (i = 0; i < oprsz; i += 1) { \
+ if (pg[H1(i)] & 1) { \
+ FN(env, addr, d[i], ra); \
+ } \
+ addr += sizeof(TYPEM); \
+ } \
+}
+
+#define DO_ST2(NAME, FN, TYPEE, TYPEM, H) \
+void HELPER(NAME)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ intptr_t ra = GETPC(); \
+ unsigned rd = simd_data(desc); \
+ void *d1 = &env->vfp.zregs[rd]; \
+ void *d2 = &env->vfp.zregs[(rd + 1) & 31]; \
+ for (i = 0; i < oprsz; ) { \
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
+ do { \
+ if (pg & 1) { \
+ TYPEM m1 = *(TYPEE *)(d1 + H(i)); \
+ TYPEM m2 = *(TYPEE *)(d2 + H(i)); \
+ FN(env, addr, m1, ra); \
+ FN(env, addr + sizeof(TYPEM), m2, ra); \
+ } \
+ i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \
+ addr += 2 * sizeof(TYPEM); \
+ } while (i & 15); \
+ } \
+}
+
+#define DO_ST3(NAME, FN, TYPEE, TYPEM, H) \
+void HELPER(NAME)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ intptr_t ra = GETPC(); \
+ unsigned rd = simd_data(desc); \
+ void *d1 = &env->vfp.zregs[rd]; \
+ void *d2 = &env->vfp.zregs[(rd + 1) & 31]; \
+ void *d3 = &env->vfp.zregs[(rd + 2) & 31]; \
+ for (i = 0; i < oprsz; ) { \
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
+ do { \
+ if (pg & 1) { \
+ TYPEM m1 = *(TYPEE *)(d1 + H(i)); \
+ TYPEM m2 = *(TYPEE *)(d2 + H(i)); \
+ TYPEM m3 = *(TYPEE *)(d3 + H(i)); \
+ FN(env, addr, m1, ra); \
+ FN(env, addr + sizeof(TYPEM), m2, ra); \
+ FN(env, addr + 2 * sizeof(TYPEM), m3, ra); \
+ } \
+ i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \
+ addr += 3 * sizeof(TYPEM); \
+ } while (i & 15); \
+ } \
+}
+
+#define DO_ST4(NAME, FN, TYPEE, TYPEM, H) \
+void HELPER(NAME)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ intptr_t ra = GETPC(); \
+ unsigned rd = simd_data(desc); \
+ void *d1 = &env->vfp.zregs[rd]; \
+ void *d2 = &env->vfp.zregs[(rd + 1) & 31]; \
+ void *d3 = &env->vfp.zregs[(rd + 2) & 31]; \
+ void *d4 = &env->vfp.zregs[(rd + 3) & 31]; \
+ for (i = 0; i < oprsz; ) { \
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
+ do { \
+ if (pg & 1) { \
+ TYPEM m1 = *(TYPEE *)(d1 + H(i)); \
+ TYPEM m2 = *(TYPEE *)(d2 + H(i)); \
+ TYPEM m3 = *(TYPEE *)(d3 + H(i)); \
+ TYPEM m4 = *(TYPEE *)(d4 + H(i)); \
+ FN(env, addr, m1, ra); \
+ FN(env, addr + sizeof(TYPEM), m2, ra); \
+ FN(env, addr + 2 * sizeof(TYPEM), m3, ra); \
+ FN(env, addr + 3 * sizeof(TYPEM), m4, ra); \
+ } \
+ i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \
+ addr += 4 * sizeof(TYPEM); \
+ } while (i & 15); \
+ } \
+}
+
+DO_ST1(sve_st1bh_r, cpu_stb_data_ra, uint16_t, uint8_t, H1_2)
+DO_ST1(sve_st1bs_r, cpu_stb_data_ra, uint32_t, uint8_t, H1_4)
+DO_ST1_D(sve_st1bd_r, cpu_stb_data_ra, uint8_t)
+
+DO_ST1(sve_st1hs_r, cpu_stw_data_ra, uint32_t, uint16_t, H1_4)
+DO_ST1_D(sve_st1hd_r, cpu_stw_data_ra, uint16_t)
+
+DO_ST1_D(sve_st1sd_r, cpu_stl_data_ra, uint32_t)
+
+DO_ST1(sve_st1bb_r, cpu_stb_data_ra, uint8_t, uint8_t, H1)
+DO_ST2(sve_st2bb_r, cpu_stb_data_ra, uint8_t, uint8_t, H1)
+DO_ST3(sve_st3bb_r, cpu_stb_data_ra, uint8_t, uint8_t, H1)
+DO_ST4(sve_st4bb_r, cpu_stb_data_ra, uint8_t, uint8_t, H1)
+
+DO_ST1(sve_st1hh_r, cpu_stw_data_ra, uint16_t, uint16_t, H1_2)
+DO_ST2(sve_st2hh_r, cpu_stw_data_ra, uint16_t, uint16_t, H1_2)
+DO_ST3(sve_st3hh_r, cpu_stw_data_ra, uint16_t, uint16_t, H1_2)
+DO_ST4(sve_st4hh_r, cpu_stw_data_ra, uint16_t, uint16_t, H1_2)
+
+DO_ST1(sve_st1ss_r, cpu_stl_data_ra, uint32_t, uint32_t, H1_4)
+DO_ST2(sve_st2ss_r, cpu_stl_data_ra, uint32_t, uint32_t, H1_4)
+DO_ST3(sve_st3ss_r, cpu_stl_data_ra, uint32_t, uint32_t, H1_4)
+DO_ST4(sve_st4ss_r, cpu_stl_data_ra, uint32_t, uint32_t, H1_4)
+
+DO_ST1_D(sve_st1dd_r, cpu_stq_data_ra, uint64_t)
+
+void HELPER(sve_st2dd_r)(CPUARMState *env, void *vg,
+ target_ulong addr, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_oprsz(desc) / 8;
+ intptr_t ra = GETPC();
+ unsigned rd = simd_data(desc);
+ uint64_t *d1 = &env->vfp.zregs[rd].d[0];
+ uint64_t *d2 = &env->vfp.zregs[(rd + 1) & 31].d[0];
+ uint8_t *pg = vg;
+
+ for (i = 0; i < oprsz; i += 1) {
+ if (pg[H1(i)] & 1) {
+ cpu_stq_data_ra(env, addr, d1[i], ra);
+ cpu_stq_data_ra(env, addr + 8, d2[i], ra);
+ }
+ addr += 2 * 8;
+ }
+}
+
+void HELPER(sve_st3dd_r)(CPUARMState *env, void *vg,
+ target_ulong addr, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_oprsz(desc) / 8;
+ intptr_t ra = GETPC();
+ unsigned rd = simd_data(desc);
+ uint64_t *d1 = &env->vfp.zregs[rd].d[0];
+ uint64_t *d2 = &env->vfp.zregs[(rd + 1) & 31].d[0];
+ uint64_t *d3 = &env->vfp.zregs[(rd + 2) & 31].d[0];
+ uint8_t *pg = vg;
+
+ for (i = 0; i < oprsz; i += 1) {
+ if (pg[H1(i)] & 1) {
+ cpu_stq_data_ra(env, addr, d1[i], ra);
+ cpu_stq_data_ra(env, addr + 8, d2[i], ra);
+ cpu_stq_data_ra(env, addr + 16, d3[i], ra);
+ }
+ addr += 3 * 8;
+ }
+}
+
+void HELPER(sve_st4dd_r)(CPUARMState *env, void *vg,
+ target_ulong addr, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_oprsz(desc) / 8;
+ intptr_t ra = GETPC();
+ unsigned rd = simd_data(desc);
+ uint64_t *d1 = &env->vfp.zregs[rd].d[0];
+ uint64_t *d2 = &env->vfp.zregs[(rd + 1) & 31].d[0];
+ uint64_t *d3 = &env->vfp.zregs[(rd + 2) & 31].d[0];
+ uint64_t *d4 = &env->vfp.zregs[(rd + 3) & 31].d[0];
+ uint8_t *pg = vg;
+
+ for (i = 0; i < oprsz; i += 1) {
+ if (pg[H1(i)] & 1) {
+ cpu_stq_data_ra(env, addr, d1[i], ra);
+ cpu_stq_data_ra(env, addr + 8, d2[i], ra);
+ cpu_stq_data_ra(env, addr + 16, d3[i], ra);
+ cpu_stq_data_ra(env, addr + 24, d4[i], ra);
+ }
+ addr += 4 * 8;
+ }
+}