aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2020-04-30 19:09:38 +0100
committerPeter Maydell <peter.maydell@linaro.org>2020-05-04 12:57:56 +0100
commita27b46304352a0eced45e560e96515dbe3cc174f (patch)
tree85c1c9a2c735362bb4383073289b486d19be6969
parentd27e82f7d02f35e5919bd9cbbcb157f3537069a0 (diff)
target/arm: Convert Neon load/store multiple structures to decodetree
Convert the Neon "load/store multiple structures" insns to decodetree. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200430181003.21682-12-peter.maydell@linaro.org
-rw-r--r--target/arm/neon-ls.decode7
-rw-r--r--target/arm/translate-neon.inc.c124
-rw-r--r--target/arm/translate.c91
3 files changed, 133 insertions, 89 deletions
diff --git a/target/arm/neon-ls.decode b/target/arm/neon-ls.decode
index 2b16c9256d..dd03d5a37b 100644
--- a/target/arm/neon-ls.decode
+++ b/target/arm/neon-ls.decode
@@ -27,3 +27,10 @@
# 0b1111_1001_xxx0_xxxx_xxxx_xxxx_xxxx_xxxx
# This file works on the A32 encoding only; calling code for T32 has to
# transform the insn into the A32 version first.
+
+%vd_dp 22:1 12:4
+
+# Neon load/store multiple structures
+
+VLDST_multiple 1111 0100 0 . l:1 0 rn:4 .... itype:4 size:2 align:2 rm:4 \
+ vd=%vd_dp
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
index b06542b8b8..966c0d9201 100644
--- a/target/arm/translate-neon.inc.c
+++ b/target/arm/translate-neon.inc.c
@@ -274,3 +274,127 @@ static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a)
gen_helper_gvec_fmlal_idx_a32);
return true;
}
+
+static struct {
+ int nregs;
+ int interleave;
+ int spacing;
+} const neon_ls_element_type[11] = {
+ {1, 4, 1},
+ {1, 4, 2},
+ {4, 1, 1},
+ {2, 2, 2},
+ {1, 3, 1},
+ {1, 3, 2},
+ {3, 1, 1},
+ {1, 1, 1},
+ {1, 2, 1},
+ {1, 2, 2},
+ {2, 1, 1}
+};
+
+static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn,
+ int stride)
+{
+ if (rm != 15) {
+ TCGv_i32 base;
+
+ base = load_reg(s, rn);
+ if (rm == 13) {
+ tcg_gen_addi_i32(base, base, stride);
+ } else {
+ TCGv_i32 index;
+ index = load_reg(s, rm);
+ tcg_gen_add_i32(base, base, index);
+ tcg_temp_free_i32(index);
+ }
+ store_reg(s, rn, base);
+ }
+}
+
+static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a)
+{
+ /* Neon load/store multiple structures */
+ int nregs, interleave, spacing, reg, n;
+ MemOp endian = s->be_data;
+ int mmu_idx = get_mem_index(s);
+ int size = a->size;
+ TCGv_i64 tmp64;
+ TCGv_i32 addr, tmp;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist */
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
+ return false;
+ }
+ if (a->itype > 10) {
+ return false;
+ }
+ /* Catch UNDEF cases for bad values of align field */
+ switch (a->itype & 0xc) {
+ case 4:
+ if (a->align >= 2) {
+ return false;
+ }
+ break;
+ case 8:
+ if (a->align == 3) {
+ return false;
+ }
+ break;
+ default:
+ break;
+ }
+ nregs = neon_ls_element_type[a->itype].nregs;
+ interleave = neon_ls_element_type[a->itype].interleave;
+ spacing = neon_ls_element_type[a->itype].spacing;
+ if (size == 3 && (interleave | spacing) != 1) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ /* For our purposes, bytes are always little-endian. */
+ if (size == 0) {
+ endian = MO_LE;
+ }
+ /*
+ * Consecutive little-endian elements from a single register
+ * can be promoted to a larger little-endian operation.
+ */
+ if (interleave == 1 && endian == MO_LE) {
+ size = 3;
+ }
+ tmp64 = tcg_temp_new_i64();
+ addr = tcg_temp_new_i32();
+ tmp = tcg_const_i32(1 << size);
+ load_reg_var(s, addr, a->rn);
+ for (reg = 0; reg < nregs; reg++) {
+ for (n = 0; n < 8 >> size; n++) {
+ int xs;
+ for (xs = 0; xs < interleave; xs++) {
+ int tt = a->vd + reg + spacing * xs;
+
+ if (a->l) {
+ gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size);
+ neon_store_element64(tt, n, size, tmp64);
+ } else {
+ neon_load_element64(tmp64, tt, n, size);
+ gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size);
+ }
+ tcg_gen_add_i32(addr, addr, tmp);
+ }
+ }
+ }
+ tcg_temp_free_i32(addr);
+ tcg_temp_free_i32(tmp);
+ tcg_temp_free_i64(tmp64);
+
+ gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8);
+ return true;
+}
diff --git a/target/arm/translate.c b/target/arm/translate.c
index e269642a48..be56cbb061 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -3214,45 +3214,19 @@ static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
}
-static struct {
- int nregs;
- int interleave;
- int spacing;
-} const neon_ls_element_type[11] = {
- {1, 4, 1},
- {1, 4, 2},
- {4, 1, 1},
- {2, 2, 2},
- {1, 3, 1},
- {1, 3, 2},
- {3, 1, 1},
- {1, 1, 1},
- {1, 2, 1},
- {1, 2, 2},
- {2, 1, 1}
-};
-
/* Translate a NEON load/store element instruction. Return nonzero if the
instruction is invalid. */
static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
{
int rd, rn, rm;
- int op;
int nregs;
- int interleave;
- int spacing;
int stride;
int size;
int reg;
int load;
- int n;
int vec_size;
- int mmu_idx;
- MemOp endian;
TCGv_i32 addr;
TCGv_i32 tmp;
- TCGv_i32 tmp2;
- TCGv_i64 tmp64;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return 1;
@@ -3274,70 +3248,9 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
rn = (insn >> 16) & 0xf;
rm = insn & 0xf;
load = (insn & (1 << 21)) != 0;
- endian = s->be_data;
- mmu_idx = get_mem_index(s);
if ((insn & (1 << 23)) == 0) {
- /* Load store all elements. */
- op = (insn >> 8) & 0xf;
- size = (insn >> 6) & 3;
- if (op > 10)
- return 1;
- /* Catch UNDEF cases for bad values of align field */
- switch (op & 0xc) {
- case 4:
- if (((insn >> 5) & 1) == 1) {
- return 1;
- }
- break;
- case 8:
- if (((insn >> 4) & 3) == 3) {
- return 1;
- }
- break;
- default:
- break;
- }
- nregs = neon_ls_element_type[op].nregs;
- interleave = neon_ls_element_type[op].interleave;
- spacing = neon_ls_element_type[op].spacing;
- if (size == 3 && (interleave | spacing) != 1) {
- return 1;
- }
- /* For our purposes, bytes are always little-endian. */
- if (size == 0) {
- endian = MO_LE;
- }
- /* Consecutive little-endian elements from a single register
- * can be promoted to a larger little-endian operation.
- */
- if (interleave == 1 && endian == MO_LE) {
- size = 3;
- }
- tmp64 = tcg_temp_new_i64();
- addr = tcg_temp_new_i32();
- tmp2 = tcg_const_i32(1 << size);
- load_reg_var(s, addr, rn);
- for (reg = 0; reg < nregs; reg++) {
- for (n = 0; n < 8 >> size; n++) {
- int xs;
- for (xs = 0; xs < interleave; xs++) {
- int tt = rd + reg + spacing * xs;
-
- if (load) {
- gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size);
- neon_store_element64(tt, n, size, tmp64);
- } else {
- neon_load_element64(tmp64, tt, n, size);
- gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size);
- }
- tcg_gen_add_i32(addr, addr, tmp2);
- }
- }
- }
- tcg_temp_free_i32(addr);
- tcg_temp_free_i32(tmp2);
- tcg_temp_free_i64(tmp64);
- stride = nregs * interleave * 8;
+ /* Load store all elements -- handled already by decodetree */
+ return 1;
} else {
size = (insn >> 10) & 3;
if (size == 3) {