diff options
Diffstat (limited to 'target-arm/translate.c')
-rw-r--r-- | target-arm/translate.c | 84 |
1 files changed, 59 insertions, 25 deletions
diff --git a/target-arm/translate.c b/target-arm/translate.c index f69912f20c..1dfd4823f5 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -2648,6 +2648,28 @@ static void gen_neon_dup_high16(TCGv var) tcg_temp_free_i32(tmp); } +static TCGv gen_load_and_replicate(DisasContext *s, TCGv addr, int size) +{ + /* Load a single Neon element and replicate into a 32 bit TCG reg */ + TCGv tmp; + switch (size) { + case 0: + tmp = gen_ld8u(addr, IS_USER(s)); + gen_neon_dup_u8(tmp, 0); + break; + case 1: + tmp = gen_ld16u(addr, IS_USER(s)); + gen_neon_dup_low16(tmp); + break; + case 2: + tmp = gen_ld32(addr, IS_USER(s)); + break; + default: /* Avoid compiler warnings. */ + abort(); + } + return tmp; +} + /* Disassemble a VFP instruction. Returns nonzero if an error occured (ie. an undefined instruction). */ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) @@ -3890,36 +3912,48 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) size = (insn >> 10) & 3; if (size == 3) { /* Load single element to all lanes. */ - if (!load) + int a = (insn >> 4) & 1; + if (!load) { return 1; + } size = (insn >> 6) & 3; nregs = ((insn >> 8) & 3) + 1; - stride = (insn & (1 << 5)) ? 2 : 1; - load_reg_var(s, addr, rn); - for (reg = 0; reg < nregs; reg++) { - switch (size) { - case 0: - tmp = gen_ld8u(addr, IS_USER(s)); - gen_neon_dup_u8(tmp, 0); - break; - case 1: - tmp = gen_ld16u(addr, IS_USER(s)); - gen_neon_dup_low16(tmp); - break; - case 2: - tmp = gen_ld32(addr, IS_USER(s)); - break; - case 3: + + if (size == 3) { + if (nregs != 4 || a == 0) { return 1; - default: /* Avoid compiler warnings. */ - abort(); } - tcg_gen_addi_i32(addr, addr, 1 << size); - tmp2 = tcg_temp_new_i32(); - tcg_gen_mov_i32(tmp2, tmp); - neon_store_reg(rd, 0, tmp2); - neon_store_reg(rd, 1, tmp); - rd += stride; + /* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment */ + size = 2; + } + if (nregs == 1 && a == 1 && size == 0) { + return 1; + } + if (nregs == 3 && a == 1) { + return 1; + } + load_reg_var(s, addr, rn); + if (nregs == 1) { + /* VLD1 to all lanes: bit 5 indicates how many Dregs to write */ + tmp = gen_load_and_replicate(s, addr, size); + tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0)); + tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1)); + if (insn & (1 << 5)) { + tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 0)); + tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 1)); + } + tcg_temp_free_i32(tmp); + } else { + /* VLD2/3/4 to all lanes: bit 5 indicates register stride */ + stride = (insn & (1 << 5)) ? 2 : 1; + for (reg = 0; reg < nregs; reg++) { + tmp = gen_load_and_replicate(s, addr, size); + tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0)); + tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1)); + tcg_temp_free_i32(tmp); + tcg_gen_addi_i32(addr, addr, 1 << size); + rd += stride; + } } stride = (1 << size) * nregs; } else { |