diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2020-11-03 10:38:05 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2020-11-03 10:38:05 +0000 |
commit | c7a7a877b716cf14848f1fd5c754d293e2f8d852 (patch) | |
tree | afbab679f98b164f9aeecb10d635199c48bb0169 | |
parent | 8545ae485b1e8e43cc0137310c4c68dbece59990 (diff) | |
parent | ffb4fbf90a2f63c9cb33e4bb9f854c79bf04ca4a (diff) |
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20201102' into staging
target-arm queue:
* target/arm: Fix Neon emulation bugs on big-endian hosts
* target/arm: fix handling of HCR.FB
* target/arm: fix LORID_EL1 access check
* disas/capstone: Fix monitor disassembly of >32 bytes
* hw/arm/smmuv3: Fix potential integer overflow (CID 1432363)
* hw/arm/boot: fix SVE for EL3 direct kernel boot
* hw/display/omap_lcdc: Fix potential NULL pointer dereference
* hw/display/exynos4210_fimd: Fix potential NULL pointer dereference
* target/arm: Get correct MMU index for other-security-state
* configure: Test that gio libs from pkg-config work
* hw/intc/arm_gicv3_cpuif: Make GIC maintenance interrupts work
* docs: Fix building with Sphinx 3
* tests/qtest/npcm7xx_rng-test: Disable randomness tests
# gpg: Signature made Mon 02 Nov 2020 17:09:00 GMT
# gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE
# gpg: issuer "peter.maydell@linaro.org"
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate]
# gpg: aka "Peter Maydell <pmaydell@gmail.com>" [ultimate]
# gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate]
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE
* remotes/pmaydell/tags/pull-target-arm-20201102: (26 commits)
tests/qtest/npcm7xx_rng-test: Disable randomness tests
qemu-option-trace.rst.inc: Don't use option:: markup
scripts/kerneldoc: For Sphinx 3 use c:macro for macros with arguments
hw/intc/arm_gicv3_cpuif: Make GIC maintenance interrupts work
configure: Test that gio libs from pkg-config work
target/arm: Get correct MMU index for other-security-state
hw/display/exynos4210_fimd: Fix potential NULL pointer dereference
hw/display/omap_lcdc: Fix potential NULL pointer dereference
hw/arm/boot: fix SVE for EL3 direct kernel boot
hw/arm/smmuv3: Fix potential integer overflow (CID 1432363)
disas/capstone: Fix monitor disassembly of >32 bytes
target/arm: fix LORID_EL1 access check
target/arm: fix handling of HCR.FB
target/arm: Fix VUDOT/VSDOT (scalar) on big-endian hosts
target/arm: Fix float16 pairwise Neon ops on big-endian hosts
target/arm: Improve do_prewiden_3d
target/arm: Simplify do_long_3d and do_2scalar_long
target/arm: Rename neon_load_reg64 to vfp_load_reg64
target/arm: Add read/write_neon_element64
target/arm: Rename neon_load_reg32 to vfp_load_reg32
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rwxr-xr-x | configure | 10 | ||||
-rw-r--r-- | disas/capstone.c | 2 | ||||
-rw-r--r-- | docs/qemu-option-trace.rst.inc | 6 | ||||
-rw-r--r-- | hw/arm/boot.c | 3 | ||||
-rw-r--r-- | hw/arm/smmuv3.c | 3 | ||||
-rw-r--r-- | hw/display/exynos4210_fimd.c | 4 | ||||
-rw-r--r-- | hw/display/omap_lcdc.c | 10 | ||||
-rw-r--r-- | hw/intc/arm_gicv3_cpuif.c | 5 | ||||
-rw-r--r-- | include/hw/intc/arm_gicv3_common.h | 1 | ||||
-rwxr-xr-x | scripts/kernel-doc | 18 | ||||
-rw-r--r-- | target/arm/helper.c | 24 | ||||
-rw-r--r-- | target/arm/m_helper.c | 3 | ||||
-rw-r--r-- | target/arm/translate-neon.c.inc | 472 | ||||
-rw-r--r-- | target/arm/translate-vfp.c.inc | 341 | ||||
-rw-r--r-- | target/arm/translate.c | 139 | ||||
-rw-r--r-- | target/arm/vec_helper.c | 12 | ||||
-rw-r--r-- | tests/qtest/npcm7xx_rng-test.c | 14 |
17 files changed, 581 insertions, 486 deletions
@@ -3489,13 +3489,21 @@ if test "$static" = yes && test "$mingw32" = yes; then fi if $pkg_config --atleast-version=$glib_req_ver gio-2.0; then - gio=yes gio_cflags=$($pkg_config --cflags gio-2.0) gio_libs=$($pkg_config --libs gio-2.0) gdbus_codegen=$($pkg_config --variable=gdbus_codegen gio-2.0) if [ ! -x "$gdbus_codegen" ]; then gdbus_codegen= fi + # Check that the libraries actually work -- Ubuntu 18.04 ships + # with pkg-config --static --libs data for gio-2.0 that is missing + # -lblkid and will give a link error. + write_c_skeleton + if compile_prog "" "gio_libs" ; then + gio=yes + else + gio=no + fi else gio=no fi diff --git a/disas/capstone.c b/disas/capstone.c index 0a9ef9c892..7462c0e305 100644 --- a/disas/capstone.c +++ b/disas/capstone.c @@ -286,7 +286,7 @@ bool cap_disas_monitor(disassemble_info *info, uint64_t pc, int count) /* Make certain that we can make progress. */ assert(tsize != 0); - info->read_memory_func(pc, cap_buf + csize, tsize, info); + info->read_memory_func(pc + csize, cap_buf + csize, tsize, info); csize += tsize; if (cs_disasm_iter(handle, &cbuf, &csize, &pc, insn)) { diff --git a/docs/qemu-option-trace.rst.inc b/docs/qemu-option-trace.rst.inc index 7e09773a9c..d7acbe67f7 100644 --- a/docs/qemu-option-trace.rst.inc +++ b/docs/qemu-option-trace.rst.inc @@ -1,7 +1,7 @@ Specify tracing options. -.. option:: [enable=]PATTERN +``[enable=]PATTERN`` Immediately enable events matching *PATTERN* (either event name or a globbing pattern). This option is only @@ -11,7 +11,7 @@ Specify tracing options. Use :option:`-trace help` to print a list of names of trace points. -.. option:: events=FILE +``events=FILE`` Immediately enable events listed in *FILE*. The file must contain one event name (as listed in the ``trace-events-all`` @@ -19,7 +19,7 @@ Specify tracing options. available if QEMU has been compiled with the ``simple``, ``log`` or ``ftrace`` tracing backend. -.. option:: file=FILE +``file=FILE`` Log output traces to *FILE*. This option is only available if QEMU has been compiled with diff --git a/hw/arm/boot.c b/hw/arm/boot.c index 3e9816af80..cf97600a91 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -742,6 +742,9 @@ static void do_cpu_reset(void *opaque) if (cpu_isar_feature(aa64_mte, cpu)) { env->cp15.scr_el3 |= SCR_ATA; } + if (cpu_isar_feature(aa64_sve, cpu)) { + env->cp15.cptr_el[3] |= CPTR_EZ; + } /* AArch64 kernels never boot in secure mode */ assert(!info->secure_boot); /* This hook is only supported for AArch32 currently: diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c index 2017ba7a5a..22607c3784 100644 --- a/hw/arm/smmuv3.c +++ b/hw/arm/smmuv3.c @@ -17,6 +17,7 @@ */ #include "qemu/osdep.h" +#include "qemu/bitops.h" #include "hw/irq.h" #include "hw/sysbus.h" #include "migration/vmstate.h" @@ -864,7 +865,7 @@ static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd) scale = CMD_SCALE(cmd); num = CMD_NUM(cmd); ttl = CMD_TTL(cmd); - num_pages = (num + 1) * (1 << (scale)); + num_pages = (num + 1) * BIT_ULL(scale); } if (type == SMMU_CMD_TLBI_NH_VA) { diff --git a/hw/display/exynos4210_fimd.c b/hw/display/exynos4210_fimd.c index 4c16e1f5a0..34a960a976 100644 --- a/hw/display/exynos4210_fimd.c +++ b/hw/display/exynos4210_fimd.c @@ -1275,12 +1275,14 @@ static void exynos4210_fimd_update(void *opaque) bool blend = false; uint8_t *host_fb_addr; bool is_dirty = false; - const int global_width = (s->vidtcon[2] & FIMD_VIDTCON2_SIZE_MASK) + 1; + int global_width; if (!s || !s->console || !s->enabled || surface_bits_per_pixel(qemu_console_surface(s->console)) == 0) { return; } + + global_width = (s->vidtcon[2] & FIMD_VIDTCON2_SIZE_MASK) + 1; exynos4210_update_resolution(s); surface = qemu_console_surface(s->console); diff --git a/hw/display/omap_lcdc.c b/hw/display/omap_lcdc.c index fa4a381db6..58e659c94f 100644 --- a/hw/display/omap_lcdc.c +++ b/hw/display/omap_lcdc.c @@ -78,14 +78,18 @@ static void omap_lcd_interrupts(struct omap_lcd_panel_s *s) static void omap_update_display(void *opaque) { struct omap_lcd_panel_s *omap_lcd = (struct omap_lcd_panel_s *) opaque; - DisplaySurface *surface = qemu_console_surface(omap_lcd->con); + DisplaySurface *surface; draw_line_func draw_line; int size, height, first, last; int width, linesize, step, bpp, frame_offset; hwaddr frame_base; - if (!omap_lcd || omap_lcd->plm == 1 || !omap_lcd->enable || - !surface_bits_per_pixel(surface)) { + if (!omap_lcd || omap_lcd->plm == 1 || !omap_lcd->enable) { + return; + } + + surface = qemu_console_surface(omap_lcd->con); + if (!surface_bits_per_pixel(surface)) { return; } diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c index 08e000e33c..43ef1d7a84 100644 --- a/hw/intc/arm_gicv3_cpuif.c +++ b/hw/intc/arm_gicv3_cpuif.c @@ -399,6 +399,7 @@ static void gicv3_cpuif_virt_update(GICv3CPUState *cs) int irqlevel = 0; int fiqlevel = 0; int maintlevel = 0; + ARMCPU *cpu = ARM_CPU(cs->cpu); idx = hppvi_index(cs); trace_gicv3_cpuif_virt_update(gicv3_redist_affid(cs), idx); @@ -424,7 +425,7 @@ static void gicv3_cpuif_virt_update(GICv3CPUState *cs) qemu_set_irq(cs->parent_vfiq, fiqlevel); qemu_set_irq(cs->parent_virq, irqlevel); - qemu_set_irq(cs->maintenance_irq, maintlevel); + qemu_set_irq(cpu->gicv3_maintenance_interrupt, maintlevel); } static uint64_t icv_ap_read(CPUARMState *env, const ARMCPRegInfo *ri) @@ -2624,8 +2625,6 @@ void gicv3_init_cpuif(GICv3State *s) && cpu->gic_num_lrs) { int j; - cs->maintenance_irq = cpu->gicv3_maintenance_interrupt; - cs->num_list_regs = cpu->gic_num_lrs; cs->vpribits = cpu->gic_vpribits; cs->vprebits = cpu->gic_vprebits; diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h index 0331b0ffdb..91491a2f66 100644 --- a/include/hw/intc/arm_gicv3_common.h +++ b/include/hw/intc/arm_gicv3_common.h @@ -153,7 +153,6 @@ struct GICv3CPUState { qemu_irq parent_fiq; qemu_irq parent_virq; qemu_irq parent_vfiq; - qemu_irq maintenance_irq; /* Redistributor */ uint32_t level; /* Current IRQ level */ diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 0ff62bb6a2..4fbaaa05e3 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -839,7 +839,23 @@ sub output_function_rst(%) { output_highlight_rst($args{'purpose'}); $start = "\n\n**Syntax**\n\n ``"; } else { - print ".. c:function:: "; + if ((split(/\./, $sphinx_version))[0] >= 3) { + # Sphinx 3 and later distinguish macros and functions and + # complain if you use c:function with something that's not + # syntactically valid as a function declaration. + # We assume that anything with a return type is a function + # and anything without is a macro. + if ($args{'functiontype'} ne "") { + print ".. c:function:: "; + } else { + print ".. c:macro:: "; + } + } else { + # Older Sphinx don't support documenting macros that take + # arguments with c:macro, and don't complain about the use + # of c:function for this. + print ".. c:function:: "; + } } if ($args{'functiontype'} ne "") { $start .= $args{'functiontype'} . " " . $args{'function'} . " ("; diff --git a/target/arm/helper.c b/target/arm/helper.c index 97bb6b8c01..6854591986 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -731,13 +731,12 @@ static void tlbimvaa_is_write(CPUARMState *env, const ARMCPRegInfo *ri, /* * Non-IS variants of TLB operations are upgraded to - * IS versions if we are at NS EL1 and HCR_EL2.FB is set to + * IS versions if we are at EL1 and HCR_EL2.FB is effectively set to * force broadcast of these operations. */ static bool tlb_force_broadcast(CPUARMState *env) { - return (env->cp15.hcr_el2 & HCR_FB) && - arm_current_el(env) == 1 && arm_is_secure_below_el3(env); + return arm_current_el(env) == 1 && (arm_hcr_el2_eff(env) & HCR_FB); } static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -6680,9 +6679,10 @@ static uint64_t id_aa64pfr0_read(CPUARMState *env, const ARMCPRegInfo *ri) #endif /* Shared logic between LORID and the rest of the LOR* registers. - * Secure state has already been delt with. + * Secure state exclusion has already been dealt with. */ -static CPAccessResult access_lor_ns(CPUARMState *env) +static CPAccessResult access_lor_ns(CPUARMState *env, + const ARMCPRegInfo *ri, bool isread) { int el = arm_current_el(env); @@ -6695,16 +6695,6 @@ static CPAccessResult access_lor_ns(CPUARMState *env) return CP_ACCESS_OK; } -static CPAccessResult access_lorid(CPUARMState *env, const ARMCPRegInfo *ri, - bool isread) -{ - if (arm_is_secure_below_el3(env)) { - /* Access ok in secure mode. */ - return CP_ACCESS_OK; - } - return access_lor_ns(env); -} - static CPAccessResult access_lor_other(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { @@ -6712,7 +6702,7 @@ static CPAccessResult access_lor_other(CPUARMState *env, /* Access denied in secure mode. */ return CP_ACCESS_TRAP; } - return access_lor_ns(env); + return access_lor_ns(env, ri, isread); } /* @@ -6739,7 +6729,7 @@ static const ARMCPRegInfo lor_reginfo[] = { .type = ARM_CP_CONST, .resetvalue = 0 }, { .name = "LORID_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 10, .crm = 4, .opc2 = 7, - .access = PL1_R, .accessfn = access_lorid, + .access = PL1_R, .accessfn = access_lor_ns, .type = ARM_CP_CONST, .resetvalue = 0 }, REGINFO_SENTINEL }; diff --git a/target/arm/m_helper.c b/target/arm/m_helper.c index 036454234c..aad01ea012 100644 --- a/target/arm/m_helper.c +++ b/target/arm/m_helper.c @@ -2719,7 +2719,8 @@ ARMMMUIdx arm_v7m_mmu_idx_for_secstate_and_priv(CPUARMState *env, /* Return the MMU index for a v7M CPU in the specified security state */ ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate) { - bool priv = arm_current_el(env) != 0; + bool priv = arm_v7m_is_handler_mode(env) || + !(env->v7m.control[secstate] & 1); return arm_v7m_mmu_idx_for_secstate_and_priv(env, secstate, priv); } diff --git a/target/arm/translate-neon.c.inc b/target/arm/translate-neon.c.inc index 4d1a292981..59368cb243 100644 --- a/target/arm/translate-neon.c.inc +++ b/target/arm/translate-neon.c.inc @@ -60,25 +60,6 @@ static inline int neon_3same_fp_size(DisasContext *s, int x) #include "decode-neon-ls.c.inc" #include "decode-neon-shared.c.inc" -/* Return the offset of a 2**SIZE piece of a NEON register, at index ELE, - * where 0 is the least significant end of the register. - */ -static inline long -neon_element_offset(int reg, int element, MemOp size) -{ - int element_size = 1 << size; - int ofs = element * element_size; -#ifdef HOST_WORDS_BIGENDIAN - /* Calculate the offset assuming fully little-endian, - * then XOR to account for the order of the 8-byte units. - */ - if (element_size < 8) { - ofs ^= 8 - element_size; - } -#endif - return neon_reg_offset(reg, 0) + ofs; -} - static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop) { long offset = neon_element_offset(reg, ele, mop & MO_SIZE); @@ -585,12 +566,12 @@ static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a) * We cannot write 16 bytes at once because the * destination is unaligned. */ - tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0), + tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd), 8, 8, tmp); - tcg_gen_gvec_mov(0, neon_reg_offset(vd + 1, 0), - neon_reg_offset(vd, 0), 8, 8); + tcg_gen_gvec_mov(0, neon_full_reg_offset(vd + 1), + neon_full_reg_offset(vd), 8, 8); } else { - tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0), + tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd), vec_size, vec_size, tmp); } tcg_gen_addi_i32(addr, addr, 1 << size); @@ -691,9 +672,9 @@ static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a) static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn) { int vec_size = a->q ? 16 : 8; - int rd_ofs = neon_reg_offset(a->vd, 0); - int rn_ofs = neon_reg_offset(a->vn, 0); - int rm_ofs = neon_reg_offset(a->vm, 0); + int rd_ofs = neon_full_reg_offset(a->vd); + int rn_ofs = neon_full_reg_offset(a->vn); + int rm_ofs = neon_full_reg_offset(a->vm); if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { return false; @@ -975,18 +956,24 @@ static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn) * early. Since Q is 0 there are always just two passes, so instead * of a complicated loop over each pass we just unroll. */ - tmp = neon_load_reg(a->vn, 0); - tmp2 = neon_load_reg(a->vn, 1); + tmp = tcg_temp_new_i32(); + tmp2 = tcg_temp_new_i32(); + tmp3 = tcg_temp_new_i32(); + + read_neon_element32(tmp, a->vn, 0, MO_32); + read_neon_element32(tmp2, a->vn, 1, MO_32); fn(tmp, tmp, tmp2); - tcg_temp_free_i32(tmp2); - tmp3 = neon_load_reg(a->vm, 0); - tmp2 = neon_load_reg(a->vm, 1); + read_neon_element32(tmp3, a->vm, 0, MO_32); + read_neon_element32(tmp2, a->vm, 1, MO_32); fn(tmp3, tmp3, tmp2); - tcg_temp_free_i32(tmp2); - neon_store_reg(a->vd, 0, tmp); - neon_store_reg(a->vd, 1, tmp3); + write_neon_element32(tmp, a->vd, 0, MO_32); + write_neon_element32(tmp3, a->vd, 1, MO_32); + + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp3); return true; } @@ -1177,8 +1164,8 @@ static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn) { /* Handle a 2-reg-shift insn which can be vectorized. */ int vec_size = a->q ? 16 : 8; - int rd_ofs = neon_reg_offset(a->vd, 0); - int rm_ofs = neon_reg_offset(a->vm, 0); + int rd_ofs = neon_full_reg_offset(a->vd); + int rm_ofs = neon_full_reg_offset(a->vm); if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { return false; @@ -1278,9 +1265,9 @@ static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a, for (pass = 0; pass < a->q + 1; pass++) { TCGv_i64 tmp = tcg_temp_new_i64(); - neon_load_reg64(tmp, a->vm + pass); + read_neon_element64(tmp, a->vm, pass, MO_64); fn(tmp, cpu_env, tmp, constimm); - neon_store_reg64(tmp, a->vd + pass); + write_neon_element64(tmp, a->vd, pass, MO_64); tcg_temp_free_i64(tmp); } tcg_temp_free_i64(constimm); @@ -1294,7 +1281,7 @@ static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a, * 2-reg-and-shift operations, size < 3 case, where the * helper needs to be passed cpu_env. */ - TCGv_i32 constimm; + TCGv_i32 constimm, tmp; int pass; if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { @@ -1320,12 +1307,14 @@ static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a, * by immediate using the variable shift operations. */ constimm = tcg_const_i32(dup_const(a->size, a->shift)); + tmp = tcg_temp_new_i32(); for (pass = 0; pass < (a->q ? 4 : 2); pass++) { - TCGv_i32 tmp = neon_load_reg(a->vm, pass); + read_neon_element32(tmp, a->vm, pass, MO_32); fn(tmp, cpu_env, tmp, constimm); - neon_store_reg(a->vd, pass, tmp); + write_neon_element32(tmp, a->vd, pass, MO_32); } + tcg_temp_free_i32(tmp); tcg_temp_free_i32(constimm); return true; } @@ -1383,21 +1372,21 @@ static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a, constimm = tcg_const_i64(-a->shift); rm1 = tcg_temp_new_i64(); rm2 = tcg_temp_new_i64(); + rd = tcg_temp_new_i32(); /* Load both inputs first to avoid potential overwrite if rm == rd */ - neon_load_reg64(rm1, a->vm); - neon_load_reg64(rm2, a->vm + 1); + read_neon_element64(rm1, a->vm, 0, MO_64); + read_neon_element64(rm2, a->vm, 1, MO_64); shiftfn(rm1, rm1, constimm); - rd = tcg_temp_new_i32(); narrowfn(rd, cpu_env, rm1); - neon_store_reg(a->vd, 0, rd); + write_neon_element32(rd, a->vd, 0, MO_32); shiftfn(rm2, rm2, constimm); - rd = tcg_temp_new_i32(); narrowfn(rd, cpu_env, rm2); - neon_store_reg(a->vd, 1, rd); + write_neon_element32(rd, a->vd, 1, MO_32); + tcg_temp_free_i32(rd); tcg_temp_free_i64(rm1); tcg_temp_free_i64(rm2); tcg_temp_free_i64(constimm); @@ -1447,10 +1436,14 @@ static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a, constimm = tcg_const_i32(imm); /* Load all inputs first to avoid potential overwrite */ - rm1 = neon_load_reg(a->vm, 0); - rm2 = neon_load_reg(a->vm, 1); - rm3 = neon_load_reg(a->vm + 1, 0); - rm4 = neon_load_reg(a->vm + 1, 1); + rm1 = tcg_temp_new_i32(); + rm2 = tcg_temp_new_i32(); + rm3 = tcg_temp_new_i32(); + rm4 = tcg_temp_new_i32(); + read_neon_element32(rm1, a->vm, 0, MO_32); + read_neon_element32(rm2, a->vm, 1, MO_32); + read_neon_element32(rm3, a->vm, 2, MO_32); + read_neon_element32(rm4, a->vm, 3, MO_32); rtmp = tcg_temp_new_i64(); shiftfn(rm1, rm1, constimm); @@ -1460,7 +1453,8 @@ static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a, tcg_temp_free_i32(rm2); narrowfn(rm1, cpu_env, rtmp); - neon_store_reg(a->vd, 0, rm1); + write_neon_element32(rm1, a->vd, 0, MO_32); + tcg_temp_free_i32(rm1); shiftfn(rm3, rm3, constimm); shiftfn(rm4, rm4, constimm); @@ -1471,7 +1465,8 @@ static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a, narrowfn(rm3, cpu_env, rtmp); tcg_temp_free_i64(rtmp); - neon_store_reg(a->vd, 1, rm3); + write_neon_element32(rm3, a->vd, 1, MO_32); + tcg_temp_free_i32(rm3); return true; } @@ -1572,8 +1567,10 @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a, widen_mask = dup_const(a->size + 1, widen_mask); } - rm0 = neon_load_reg(a->vm, 0); - rm1 = neon_load_reg(a->vm, 1); + rm0 = tcg_temp_new_i32(); + rm1 = tcg_temp_new_i32(); + read_neon_element32(rm0, a->vm, 0, MO_32); + read_neon_element32(rm1, a->vm, 1, MO_32); tmp = tcg_temp_new_i64(); widenfn(tmp, rm0); @@ -1582,7 +1579,7 @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a, tcg_gen_shli_i64(tmp, tmp, a->shift); tcg_gen_andi_i64(tmp, tmp, ~widen_mask); } - neon_store_reg64(tmp, a->vd); + write_neon_element64(tmp, a->vd, 0, MO_64); widenfn(tmp, rm1); tcg_temp_free_i32(rm1); @@ -1590,7 +1587,7 @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a, tcg_gen_shli_i64(tmp, tmp, a->shift); tcg_gen_andi_i64(tmp, tmp, ~widen_mask); } - neon_store_reg64(tmp, a->vd + 1); + write_neon_element64(tmp, a->vd, 1, MO_64); tcg_temp_free_i64(tmp); return true; } @@ -1620,8 +1617,8 @@ static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a, { /* FP operations in 2-reg-and-shift group */ int vec_size = a->q ? 16 : 8; - int rd_ofs = neon_reg_offset(a->vd, 0); - int rm_ofs = neon_reg_offset(a->vm, 0); + int rd_ofs = neon_full_reg_offset(a->vd); + int rm_ofs = neon_full_reg_offset(a->vm); TCGv_ptr fpst; if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { @@ -1756,7 +1753,7 @@ static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a, return true; } - reg_ofs = neon_reg_offset(a->vd, 0); + reg_ofs = neon_full_reg_offset(a->vd); vec_size = a->q ? 16 : 8; imm = asimd_imm_const(a->imm, a->cmode, a->op); @@ -1791,11 +1788,10 @@ static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a) static bool do_prewiden_3d(DisasContext *s, arg_3diff *a, NeonGenWidenFn *widenfn, NeonGenTwo64OpFn *opfn, - bool src1_wide) + int src1_mop, int src2_mop) { /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */ TCGv_i64 rn0_64, rn1_64, rm_64; - TCGv_i32 rm; if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { return false; @@ -1807,12 +1803,12 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a, return false; } - if (!widenfn || !opfn) { + if (!opfn) { /* size == 3 case, which is an entirely different insn group */ return false; } - if ((a->vd & 1) || (src1_wide && (a->vn & 1))) { + if ((a->vd & 1) || (src1_mop == MO_Q && (a->vn & 1))) { return false; } @@ -1824,38 +1820,50 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a, rn1_64 = tcg_temp_new_i64(); rm_64 = tcg_temp_new_i64(); - if (src1_wide) { - neon_load_reg64(rn0_64, a->vn); + if (src1_mop >= 0) { + read_neon_element64(rn0_64, a->vn, 0, src1_mop); } else { - TCGv_i32 tmp = neon_load_reg(a->vn, 0); + TCGv_i32 tmp = tcg_temp_new_i32(); + read_neon_element32(tmp, a->vn, 0, MO_32); widenfn(rn0_64, tmp); tcg_temp_free_i32(tmp); } - rm = neon_load_reg(a->vm, 0); + if (src2_mop >= 0) { + read_neon_element64(rm_64, a->vm, 0, src2_mop); + } else { + TCGv_i32 tmp = tcg_temp_new_i32(); + read_neon_element32(tmp, a->vm, 0, MO_32); + widenfn(rm_64, tmp); + tcg_temp_free_i32(tmp); + } - widenfn(rm_64, rm); - tcg_temp_free_i32(rm); opfn(rn0_64, rn0_64, rm_64); /* * Load second pass inputs before storing the first pass result, to * avoid incorrect results if a narrow input overlaps with the result. */ - if (src1_wide) { - neon_load_reg64(rn1_64, a->vn + 1); + if (src1_mop >= 0) { + read_neon_element64(rn1_64, a->vn, 1, src1_mop); } else { - TCGv_i32 tmp = neon_load_reg(a->vn, 1); + TCGv_i32 tmp = tcg_temp_new_i32(); + read_neon_element32(tmp, a->vn, 1, MO_32); widenfn(rn1_64, tmp); tcg_temp_free_i32(tmp); } - rm = neon_load_reg(a->vm, 1); + if (src2_mop >= 0) { + read_neon_element64(rm_64, a->vm, 1, src2_mop); + } else { + TCGv_i32 tmp = tcg_temp_new_i32(); + read_neon_element32(tmp, a->vm, 1, MO_32); + widenfn(rm_64, tmp); + tcg_temp_free_i32(tmp); + } - neon_store_reg64(rn0_64, a->vd); + write_neon_element64(rn0_64, a->vd, 0, MO_64); - widenfn(rm_64, rm); - tcg_temp_free_i32(rm); opfn(rn1_64, rn1_64, rm_64); - neon_store_reg64(rn1_64, a->vd + 1); + write_neon_element64(rn1_64, a->vd, 1, MO_64); tcg_temp_free_i64(rn0_64); tcg_temp_free_i64(rn1_64); @@ -1864,14 +1872,13 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a, return true; } -#define DO_PREWIDEN(INSN, S, EXT, OP, SRC1WIDE) \ +#define DO_PREWIDEN(INSN, S, OP, SRC1WIDE, SIGN) \ static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \ { \ static NeonGenWidenFn * const widenfn[] = { \ gen_helper_neon_widen_##S##8, \ gen_helper_neon_widen_##S##16, \ - tcg_gen_##EXT##_i32_i64, \ - NULL, \ + NULL, NULL, \ }; \ static NeonGenTwo64OpFn * const addfn[] = { \ gen_helper_neon_##OP##l_u16, \ @@ -1879,18 +1886,20 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a, tcg_gen_##OP##_i64, \ NULL, \ }; \ - return do_prewiden_3d(s, a, widenfn[a->size], \ - addfn[a->size], SRC1WIDE); \ + int narrow_mop = a->size == MO_32 ? MO_32 | SIGN : -1; \ + return do_prewiden_3d(s, a, widenfn[a->size], addfn[a->size], \ + SRC1WIDE ? MO_Q : narrow_mop, \ + narrow_mop); \ } -DO_PREWIDEN(VADDL_S, s, ext, add, false) -DO_PREWIDEN(VADDL_U, u, extu, add, false) -DO_PREWIDEN(VSUBL_S, s, ext, sub, false) -DO_PREWIDEN(VSUBL_U, u, extu, sub, false) -DO_PREWIDEN(VADDW_S, s, ext, add, true) -DO_PREWIDEN(VADDW_U, u, extu, add, true) -DO_PREWIDEN(VSUBW_S, s, ext, sub, true) -DO_PREWIDEN(VSUBW_U, u, extu, sub, true) +DO_PREWIDEN(VADDL_S, s, add, false, MO_SIGN) +DO_PREWIDEN(VADDL_U, u, add, false, 0) +DO_PREWIDEN(VSUBL_S, s, sub, false, MO_SIGN) +DO_PREWIDEN(VSUBL_U, u, sub, false, 0) +DO_PREWIDEN(VADDW_S, s, add, true, MO_SIGN) +DO_PREWIDEN(VADDW_U, u, add, true, 0) +DO_PREWIDEN(VSUBW_S, s, sub, true, MO_SIGN) +DO_PREWIDEN(VSUBW_U, u, sub, true, 0) static bool do_narrow_3d(DisasContext *s, arg_3diff *a, NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn) @@ -1927,23 +1936,25 @@ static bool do_narrow_3d(DisasContext *s, arg_3diff *a, rd0 = tcg_temp_new_i32(); rd1 = tcg_temp_new_i32(); - neon_load_reg64(rn_64, a->vn); - neon_load_reg64(rm_64, a->vm); + read_neon_element64(rn_64, a->vn, 0, MO_64); + read_neon_element64(rm_64, a->vm, 0, MO_64); opfn(rn_64, rn_64, rm_64); narrowfn(rd0, rn_64); - neon_load_reg64(rn_64, a->vn + 1); - neon_load_reg64(rm_64, a->vm + 1); + read_neon_element64(rn_64, a->vn, 1, MO_64); + read_neon_element64(rm_64, a->vm, 1, MO_64); opfn(rn_64, rn_64, rm_64); narrowfn(rd1, rn_64); - neon_store_reg(a->vd, 0, rd0); - neon_store_reg(a->vd, 1, rd1); + write_neon_element32(rd0, a->vd, 0, MO_32); + write_neon_element32(rd1, a->vd, 1, MO_32); + tcg_temp_free_i32(rd0); + tcg_temp_free_i32(rd1); tcg_temp_free_i64(rn_64); tcg_temp_free_i64(rm_64); @@ -2018,14 +2029,14 @@ static bool do_long_3d(DisasContext *s, arg_3diff *a, rd0 = tcg_temp_new_i64(); rd1 = tcg_temp_new_i64(); - rn = neon_load_reg(a->vn, 0); - rm = neon_load_reg(a->vm, 0); + rn = tcg_temp_new_i32(); + rm = tcg_temp_new_i32(); + read_neon_element32(rn, a->vn, 0, MO_32); + read_neon_element32(rm, a->vm, 0, MO_32); opfn(rd0, rn, rm); - tcg_temp_free_i32(rn); - tcg_temp_free_i32(rm); - rn = neon_load_reg(a->vn, 1); - rm = neon_load_reg(a->vm, 1); + read_neon_element32(rn, a->vn, 1, MO_32); + read_neon_element32(rm, a->vm, 1, MO_32); opfn(rd1, rn, rm); tcg_temp_free_i32(rn); tcg_temp_free_i32(rm); @@ -2033,18 +2044,15 @@ static bool do_long_3d(DisasContext *s, arg_3diff *a, /* Don't store results until after all loads: they might overlap */ if (accfn) { tmp = tcg_temp_new_i64(); - neon_load_reg64(tmp, a->vd); - accfn(tmp, tmp, rd0); - neon_store_reg64(tmp, a->vd); - neon_load_reg64(tmp, a->vd + 1); - accfn(tmp, tmp, rd1); - neon_store_reg64(tmp, a->vd + 1); + read_neon_element64(tmp, a->vd, 0, MO_64); + accfn(rd0, tmp, rd0); + read_neon_element64(tmp, a->vd, 1, MO_64); + accfn(rd1, tmp, rd1); tcg_temp_free_i64(tmp); - } else { - neon_store_reg64(rd0, a->vd); - neon_store_reg64(rd1, a->vd + 1); } + write_neon_element64(rd0, a->vd, 0, MO_64); + write_neon_element64(rd1, a->vd, 1, MO_64); tcg_temp_free_i64(rd0); tcg_temp_free_i64(rd1); @@ -2300,9 +2308,9 @@ static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a) return true; } - tcg_gen_gvec_3_ool(neon_reg_offset(a->vd, 0), - neon_reg_offset(a->vn, 0), - neon_reg_offset(a->vm, 0), + tcg_gen_gvec_3_ool(neon_full_reg_offset(a->vd), + neon_full_reg_offset(a->vn), + neon_full_reg_offset(a->vm), 16, 16, 0, fn_gvec); return true; } @@ -2327,16 +2335,16 @@ static void gen_neon_dup_high16(TCGv_i32 var) static inline TCGv_i32 neon_get_scalar(int size, int reg) { - TCGv_i32 tmp; - if (size == 1) { - tmp = neon_load_reg(reg & 7, reg >> 4); + TCGv_i32 tmp = tcg_temp_new_i32(); + if (size == MO_16) { + read_neon_element32(tmp, reg & 7, reg >> 4, MO_32); if (reg & 8) { gen_neon_dup_high16(tmp); } else { gen_neon_dup_low16(tmp); } } else { - tmp = neon_load_reg(reg & 15, reg >> 4); + read_neon_element32(tmp, reg & 15, reg >> 4, MO_32); } return tmp; } @@ -2350,7 +2358,7 @@ static bool do_2scalar(DisasContext *s, arg_2scalar *a, * perform an accumulation operation of that result into the * destination. */ - TCGv_i32 scalar; + TCGv_i32 scalar, tmp; int pass; if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { @@ -2377,17 +2385,20 @@ static bool do_2scalar(DisasContext *s, arg_2scalar *a, } scalar = neon_get_scalar(a->size, a->vm); + tmp = tcg_temp_new_i32(); for (pass = 0; pass < (a->q ? 4 : 2); pass++) { - TCGv_i32 tmp = neon_load_reg(a->vn, pass); + read_neon_element32(tmp, a->vn, pass, MO_32); opfn(tmp, tmp, scalar); if (accfn) { - TCGv_i32 rd = neon_load_reg(a->vd, pass); + TCGv_i32 rd = tcg_temp_new_i32(); + read_neon_element32(rd, a->vd, pass, MO_32); accfn(tmp, rd, tmp); tcg_temp_free_i32(rd); } - neon_store_reg(a->vd, pass, tmp); + write_neon_element32(tmp, a->vd, pass, MO_32); } + tcg_temp_free_i32(tmp); tcg_temp_free_i32(scalar); return true; } @@ -2445,8 +2456,8 @@ static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a, { /* Two registers and a scalar, using gvec */ int vec_size = a->q ? 16 : 8; - int rd_ofs = neon_reg_offset(a->vd, 0); - int rn_ofs = neon_reg_offset(a->vn, 0); + int rd_ofs = neon_full_reg_offset(a->vd); + int rn_ofs = neon_full_reg_offset(a->vn); int rm_ofs; int idx; TCGv_ptr fpstatus; @@ -2477,7 +2488,7 @@ static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a, /* a->vm is M:Vm, which encodes both register and index */ idx = extract32(a->vm, a->size + 2, 2); a->vm = extract32(a->vm, 0, a->size + 2); - rm_ofs = neon_reg_offset(a->vm, 0); + rm_ofs = neon_full_reg_offset(a->vm); fpstatus = fpstatus_ptr(a->size == 1 ? FPST_STD_F16 : FPST_STD); tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpstatus, @@ -2542,7 +2553,7 @@ static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a, * performs a kind of fused op-then-accumulate using a helper * function that takes all of rd, rn and the scalar at once. */ - TCGv_i32 scalar; + TCGv_i32 scalar, rn, rd; int pass; if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { @@ -2573,14 +2584,17 @@ static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a, } scalar = neon_get_scalar(a->size, a->vm); + rn = tcg_temp_new_i32(); + rd = tcg_temp_new_i32(); for (pass = 0; pass < (a->q ? 4 : 2); pass++) { - TCGv_i32 rn = neon_load_reg(a->vn, pass); - TCGv_i32 rd = neon_load_reg(a->vd, pass); + read_neon_element32(rn, a->vn, pass, MO_32); + read_neon_element32(rd, a->vd, pass, MO_32); opfn(rd, cpu_env, rn, scalar, rd); - tcg_temp_free_i32(rn); - neon_store_reg(a->vd, pass, rd); + write_neon_element32(rd, a->vd, pass, MO_32); } + tcg_temp_free_i32(rn); + tcg_temp_free_i32(rd); tcg_temp_free_i32(scalar); return true; @@ -2647,12 +2661,12 @@ static bool do_2scalar_long(DisasContext *s, arg_2scalar *a, scalar = neon_get_scalar(a->size, a->vm); /* Load all inputs before writing any outputs, in case of overlap */ - rn = neon_load_reg(a->vn, 0); + rn = tcg_temp_new_i32(); + read_neon_element32(rn, a->vn, 0, MO_32); rn0_64 = tcg_temp_new_i64(); opfn(rn0_64, rn, scalar); - tcg_temp_free_i32(rn); - rn = neon_load_reg(a->vn, 1); + read_neon_element32(rn, a->vn, 1, MO_32); rn1_64 = tcg_temp_new_i64(); opfn(rn1_64, rn, scalar); tcg_temp_free_i32(rn); @@ -2660,17 +2674,15 @@ static bool do_2scalar_long(DisasContext *s, arg_2scalar *a, if (accfn) { TCGv_i64 t64 = tcg_temp_new_i64(); - neon_load_reg64(t64, a->vd); - accfn(t64, t64, rn0_64); - neon_store_reg64(t64, a->vd); - neon_load_reg64(t64, a->vd + 1); - accfn(t64, t64, rn1_64); - neon_store_reg64(t64, a->vd + 1); + read_neon_element64(t64, a->vd, 0, MO_64); + accfn(rn0_64, t64, rn0_64); + read_neon_element64(t64, a->vd, 1, MO_64); + accfn(rn1_64, t64, rn1_64); tcg_temp_free_i64(t64); - } else { - neon_store_reg64(rn0_64, a->vd); - neon_store_reg64(rn1_64, a->vd + 1); } + + write_neon_element64(rn0_64, a->vd, 0, MO_64); + write_neon_element64(rn1_64, a->vd, 1, MO_64); tcg_temp_free_i64(rn0_64); tcg_temp_free_i64(rn1_64); return true; @@ -2803,10 +2815,10 @@ static bool trans_VEXT(DisasContext *s, arg_VEXT *a) right = tcg_temp_new_i64(); dest = tcg_temp_new_i64(); - neon_load_reg64(right, a->vn); - neon_load_reg64(left, a->vm); + read_neon_element64(right, a->vn, 0, MO_64); + read_neon_element64(left, a->vm, 0, MO_64); tcg_gen_extract2_i64(dest, right, left, a->imm * 8); - neon_store_reg64(dest, a->vd); + write_neon_element64(dest, a->vd, 0, MO_64); tcg_temp_free_i64(left); tcg_temp_free_i64(right); @@ -2822,21 +2834,21 @@ static bool trans_VEXT(DisasContext *s, arg_VEXT *a) destright = tcg_temp_new_i64(); if (a->imm < 8) { - neon_load_reg64(right, a->vn); - neon_load_reg64(middle, a->vn + 1); + read_neon_element64(right, a->vn, 0, MO_64); + read_neon_element64(middle, a->vn, 1, MO_64); tcg_gen_extract2_i64(destright, right, middle, a->imm * 8); - neon_load_reg64(left, a->vm); + read_neon_element64(left, a->vm, 0, MO_64); tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8); } else { - neon_load_reg64(right, a->vn + 1); - neon_load_reg64(middle, a->vm); + read_neon_element64(right, a->vn, 1, MO_64); + read_neon_element64(middle, a->vm, 0, MO_64); tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8); - neon_load_reg64(left, a->vm + 1); + read_neon_element64(left, a->vm, 1, MO_64); tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8); } - neon_store_reg64(destright, a->vd); - neon_store_reg64(destleft, a->vd + 1); + write_neon_element64(destright, a->vd, 0, MO_64); + write_neon_element64(destleft, a->vd, 1, MO_64); tcg_temp_free_i64(destright); tcg_temp_free_i64(destleft); @@ -2876,30 +2888,34 @@ static bool trans_VTBL(DisasContext *s, arg_VTBL *a) return false; } n <<= 3; + tmp = tcg_temp_new_i32(); if (a->op) { - tmp = neon_load_reg(a->vd, 0); + read_neon_element32(tmp, a->vd, 0, MO_32); } else { - tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, 0); } - tmp2 = neon_load_reg(a->vm, 0); + tmp2 = tcg_temp_new_i32(); + read_neon_element32(tmp2, a->vm, 0, MO_32); ptr1 = vfp_reg_ptr(true, a->vn); tmp4 = tcg_const_i32(n); gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp4); - tcg_temp_free_i32(tmp); + if (a->op) { - tmp = neon_load_reg(a->vd, 1); + read_neon_element32(tmp, a->vd, 1, MO_32); } else { - tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, 0); } - tmp3 = neon_load_reg(a->vm, 1); + tmp3 = tcg_temp_new_i32(); + read_neon_element32(tmp3, a->vm, 1, MO_32); gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp4); + tcg_temp_free_i32(tmp); tcg_temp_free_i32(tmp4); tcg_temp_free_ptr(ptr1); - neon_store_reg(a->vd, 0, tmp2); - neon_store_reg(a->vd, 1, tmp3); - tcg_temp_free_i32(tmp); + + write_neon_element32(tmp2, a->vd, 0, MO_32); + write_neon_element32(tmp3, a->vd, 1, MO_32); + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp3); return true; } @@ -2923,7 +2939,7 @@ static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a) return true; } - tcg_gen_gvec_dup_mem(a->size, neon_reg_offset(a->vd, 0), + tcg_gen_gvec_dup_mem(a->size, neon_full_reg_offset(a->vd), neon_element_offset(a->vm, a->index, a->size), a->q ? 16 : 8, a->q ? 16 : 8); return true; @@ -2932,6 +2948,7 @@ static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a) static bool trans_VREV64(DisasContext *s, arg_VREV64 *a) { int pass, half; + TCGv_i32 tmp[2]; if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { return false; @@ -2955,11 +2972,12 @@ static bool trans_VREV64(DisasContext *s, arg_VREV64 *a) return true; } - for (pass = 0; pass < (a->q ? 2 : 1); pass++) { - TCGv_i32 tmp[2]; + tmp[0] = tcg_temp_new_i32(); + tmp[1] = tcg_temp_new_i32(); + for (pass = 0; pass < (a->q ? 2 : 1); pass++) { for (half = 0; half < 2; half++) { - tmp[half] = neon_load_reg(a->vm, pass * 2 + half); + read_neon_element32(tmp[half], a->vm, pass * 2 + half, MO_32); switch (a->size) { case 0: tcg_gen_bswap32_i32(tmp[half], tmp[half]); @@ -2973,9 +2991,12 @@ static bool trans_VREV64(DisasContext *s, arg_VREV64 *a) g_assert_not_reached(); } } - neon_store_reg(a->vd, pass * 2, tmp[1]); - neon_store_reg(a->vd, pass * 2 + 1, tmp[0]); + write_neon_element32(tmp[1], a->vd, pass * 2, MO_32); + write_neon_element32(tmp[0], a->vd, pass * 2 + 1, MO_32); } + + tcg_temp_free_i32(tmp[0]); + tcg_temp_free_i32(tmp[1]); return true; } @@ -3020,23 +3041,25 @@ static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a, rm0_64 = tcg_temp_new_i64(); rm1_64 = tcg_temp_new_i64(); rd_64 = tcg_temp_new_i64(); - tmp = neon_load_reg(a->vm, pass * 2); + + tmp = tcg_temp_new_i32(); + read_neon_element32(tmp, a->vm, pass * 2, MO_32); widenfn(rm0_64, tmp); - tcg_temp_free_i32(tmp); - tmp = neon_load_reg(a->vm, pass * 2 + 1); + read_neon_element32(tmp, a->vm, pass * 2 + 1, MO_32); widenfn(rm1_64, tmp); tcg_temp_free_i32(tmp); + opfn(rd_64, rm0_64, rm1_64); tcg_temp_free_i64(rm0_64); tcg_temp_free_i64(rm1_64); if (accfn) { TCGv_i64 tmp64 = tcg_temp_new_i64(); - neon_load_reg64(tmp64, a->vd + pass); + read_neon_element64(tmp64, a->vd, pass, MO_64); accfn(rd_64, tmp64, rd_64); tcg_temp_free_i64(tmp64); } - neon_store_reg64(rd_64, a->vd + pass); + write_neon_element64(rd_64, a->vd, pass, MO_64); tcg_temp_free_i64(rd_64); } return true; @@ -3234,12 +3257,14 @@ static bool do_vmovn(DisasContext *s, arg_2misc *a, rd0 = tcg_temp_new_i32(); rd1 = tcg_temp_new_i32(); - neon_load_reg64(rm, a->vm); + read_neon_element64(rm, a->vm, 0, MO_64); narrowfn(rd0, cpu_env, rm); - neon_load_reg64(rm, a->vm + 1); + read_neon_element64(rm, a->vm, 1, MO_64); narrowfn(rd1, cpu_env, rm); - neon_store_reg(a->vd, 0, rd0); - neon_store_reg(a->vd, 1, rd1); + write_neon_element32(rd0, a->vd, 0, MO_32); + write_neon_element32(rd1, a->vd, 1, MO_32); + tcg_temp_free_i32(rd0); + tcg_temp_free_i32(rd1); tcg_temp_free_i64(rm); return true; } @@ -3296,16 +3321,18 @@ static bool trans_VSHLL(DisasContext *s, arg_2misc *a) } rd = tcg_temp_new_i64(); + rm0 = tcg_temp_new_i32(); + rm1 = tcg_temp_new_i32(); - rm0 = neon_load_reg(a->vm, 0); - rm1 = neon_load_reg(a->vm, 1); + read_neon_element32(rm0, a->vm, 0, MO_32); + read_neon_element32(rm1, a->vm, 1, MO_32); widenfn(rd, rm0); tcg_gen_shli_i64(rd, rd, 8 << a->size); - neon_store_reg64(rd, a->vd); + write_neon_element64(rd, a->vd, 0, MO_64); widenfn(rd, rm1); tcg_gen_shli_i64(rd, rd, 8 << a->size); - neon_store_reg64(rd, a->vd + 1); + write_neon_element64(rd, a->vd, 1, MO_64); tcg_temp_free_i64(rd); tcg_temp_free_i32(rm0); @@ -3339,21 +3366,25 @@ static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a) fpst = fpstatus_ptr(FPST_STD); ahp = get_ahp_flag(); - tmp = neon_load_reg(a->vm, 0); + tmp = tcg_temp_new_i32(); + read_neon_element32(tmp, a->vm, 0, MO_32); gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); - tmp2 = neon_load_reg(a->vm, 1); + tmp2 = tcg_temp_new_i32(); + read_neon_element32(tmp2, a->vm, 1, MO_32); gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp); tcg_gen_shli_i32(tmp2, tmp2, 16); tcg_gen_or_i32(tmp2, tmp2, tmp); - tcg_temp_free_i32(tmp); - tmp = neon_load_reg(a->vm, 2); + read_neon_element32(tmp, a->vm, 2, MO_32); gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); - tmp3 = neon_load_reg(a->vm, 3); - neon_store_reg(a->vd, 0, tmp2); + tmp3 = tcg_temp_new_i32(); + read_neon_element32(tmp3, a->vm, 3, MO_32); + write_neon_element32(tmp2, a->vd, 0, MO_32); + tcg_temp_free_i32(tmp2); gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp); tcg_gen_shli_i32(tmp3, tmp3, 16); tcg_gen_or_i32(tmp3, tmp3, tmp); - neon_store_reg(a->vd, 1, tmp3); + write_neon_element32(tmp3, a->vd, 1, MO_32); + tcg_temp_free_i32(tmp3); tcg_temp_free_i32(tmp); tcg_temp_free_i32(ahp); tcg_temp_free_ptr(fpst); @@ -3388,21 +3419,25 @@ static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a) fpst = fpstatus_ptr(FPST_STD); ahp = get_ahp_flag(); tmp3 = tcg_temp_new_i32(); - tmp = neon_load_reg(a->vm, 0); - tmp2 = neon_load_reg(a->vm, 1); + tmp2 = tcg_temp_new_i32(); + tmp = tcg_temp_new_i32(); + read_neon_element32(tmp, a->vm, 0, MO_32); + read_neon_element32(tmp2, a->vm, 1, MO_32); tcg_gen_ext16u_i32(tmp3, tmp); gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp); - neon_store_reg(a->vd, 0, tmp3); + write_neon_element32(tmp3, a->vd, 0, MO_32); tcg_gen_shri_i32(tmp, tmp, 16); gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp); - neon_store_reg(a->vd, 1, tmp); - tmp3 = tcg_temp_new_i32(); + write_neon_element32(tmp, a->vd, 1, MO_32); + tcg_temp_free_i32(tmp); tcg_gen_ext16u_i32(tmp3, tmp2); gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp); - neon_store_reg(a->vd, 2, tmp3); + write_neon_element32(tmp3, a->vd, 2, MO_32); + tcg_temp_free_i32(tmp3); tcg_gen_shri_i32(tmp2, tmp2, 16); gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp); - neon_store_reg(a->vd, 3, tmp2); + write_neon_element32(tmp2, a->vd, 3, MO_32); + tcg_temp_free_i32(tmp2); tcg_temp_free_i32(ahp); tcg_temp_free_ptr(fpst); @@ -3412,8 +3447,8 @@ static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a) static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn) { int vec_size = a->q ? 16 : 8; - int rd_ofs = neon_reg_offset(a->vd, 0); - int rm_ofs = neon_reg_offset(a->vm, 0); + int rd_ofs = neon_full_reg_offset(a->vd); + int rm_ofs = neon_full_reg_offset(a->vm); if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { return false; @@ -3508,6 +3543,7 @@ DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2) static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn) { + TCGv_i32 tmp; int pass; /* Handle a 2-reg-misc operation by iterating 32 bits at a time */ @@ -3533,11 +3569,13 @@ static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn) return true; } + tmp = tcg_temp_new_i32(); for (pass = 0; pass < (a->q ? 4 : 2); pass++) { - TCGv_i32 tmp = neon_load_reg(a->vm, pass); + read_neon_element32(tmp, a->vm, pass, MO_32); fn(tmp, tmp); - neon_store_reg(a->vd, pass, tmp); + write_neon_element32(tmp, a->vd, pass, MO_32); } + tcg_temp_free_i32(tmp); return true; } @@ -3812,10 +3850,10 @@ static bool trans_VSWP(DisasContext *s, arg_2misc *a) rm = tcg_temp_new_i64(); rd = tcg_temp_new_i64(); for (pass = 0; pass < (a->q ? 2 : 1); pass++) { - neon_load_reg64(rm, a->vm + pass); - neon_load_reg64(rd, a->vd + pass); - neon_store_reg64(rm, a->vd + pass); - neon_store_reg64(rd, a->vm + pass); + read_neon_element64(rm, a->vm, pass, MO_64); + read_neon_element64(rd, a->vd, pass, MO_64); + write_neon_element64(rm, a->vd, pass, MO_64); + write_neon_element64(rd, a->vm, pass, MO_64); } tcg_temp_free_i64(rm); tcg_temp_free_i64(rd); @@ -3890,25 +3928,29 @@ static bool trans_VTRN(DisasContext *s, arg_2misc *a) return true; } - if (a->size == 2) { + tmp = tcg_temp_new_i32(); + tmp2 = tcg_temp_new_i32(); + if (a->size == MO_32) { for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) { - tmp = neon_load_reg(a->vm, pass); - tmp2 = neon_load_reg(a->vd, pass + 1); - neon_store_reg(a->vm, pass, tmp2); - neon_store_reg(a->vd, pass + 1, tmp); + read_neon_element32(tmp, a->vm, pass, MO_32); + read_neon_element32(tmp2, a->vd, pass + 1, MO_32); + write_neon_element32(tmp2, a->vm, pass, MO_32); + write_neon_element32(tmp, a->vd, pass + 1, MO_32); } } else { for (pass = 0; pass < (a->q ? 4 : 2); pass++) { - tmp = neon_load_reg(a->vm, pass); - tmp2 = neon_load_reg(a->vd, pass); - if (a->size == 0) { + read_neon_element32(tmp, a->vm, pass, MO_32); + read_neon_element32(tmp2, a->vd, pass, MO_32); + if (a->size == MO_8) { gen_neon_trn_u8(tmp, tmp2); } else { gen_neon_trn_u16(tmp, tmp2); } - neon_store_reg(a->vm, pass, tmp2); - neon_store_reg(a->vd, pass, tmp); + write_neon_element32(tmp2, a->vm, pass, MO_32); + write_neon_element32(tmp, a->vd, pass, MO_32); } } + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp2); return true; } diff --git a/target/arm/translate-vfp.c.inc b/target/arm/translate-vfp.c.inc index a7ed9bc81b..f966de5b1f 100644 --- a/target/arm/translate-vfp.c.inc +++ b/target/arm/translate-vfp.c.inc @@ -236,8 +236,8 @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a) tcg_gen_ext_i32_i64(nf, cpu_NF); tcg_gen_ext_i32_i64(vf, cpu_VF); - neon_load_reg64(frn, rn); - neon_load_reg64(frm, rm); + vfp_load_reg64(frn, rn); + vfp_load_reg64(frm, rm); switch (a->cc) { case 0: /* eq: Z */ tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero, @@ -264,7 +264,7 @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a) tcg_temp_free_i64(tmp); break; } - neon_store_reg64(dest, rd); + vfp_store_reg64(dest, rd); tcg_temp_free_i64(frn); tcg_temp_free_i64(frm); tcg_temp_free_i64(dest); @@ -283,8 +283,8 @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a) frn = tcg_temp_new_i32(); frm = tcg_temp_new_i32(); dest = tcg_temp_new_i32(); - neon_load_reg32(frn, rn); - neon_load_reg32(frm, rm); + vfp_load_reg32(frn, rn); + vfp_load_reg32(frm, rm); switch (a->cc) { case 0: /* eq: Z */ tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero, @@ -315,7 +315,7 @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a) if (sz == 1) { tcg_gen_andi_i32(dest, dest, 0xffff); } - neon_store_reg32(dest, rd); + vfp_store_reg32(dest, rd); tcg_temp_free_i32(frn); tcg_temp_free_i32(frm); tcg_temp_free_i32(dest); @@ -385,9 +385,9 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a) TCGv_i64 tcg_res; tcg_op = tcg_temp_new_i64(); tcg_res = tcg_temp_new_i64(); - neon_load_reg64(tcg_op, rm); + vfp_load_reg64(tcg_op, rm); gen_helper_rintd(tcg_res, tcg_op, fpst); - neon_store_reg64(tcg_res, rd); + vfp_store_reg64(tcg_res, rd); tcg_temp_free_i64(tcg_op); tcg_temp_free_i64(tcg_res); } else { @@ -395,13 +395,13 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a) TCGv_i32 tcg_res; tcg_op = tcg_temp_new_i32(); tcg_res = tcg_temp_new_i32(); - neon_load_reg32(tcg_op, rm); + vfp_load_reg32(tcg_op, rm); if (sz == 1) { gen_helper_rinth(tcg_res, tcg_op, fpst); } else { gen_helper_rints(tcg_res, tcg_op, fpst); } - neon_store_reg32(tcg_res, rd); + vfp_store_reg32(tcg_res, rd); tcg_temp_free_i32(tcg_op); tcg_temp_free_i32(tcg_res); } @@ -463,14 +463,14 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a) tcg_double = tcg_temp_new_i64(); tcg_res = tcg_temp_new_i64(); tcg_tmp = tcg_temp_new_i32(); - neon_load_reg64(tcg_double, rm); + vfp_load_reg64(tcg_double, rm); if (is_signed) { gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst); } else { gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst); } tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res); - neon_store_reg32(tcg_tmp, rd); + vfp_store_reg32(tcg_tmp, rd); tcg_temp_free_i32(tcg_tmp); tcg_temp_free_i64(tcg_res); tcg_temp_free_i64(tcg_double); @@ -478,7 +478,7 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a) TCGv_i32 tcg_single, tcg_res; tcg_single = tcg_temp_new_i32(); tcg_res = tcg_temp_new_i32(); - neon_load_reg32(tcg_single, rm); + vfp_load_reg32(tcg_single, rm); if (sz == 1) { if (is_signed) { gen_helper_vfp_toslh(tcg_res, tcg_single, tcg_shift, fpst); @@ -492,7 +492,7 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a) gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst); } } - neon_store_reg32(tcg_res, rd); + vfp_store_reg32(tcg_res, rd); tcg_temp_free_i32(tcg_res); tcg_temp_free_i32(tcg_single); } @@ -511,11 +511,9 @@ static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a) { /* VMOV scalar to general purpose register */ TCGv_i32 tmp; - int pass; - uint32_t offset; - /* SIZE == 2 is a VFP instruction; otherwise NEON. */ - if (a->size == 2 + /* SIZE == MO_32 is a VFP instruction; otherwise NEON. */ + if (a->size == MO_32 ? !dc_isar_feature(aa32_fpsp_v2, s) : !arm_dc_feature(s, ARM_FEATURE_NEON)) { return false; @@ -526,44 +524,12 @@ static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a) return false; } - offset = a->index << a->size; - pass = extract32(offset, 2, 1); - offset = extract32(offset, 0, 2) * 8; - if (!vfp_access_check(s)) { return true; } - tmp = neon_load_reg(a->vn, pass); - switch (a->size) { - case 0: - if (offset) { - tcg_gen_shri_i32(tmp, tmp, offset); - } - if (a->u) { - gen_uxtb(tmp); - } else { - gen_sxtb(tmp); - } - break; - case 1: - if (a->u) { - if (offset) { - tcg_gen_shri_i32(tmp, tmp, 16); - } else { - gen_uxth(tmp); - } - } else { - if (offset) { - tcg_gen_sari_i32(tmp, tmp, 16); - } else { - gen_sxth(tmp); - } - } - break; - case 2: - break; - } + tmp = tcg_temp_new_i32(); + read_neon_element32(tmp, a->vn, a->index, a->size | (a->u ? 0 : MO_SIGN)); store_reg(s, a->rt, tmp); return true; @@ -572,12 +538,10 @@ static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a) static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a) { /* VMOV general purpose register to scalar */ - TCGv_i32 tmp, tmp2; - int pass; - uint32_t offset; + TCGv_i32 tmp; - /* SIZE == 2 is a VFP instruction; otherwise NEON. */ - if (a->size == 2 + /* SIZE == MO_32 is a VFP instruction; otherwise NEON. */ + if (a->size == MO_32 ? !dc_isar_feature(aa32_fpsp_v2, s) : !arm_dc_feature(s, ARM_FEATURE_NEON)) { return false; @@ -588,30 +552,13 @@ static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a) return false; } - offset = a->index << a->size; - pass = extract32(offset, 2, 1); - offset = extract32(offset, 0, 2) * 8; - if (!vfp_access_check(s)) { return true; } tmp = load_reg(s, a->rt); - switch (a->size) { - case 0: - tmp2 = neon_load_reg(a->vn, pass); - tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 8); - tcg_temp_free_i32(tmp2); - break; - case 1: - tmp2 = neon_load_reg(a->vn, pass); - tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 16); - tcg_temp_free_i32(tmp2); - break; - case 2: - break; - } - neon_store_reg(a->vn, pass, tmp); + write_neon_element32(tmp, a->vn, a->index, a->size); + tcg_temp_free_i32(tmp); return true; } @@ -653,7 +600,7 @@ static bool trans_VDUP(DisasContext *s, arg_VDUP *a) } tmp = load_reg(s, a->rt); - tcg_gen_gvec_dup_i32(size, neon_reg_offset(a->vn, 0), + tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(a->vn), vec_size, vec_size, tmp); tcg_temp_free_i32(tmp); @@ -829,14 +776,14 @@ static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a) if (a->l) { /* VFP to general purpose register */ tmp = tcg_temp_new_i32(); - neon_load_reg32(tmp, a->vn); + vfp_load_reg32(tmp, a->vn); tcg_gen_andi_i32(tmp, tmp, 0xffff); store_reg(s, a->rt, tmp); } else { /* general purpose register to VFP */ tmp = load_reg(s, a->rt); tcg_gen_andi_i32(tmp, tmp, 0xffff); - neon_store_reg32(tmp, a->vn); + vfp_store_reg32(tmp, a->vn); tcg_temp_free_i32(tmp); } @@ -858,7 +805,7 @@ static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a) if (a->l) { /* VFP to general purpose register */ tmp = tcg_temp_new_i32(); - neon_load_reg32(tmp, a->vn); + vfp_load_reg32(tmp, a->vn); if (a->rt == 15) { /* Set the 4 flag bits in the CPSR. */ gen_set_nzcv(tmp); @@ -869,7 +816,7 @@ static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a) } else { /* general purpose register to VFP */ tmp = load_reg(s, a->rt); - neon_store_reg32(tmp, a->vn); + vfp_store_reg32(tmp, a->vn); tcg_temp_free_i32(tmp); } @@ -895,18 +842,18 @@ static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a) if (a->op) { /* fpreg to gpreg */ tmp = tcg_temp_new_i32(); - neon_load_reg32(tmp, a->vm); + vfp_load_reg32(tmp, a->vm); store_reg(s, a->rt, tmp); tmp = tcg_temp_new_i32(); - neon_load_reg32(tmp, a->vm + 1); + vfp_load_reg32(tmp, a->vm + 1); store_reg(s, a->rt2, tmp); } else { /* gpreg to fpreg */ tmp = load_reg(s, a->rt); - neon_store_reg32(tmp, a->vm); + vfp_store_reg32(tmp, a->vm); tcg_temp_free_i32(tmp); tmp = load_reg(s, a->rt2); - neon_store_reg32(tmp, a->vm + 1); + vfp_store_reg32(tmp, a->vm + 1); tcg_temp_free_i32(tmp); } @@ -938,18 +885,18 @@ static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a) if (a->op) { /* fpreg to gpreg */ tmp = tcg_temp_new_i32(); - neon_load_reg32(tmp, a->vm * 2); + vfp_load_reg32(tmp, a->vm * 2); store_reg(s, a->rt, tmp); tmp = tcg_temp_new_i32(); - neon_load_reg32(tmp, a->vm * 2 + 1); + vfp_load_reg32(tmp, a->vm * 2 + 1); store_reg(s, a->rt2, tmp); } else { /* gpreg to fpreg */ tmp = load_reg(s, a->rt); - neon_store_reg32(tmp, a->vm * 2); + vfp_store_reg32(tmp, a->vm * 2); tcg_temp_free_i32(tmp); tmp = load_reg(s, a->rt2); - neon_store_reg32(tmp, a->vm * 2 + 1); + vfp_store_reg32(tmp, a->vm * 2 + 1); tcg_temp_free_i32(tmp); } @@ -980,9 +927,9 @@ static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a) tmp = tcg_temp_new_i32(); if (a->l) { gen_aa32_ld16u(s, tmp, addr, get_mem_index(s)); - neon_store_reg32(tmp, a->vd); + vfp_store_reg32(tmp, a->vd); } else { - neon_load_reg32(tmp, a->vd); + vfp_load_reg32(tmp, a->vd); gen_aa32_st16(s, tmp, addr, get_mem_index(s)); } tcg_temp_free_i32(tmp); @@ -1014,9 +961,9 @@ static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a) tmp = tcg_temp_new_i32(); if (a->l) { gen_aa32_ld32u(s, tmp, addr, get_mem_index(s)); - neon_store_reg32(tmp, a->vd); + vfp_store_reg32(tmp, a->vd); } else { - neon_load_reg32(tmp, a->vd); + vfp_load_reg32(tmp, a->vd); gen_aa32_st32(s, tmp, addr, get_mem_index(s)); } tcg_temp_free_i32(tmp); @@ -1055,9 +1002,9 @@ static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a) tmp = tcg_temp_new_i64(); if (a->l) { gen_aa32_ld64(s, tmp, addr, get_mem_index(s)); - neon_store_reg64(tmp, a->vd); + vfp_store_reg64(tmp, a->vd); } else { - neon_load_reg64(tmp, a->vd); + vfp_load_reg64(tmp, a->vd); gen_aa32_st64(s, tmp, addr, get_mem_index(s)); } tcg_temp_free_i64(tmp); @@ -1119,10 +1066,10 @@ static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a) if (a->l) { /* load */ gen_aa32_ld32u(s, tmp, addr, get_mem_index(s)); - neon_store_reg32(tmp, a->vd + i); + vfp_store_reg32(tmp, a->vd + i); } else { /* store */ - neon_load_reg32(tmp, a->vd + i); + vfp_load_reg32(tmp, a->vd + i); gen_aa32_st32(s, tmp, addr, get_mem_index(s)); } tcg_gen_addi_i32(addr, addr, offset); @@ -1202,10 +1149,10 @@ static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a) if (a->l) { /* load */ gen_aa32_ld64(s, tmp, addr, get_mem_index(s)); - neon_store_reg64(tmp, a->vd + i); + vfp_store_reg64(tmp, a->vd + i); } else { /* store */ - neon_load_reg64(tmp, a->vd + i); + vfp_load_reg64(tmp, a->vd + i); gen_aa32_st64(s, tmp, addr, get_mem_index(s)); } tcg_gen_addi_i32(addr, addr, offset); @@ -1338,15 +1285,15 @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn, fd = tcg_temp_new_i32(); fpst = fpstatus_ptr(FPST_FPCR); - neon_load_reg32(f0, vn); - neon_load_reg32(f1, vm); + vfp_load_reg32(f0, vn); + vfp_load_reg32(f1, vm); for (;;) { if (reads_vd) { - neon_load_reg32(fd, vd); + vfp_load_reg32(fd, vd); } fn(fd, f0, f1, fpst); - neon_store_reg32(fd, vd); + vfp_store_reg32(fd, vd); if (veclen == 0) { break; @@ -1356,10 +1303,10 @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn, veclen--; vd = vfp_advance_sreg(vd, delta_d); vn = vfp_advance_sreg(vn, delta_d); - neon_load_reg32(f0, vn); + vfp_load_reg32(f0, vn); if (delta_m) { vm = vfp_advance_sreg(vm, delta_m); - neon_load_reg32(f1, vm); + vfp_load_reg32(f1, vm); } } @@ -1402,14 +1349,14 @@ static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn, fd = tcg_temp_new_i32(); fpst = fpstatus_ptr(FPST_FPCR_F16); - neon_load_reg32(f0, vn); - neon_load_reg32(f1, vm); + vfp_load_reg32(f0, vn); + vfp_load_reg32(f1, vm); if (reads_vd) { - neon_load_reg32(fd, vd); + vfp_load_reg32(fd, vd); } fn(fd, f0, f1, fpst); - neon_store_reg32(fd, vd); + vfp_store_reg32(fd, vd); tcg_temp_free_i32(f0); tcg_temp_free_i32(f1); @@ -1469,15 +1416,15 @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn, fd = tcg_temp_new_i64(); fpst = fpstatus_ptr(FPST_FPCR); - neon_load_reg64(f0, vn); - neon_load_reg64(f1, vm); + vfp_load_reg64(f0, vn); + vfp_load_reg64(f1, vm); for (;;) { if (reads_vd) { - neon_load_reg64(fd, vd); + vfp_load_reg64(fd, vd); } fn(fd, f0, f1, fpst); - neon_store_reg64(fd, vd); + vfp_store_reg64(fd, vd); if (veclen == 0) { break; @@ -1486,10 +1433,10 @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn, veclen--; vd = vfp_advance_dreg(vd, delta_d); vn = vfp_advance_dreg(vn, delta_d); - neon_load_reg64(f0, vn); + vfp_load_reg64(f0, vn); if (delta_m) { vm = vfp_advance_dreg(vm, delta_m); - neon_load_reg64(f1, vm); + vfp_load_reg64(f1, vm); } } @@ -1542,11 +1489,11 @@ static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm) f0 = tcg_temp_new_i32(); fd = tcg_temp_new_i32(); - neon_load_reg32(f0, vm); + vfp_load_reg32(f0, vm); for (;;) { fn(fd, f0); - neon_store_reg32(fd, vd); + vfp_store_reg32(fd, vd); if (veclen == 0) { break; @@ -1556,7 +1503,7 @@ static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm) /* single source one-many */ while (veclen--) { vd = vfp_advance_sreg(vd, delta_d); - neon_store_reg32(fd, vd); + vfp_store_reg32(fd, vd); } break; } @@ -1565,7 +1512,7 @@ static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm) veclen--; vd = vfp_advance_sreg(vd, delta_d); vm = vfp_advance_sreg(vm, delta_m); - neon_load_reg32(f0, vm); + vfp_load_reg32(f0, vm); } tcg_temp_free_i32(f0); @@ -1598,9 +1545,9 @@ static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm) } f0 = tcg_temp_new_i32(); - neon_load_reg32(f0, vm); + vfp_load_reg32(f0, vm); fn(f0, f0); - neon_store_reg32(f0, vd); + vfp_store_reg32(f0, vd); tcg_temp_free_i32(f0); return true; @@ -1652,11 +1599,11 @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm) f0 = tcg_temp_new_i64(); fd = tcg_temp_new_i64(); - neon_load_reg64(f0, vm); + vfp_load_reg64(f0, vm); for (;;) { fn(fd, f0); - neon_store_reg64(fd, vd); + vfp_store_reg64(fd, vd); if (veclen == 0) { break; @@ -1666,7 +1613,7 @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm) /* single source one-many */ while (veclen--) { vd = vfp_advance_dreg(vd, delta_d); - neon_store_reg64(fd, vd); + vfp_store_reg64(fd, vd); } break; } @@ -1675,7 +1622,7 @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm) veclen--; vd = vfp_advance_dreg(vd, delta_d); vd = vfp_advance_dreg(vm, delta_m); - neon_load_reg64(f0, vm); + vfp_load_reg64(f0, vm); } tcg_temp_free_i64(f0); @@ -2090,20 +2037,20 @@ static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d) vm = tcg_temp_new_i32(); vd = tcg_temp_new_i32(); - neon_load_reg32(vn, a->vn); - neon_load_reg32(vm, a->vm); + vfp_load_reg32(vn, a->vn); + vfp_load_reg32(vm, a->vm); if (neg_n) { /* VFNMS, VFMS */ gen_helper_vfp_negh(vn, vn); } - neon_load_reg32(vd, a->vd); + vfp_load_reg32(vd, a->vd); if (neg_d) { /* VFNMA, VFNMS */ gen_helper_vfp_negh(vd, vd); } fpst = fpstatus_ptr(FPST_FPCR_F16); gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst); - neon_store_reg32(vd, a->vd); + vfp_store_reg32(vd, a->vd); tcg_temp_free_ptr(fpst); tcg_temp_free_i32(vn); @@ -2155,20 +2102,20 @@ static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d) vm = tcg_temp_new_i32(); vd = tcg_temp_new_i32(); - neon_load_reg32(vn, a->vn); - neon_load_reg32(vm, a->vm); + vfp_load_reg32(vn, a->vn); + vfp_load_reg32(vm, a->vm); if (neg_n) { /* VFNMS, VFMS */ gen_helper_vfp_negs(vn, vn); } - neon_load_reg32(vd, a->vd); + vfp_load_reg32(vd, a->vd); if (neg_d) { /* VFNMA, VFNMS */ gen_helper_vfp_negs(vd, vd); } fpst = fpstatus_ptr(FPST_FPCR); gen_helper_vfp_muladds(vd, vn, vm, vd, fpst); - neon_store_reg32(vd, a->vd); + vfp_store_reg32(vd, a->vd); tcg_temp_free_ptr(fpst); tcg_temp_free_i32(vn); @@ -2226,20 +2173,20 @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d) vm = tcg_temp_new_i64(); vd = tcg_temp_new_i64(); - neon_load_reg64(vn, a->vn); - neon_load_reg64(vm, a->vm); + vfp_load_reg64(vn, a->vn); + vfp_load_reg64(vm, a->vm); if (neg_n) { /* VFNMS, VFMS */ gen_helper_vfp_negd(vn, vn); } - neon_load_reg64(vd, a->vd); + vfp_load_reg64(vd, a->vd); if (neg_d) { /* VFNMA, VFNMS */ gen_helper_vfp_negd(vd, vd); } fpst = fpstatus_ptr(FPST_FPCR); gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst); - neon_store_reg64(vd, a->vd); + vfp_store_reg64(vd, a->vd); tcg_temp_free_ptr(fpst); tcg_temp_free_i64(vn); @@ -2283,7 +2230,7 @@ static bool trans_VMOV_imm_hp(DisasContext *s, arg_VMOV_imm_sp *a) } fd = tcg_const_i32(vfp_expand_imm(MO_16, a->imm)); - neon_store_reg32(fd, a->vd); + vfp_store_reg32(fd, a->vd); tcg_temp_free_i32(fd); return true; } @@ -2323,7 +2270,7 @@ static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a) fd = tcg_const_i32(vfp_expand_imm(MO_32, a->imm)); for (;;) { - neon_store_reg32(fd, vd); + vfp_store_reg32(fd, vd); if (veclen == 0) { break; @@ -2378,7 +2325,7 @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a) fd = tcg_const_i64(vfp_expand_imm(MO_64, a->imm)); for (;;) { - neon_store_reg64(fd, vd); + vfp_store_reg64(fd, vd); if (veclen == 0) { break; @@ -2450,11 +2397,11 @@ static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a) vd = tcg_temp_new_i32(); vm = tcg_temp_new_i32(); - neon_load_reg32(vd, a->vd); + vfp_load_reg32(vd, a->vd); if (a->z) { tcg_gen_movi_i32(vm, 0); } else { - neon_load_reg32(vm, a->vm); + vfp_load_reg32(vm, a->vm); } if (a->e) { @@ -2489,11 +2436,11 @@ static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a) vd = tcg_temp_new_i32(); vm = tcg_temp_new_i32(); - neon_load_reg32(vd, a->vd); + vfp_load_reg32(vd, a->vd); if (a->z) { tcg_gen_movi_i32(vm, 0); } else { - neon_load_reg32(vm, a->vm); + vfp_load_reg32(vm, a->vm); } if (a->e) { @@ -2533,11 +2480,11 @@ static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a) vd = tcg_temp_new_i64(); vm = tcg_temp_new_i64(); - neon_load_reg64(vd, a->vd); + vfp_load_reg64(vd, a->vd); if (a->z) { tcg_gen_movi_i64(vm, 0); } else { - neon_load_reg64(vm, a->vm); + vfp_load_reg64(vm, a->vm); } if (a->e) { @@ -2572,7 +2519,7 @@ static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a) /* The T bit tells us if we want the low or high 16 bits of Vm */ tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t)); gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode); - neon_store_reg32(tmp, a->vd); + vfp_store_reg32(tmp, a->vd); tcg_temp_free_i32(ahp_mode); tcg_temp_free_ptr(fpst); tcg_temp_free_i32(tmp); @@ -2610,7 +2557,7 @@ static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a) tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t)); vd = tcg_temp_new_i64(); gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode); - neon_store_reg64(vd, a->vd); + vfp_store_reg64(vd, a->vd); tcg_temp_free_i32(ahp_mode); tcg_temp_free_ptr(fpst); tcg_temp_free_i32(tmp); @@ -2636,7 +2583,7 @@ static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a) ahp_mode = get_ahp_flag(); tmp = tcg_temp_new_i32(); - neon_load_reg32(tmp, a->vm); + vfp_load_reg32(tmp, a->vm); gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode); tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t)); tcg_temp_free_i32(ahp_mode); @@ -2674,7 +2621,7 @@ static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a) tmp = tcg_temp_new_i32(); vm = tcg_temp_new_i64(); - neon_load_reg64(vm, a->vm); + vfp_load_reg64(vm, a->vm); gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode); tcg_temp_free_i64(vm); tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t)); @@ -2698,10 +2645,10 @@ static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a) } tmp = tcg_temp_new_i32(); - neon_load_reg32(tmp, a->vm); + vfp_load_reg32(tmp, a->vm); fpst = fpstatus_ptr(FPST_FPCR_F16); gen_helper_rinth(tmp, tmp, fpst); - neon_store_reg32(tmp, a->vd); + vfp_store_reg32(tmp, a->vd); tcg_temp_free_ptr(fpst); tcg_temp_free_i32(tmp); return true; @@ -2721,10 +2668,10 @@ static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a) } tmp = tcg_temp_new_i32(); - neon_load_reg32(tmp, a->vm); + vfp_load_reg32(tmp, a->vm); fpst = fpstatus_ptr(FPST_FPCR); gen_helper_rints(tmp, tmp, fpst); - neon_store_reg32(tmp, a->vd); + vfp_store_reg32(tmp, a->vd); tcg_temp_free_ptr(fpst); tcg_temp_free_i32(tmp); return true; @@ -2753,10 +2700,10 @@ static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a) } tmp = tcg_temp_new_i64(); - neon_load_reg64(tmp, a->vm); + vfp_load_reg64(tmp, a->vm); fpst = fpstatus_ptr(FPST_FPCR); gen_helper_rintd(tmp, tmp, fpst); - neon_store_reg64(tmp, a->vd); + vfp_store_reg64(tmp, a->vd); tcg_temp_free_ptr(fpst); tcg_temp_free_i64(tmp); return true; @@ -2777,13 +2724,13 @@ static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a) } tmp = tcg_temp_new_i32(); - neon_load_reg32(tmp, a->vm); + vfp_load_reg32(tmp, a->vm); fpst = fpstatus_ptr(FPST_FPCR_F16); tcg_rmode = tcg_const_i32(float_round_to_zero); gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); gen_helper_rinth(tmp, tmp, fpst); gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); - neon_store_reg32(tmp, a->vd); + vfp_store_reg32(tmp, a->vd); tcg_temp_free_ptr(fpst); tcg_temp_free_i32(tcg_rmode); tcg_temp_free_i32(tmp); @@ -2805,13 +2752,13 @@ static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a) } tmp = tcg_temp_new_i32(); - neon_load_reg32(tmp, a->vm); + vfp_load_reg32(tmp, a->vm); fpst = fpstatus_ptr(FPST_FPCR); tcg_rmode = tcg_const_i32(float_round_to_zero); gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); gen_helper_rints(tmp, tmp, fpst); gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); - neon_store_reg32(tmp, a->vd); + vfp_store_reg32(tmp, a->vd); tcg_temp_free_ptr(fpst); tcg_temp_free_i32(tcg_rmode); tcg_temp_free_i32(tmp); @@ -2842,13 +2789,13 @@ static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a) } tmp = tcg_temp_new_i64(); - neon_load_reg64(tmp, a->vm); + vfp_load_reg64(tmp, a->vm); fpst = fpstatus_ptr(FPST_FPCR); tcg_rmode = tcg_const_i32(float_round_to_zero); gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); gen_helper_rintd(tmp, tmp, fpst); gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); - neon_store_reg64(tmp, a->vd); + vfp_store_reg64(tmp, a->vd); tcg_temp_free_ptr(fpst); tcg_temp_free_i64(tmp); tcg_temp_free_i32(tcg_rmode); @@ -2869,10 +2816,10 @@ static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a) } tmp = tcg_temp_new_i32(); - neon_load_reg32(tmp, a->vm); + vfp_load_reg32(tmp, a->vm); fpst = fpstatus_ptr(FPST_FPCR_F16); gen_helper_rinth_exact(tmp, tmp, fpst); - neon_store_reg32(tmp, a->vd); + vfp_store_reg32(tmp, a->vd); tcg_temp_free_ptr(fpst); tcg_temp_free_i32(tmp); return true; @@ -2892,10 +2839,10 @@ static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a) } tmp = tcg_temp_new_i32(); - neon_load_reg32(tmp, a->vm); + vfp_load_reg32(tmp, a->vm); fpst = fpstatus_ptr(FPST_FPCR); gen_helper_rints_exact(tmp, tmp, fpst); - neon_store_reg32(tmp, a->vd); + vfp_store_reg32(tmp, a->vd); tcg_temp_free_ptr(fpst); tcg_temp_free_i32(tmp); return true; @@ -2924,10 +2871,10 @@ static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a) } tmp = tcg_temp_new_i64(); - neon_load_reg64(tmp, a->vm); + vfp_load_reg64(tmp, a->vm); fpst = fpstatus_ptr(FPST_FPCR); gen_helper_rintd_exact(tmp, tmp, fpst); - neon_store_reg64(tmp, a->vd); + vfp_store_reg64(tmp, a->vd); tcg_temp_free_ptr(fpst); tcg_temp_free_i64(tmp); return true; @@ -2953,9 +2900,9 @@ static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a) vm = tcg_temp_new_i32(); vd = tcg_temp_new_i64(); - neon_load_reg32(vm, a->vm); + vfp_load_reg32(vm, a->vm); gen_helper_vfp_fcvtds(vd, vm, cpu_env); - neon_store_reg64(vd, a->vd); + vfp_store_reg64(vd, a->vd); tcg_temp_free_i32(vm); tcg_temp_free_i64(vd); return true; @@ -2981,9 +2928,9 @@ static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a) vd = tcg_temp_new_i32(); vm = tcg_temp_new_i64(); - neon_load_reg64(vm, a->vm); + vfp_load_reg64(vm, a->vm); gen_helper_vfp_fcvtsd(vd, vm, cpu_env); - neon_store_reg32(vd, a->vd); + vfp_store_reg32(vd, a->vd); tcg_temp_free_i32(vd); tcg_temp_free_i64(vm); return true; @@ -3003,7 +2950,7 @@ static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a) } vm = tcg_temp_new_i32(); - neon_load_reg32(vm, a->vm); + vfp_load_reg32(vm, a->vm); fpst = fpstatus_ptr(FPST_FPCR_F16); if (a->s) { /* i32 -> f16 */ @@ -3012,7 +2959,7 @@ static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a) /* u32 -> f16 */ gen_helper_vfp_uitoh(vm, vm, fpst); } - neon_store_reg32(vm, a->vd); + vfp_store_reg32(vm, a->vd); tcg_temp_free_i32(vm); tcg_temp_free_ptr(fpst); return true; @@ -3032,7 +2979,7 @@ static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a) } vm = tcg_temp_new_i32(); - neon_load_reg32(vm, a->vm); + vfp_load_reg32(vm, a->vm); fpst = fpstatus_ptr(FPST_FPCR); if (a->s) { /* i32 -> f32 */ @@ -3041,7 +2988,7 @@ static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a) /* u32 -> f32 */ gen_helper_vfp_uitos(vm, vm, fpst); } - neon_store_reg32(vm, a->vd); + vfp_store_reg32(vm, a->vd); tcg_temp_free_i32(vm); tcg_temp_free_ptr(fpst); return true; @@ -3068,7 +3015,7 @@ static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a) vm = tcg_temp_new_i32(); vd = tcg_temp_new_i64(); - neon_load_reg32(vm, a->vm); + vfp_load_reg32(vm, a->vm); fpst = fpstatus_ptr(FPST_FPCR); if (a->s) { /* i32 -> f64 */ @@ -3077,7 +3024,7 @@ static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a) /* u32 -> f64 */ gen_helper_vfp_uitod(vd, vm, fpst); } - neon_store_reg64(vd, a->vd); + vfp_store_reg64(vd, a->vd); tcg_temp_free_i32(vm); tcg_temp_free_i64(vd); tcg_temp_free_ptr(fpst); @@ -3108,9 +3055,9 @@ static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a) vm = tcg_temp_new_i64(); vd = tcg_temp_new_i32(); - neon_load_reg64(vm, a->vm); + vfp_load_reg64(vm, a->vm); gen_helper_vjcvt(vd, vm, cpu_env); - neon_store_reg32(vd, a->vd); + vfp_store_reg32(vd, a->vd); tcg_temp_free_i64(vm); tcg_temp_free_i32(vd); return true; @@ -3133,7 +3080,7 @@ static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a) frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm); vd = tcg_temp_new_i32(); - neon_load_reg32(vd, a->vd); + vfp_load_reg32(vd, a->vd); fpst = fpstatus_ptr(FPST_FPCR_F16); shift = tcg_const_i32(frac_bits); @@ -3168,7 +3115,7 @@ static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a) g_assert_not_reached(); } - neon_store_reg32(vd, a->vd); + vfp_store_reg32(vd, a->vd); tcg_temp_free_i32(vd); tcg_temp_free_i32(shift); tcg_temp_free_ptr(fpst); @@ -3192,7 +3139,7 @@ static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a) frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm); vd = tcg_temp_new_i32(); - neon_load_reg32(vd, a->vd); + vfp_load_reg32(vd, a->vd); fpst = fpstatus_ptr(FPST_FPCR); shift = tcg_const_i32(frac_bits); @@ -3227,7 +3174,7 @@ static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a) g_assert_not_reached(); } - neon_store_reg32(vd, a->vd); + vfp_store_reg32(vd, a->vd); tcg_temp_free_i32(vd); tcg_temp_free_i32(shift); tcg_temp_free_ptr(fpst); @@ -3257,7 +3204,7 @@ static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a) frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm); vd = tcg_temp_new_i64(); - neon_load_reg64(vd, a->vd); + vfp_load_reg64(vd, a->vd); fpst = fpstatus_ptr(FPST_FPCR); shift = tcg_const_i32(frac_bits); @@ -3292,7 +3239,7 @@ static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a) g_assert_not_reached(); } - neon_store_reg64(vd, a->vd); + vfp_store_reg64(vd, a->vd); tcg_temp_free_i64(vd); tcg_temp_free_i32(shift); tcg_temp_free_ptr(fpst); @@ -3314,7 +3261,7 @@ static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a) fpst = fpstatus_ptr(FPST_FPCR_F16); vm = tcg_temp_new_i32(); - neon_load_reg32(vm, a->vm); + vfp_load_reg32(vm, a->vm); if (a->s) { if (a->rz) { @@ -3329,7 +3276,7 @@ static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a) gen_helper_vfp_touih(vm, vm, fpst); } } - neon_store_reg32(vm, a->vd); + vfp_store_reg32(vm, a->vd); tcg_temp_free_i32(vm); tcg_temp_free_ptr(fpst); return true; @@ -3350,7 +3297,7 @@ static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a) fpst = fpstatus_ptr(FPST_FPCR); vm = tcg_temp_new_i32(); - neon_load_reg32(vm, a->vm); + vfp_load_reg32(vm, a->vm); if (a->s) { if (a->rz) { @@ -3365,7 +3312,7 @@ static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a) gen_helper_vfp_touis(vm, vm, fpst); } } - neon_store_reg32(vm, a->vd); + vfp_store_reg32(vm, a->vd); tcg_temp_free_i32(vm); tcg_temp_free_ptr(fpst); return true; @@ -3393,7 +3340,7 @@ static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a) fpst = fpstatus_ptr(FPST_FPCR); vm = tcg_temp_new_i64(); vd = tcg_temp_new_i32(); - neon_load_reg64(vm, a->vm); + vfp_load_reg64(vm, a->vm); if (a->s) { if (a->rz) { @@ -3408,7 +3355,7 @@ static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a) gen_helper_vfp_touid(vd, vm, fpst); } } - neon_store_reg32(vd, a->vd); + vfp_store_reg32(vd, a->vd); tcg_temp_free_i32(vd); tcg_temp_free_i64(vm); tcg_temp_free_ptr(fpst); @@ -3521,10 +3468,10 @@ static bool trans_VINS(DisasContext *s, arg_VINS *a) /* Insert low half of Vm into high half of Vd */ rm = tcg_temp_new_i32(); rd = tcg_temp_new_i32(); - neon_load_reg32(rm, a->vm); - neon_load_reg32(rd, a->vd); + vfp_load_reg32(rm, a->vm); + vfp_load_reg32(rd, a->vd); tcg_gen_deposit_i32(rd, rd, rm, 16, 16); - neon_store_reg32(rd, a->vd); + vfp_store_reg32(rd, a->vd); tcg_temp_free_i32(rm); tcg_temp_free_i32(rd); return true; @@ -3548,9 +3495,9 @@ static bool trans_VMOVX(DisasContext *s, arg_VINS *a) /* Set Vd to high half of Vm */ rm = tcg_temp_new_i32(); - neon_load_reg32(rm, a->vm); + vfp_load_reg32(rm, a->vm); tcg_gen_shri_i32(rm, rm, 16); - neon_store_reg32(rm, a->vd); + vfp_store_reg32(rm, a->vd); tcg_temp_free_i32(rm); return true; } diff --git a/target/arm/translate.c b/target/arm/translate.c index 38371db540..29ea1eb781 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -1094,62 +1094,139 @@ static inline void gen_hlt(DisasContext *s, int imm) unallocated_encoding(s); } -static inline long vfp_reg_offset(bool dp, unsigned reg) +/* + * Return the offset of a "full" NEON Dreg. + */ +static long neon_full_reg_offset(unsigned reg) +{ + return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]); +} + +/* + * Return the offset of a 2**SIZE piece of a NEON register, at index ELE, + * where 0 is the least significant end of the register. + */ +static long neon_element_offset(int reg, int element, MemOp memop) +{ + int element_size = 1 << (memop & MO_SIZE); + int ofs = element * element_size; +#ifdef HOST_WORDS_BIGENDIAN + /* + * Calculate the offset assuming fully little-endian, + * then XOR to account for the order of the 8-byte units. + */ + if (element_size < 8) { + ofs ^= 8 - element_size; + } +#endif + return neon_full_reg_offset(reg) + ofs; +} + +/* Return the offset of a VFP Dreg (dp = true) or VFP Sreg (dp = false). */ +static long vfp_reg_offset(bool dp, unsigned reg) { if (dp) { - return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]); + return neon_element_offset(reg, 0, MO_64); } else { - long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]); - if (reg & 1) { - ofs += offsetof(CPU_DoubleU, l.upper); - } else { - ofs += offsetof(CPU_DoubleU, l.lower); - } - return ofs; + return neon_element_offset(reg >> 1, reg & 1, MO_32); } } -/* Return the offset of a 32-bit piece of a NEON register. - zero is the least significant end of the register. */ -static inline long -neon_reg_offset (int reg, int n) +static inline void vfp_load_reg64(TCGv_i64 var, int reg) { - int sreg; - sreg = reg * 2 + n; - return vfp_reg_offset(0, sreg); + tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(true, reg)); } -static TCGv_i32 neon_load_reg(int reg, int pass) +static inline void vfp_store_reg64(TCGv_i64 var, int reg) { - TCGv_i32 tmp = tcg_temp_new_i32(); - tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass)); - return tmp; + tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(true, reg)); } -static void neon_store_reg(int reg, int pass, TCGv_i32 var) +static inline void vfp_load_reg32(TCGv_i32 var, int reg) { - tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass)); - tcg_temp_free_i32(var); + tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg)); } -static inline void neon_load_reg64(TCGv_i64 var, int reg) +static inline void vfp_store_reg32(TCGv_i32 var, int reg) { - tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg)); + tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg)); } -static inline void neon_store_reg64(TCGv_i64 var, int reg) +static void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop) { - tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg)); + long off = neon_element_offset(reg, ele, memop); + + switch (memop) { + case MO_SB: + tcg_gen_ld8s_i32(dest, cpu_env, off); + break; + case MO_UB: + tcg_gen_ld8u_i32(dest, cpu_env, off); + break; + case MO_SW: + tcg_gen_ld16s_i32(dest, cpu_env, off); + break; + case MO_UW: + tcg_gen_ld16u_i32(dest, cpu_env, off); + break; + case MO_UL: + case MO_SL: + tcg_gen_ld_i32(dest, cpu_env, off); + break; + default: + g_assert_not_reached(); + } } -static inline void neon_load_reg32(TCGv_i32 var, int reg) +static void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop) { - tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg)); + long off = neon_element_offset(reg, ele, memop); + + switch (memop) { + case MO_SL: + tcg_gen_ld32s_i64(dest, cpu_env, off); + break; + case MO_UL: + tcg_gen_ld32u_i64(dest, cpu_env, off); + break; + case MO_Q: + tcg_gen_ld_i64(dest, cpu_env, off); + break; + default: + g_assert_not_reached(); + } } -static inline void neon_store_reg32(TCGv_i32 var, int reg) +static void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop) { - tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg)); + long off = neon_element_offset(reg, ele, memop); + + switch (memop) { + case MO_8: + tcg_gen_st8_i32(src, cpu_env, off); + break; + case MO_16: + tcg_gen_st16_i32(src, cpu_env, off); + break; + case MO_32: + tcg_gen_st_i32(src, cpu_env, off); + break; + default: + g_assert_not_reached(); + } +} + +static void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop) +{ + long off = neon_element_offset(reg, ele, memop); + + switch (memop) { + case MO_64: + tcg_gen_st_i64(src, cpu_env, off); + break; + default: + g_assert_not_reached(); + } } static TCGv_ptr vfp_reg_ptr(bool dp, int reg) diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c index a973454e4f..0f33127c4c 100644 --- a/target/arm/vec_helper.c +++ b/target/arm/vec_helper.c @@ -293,7 +293,7 @@ void HELPER(gvec_sdot_idx_b)(void *vd, void *vn, void *vm, uint32_t desc) intptr_t index = simd_data(desc); uint32_t *d = vd; int8_t *n = vn; - int8_t *m_indexed = (int8_t *)vm + index * 4; + int8_t *m_indexed = (int8_t *)vm + H4(index) * 4; /* Notice the special case of opr_sz == 8, from aa64/aa32 advsimd. * Otherwise opr_sz is a multiple of 16. @@ -324,7 +324,7 @@ void HELPER(gvec_udot_idx_b)(void *vd, void *vn, void *vm, uint32_t desc) intptr_t index = simd_data(desc); uint32_t *d = vd; uint8_t *n = vn; - uint8_t *m_indexed = (uint8_t *)vm + index * 4; + uint8_t *m_indexed = (uint8_t *)vm + H4(index) * 4; /* Notice the special case of opr_sz == 8, from aa64/aa32 advsimd. * Otherwise opr_sz is a multiple of 16. @@ -1858,10 +1858,10 @@ DO_ABA(gvec_uaba_d, uint64_t) r2 = float16_##OP(m[H2(0)], m[H2(1)], fpst); \ r3 = float16_##OP(m[H2(2)], m[H2(3)], fpst); \ \ - d[H4(0)] = r0; \ - d[H4(1)] = r1; \ - d[H4(2)] = r2; \ - d[H4(3)] = r3; \ + d[H2(0)] = r0; \ + d[H2(1)] = r1; \ + d[H2(2)] = r2; \ + d[H2(3)] = r3; \ } DO_NEON_PAIRWISE(neon_padd, add) diff --git a/tests/qtest/npcm7xx_rng-test.c b/tests/qtest/npcm7xx_rng-test.c index da6e639bf6..e7cde85fbb 100644 --- a/tests/qtest/npcm7xx_rng-test.c +++ b/tests/qtest/npcm7xx_rng-test.c @@ -265,10 +265,16 @@ int main(int argc, char **argv) qtest_add_func("npcm7xx_rng/enable_disable", test_enable_disable); qtest_add_func("npcm7xx_rng/rosel", test_rosel); - qtest_add_func("npcm7xx_rng/continuous/monobit", test_continuous_monobit); - qtest_add_func("npcm7xx_rng/continuous/runs", test_continuous_runs); - qtest_add_func("npcm7xx_rng/first_byte/monobit", test_first_byte_monobit); - qtest_add_func("npcm7xx_rng/first_byte/runs", test_first_byte_runs); + /* + * These tests fail intermittently; only run them on explicit + * request until we figure out why. + */ + if (getenv("QEMU_TEST_FLAKY_RNG_TESTS")) { + qtest_add_func("npcm7xx_rng/continuous/monobit", test_continuous_monobit); + qtest_add_func("npcm7xx_rng/continuous/runs", test_continuous_runs); + qtest_add_func("npcm7xx_rng/first_byte/monobit", test_first_byte_monobit); + qtest_add_func("npcm7xx_rng/first_byte/runs", test_first_byte_runs); + } qtest_start("-machine npcm750-evb"); ret = g_test_run(); |