aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2020-11-03 10:38:05 +0000
committerPeter Maydell <peter.maydell@linaro.org>2020-11-03 10:38:05 +0000
commitc7a7a877b716cf14848f1fd5c754d293e2f8d852 (patch)
treeafbab679f98b164f9aeecb10d635199c48bb0169
parent8545ae485b1e8e43cc0137310c4c68dbece59990 (diff)
parentffb4fbf90a2f63c9cb33e4bb9f854c79bf04ca4a (diff)
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20201102' into staging
target-arm queue: * target/arm: Fix Neon emulation bugs on big-endian hosts * target/arm: fix handling of HCR.FB * target/arm: fix LORID_EL1 access check * disas/capstone: Fix monitor disassembly of >32 bytes * hw/arm/smmuv3: Fix potential integer overflow (CID 1432363) * hw/arm/boot: fix SVE for EL3 direct kernel boot * hw/display/omap_lcdc: Fix potential NULL pointer dereference * hw/display/exynos4210_fimd: Fix potential NULL pointer dereference * target/arm: Get correct MMU index for other-security-state * configure: Test that gio libs from pkg-config work * hw/intc/arm_gicv3_cpuif: Make GIC maintenance interrupts work * docs: Fix building with Sphinx 3 * tests/qtest/npcm7xx_rng-test: Disable randomness tests # gpg: Signature made Mon 02 Nov 2020 17:09:00 GMT # gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE # gpg: issuer "peter.maydell@linaro.org" # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@gmail.com>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate] # Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE * remotes/pmaydell/tags/pull-target-arm-20201102: (26 commits) tests/qtest/npcm7xx_rng-test: Disable randomness tests qemu-option-trace.rst.inc: Don't use option:: markup scripts/kerneldoc: For Sphinx 3 use c:macro for macros with arguments hw/intc/arm_gicv3_cpuif: Make GIC maintenance interrupts work configure: Test that gio libs from pkg-config work target/arm: Get correct MMU index for other-security-state hw/display/exynos4210_fimd: Fix potential NULL pointer dereference hw/display/omap_lcdc: Fix potential NULL pointer dereference hw/arm/boot: fix SVE for EL3 direct kernel boot hw/arm/smmuv3: Fix potential integer overflow (CID 1432363) disas/capstone: Fix monitor disassembly of >32 bytes target/arm: fix LORID_EL1 access check target/arm: fix handling of HCR.FB target/arm: Fix VUDOT/VSDOT (scalar) on big-endian hosts target/arm: Fix float16 pairwise Neon ops on big-endian hosts target/arm: Improve do_prewiden_3d target/arm: Simplify do_long_3d and do_2scalar_long target/arm: Rename neon_load_reg64 to vfp_load_reg64 target/arm: Add read/write_neon_element64 target/arm: Rename neon_load_reg32 to vfp_load_reg32 ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rwxr-xr-xconfigure10
-rw-r--r--disas/capstone.c2
-rw-r--r--docs/qemu-option-trace.rst.inc6
-rw-r--r--hw/arm/boot.c3
-rw-r--r--hw/arm/smmuv3.c3
-rw-r--r--hw/display/exynos4210_fimd.c4
-rw-r--r--hw/display/omap_lcdc.c10
-rw-r--r--hw/intc/arm_gicv3_cpuif.c5
-rw-r--r--include/hw/intc/arm_gicv3_common.h1
-rwxr-xr-xscripts/kernel-doc18
-rw-r--r--target/arm/helper.c24
-rw-r--r--target/arm/m_helper.c3
-rw-r--r--target/arm/translate-neon.c.inc472
-rw-r--r--target/arm/translate-vfp.c.inc341
-rw-r--r--target/arm/translate.c139
-rw-r--r--target/arm/vec_helper.c12
-rw-r--r--tests/qtest/npcm7xx_rng-test.c14
17 files changed, 581 insertions, 486 deletions
diff --git a/configure b/configure
index 6df4306c88..2c3c69f118 100755
--- a/configure
+++ b/configure
@@ -3489,13 +3489,21 @@ if test "$static" = yes && test "$mingw32" = yes; then
fi
if $pkg_config --atleast-version=$glib_req_ver gio-2.0; then
- gio=yes
gio_cflags=$($pkg_config --cflags gio-2.0)
gio_libs=$($pkg_config --libs gio-2.0)
gdbus_codegen=$($pkg_config --variable=gdbus_codegen gio-2.0)
if [ ! -x "$gdbus_codegen" ]; then
gdbus_codegen=
fi
+ # Check that the libraries actually work -- Ubuntu 18.04 ships
+ # with pkg-config --static --libs data for gio-2.0 that is missing
+ # -lblkid and will give a link error.
+ write_c_skeleton
+ if compile_prog "" "gio_libs" ; then
+ gio=yes
+ else
+ gio=no
+ fi
else
gio=no
fi
diff --git a/disas/capstone.c b/disas/capstone.c
index 0a9ef9c892..7462c0e305 100644
--- a/disas/capstone.c
+++ b/disas/capstone.c
@@ -286,7 +286,7 @@ bool cap_disas_monitor(disassemble_info *info, uint64_t pc, int count)
/* Make certain that we can make progress. */
assert(tsize != 0);
- info->read_memory_func(pc, cap_buf + csize, tsize, info);
+ info->read_memory_func(pc + csize, cap_buf + csize, tsize, info);
csize += tsize;
if (cs_disasm_iter(handle, &cbuf, &csize, &pc, insn)) {
diff --git a/docs/qemu-option-trace.rst.inc b/docs/qemu-option-trace.rst.inc
index 7e09773a9c..d7acbe67f7 100644
--- a/docs/qemu-option-trace.rst.inc
+++ b/docs/qemu-option-trace.rst.inc
@@ -1,7 +1,7 @@
Specify tracing options.
-.. option:: [enable=]PATTERN
+``[enable=]PATTERN``
Immediately enable events matching *PATTERN*
(either event name or a globbing pattern). This option is only
@@ -11,7 +11,7 @@ Specify tracing options.
Use :option:`-trace help` to print a list of names of trace points.
-.. option:: events=FILE
+``events=FILE``
Immediately enable events listed in *FILE*.
The file must contain one event name (as listed in the ``trace-events-all``
@@ -19,7 +19,7 @@ Specify tracing options.
available if QEMU has been compiled with the ``simple``, ``log`` or
``ftrace`` tracing backend.
-.. option:: file=FILE
+``file=FILE``
Log output traces to *FILE*.
This option is only available if QEMU has been compiled with
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index 3e9816af80..cf97600a91 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -742,6 +742,9 @@ static void do_cpu_reset(void *opaque)
if (cpu_isar_feature(aa64_mte, cpu)) {
env->cp15.scr_el3 |= SCR_ATA;
}
+ if (cpu_isar_feature(aa64_sve, cpu)) {
+ env->cp15.cptr_el[3] |= CPTR_EZ;
+ }
/* AArch64 kernels never boot in secure mode */
assert(!info->secure_boot);
/* This hook is only supported for AArch32 currently:
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 2017ba7a5a..22607c3784 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -17,6 +17,7 @@
*/
#include "qemu/osdep.h"
+#include "qemu/bitops.h"
#include "hw/irq.h"
#include "hw/sysbus.h"
#include "migration/vmstate.h"
@@ -864,7 +865,7 @@ static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd)
scale = CMD_SCALE(cmd);
num = CMD_NUM(cmd);
ttl = CMD_TTL(cmd);
- num_pages = (num + 1) * (1 << (scale));
+ num_pages = (num + 1) * BIT_ULL(scale);
}
if (type == SMMU_CMD_TLBI_NH_VA) {
diff --git a/hw/display/exynos4210_fimd.c b/hw/display/exynos4210_fimd.c
index 4c16e1f5a0..34a960a976 100644
--- a/hw/display/exynos4210_fimd.c
+++ b/hw/display/exynos4210_fimd.c
@@ -1275,12 +1275,14 @@ static void exynos4210_fimd_update(void *opaque)
bool blend = false;
uint8_t *host_fb_addr;
bool is_dirty = false;
- const int global_width = (s->vidtcon[2] & FIMD_VIDTCON2_SIZE_MASK) + 1;
+ int global_width;
if (!s || !s->console || !s->enabled ||
surface_bits_per_pixel(qemu_console_surface(s->console)) == 0) {
return;
}
+
+ global_width = (s->vidtcon[2] & FIMD_VIDTCON2_SIZE_MASK) + 1;
exynos4210_update_resolution(s);
surface = qemu_console_surface(s->console);
diff --git a/hw/display/omap_lcdc.c b/hw/display/omap_lcdc.c
index fa4a381db6..58e659c94f 100644
--- a/hw/display/omap_lcdc.c
+++ b/hw/display/omap_lcdc.c
@@ -78,14 +78,18 @@ static void omap_lcd_interrupts(struct omap_lcd_panel_s *s)
static void omap_update_display(void *opaque)
{
struct omap_lcd_panel_s *omap_lcd = (struct omap_lcd_panel_s *) opaque;
- DisplaySurface *surface = qemu_console_surface(omap_lcd->con);
+ DisplaySurface *surface;
draw_line_func draw_line;
int size, height, first, last;
int width, linesize, step, bpp, frame_offset;
hwaddr frame_base;
- if (!omap_lcd || omap_lcd->plm == 1 || !omap_lcd->enable ||
- !surface_bits_per_pixel(surface)) {
+ if (!omap_lcd || omap_lcd->plm == 1 || !omap_lcd->enable) {
+ return;
+ }
+
+ surface = qemu_console_surface(omap_lcd->con);
+ if (!surface_bits_per_pixel(surface)) {
return;
}
diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c
index 08e000e33c..43ef1d7a84 100644
--- a/hw/intc/arm_gicv3_cpuif.c
+++ b/hw/intc/arm_gicv3_cpuif.c
@@ -399,6 +399,7 @@ static void gicv3_cpuif_virt_update(GICv3CPUState *cs)
int irqlevel = 0;
int fiqlevel = 0;
int maintlevel = 0;
+ ARMCPU *cpu = ARM_CPU(cs->cpu);
idx = hppvi_index(cs);
trace_gicv3_cpuif_virt_update(gicv3_redist_affid(cs), idx);
@@ -424,7 +425,7 @@ static void gicv3_cpuif_virt_update(GICv3CPUState *cs)
qemu_set_irq(cs->parent_vfiq, fiqlevel);
qemu_set_irq(cs->parent_virq, irqlevel);
- qemu_set_irq(cs->maintenance_irq, maintlevel);
+ qemu_set_irq(cpu->gicv3_maintenance_interrupt, maintlevel);
}
static uint64_t icv_ap_read(CPUARMState *env, const ARMCPRegInfo *ri)
@@ -2624,8 +2625,6 @@ void gicv3_init_cpuif(GICv3State *s)
&& cpu->gic_num_lrs) {
int j;
- cs->maintenance_irq = cpu->gicv3_maintenance_interrupt;
-
cs->num_list_regs = cpu->gic_num_lrs;
cs->vpribits = cpu->gic_vpribits;
cs->vprebits = cpu->gic_vprebits;
diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h
index 0331b0ffdb..91491a2f66 100644
--- a/include/hw/intc/arm_gicv3_common.h
+++ b/include/hw/intc/arm_gicv3_common.h
@@ -153,7 +153,6 @@ struct GICv3CPUState {
qemu_irq parent_fiq;
qemu_irq parent_virq;
qemu_irq parent_vfiq;
- qemu_irq maintenance_irq;
/* Redistributor */
uint32_t level; /* Current IRQ level */
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 0ff62bb6a2..4fbaaa05e3 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -839,7 +839,23 @@ sub output_function_rst(%) {
output_highlight_rst($args{'purpose'});
$start = "\n\n**Syntax**\n\n ``";
} else {
- print ".. c:function:: ";
+ if ((split(/\./, $sphinx_version))[0] >= 3) {
+ # Sphinx 3 and later distinguish macros and functions and
+ # complain if you use c:function with something that's not
+ # syntactically valid as a function declaration.
+ # We assume that anything with a return type is a function
+ # and anything without is a macro.
+ if ($args{'functiontype'} ne "") {
+ print ".. c:function:: ";
+ } else {
+ print ".. c:macro:: ";
+ }
+ } else {
+ # Older Sphinx don't support documenting macros that take
+ # arguments with c:macro, and don't complain about the use
+ # of c:function for this.
+ print ".. c:function:: ";
+ }
}
if ($args{'functiontype'} ne "") {
$start .= $args{'functiontype'} . " " . $args{'function'} . " (";
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 97bb6b8c01..6854591986 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -731,13 +731,12 @@ static void tlbimvaa_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
/*
* Non-IS variants of TLB operations are upgraded to
- * IS versions if we are at NS EL1 and HCR_EL2.FB is set to
+ * IS versions if we are at EL1 and HCR_EL2.FB is effectively set to
* force broadcast of these operations.
*/
static bool tlb_force_broadcast(CPUARMState *env)
{
- return (env->cp15.hcr_el2 & HCR_FB) &&
- arm_current_el(env) == 1 && arm_is_secure_below_el3(env);
+ return arm_current_el(env) == 1 && (arm_hcr_el2_eff(env) & HCR_FB);
}
static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -6680,9 +6679,10 @@ static uint64_t id_aa64pfr0_read(CPUARMState *env, const ARMCPRegInfo *ri)
#endif
/* Shared logic between LORID and the rest of the LOR* registers.
- * Secure state has already been delt with.
+ * Secure state exclusion has already been dealt with.
*/
-static CPAccessResult access_lor_ns(CPUARMState *env)
+static CPAccessResult access_lor_ns(CPUARMState *env,
+ const ARMCPRegInfo *ri, bool isread)
{
int el = arm_current_el(env);
@@ -6695,16 +6695,6 @@ static CPAccessResult access_lor_ns(CPUARMState *env)
return CP_ACCESS_OK;
}
-static CPAccessResult access_lorid(CPUARMState *env, const ARMCPRegInfo *ri,
- bool isread)
-{
- if (arm_is_secure_below_el3(env)) {
- /* Access ok in secure mode. */
- return CP_ACCESS_OK;
- }
- return access_lor_ns(env);
-}
-
static CPAccessResult access_lor_other(CPUARMState *env,
const ARMCPRegInfo *ri, bool isread)
{
@@ -6712,7 +6702,7 @@ static CPAccessResult access_lor_other(CPUARMState *env,
/* Access denied in secure mode. */
return CP_ACCESS_TRAP;
}
- return access_lor_ns(env);
+ return access_lor_ns(env, ri, isread);
}
/*
@@ -6739,7 +6729,7 @@ static const ARMCPRegInfo lor_reginfo[] = {
.type = ARM_CP_CONST, .resetvalue = 0 },
{ .name = "LORID_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 10, .crm = 4, .opc2 = 7,
- .access = PL1_R, .accessfn = access_lorid,
+ .access = PL1_R, .accessfn = access_lor_ns,
.type = ARM_CP_CONST, .resetvalue = 0 },
REGINFO_SENTINEL
};
diff --git a/target/arm/m_helper.c b/target/arm/m_helper.c
index 036454234c..aad01ea012 100644
--- a/target/arm/m_helper.c
+++ b/target/arm/m_helper.c
@@ -2719,7 +2719,8 @@ ARMMMUIdx arm_v7m_mmu_idx_for_secstate_and_priv(CPUARMState *env,
/* Return the MMU index for a v7M CPU in the specified security state */
ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate)
{
- bool priv = arm_current_el(env) != 0;
+ bool priv = arm_v7m_is_handler_mode(env) ||
+ !(env->v7m.control[secstate] & 1);
return arm_v7m_mmu_idx_for_secstate_and_priv(env, secstate, priv);
}
diff --git a/target/arm/translate-neon.c.inc b/target/arm/translate-neon.c.inc
index 4d1a292981..59368cb243 100644
--- a/target/arm/translate-neon.c.inc
+++ b/target/arm/translate-neon.c.inc
@@ -60,25 +60,6 @@ static inline int neon_3same_fp_size(DisasContext *s, int x)
#include "decode-neon-ls.c.inc"
#include "decode-neon-shared.c.inc"
-/* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
- * where 0 is the least significant end of the register.
- */
-static inline long
-neon_element_offset(int reg, int element, MemOp size)
-{
- int element_size = 1 << size;
- int ofs = element * element_size;
-#ifdef HOST_WORDS_BIGENDIAN
- /* Calculate the offset assuming fully little-endian,
- * then XOR to account for the order of the 8-byte units.
- */
- if (element_size < 8) {
- ofs ^= 8 - element_size;
- }
-#endif
- return neon_reg_offset(reg, 0) + ofs;
-}
-
static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
{
long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
@@ -585,12 +566,12 @@ static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a)
* We cannot write 16 bytes at once because the
* destination is unaligned.
*/
- tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0),
+ tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
8, 8, tmp);
- tcg_gen_gvec_mov(0, neon_reg_offset(vd + 1, 0),
- neon_reg_offset(vd, 0), 8, 8);
+ tcg_gen_gvec_mov(0, neon_full_reg_offset(vd + 1),
+ neon_full_reg_offset(vd), 8, 8);
} else {
- tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0),
+ tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
vec_size, vec_size, tmp);
}
tcg_gen_addi_i32(addr, addr, 1 << size);
@@ -691,9 +672,9 @@ static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a)
static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn)
{
int vec_size = a->q ? 16 : 8;
- int rd_ofs = neon_reg_offset(a->vd, 0);
- int rn_ofs = neon_reg_offset(a->vn, 0);
- int rm_ofs = neon_reg_offset(a->vm, 0);
+ int rd_ofs = neon_full_reg_offset(a->vd);
+ int rn_ofs = neon_full_reg_offset(a->vn);
+ int rm_ofs = neon_full_reg_offset(a->vm);
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
@@ -975,18 +956,24 @@ static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn)
* early. Since Q is 0 there are always just two passes, so instead
* of a complicated loop over each pass we just unroll.
*/
- tmp = neon_load_reg(a->vn, 0);
- tmp2 = neon_load_reg(a->vn, 1);
+ tmp = tcg_temp_new_i32();
+ tmp2 = tcg_temp_new_i32();
+ tmp3 = tcg_temp_new_i32();
+
+ read_neon_element32(tmp, a->vn, 0, MO_32);
+ read_neon_element32(tmp2, a->vn, 1, MO_32);
fn(tmp, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- tmp3 = neon_load_reg(a->vm, 0);
- tmp2 = neon_load_reg(a->vm, 1);
+ read_neon_element32(tmp3, a->vm, 0, MO_32);
+ read_neon_element32(tmp2, a->vm, 1, MO_32);
fn(tmp3, tmp3, tmp2);
- tcg_temp_free_i32(tmp2);
- neon_store_reg(a->vd, 0, tmp);
- neon_store_reg(a->vd, 1, tmp3);
+ write_neon_element32(tmp, a->vd, 0, MO_32);
+ write_neon_element32(tmp3, a->vd, 1, MO_32);
+
+ tcg_temp_free_i32(tmp);
+ tcg_temp_free_i32(tmp2);
+ tcg_temp_free_i32(tmp3);
return true;
}
@@ -1177,8 +1164,8 @@ static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn)
{
/* Handle a 2-reg-shift insn which can be vectorized. */
int vec_size = a->q ? 16 : 8;
- int rd_ofs = neon_reg_offset(a->vd, 0);
- int rm_ofs = neon_reg_offset(a->vm, 0);
+ int rd_ofs = neon_full_reg_offset(a->vd);
+ int rm_ofs = neon_full_reg_offset(a->vm);
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
@@ -1278,9 +1265,9 @@ static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a,
for (pass = 0; pass < a->q + 1; pass++) {
TCGv_i64 tmp = tcg_temp_new_i64();
- neon_load_reg64(tmp, a->vm + pass);
+ read_neon_element64(tmp, a->vm, pass, MO_64);
fn(tmp, cpu_env, tmp, constimm);
- neon_store_reg64(tmp, a->vd + pass);
+ write_neon_element64(tmp, a->vd, pass, MO_64);
tcg_temp_free_i64(tmp);
}
tcg_temp_free_i64(constimm);
@@ -1294,7 +1281,7 @@ static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
* 2-reg-and-shift operations, size < 3 case, where the
* helper needs to be passed cpu_env.
*/
- TCGv_i32 constimm;
+ TCGv_i32 constimm, tmp;
int pass;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
@@ -1320,12 +1307,14 @@ static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
* by immediate using the variable shift operations.
*/
constimm = tcg_const_i32(dup_const(a->size, a->shift));
+ tmp = tcg_temp_new_i32();
for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
- TCGv_i32 tmp = neon_load_reg(a->vm, pass);
+ read_neon_element32(tmp, a->vm, pass, MO_32);
fn(tmp, cpu_env, tmp, constimm);
- neon_store_reg(a->vd, pass, tmp);
+ write_neon_element32(tmp, a->vd, pass, MO_32);
}
+ tcg_temp_free_i32(tmp);
tcg_temp_free_i32(constimm);
return true;
}
@@ -1383,21 +1372,21 @@ static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
constimm = tcg_const_i64(-a->shift);
rm1 = tcg_temp_new_i64();
rm2 = tcg_temp_new_i64();
+ rd = tcg_temp_new_i32();
/* Load both inputs first to avoid potential overwrite if rm == rd */
- neon_load_reg64(rm1, a->vm);
- neon_load_reg64(rm2, a->vm + 1);
+ read_neon_element64(rm1, a->vm, 0, MO_64);
+ read_neon_element64(rm2, a->vm, 1, MO_64);
shiftfn(rm1, rm1, constimm);
- rd = tcg_temp_new_i32();
narrowfn(rd, cpu_env, rm1);
- neon_store_reg(a->vd, 0, rd);
+ write_neon_element32(rd, a->vd, 0, MO_32);
shiftfn(rm2, rm2, constimm);
- rd = tcg_temp_new_i32();
narrowfn(rd, cpu_env, rm2);
- neon_store_reg(a->vd, 1, rd);
+ write_neon_element32(rd, a->vd, 1, MO_32);
+ tcg_temp_free_i32(rd);
tcg_temp_free_i64(rm1);
tcg_temp_free_i64(rm2);
tcg_temp_free_i64(constimm);
@@ -1447,10 +1436,14 @@ static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
constimm = tcg_const_i32(imm);
/* Load all inputs first to avoid potential overwrite */
- rm1 = neon_load_reg(a->vm, 0);
- rm2 = neon_load_reg(a->vm, 1);
- rm3 = neon_load_reg(a->vm + 1, 0);
- rm4 = neon_load_reg(a->vm + 1, 1);
+ rm1 = tcg_temp_new_i32();
+ rm2 = tcg_temp_new_i32();
+ rm3 = tcg_temp_new_i32();
+ rm4 = tcg_temp_new_i32();
+ read_neon_element32(rm1, a->vm, 0, MO_32);
+ read_neon_element32(rm2, a->vm, 1, MO_32);
+ read_neon_element32(rm3, a->vm, 2, MO_32);
+ read_neon_element32(rm4, a->vm, 3, MO_32);
rtmp = tcg_temp_new_i64();
shiftfn(rm1, rm1, constimm);
@@ -1460,7 +1453,8 @@ static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
tcg_temp_free_i32(rm2);
narrowfn(rm1, cpu_env, rtmp);
- neon_store_reg(a->vd, 0, rm1);
+ write_neon_element32(rm1, a->vd, 0, MO_32);
+ tcg_temp_free_i32(rm1);
shiftfn(rm3, rm3, constimm);
shiftfn(rm4, rm4, constimm);
@@ -1471,7 +1465,8 @@ static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
narrowfn(rm3, cpu_env, rtmp);
tcg_temp_free_i64(rtmp);
- neon_store_reg(a->vd, 1, rm3);
+ write_neon_element32(rm3, a->vd, 1, MO_32);
+ tcg_temp_free_i32(rm3);
return true;
}
@@ -1572,8 +1567,10 @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
widen_mask = dup_const(a->size + 1, widen_mask);
}
- rm0 = neon_load_reg(a->vm, 0);
- rm1 = neon_load_reg(a->vm, 1);
+ rm0 = tcg_temp_new_i32();
+ rm1 = tcg_temp_new_i32();
+ read_neon_element32(rm0, a->vm, 0, MO_32);
+ read_neon_element32(rm1, a->vm, 1, MO_32);
tmp = tcg_temp_new_i64();
widenfn(tmp, rm0);
@@ -1582,7 +1579,7 @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
tcg_gen_shli_i64(tmp, tmp, a->shift);
tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
}
- neon_store_reg64(tmp, a->vd);
+ write_neon_element64(tmp, a->vd, 0, MO_64);
widenfn(tmp, rm1);
tcg_temp_free_i32(rm1);
@@ -1590,7 +1587,7 @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
tcg_gen_shli_i64(tmp, tmp, a->shift);
tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
}
- neon_store_reg64(tmp, a->vd + 1);
+ write_neon_element64(tmp, a->vd, 1, MO_64);
tcg_temp_free_i64(tmp);
return true;
}
@@ -1620,8 +1617,8 @@ static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
{
/* FP operations in 2-reg-and-shift group */
int vec_size = a->q ? 16 : 8;
- int rd_ofs = neon_reg_offset(a->vd, 0);
- int rm_ofs = neon_reg_offset(a->vm, 0);
+ int rd_ofs = neon_full_reg_offset(a->vd);
+ int rm_ofs = neon_full_reg_offset(a->vm);
TCGv_ptr fpst;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
@@ -1756,7 +1753,7 @@ static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a,
return true;
}
- reg_ofs = neon_reg_offset(a->vd, 0);
+ reg_ofs = neon_full_reg_offset(a->vd);
vec_size = a->q ? 16 : 8;
imm = asimd_imm_const(a->imm, a->cmode, a->op);
@@ -1791,11 +1788,10 @@ static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
NeonGenWidenFn *widenfn,
NeonGenTwo64OpFn *opfn,
- bool src1_wide)
+ int src1_mop, int src2_mop)
{
/* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
TCGv_i64 rn0_64, rn1_64, rm_64;
- TCGv_i32 rm;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
@@ -1807,12 +1803,12 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
return false;
}
- if (!widenfn || !opfn) {
+ if (!opfn) {
/* size == 3 case, which is an entirely different insn group */
return false;
}
- if ((a->vd & 1) || (src1_wide && (a->vn & 1))) {
+ if ((a->vd & 1) || (src1_mop == MO_Q && (a->vn & 1))) {
return false;
}
@@ -1824,38 +1820,50 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
rn1_64 = tcg_temp_new_i64();
rm_64 = tcg_temp_new_i64();
- if (src1_wide) {
- neon_load_reg64(rn0_64, a->vn);
+ if (src1_mop >= 0) {
+ read_neon_element64(rn0_64, a->vn, 0, src1_mop);
} else {
- TCGv_i32 tmp = neon_load_reg(a->vn, 0);
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ read_neon_element32(tmp, a->vn, 0, MO_32);
widenfn(rn0_64, tmp);
tcg_temp_free_i32(tmp);
}
- rm = neon_load_reg(a->vm, 0);
+ if (src2_mop >= 0) {
+ read_neon_element64(rm_64, a->vm, 0, src2_mop);
+ } else {
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ read_neon_element32(tmp, a->vm, 0, MO_32);
+ widenfn(rm_64, tmp);
+ tcg_temp_free_i32(tmp);
+ }
- widenfn(rm_64, rm);
- tcg_temp_free_i32(rm);
opfn(rn0_64, rn0_64, rm_64);
/*
* Load second pass inputs before storing the first pass result, to
* avoid incorrect results if a narrow input overlaps with the result.
*/
- if (src1_wide) {
- neon_load_reg64(rn1_64, a->vn + 1);
+ if (src1_mop >= 0) {
+ read_neon_element64(rn1_64, a->vn, 1, src1_mop);
} else {
- TCGv_i32 tmp = neon_load_reg(a->vn, 1);
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ read_neon_element32(tmp, a->vn, 1, MO_32);
widenfn(rn1_64, tmp);
tcg_temp_free_i32(tmp);
}
- rm = neon_load_reg(a->vm, 1);
+ if (src2_mop >= 0) {
+ read_neon_element64(rm_64, a->vm, 1, src2_mop);
+ } else {
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ read_neon_element32(tmp, a->vm, 1, MO_32);
+ widenfn(rm_64, tmp);
+ tcg_temp_free_i32(tmp);
+ }
- neon_store_reg64(rn0_64, a->vd);
+ write_neon_element64(rn0_64, a->vd, 0, MO_64);
- widenfn(rm_64, rm);
- tcg_temp_free_i32(rm);
opfn(rn1_64, rn1_64, rm_64);
- neon_store_reg64(rn1_64, a->vd + 1);
+ write_neon_element64(rn1_64, a->vd, 1, MO_64);
tcg_temp_free_i64(rn0_64);
tcg_temp_free_i64(rn1_64);
@@ -1864,14 +1872,13 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
return true;
}
-#define DO_PREWIDEN(INSN, S, EXT, OP, SRC1WIDE) \
+#define DO_PREWIDEN(INSN, S, OP, SRC1WIDE, SIGN) \
static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
{ \
static NeonGenWidenFn * const widenfn[] = { \
gen_helper_neon_widen_##S##8, \
gen_helper_neon_widen_##S##16, \
- tcg_gen_##EXT##_i32_i64, \
- NULL, \
+ NULL, NULL, \
}; \
static NeonGenTwo64OpFn * const addfn[] = { \
gen_helper_neon_##OP##l_u16, \
@@ -1879,18 +1886,20 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
tcg_gen_##OP##_i64, \
NULL, \
}; \
- return do_prewiden_3d(s, a, widenfn[a->size], \
- addfn[a->size], SRC1WIDE); \
+ int narrow_mop = a->size == MO_32 ? MO_32 | SIGN : -1; \
+ return do_prewiden_3d(s, a, widenfn[a->size], addfn[a->size], \
+ SRC1WIDE ? MO_Q : narrow_mop, \
+ narrow_mop); \
}
-DO_PREWIDEN(VADDL_S, s, ext, add, false)
-DO_PREWIDEN(VADDL_U, u, extu, add, false)
-DO_PREWIDEN(VSUBL_S, s, ext, sub, false)
-DO_PREWIDEN(VSUBL_U, u, extu, sub, false)
-DO_PREWIDEN(VADDW_S, s, ext, add, true)
-DO_PREWIDEN(VADDW_U, u, extu, add, true)
-DO_PREWIDEN(VSUBW_S, s, ext, sub, true)
-DO_PREWIDEN(VSUBW_U, u, extu, sub, true)
+DO_PREWIDEN(VADDL_S, s, add, false, MO_SIGN)
+DO_PREWIDEN(VADDL_U, u, add, false, 0)
+DO_PREWIDEN(VSUBL_S, s, sub, false, MO_SIGN)
+DO_PREWIDEN(VSUBL_U, u, sub, false, 0)
+DO_PREWIDEN(VADDW_S, s, add, true, MO_SIGN)
+DO_PREWIDEN(VADDW_U, u, add, true, 0)
+DO_PREWIDEN(VSUBW_S, s, sub, true, MO_SIGN)
+DO_PREWIDEN(VSUBW_U, u, sub, true, 0)
static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
@@ -1927,23 +1936,25 @@ static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
rd0 = tcg_temp_new_i32();
rd1 = tcg_temp_new_i32();
- neon_load_reg64(rn_64, a->vn);
- neon_load_reg64(rm_64, a->vm);
+ read_neon_element64(rn_64, a->vn, 0, MO_64);
+ read_neon_element64(rm_64, a->vm, 0, MO_64);
opfn(rn_64, rn_64, rm_64);
narrowfn(rd0, rn_64);
- neon_load_reg64(rn_64, a->vn + 1);
- neon_load_reg64(rm_64, a->vm + 1);
+ read_neon_element64(rn_64, a->vn, 1, MO_64);
+ read_neon_element64(rm_64, a->vm, 1, MO_64);
opfn(rn_64, rn_64, rm_64);
narrowfn(rd1, rn_64);
- neon_store_reg(a->vd, 0, rd0);
- neon_store_reg(a->vd, 1, rd1);
+ write_neon_element32(rd0, a->vd, 0, MO_32);
+ write_neon_element32(rd1, a->vd, 1, MO_32);
+ tcg_temp_free_i32(rd0);
+ tcg_temp_free_i32(rd1);
tcg_temp_free_i64(rn_64);
tcg_temp_free_i64(rm_64);
@@ -2018,14 +2029,14 @@ static bool do_long_3d(DisasContext *s, arg_3diff *a,
rd0 = tcg_temp_new_i64();
rd1 = tcg_temp_new_i64();
- rn = neon_load_reg(a->vn, 0);
- rm = neon_load_reg(a->vm, 0);
+ rn = tcg_temp_new_i32();
+ rm = tcg_temp_new_i32();
+ read_neon_element32(rn, a->vn, 0, MO_32);
+ read_neon_element32(rm, a->vm, 0, MO_32);
opfn(rd0, rn, rm);
- tcg_temp_free_i32(rn);
- tcg_temp_free_i32(rm);
- rn = neon_load_reg(a->vn, 1);
- rm = neon_load_reg(a->vm, 1);
+ read_neon_element32(rn, a->vn, 1, MO_32);
+ read_neon_element32(rm, a->vm, 1, MO_32);
opfn(rd1, rn, rm);
tcg_temp_free_i32(rn);
tcg_temp_free_i32(rm);
@@ -2033,18 +2044,15 @@ static bool do_long_3d(DisasContext *s, arg_3diff *a,
/* Don't store results until after all loads: they might overlap */
if (accfn) {
tmp = tcg_temp_new_i64();
- neon_load_reg64(tmp, a->vd);
- accfn(tmp, tmp, rd0);
- neon_store_reg64(tmp, a->vd);
- neon_load_reg64(tmp, a->vd + 1);
- accfn(tmp, tmp, rd1);
- neon_store_reg64(tmp, a->vd + 1);
+ read_neon_element64(tmp, a->vd, 0, MO_64);
+ accfn(rd0, tmp, rd0);
+ read_neon_element64(tmp, a->vd, 1, MO_64);
+ accfn(rd1, tmp, rd1);
tcg_temp_free_i64(tmp);
- } else {
- neon_store_reg64(rd0, a->vd);
- neon_store_reg64(rd1, a->vd + 1);
}
+ write_neon_element64(rd0, a->vd, 0, MO_64);
+ write_neon_element64(rd1, a->vd, 1, MO_64);
tcg_temp_free_i64(rd0);
tcg_temp_free_i64(rd1);
@@ -2300,9 +2308,9 @@ static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
return true;
}
- tcg_gen_gvec_3_ool(neon_reg_offset(a->vd, 0),
- neon_reg_offset(a->vn, 0),
- neon_reg_offset(a->vm, 0),
+ tcg_gen_gvec_3_ool(neon_full_reg_offset(a->vd),
+ neon_full_reg_offset(a->vn),
+ neon_full_reg_offset(a->vm),
16, 16, 0, fn_gvec);
return true;
}
@@ -2327,16 +2335,16 @@ static void gen_neon_dup_high16(TCGv_i32 var)
static inline TCGv_i32 neon_get_scalar(int size, int reg)
{
- TCGv_i32 tmp;
- if (size == 1) {
- tmp = neon_load_reg(reg & 7, reg >> 4);
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ if (size == MO_16) {
+ read_neon_element32(tmp, reg & 7, reg >> 4, MO_32);
if (reg & 8) {
gen_neon_dup_high16(tmp);
} else {
gen_neon_dup_low16(tmp);
}
} else {
- tmp = neon_load_reg(reg & 15, reg >> 4);
+ read_neon_element32(tmp, reg & 15, reg >> 4, MO_32);
}
return tmp;
}
@@ -2350,7 +2358,7 @@ static bool do_2scalar(DisasContext *s, arg_2scalar *a,
* perform an accumulation operation of that result into the
* destination.
*/
- TCGv_i32 scalar;
+ TCGv_i32 scalar, tmp;
int pass;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
@@ -2377,17 +2385,20 @@ static bool do_2scalar(DisasContext *s, arg_2scalar *a,
}
scalar = neon_get_scalar(a->size, a->vm);
+ tmp = tcg_temp_new_i32();
for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
- TCGv_i32 tmp = neon_load_reg(a->vn, pass);
+ read_neon_element32(tmp, a->vn, pass, MO_32);
opfn(tmp, tmp, scalar);
if (accfn) {
- TCGv_i32 rd = neon_load_reg(a->vd, pass);
+ TCGv_i32 rd = tcg_temp_new_i32();
+ read_neon_element32(rd, a->vd, pass, MO_32);
accfn(tmp, rd, tmp);
tcg_temp_free_i32(rd);
}
- neon_store_reg(a->vd, pass, tmp);
+ write_neon_element32(tmp, a->vd, pass, MO_32);
}
+ tcg_temp_free_i32(tmp);
tcg_temp_free_i32(scalar);
return true;
}
@@ -2445,8 +2456,8 @@ static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a,
{
/* Two registers and a scalar, using gvec */
int vec_size = a->q ? 16 : 8;
- int rd_ofs = neon_reg_offset(a->vd, 0);
- int rn_ofs = neon_reg_offset(a->vn, 0);
+ int rd_ofs = neon_full_reg_offset(a->vd);
+ int rn_ofs = neon_full_reg_offset(a->vn);
int rm_ofs;
int idx;
TCGv_ptr fpstatus;
@@ -2477,7 +2488,7 @@ static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a,
/* a->vm is M:Vm, which encodes both register and index */
idx = extract32(a->vm, a->size + 2, 2);
a->vm = extract32(a->vm, 0, a->size + 2);
- rm_ofs = neon_reg_offset(a->vm, 0);
+ rm_ofs = neon_full_reg_offset(a->vm);
fpstatus = fpstatus_ptr(a->size == 1 ? FPST_STD_F16 : FPST_STD);
tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpstatus,
@@ -2542,7 +2553,7 @@ static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
* performs a kind of fused op-then-accumulate using a helper
* function that takes all of rd, rn and the scalar at once.
*/
- TCGv_i32 scalar;
+ TCGv_i32 scalar, rn, rd;
int pass;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
@@ -2573,14 +2584,17 @@ static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
}
scalar = neon_get_scalar(a->size, a->vm);
+ rn = tcg_temp_new_i32();
+ rd = tcg_temp_new_i32();
for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
- TCGv_i32 rn = neon_load_reg(a->vn, pass);
- TCGv_i32 rd = neon_load_reg(a->vd, pass);
+ read_neon_element32(rn, a->vn, pass, MO_32);
+ read_neon_element32(rd, a->vd, pass, MO_32);
opfn(rd, cpu_env, rn, scalar, rd);
- tcg_temp_free_i32(rn);
- neon_store_reg(a->vd, pass, rd);
+ write_neon_element32(rd, a->vd, pass, MO_32);
}
+ tcg_temp_free_i32(rn);
+ tcg_temp_free_i32(rd);
tcg_temp_free_i32(scalar);
return true;
@@ -2647,12 +2661,12 @@ static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
scalar = neon_get_scalar(a->size, a->vm);
/* Load all inputs before writing any outputs, in case of overlap */
- rn = neon_load_reg(a->vn, 0);
+ rn = tcg_temp_new_i32();
+ read_neon_element32(rn, a->vn, 0, MO_32);
rn0_64 = tcg_temp_new_i64();
opfn(rn0_64, rn, scalar);
- tcg_temp_free_i32(rn);
- rn = neon_load_reg(a->vn, 1);
+ read_neon_element32(rn, a->vn, 1, MO_32);
rn1_64 = tcg_temp_new_i64();
opfn(rn1_64, rn, scalar);
tcg_temp_free_i32(rn);
@@ -2660,17 +2674,15 @@ static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
if (accfn) {
TCGv_i64 t64 = tcg_temp_new_i64();
- neon_load_reg64(t64, a->vd);
- accfn(t64, t64, rn0_64);
- neon_store_reg64(t64, a->vd);
- neon_load_reg64(t64, a->vd + 1);
- accfn(t64, t64, rn1_64);
- neon_store_reg64(t64, a->vd + 1);
+ read_neon_element64(t64, a->vd, 0, MO_64);
+ accfn(rn0_64, t64, rn0_64);
+ read_neon_element64(t64, a->vd, 1, MO_64);
+ accfn(rn1_64, t64, rn1_64);
tcg_temp_free_i64(t64);
- } else {
- neon_store_reg64(rn0_64, a->vd);
- neon_store_reg64(rn1_64, a->vd + 1);
}
+
+ write_neon_element64(rn0_64, a->vd, 0, MO_64);
+ write_neon_element64(rn1_64, a->vd, 1, MO_64);
tcg_temp_free_i64(rn0_64);
tcg_temp_free_i64(rn1_64);
return true;
@@ -2803,10 +2815,10 @@ static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
right = tcg_temp_new_i64();
dest = tcg_temp_new_i64();
- neon_load_reg64(right, a->vn);
- neon_load_reg64(left, a->vm);
+ read_neon_element64(right, a->vn, 0, MO_64);
+ read_neon_element64(left, a->vm, 0, MO_64);
tcg_gen_extract2_i64(dest, right, left, a->imm * 8);
- neon_store_reg64(dest, a->vd);
+ write_neon_element64(dest, a->vd, 0, MO_64);
tcg_temp_free_i64(left);
tcg_temp_free_i64(right);
@@ -2822,21 +2834,21 @@ static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
destright = tcg_temp_new_i64();
if (a->imm < 8) {
- neon_load_reg64(right, a->vn);
- neon_load_reg64(middle, a->vn + 1);
+ read_neon_element64(right, a->vn, 0, MO_64);
+ read_neon_element64(middle, a->vn, 1, MO_64);
tcg_gen_extract2_i64(destright, right, middle, a->imm * 8);
- neon_load_reg64(left, a->vm);
+ read_neon_element64(left, a->vm, 0, MO_64);
tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8);
} else {
- neon_load_reg64(right, a->vn + 1);
- neon_load_reg64(middle, a->vm);
+ read_neon_element64(right, a->vn, 1, MO_64);
+ read_neon_element64(middle, a->vm, 0, MO_64);
tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8);
- neon_load_reg64(left, a->vm + 1);
+ read_neon_element64(left, a->vm, 1, MO_64);
tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8);
}
- neon_store_reg64(destright, a->vd);
- neon_store_reg64(destleft, a->vd + 1);
+ write_neon_element64(destright, a->vd, 0, MO_64);
+ write_neon_element64(destleft, a->vd, 1, MO_64);
tcg_temp_free_i64(destright);
tcg_temp_free_i64(destleft);
@@ -2876,30 +2888,34 @@ static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
return false;
}
n <<= 3;
+ tmp = tcg_temp_new_i32();
if (a->op) {
- tmp = neon_load_reg(a->vd, 0);
+ read_neon_element32(tmp, a->vd, 0, MO_32);
} else {
- tmp = tcg_temp_new_i32();
tcg_gen_movi_i32(tmp, 0);
}
- tmp2 = neon_load_reg(a->vm, 0);
+ tmp2 = tcg_temp_new_i32();
+ read_neon_element32(tmp2, a->vm, 0, MO_32);
ptr1 = vfp_reg_ptr(true, a->vn);
tmp4 = tcg_const_i32(n);
gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp4);
- tcg_temp_free_i32(tmp);
+
if (a->op) {
- tmp = neon_load_reg(a->vd, 1);
+ read_neon_element32(tmp, a->vd, 1, MO_32);
} else {
- tmp = tcg_temp_new_i32();
tcg_gen_movi_i32(tmp, 0);
}
- tmp3 = neon_load_reg(a->vm, 1);
+ tmp3 = tcg_temp_new_i32();
+ read_neon_element32(tmp3, a->vm, 1, MO_32);
gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp4);
+ tcg_temp_free_i32(tmp);
tcg_temp_free_i32(tmp4);
tcg_temp_free_ptr(ptr1);
- neon_store_reg(a->vd, 0, tmp2);
- neon_store_reg(a->vd, 1, tmp3);
- tcg_temp_free_i32(tmp);
+
+ write_neon_element32(tmp2, a->vd, 0, MO_32);
+ write_neon_element32(tmp3, a->vd, 1, MO_32);
+ tcg_temp_free_i32(tmp2);
+ tcg_temp_free_i32(tmp3);
return true;
}
@@ -2923,7 +2939,7 @@ static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
return true;
}
- tcg_gen_gvec_dup_mem(a->size, neon_reg_offset(a->vd, 0),
+ tcg_gen_gvec_dup_mem(a->size, neon_full_reg_offset(a->vd),
neon_element_offset(a->vm, a->index, a->size),
a->q ? 16 : 8, a->q ? 16 : 8);
return true;
@@ -2932,6 +2948,7 @@ static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
{
int pass, half;
+ TCGv_i32 tmp[2];
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
@@ -2955,11 +2972,12 @@ static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
return true;
}
- for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
- TCGv_i32 tmp[2];
+ tmp[0] = tcg_temp_new_i32();
+ tmp[1] = tcg_temp_new_i32();
+ for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
for (half = 0; half < 2; half++) {
- tmp[half] = neon_load_reg(a->vm, pass * 2 + half);
+ read_neon_element32(tmp[half], a->vm, pass * 2 + half, MO_32);
switch (a->size) {
case 0:
tcg_gen_bswap32_i32(tmp[half], tmp[half]);
@@ -2973,9 +2991,12 @@ static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
g_assert_not_reached();
}
}
- neon_store_reg(a->vd, pass * 2, tmp[1]);
- neon_store_reg(a->vd, pass * 2 + 1, tmp[0]);
+ write_neon_element32(tmp[1], a->vd, pass * 2, MO_32);
+ write_neon_element32(tmp[0], a->vd, pass * 2 + 1, MO_32);
}
+
+ tcg_temp_free_i32(tmp[0]);
+ tcg_temp_free_i32(tmp[1]);
return true;
}
@@ -3020,23 +3041,25 @@ static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a,
rm0_64 = tcg_temp_new_i64();
rm1_64 = tcg_temp_new_i64();
rd_64 = tcg_temp_new_i64();
- tmp = neon_load_reg(a->vm, pass * 2);
+
+ tmp = tcg_temp_new_i32();
+ read_neon_element32(tmp, a->vm, pass * 2, MO_32);
widenfn(rm0_64, tmp);
- tcg_temp_free_i32(tmp);
- tmp = neon_load_reg(a->vm, pass * 2 + 1);
+ read_neon_element32(tmp, a->vm, pass * 2 + 1, MO_32);
widenfn(rm1_64, tmp);
tcg_temp_free_i32(tmp);
+
opfn(rd_64, rm0_64, rm1_64);
tcg_temp_free_i64(rm0_64);
tcg_temp_free_i64(rm1_64);
if (accfn) {
TCGv_i64 tmp64 = tcg_temp_new_i64();
- neon_load_reg64(tmp64, a->vd + pass);
+ read_neon_element64(tmp64, a->vd, pass, MO_64);
accfn(rd_64, tmp64, rd_64);
tcg_temp_free_i64(tmp64);
}
- neon_store_reg64(rd_64, a->vd + pass);
+ write_neon_element64(rd_64, a->vd, pass, MO_64);
tcg_temp_free_i64(rd_64);
}
return true;
@@ -3234,12 +3257,14 @@ static bool do_vmovn(DisasContext *s, arg_2misc *a,
rd0 = tcg_temp_new_i32();
rd1 = tcg_temp_new_i32();
- neon_load_reg64(rm, a->vm);
+ read_neon_element64(rm, a->vm, 0, MO_64);
narrowfn(rd0, cpu_env, rm);
- neon_load_reg64(rm, a->vm + 1);
+ read_neon_element64(rm, a->vm, 1, MO_64);
narrowfn(rd1, cpu_env, rm);
- neon_store_reg(a->vd, 0, rd0);
- neon_store_reg(a->vd, 1, rd1);
+ write_neon_element32(rd0, a->vd, 0, MO_32);
+ write_neon_element32(rd1, a->vd, 1, MO_32);
+ tcg_temp_free_i32(rd0);
+ tcg_temp_free_i32(rd1);
tcg_temp_free_i64(rm);
return true;
}
@@ -3296,16 +3321,18 @@ static bool trans_VSHLL(DisasContext *s, arg_2misc *a)
}
rd = tcg_temp_new_i64();
+ rm0 = tcg_temp_new_i32();
+ rm1 = tcg_temp_new_i32();
- rm0 = neon_load_reg(a->vm, 0);
- rm1 = neon_load_reg(a->vm, 1);
+ read_neon_element32(rm0, a->vm, 0, MO_32);
+ read_neon_element32(rm1, a->vm, 1, MO_32);
widenfn(rd, rm0);
tcg_gen_shli_i64(rd, rd, 8 << a->size);
- neon_store_reg64(rd, a->vd);
+ write_neon_element64(rd, a->vd, 0, MO_64);
widenfn(rd, rm1);
tcg_gen_shli_i64(rd, rd, 8 << a->size);
- neon_store_reg64(rd, a->vd + 1);
+ write_neon_element64(rd, a->vd, 1, MO_64);
tcg_temp_free_i64(rd);
tcg_temp_free_i32(rm0);
@@ -3339,21 +3366,25 @@ static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a)
fpst = fpstatus_ptr(FPST_STD);
ahp = get_ahp_flag();
- tmp = neon_load_reg(a->vm, 0);
+ tmp = tcg_temp_new_i32();
+ read_neon_element32(tmp, a->vm, 0, MO_32);
gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
- tmp2 = neon_load_reg(a->vm, 1);
+ tmp2 = tcg_temp_new_i32();
+ read_neon_element32(tmp2, a->vm, 1, MO_32);
gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
tcg_gen_shli_i32(tmp2, tmp2, 16);
tcg_gen_or_i32(tmp2, tmp2, tmp);
- tcg_temp_free_i32(tmp);
- tmp = neon_load_reg(a->vm, 2);
+ read_neon_element32(tmp, a->vm, 2, MO_32);
gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
- tmp3 = neon_load_reg(a->vm, 3);
- neon_store_reg(a->vd, 0, tmp2);
+ tmp3 = tcg_temp_new_i32();
+ read_neon_element32(tmp3, a->vm, 3, MO_32);
+ write_neon_element32(tmp2, a->vd, 0, MO_32);
+ tcg_temp_free_i32(tmp2);
gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
tcg_gen_shli_i32(tmp3, tmp3, 16);
tcg_gen_or_i32(tmp3, tmp3, tmp);
- neon_store_reg(a->vd, 1, tmp3);
+ write_neon_element32(tmp3, a->vd, 1, MO_32);
+ tcg_temp_free_i32(tmp3);
tcg_temp_free_i32(tmp);
tcg_temp_free_i32(ahp);
tcg_temp_free_ptr(fpst);
@@ -3388,21 +3419,25 @@ static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
fpst = fpstatus_ptr(FPST_STD);
ahp = get_ahp_flag();
tmp3 = tcg_temp_new_i32();
- tmp = neon_load_reg(a->vm, 0);
- tmp2 = neon_load_reg(a->vm, 1);
+ tmp2 = tcg_temp_new_i32();
+ tmp = tcg_temp_new_i32();
+ read_neon_element32(tmp, a->vm, 0, MO_32);
+ read_neon_element32(tmp2, a->vm, 1, MO_32);
tcg_gen_ext16u_i32(tmp3, tmp);
gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
- neon_store_reg(a->vd, 0, tmp3);
+ write_neon_element32(tmp3, a->vd, 0, MO_32);
tcg_gen_shri_i32(tmp, tmp, 16);
gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
- neon_store_reg(a->vd, 1, tmp);
- tmp3 = tcg_temp_new_i32();
+ write_neon_element32(tmp, a->vd, 1, MO_32);
+ tcg_temp_free_i32(tmp);
tcg_gen_ext16u_i32(tmp3, tmp2);
gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
- neon_store_reg(a->vd, 2, tmp3);
+ write_neon_element32(tmp3, a->vd, 2, MO_32);
+ tcg_temp_free_i32(tmp3);
tcg_gen_shri_i32(tmp2, tmp2, 16);
gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
- neon_store_reg(a->vd, 3, tmp2);
+ write_neon_element32(tmp2, a->vd, 3, MO_32);
+ tcg_temp_free_i32(tmp2);
tcg_temp_free_i32(ahp);
tcg_temp_free_ptr(fpst);
@@ -3412,8 +3447,8 @@ static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn)
{
int vec_size = a->q ? 16 : 8;
- int rd_ofs = neon_reg_offset(a->vd, 0);
- int rm_ofs = neon_reg_offset(a->vm, 0);
+ int rd_ofs = neon_full_reg_offset(a->vd);
+ int rm_ofs = neon_full_reg_offset(a->vm);
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
@@ -3508,6 +3543,7 @@ DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2)
static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
{
+ TCGv_i32 tmp;
int pass;
/* Handle a 2-reg-misc operation by iterating 32 bits at a time */
@@ -3533,11 +3569,13 @@ static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
return true;
}
+ tmp = tcg_temp_new_i32();
for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
- TCGv_i32 tmp = neon_load_reg(a->vm, pass);
+ read_neon_element32(tmp, a->vm, pass, MO_32);
fn(tmp, tmp);
- neon_store_reg(a->vd, pass, tmp);
+ write_neon_element32(tmp, a->vd, pass, MO_32);
}
+ tcg_temp_free_i32(tmp);
return true;
}
@@ -3812,10 +3850,10 @@ static bool trans_VSWP(DisasContext *s, arg_2misc *a)
rm = tcg_temp_new_i64();
rd = tcg_temp_new_i64();
for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
- neon_load_reg64(rm, a->vm + pass);
- neon_load_reg64(rd, a->vd + pass);
- neon_store_reg64(rm, a->vd + pass);
- neon_store_reg64(rd, a->vm + pass);
+ read_neon_element64(rm, a->vm, pass, MO_64);
+ read_neon_element64(rd, a->vd, pass, MO_64);
+ write_neon_element64(rm, a->vd, pass, MO_64);
+ write_neon_element64(rd, a->vm, pass, MO_64);
}
tcg_temp_free_i64(rm);
tcg_temp_free_i64(rd);
@@ -3890,25 +3928,29 @@ static bool trans_VTRN(DisasContext *s, arg_2misc *a)
return true;
}
- if (a->size == 2) {
+ tmp = tcg_temp_new_i32();
+ tmp2 = tcg_temp_new_i32();
+ if (a->size == MO_32) {
for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) {
- tmp = neon_load_reg(a->vm, pass);
- tmp2 = neon_load_reg(a->vd, pass + 1);
- neon_store_reg(a->vm, pass, tmp2);
- neon_store_reg(a->vd, pass + 1, tmp);
+ read_neon_element32(tmp, a->vm, pass, MO_32);
+ read_neon_element32(tmp2, a->vd, pass + 1, MO_32);
+ write_neon_element32(tmp2, a->vm, pass, MO_32);
+ write_neon_element32(tmp, a->vd, pass + 1, MO_32);
}
} else {
for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
- tmp = neon_load_reg(a->vm, pass);
- tmp2 = neon_load_reg(a->vd, pass);
- if (a->size == 0) {
+ read_neon_element32(tmp, a->vm, pass, MO_32);
+ read_neon_element32(tmp2, a->vd, pass, MO_32);
+ if (a->size == MO_8) {
gen_neon_trn_u8(tmp, tmp2);
} else {
gen_neon_trn_u16(tmp, tmp2);
}
- neon_store_reg(a->vm, pass, tmp2);
- neon_store_reg(a->vd, pass, tmp);
+ write_neon_element32(tmp2, a->vm, pass, MO_32);
+ write_neon_element32(tmp, a->vd, pass, MO_32);
}
}
+ tcg_temp_free_i32(tmp);
+ tcg_temp_free_i32(tmp2);
return true;
}
diff --git a/target/arm/translate-vfp.c.inc b/target/arm/translate-vfp.c.inc
index a7ed9bc81b..f966de5b1f 100644
--- a/target/arm/translate-vfp.c.inc
+++ b/target/arm/translate-vfp.c.inc
@@ -236,8 +236,8 @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
tcg_gen_ext_i32_i64(nf, cpu_NF);
tcg_gen_ext_i32_i64(vf, cpu_VF);
- neon_load_reg64(frn, rn);
- neon_load_reg64(frm, rm);
+ vfp_load_reg64(frn, rn);
+ vfp_load_reg64(frm, rm);
switch (a->cc) {
case 0: /* eq: Z */
tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
@@ -264,7 +264,7 @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
tcg_temp_free_i64(tmp);
break;
}
- neon_store_reg64(dest, rd);
+ vfp_store_reg64(dest, rd);
tcg_temp_free_i64(frn);
tcg_temp_free_i64(frm);
tcg_temp_free_i64(dest);
@@ -283,8 +283,8 @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
frn = tcg_temp_new_i32();
frm = tcg_temp_new_i32();
dest = tcg_temp_new_i32();
- neon_load_reg32(frn, rn);
- neon_load_reg32(frm, rm);
+ vfp_load_reg32(frn, rn);
+ vfp_load_reg32(frm, rm);
switch (a->cc) {
case 0: /* eq: Z */
tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
@@ -315,7 +315,7 @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
if (sz == 1) {
tcg_gen_andi_i32(dest, dest, 0xffff);
}
- neon_store_reg32(dest, rd);
+ vfp_store_reg32(dest, rd);
tcg_temp_free_i32(frn);
tcg_temp_free_i32(frm);
tcg_temp_free_i32(dest);
@@ -385,9 +385,9 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
TCGv_i64 tcg_res;
tcg_op = tcg_temp_new_i64();
tcg_res = tcg_temp_new_i64();
- neon_load_reg64(tcg_op, rm);
+ vfp_load_reg64(tcg_op, rm);
gen_helper_rintd(tcg_res, tcg_op, fpst);
- neon_store_reg64(tcg_res, rd);
+ vfp_store_reg64(tcg_res, rd);
tcg_temp_free_i64(tcg_op);
tcg_temp_free_i64(tcg_res);
} else {
@@ -395,13 +395,13 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
TCGv_i32 tcg_res;
tcg_op = tcg_temp_new_i32();
tcg_res = tcg_temp_new_i32();
- neon_load_reg32(tcg_op, rm);
+ vfp_load_reg32(tcg_op, rm);
if (sz == 1) {
gen_helper_rinth(tcg_res, tcg_op, fpst);
} else {
gen_helper_rints(tcg_res, tcg_op, fpst);
}
- neon_store_reg32(tcg_res, rd);
+ vfp_store_reg32(tcg_res, rd);
tcg_temp_free_i32(tcg_op);
tcg_temp_free_i32(tcg_res);
}
@@ -463,14 +463,14 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
tcg_double = tcg_temp_new_i64();
tcg_res = tcg_temp_new_i64();
tcg_tmp = tcg_temp_new_i32();
- neon_load_reg64(tcg_double, rm);
+ vfp_load_reg64(tcg_double, rm);
if (is_signed) {
gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
} else {
gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
}
tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
- neon_store_reg32(tcg_tmp, rd);
+ vfp_store_reg32(tcg_tmp, rd);
tcg_temp_free_i32(tcg_tmp);
tcg_temp_free_i64(tcg_res);
tcg_temp_free_i64(tcg_double);
@@ -478,7 +478,7 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
TCGv_i32 tcg_single, tcg_res;
tcg_single = tcg_temp_new_i32();
tcg_res = tcg_temp_new_i32();
- neon_load_reg32(tcg_single, rm);
+ vfp_load_reg32(tcg_single, rm);
if (sz == 1) {
if (is_signed) {
gen_helper_vfp_toslh(tcg_res, tcg_single, tcg_shift, fpst);
@@ -492,7 +492,7 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
}
}
- neon_store_reg32(tcg_res, rd);
+ vfp_store_reg32(tcg_res, rd);
tcg_temp_free_i32(tcg_res);
tcg_temp_free_i32(tcg_single);
}
@@ -511,11 +511,9 @@ static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
{
/* VMOV scalar to general purpose register */
TCGv_i32 tmp;
- int pass;
- uint32_t offset;
- /* SIZE == 2 is a VFP instruction; otherwise NEON. */
- if (a->size == 2
+ /* SIZE == MO_32 is a VFP instruction; otherwise NEON. */
+ if (a->size == MO_32
? !dc_isar_feature(aa32_fpsp_v2, s)
: !arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
@@ -526,44 +524,12 @@ static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
return false;
}
- offset = a->index << a->size;
- pass = extract32(offset, 2, 1);
- offset = extract32(offset, 0, 2) * 8;
-
if (!vfp_access_check(s)) {
return true;
}
- tmp = neon_load_reg(a->vn, pass);
- switch (a->size) {
- case 0:
- if (offset) {
- tcg_gen_shri_i32(tmp, tmp, offset);
- }
- if (a->u) {
- gen_uxtb(tmp);
- } else {
- gen_sxtb(tmp);
- }
- break;
- case 1:
- if (a->u) {
- if (offset) {
- tcg_gen_shri_i32(tmp, tmp, 16);
- } else {
- gen_uxth(tmp);
- }
- } else {
- if (offset) {
- tcg_gen_sari_i32(tmp, tmp, 16);
- } else {
- gen_sxth(tmp);
- }
- }
- break;
- case 2:
- break;
- }
+ tmp = tcg_temp_new_i32();
+ read_neon_element32(tmp, a->vn, a->index, a->size | (a->u ? 0 : MO_SIGN));
store_reg(s, a->rt, tmp);
return true;
@@ -572,12 +538,10 @@ static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
{
/* VMOV general purpose register to scalar */
- TCGv_i32 tmp, tmp2;
- int pass;
- uint32_t offset;
+ TCGv_i32 tmp;
- /* SIZE == 2 is a VFP instruction; otherwise NEON. */
- if (a->size == 2
+ /* SIZE == MO_32 is a VFP instruction; otherwise NEON. */
+ if (a->size == MO_32
? !dc_isar_feature(aa32_fpsp_v2, s)
: !arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
@@ -588,30 +552,13 @@ static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
return false;
}
- offset = a->index << a->size;
- pass = extract32(offset, 2, 1);
- offset = extract32(offset, 0, 2) * 8;
-
if (!vfp_access_check(s)) {
return true;
}
tmp = load_reg(s, a->rt);
- switch (a->size) {
- case 0:
- tmp2 = neon_load_reg(a->vn, pass);
- tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 8);
- tcg_temp_free_i32(tmp2);
- break;
- case 1:
- tmp2 = neon_load_reg(a->vn, pass);
- tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 16);
- tcg_temp_free_i32(tmp2);
- break;
- case 2:
- break;
- }
- neon_store_reg(a->vn, pass, tmp);
+ write_neon_element32(tmp, a->vn, a->index, a->size);
+ tcg_temp_free_i32(tmp);
return true;
}
@@ -653,7 +600,7 @@ static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
}
tmp = load_reg(s, a->rt);
- tcg_gen_gvec_dup_i32(size, neon_reg_offset(a->vn, 0),
+ tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(a->vn),
vec_size, vec_size, tmp);
tcg_temp_free_i32(tmp);
@@ -829,14 +776,14 @@ static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a)
if (a->l) {
/* VFP to general purpose register */
tmp = tcg_temp_new_i32();
- neon_load_reg32(tmp, a->vn);
+ vfp_load_reg32(tmp, a->vn);
tcg_gen_andi_i32(tmp, tmp, 0xffff);
store_reg(s, a->rt, tmp);
} else {
/* general purpose register to VFP */
tmp = load_reg(s, a->rt);
tcg_gen_andi_i32(tmp, tmp, 0xffff);
- neon_store_reg32(tmp, a->vn);
+ vfp_store_reg32(tmp, a->vn);
tcg_temp_free_i32(tmp);
}
@@ -858,7 +805,7 @@ static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
if (a->l) {
/* VFP to general purpose register */
tmp = tcg_temp_new_i32();
- neon_load_reg32(tmp, a->vn);
+ vfp_load_reg32(tmp, a->vn);
if (a->rt == 15) {
/* Set the 4 flag bits in the CPSR. */
gen_set_nzcv(tmp);
@@ -869,7 +816,7 @@ static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
} else {
/* general purpose register to VFP */
tmp = load_reg(s, a->rt);
- neon_store_reg32(tmp, a->vn);
+ vfp_store_reg32(tmp, a->vn);
tcg_temp_free_i32(tmp);
}
@@ -895,18 +842,18 @@ static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a)
if (a->op) {
/* fpreg to gpreg */
tmp = tcg_temp_new_i32();
- neon_load_reg32(tmp, a->vm);
+ vfp_load_reg32(tmp, a->vm);
store_reg(s, a->rt, tmp);
tmp = tcg_temp_new_i32();
- neon_load_reg32(tmp, a->vm + 1);
+ vfp_load_reg32(tmp, a->vm + 1);
store_reg(s, a->rt2, tmp);
} else {
/* gpreg to fpreg */
tmp = load_reg(s, a->rt);
- neon_store_reg32(tmp, a->vm);
+ vfp_store_reg32(tmp, a->vm);
tcg_temp_free_i32(tmp);
tmp = load_reg(s, a->rt2);
- neon_store_reg32(tmp, a->vm + 1);
+ vfp_store_reg32(tmp, a->vm + 1);
tcg_temp_free_i32(tmp);
}
@@ -938,18 +885,18 @@ static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a)
if (a->op) {
/* fpreg to gpreg */
tmp = tcg_temp_new_i32();
- neon_load_reg32(tmp, a->vm * 2);
+ vfp_load_reg32(tmp, a->vm * 2);
store_reg(s, a->rt, tmp);
tmp = tcg_temp_new_i32();
- neon_load_reg32(tmp, a->vm * 2 + 1);
+ vfp_load_reg32(tmp, a->vm * 2 + 1);
store_reg(s, a->rt2, tmp);
} else {
/* gpreg to fpreg */
tmp = load_reg(s, a->rt);
- neon_store_reg32(tmp, a->vm * 2);
+ vfp_store_reg32(tmp, a->vm * 2);
tcg_temp_free_i32(tmp);
tmp = load_reg(s, a->rt2);
- neon_store_reg32(tmp, a->vm * 2 + 1);
+ vfp_store_reg32(tmp, a->vm * 2 + 1);
tcg_temp_free_i32(tmp);
}
@@ -980,9 +927,9 @@ static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a)
tmp = tcg_temp_new_i32();
if (a->l) {
gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
- neon_store_reg32(tmp, a->vd);
+ vfp_store_reg32(tmp, a->vd);
} else {
- neon_load_reg32(tmp, a->vd);
+ vfp_load_reg32(tmp, a->vd);
gen_aa32_st16(s, tmp, addr, get_mem_index(s));
}
tcg_temp_free_i32(tmp);
@@ -1014,9 +961,9 @@ static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
tmp = tcg_temp_new_i32();
if (a->l) {
gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
- neon_store_reg32(tmp, a->vd);
+ vfp_store_reg32(tmp, a->vd);
} else {
- neon_load_reg32(tmp, a->vd);
+ vfp_load_reg32(tmp, a->vd);
gen_aa32_st32(s, tmp, addr, get_mem_index(s));
}
tcg_temp_free_i32(tmp);
@@ -1055,9 +1002,9 @@ static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a)
tmp = tcg_temp_new_i64();
if (a->l) {
gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
- neon_store_reg64(tmp, a->vd);
+ vfp_store_reg64(tmp, a->vd);
} else {
- neon_load_reg64(tmp, a->vd);
+ vfp_load_reg64(tmp, a->vd);
gen_aa32_st64(s, tmp, addr, get_mem_index(s));
}
tcg_temp_free_i64(tmp);
@@ -1119,10 +1066,10 @@ static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
if (a->l) {
/* load */
gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
- neon_store_reg32(tmp, a->vd + i);
+ vfp_store_reg32(tmp, a->vd + i);
} else {
/* store */
- neon_load_reg32(tmp, a->vd + i);
+ vfp_load_reg32(tmp, a->vd + i);
gen_aa32_st32(s, tmp, addr, get_mem_index(s));
}
tcg_gen_addi_i32(addr, addr, offset);
@@ -1202,10 +1149,10 @@ static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
if (a->l) {
/* load */
gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
- neon_store_reg64(tmp, a->vd + i);
+ vfp_store_reg64(tmp, a->vd + i);
} else {
/* store */
- neon_load_reg64(tmp, a->vd + i);
+ vfp_load_reg64(tmp, a->vd + i);
gen_aa32_st64(s, tmp, addr, get_mem_index(s));
}
tcg_gen_addi_i32(addr, addr, offset);
@@ -1338,15 +1285,15 @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
fd = tcg_temp_new_i32();
fpst = fpstatus_ptr(FPST_FPCR);
- neon_load_reg32(f0, vn);
- neon_load_reg32(f1, vm);
+ vfp_load_reg32(f0, vn);
+ vfp_load_reg32(f1, vm);
for (;;) {
if (reads_vd) {
- neon_load_reg32(fd, vd);
+ vfp_load_reg32(fd, vd);
}
fn(fd, f0, f1, fpst);
- neon_store_reg32(fd, vd);
+ vfp_store_reg32(fd, vd);
if (veclen == 0) {
break;
@@ -1356,10 +1303,10 @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
veclen--;
vd = vfp_advance_sreg(vd, delta_d);
vn = vfp_advance_sreg(vn, delta_d);
- neon_load_reg32(f0, vn);
+ vfp_load_reg32(f0, vn);
if (delta_m) {
vm = vfp_advance_sreg(vm, delta_m);
- neon_load_reg32(f1, vm);
+ vfp_load_reg32(f1, vm);
}
}
@@ -1402,14 +1349,14 @@ static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
fd = tcg_temp_new_i32();
fpst = fpstatus_ptr(FPST_FPCR_F16);
- neon_load_reg32(f0, vn);
- neon_load_reg32(f1, vm);
+ vfp_load_reg32(f0, vn);
+ vfp_load_reg32(f1, vm);
if (reads_vd) {
- neon_load_reg32(fd, vd);
+ vfp_load_reg32(fd, vd);
}
fn(fd, f0, f1, fpst);
- neon_store_reg32(fd, vd);
+ vfp_store_reg32(fd, vd);
tcg_temp_free_i32(f0);
tcg_temp_free_i32(f1);
@@ -1469,15 +1416,15 @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
fd = tcg_temp_new_i64();
fpst = fpstatus_ptr(FPST_FPCR);
- neon_load_reg64(f0, vn);
- neon_load_reg64(f1, vm);
+ vfp_load_reg64(f0, vn);
+ vfp_load_reg64(f1, vm);
for (;;) {
if (reads_vd) {
- neon_load_reg64(fd, vd);
+ vfp_load_reg64(fd, vd);
}
fn(fd, f0, f1, fpst);
- neon_store_reg64(fd, vd);
+ vfp_store_reg64(fd, vd);
if (veclen == 0) {
break;
@@ -1486,10 +1433,10 @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
veclen--;
vd = vfp_advance_dreg(vd, delta_d);
vn = vfp_advance_dreg(vn, delta_d);
- neon_load_reg64(f0, vn);
+ vfp_load_reg64(f0, vn);
if (delta_m) {
vm = vfp_advance_dreg(vm, delta_m);
- neon_load_reg64(f1, vm);
+ vfp_load_reg64(f1, vm);
}
}
@@ -1542,11 +1489,11 @@ static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
f0 = tcg_temp_new_i32();
fd = tcg_temp_new_i32();
- neon_load_reg32(f0, vm);
+ vfp_load_reg32(f0, vm);
for (;;) {
fn(fd, f0);
- neon_store_reg32(fd, vd);
+ vfp_store_reg32(fd, vd);
if (veclen == 0) {
break;
@@ -1556,7 +1503,7 @@ static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
/* single source one-many */
while (veclen--) {
vd = vfp_advance_sreg(vd, delta_d);
- neon_store_reg32(fd, vd);
+ vfp_store_reg32(fd, vd);
}
break;
}
@@ -1565,7 +1512,7 @@ static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
veclen--;
vd = vfp_advance_sreg(vd, delta_d);
vm = vfp_advance_sreg(vm, delta_m);
- neon_load_reg32(f0, vm);
+ vfp_load_reg32(f0, vm);
}
tcg_temp_free_i32(f0);
@@ -1598,9 +1545,9 @@ static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
}
f0 = tcg_temp_new_i32();
- neon_load_reg32(f0, vm);
+ vfp_load_reg32(f0, vm);
fn(f0, f0);
- neon_store_reg32(f0, vd);
+ vfp_store_reg32(f0, vd);
tcg_temp_free_i32(f0);
return true;
@@ -1652,11 +1599,11 @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
f0 = tcg_temp_new_i64();
fd = tcg_temp_new_i64();
- neon_load_reg64(f0, vm);
+ vfp_load_reg64(f0, vm);
for (;;) {
fn(fd, f0);
- neon_store_reg64(fd, vd);
+ vfp_store_reg64(fd, vd);
if (veclen == 0) {
break;
@@ -1666,7 +1613,7 @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
/* single source one-many */
while (veclen--) {
vd = vfp_advance_dreg(vd, delta_d);
- neon_store_reg64(fd, vd);
+ vfp_store_reg64(fd, vd);
}
break;
}
@@ -1675,7 +1622,7 @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
veclen--;
vd = vfp_advance_dreg(vd, delta_d);
vd = vfp_advance_dreg(vm, delta_m);
- neon_load_reg64(f0, vm);
+ vfp_load_reg64(f0, vm);
}
tcg_temp_free_i64(f0);
@@ -2090,20 +2037,20 @@ static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
vm = tcg_temp_new_i32();
vd = tcg_temp_new_i32();
- neon_load_reg32(vn, a->vn);
- neon_load_reg32(vm, a->vm);
+ vfp_load_reg32(vn, a->vn);
+ vfp_load_reg32(vm, a->vm);
if (neg_n) {
/* VFNMS, VFMS */
gen_helper_vfp_negh(vn, vn);
}
- neon_load_reg32(vd, a->vd);
+ vfp_load_reg32(vd, a->vd);
if (neg_d) {
/* VFNMA, VFNMS */
gen_helper_vfp_negh(vd, vd);
}
fpst = fpstatus_ptr(FPST_FPCR_F16);
gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst);
- neon_store_reg32(vd, a->vd);
+ vfp_store_reg32(vd, a->vd);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i32(vn);
@@ -2155,20 +2102,20 @@ static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
vm = tcg_temp_new_i32();
vd = tcg_temp_new_i32();
- neon_load_reg32(vn, a->vn);
- neon_load_reg32(vm, a->vm);
+ vfp_load_reg32(vn, a->vn);
+ vfp_load_reg32(vm, a->vm);
if (neg_n) {
/* VFNMS, VFMS */
gen_helper_vfp_negs(vn, vn);
}
- neon_load_reg32(vd, a->vd);
+ vfp_load_reg32(vd, a->vd);
if (neg_d) {
/* VFNMA, VFNMS */
gen_helper_vfp_negs(vd, vd);
}
fpst = fpstatus_ptr(FPST_FPCR);
gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
- neon_store_reg32(vd, a->vd);
+ vfp_store_reg32(vd, a->vd);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i32(vn);
@@ -2226,20 +2173,20 @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
vm = tcg_temp_new_i64();
vd = tcg_temp_new_i64();
- neon_load_reg64(vn, a->vn);
- neon_load_reg64(vm, a->vm);
+ vfp_load_reg64(vn, a->vn);
+ vfp_load_reg64(vm, a->vm);
if (neg_n) {
/* VFNMS, VFMS */
gen_helper_vfp_negd(vn, vn);
}
- neon_load_reg64(vd, a->vd);
+ vfp_load_reg64(vd, a->vd);
if (neg_d) {
/* VFNMA, VFNMS */
gen_helper_vfp_negd(vd, vd);
}
fpst = fpstatus_ptr(FPST_FPCR);
gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
- neon_store_reg64(vd, a->vd);
+ vfp_store_reg64(vd, a->vd);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i64(vn);
@@ -2283,7 +2230,7 @@ static bool trans_VMOV_imm_hp(DisasContext *s, arg_VMOV_imm_sp *a)
}
fd = tcg_const_i32(vfp_expand_imm(MO_16, a->imm));
- neon_store_reg32(fd, a->vd);
+ vfp_store_reg32(fd, a->vd);
tcg_temp_free_i32(fd);
return true;
}
@@ -2323,7 +2270,7 @@ static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
fd = tcg_const_i32(vfp_expand_imm(MO_32, a->imm));
for (;;) {
- neon_store_reg32(fd, vd);
+ vfp_store_reg32(fd, vd);
if (veclen == 0) {
break;
@@ -2378,7 +2325,7 @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
fd = tcg_const_i64(vfp_expand_imm(MO_64, a->imm));
for (;;) {
- neon_store_reg64(fd, vd);
+ vfp_store_reg64(fd, vd);
if (veclen == 0) {
break;
@@ -2450,11 +2397,11 @@ static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a)
vd = tcg_temp_new_i32();
vm = tcg_temp_new_i32();
- neon_load_reg32(vd, a->vd);
+ vfp_load_reg32(vd, a->vd);
if (a->z) {
tcg_gen_movi_i32(vm, 0);
} else {
- neon_load_reg32(vm, a->vm);
+ vfp_load_reg32(vm, a->vm);
}
if (a->e) {
@@ -2489,11 +2436,11 @@ static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a)
vd = tcg_temp_new_i32();
vm = tcg_temp_new_i32();
- neon_load_reg32(vd, a->vd);
+ vfp_load_reg32(vd, a->vd);
if (a->z) {
tcg_gen_movi_i32(vm, 0);
} else {
- neon_load_reg32(vm, a->vm);
+ vfp_load_reg32(vm, a->vm);
}
if (a->e) {
@@ -2533,11 +2480,11 @@ static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a)
vd = tcg_temp_new_i64();
vm = tcg_temp_new_i64();
- neon_load_reg64(vd, a->vd);
+ vfp_load_reg64(vd, a->vd);
if (a->z) {
tcg_gen_movi_i64(vm, 0);
} else {
- neon_load_reg64(vm, a->vm);
+ vfp_load_reg64(vm, a->vm);
}
if (a->e) {
@@ -2572,7 +2519,7 @@ static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a)
/* The T bit tells us if we want the low or high 16 bits of Vm */
tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode);
- neon_store_reg32(tmp, a->vd);
+ vfp_store_reg32(tmp, a->vd);
tcg_temp_free_i32(ahp_mode);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i32(tmp);
@@ -2610,7 +2557,7 @@ static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
vd = tcg_temp_new_i64();
gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode);
- neon_store_reg64(vd, a->vd);
+ vfp_store_reg64(vd, a->vd);
tcg_temp_free_i32(ahp_mode);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i32(tmp);
@@ -2636,7 +2583,7 @@ static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
ahp_mode = get_ahp_flag();
tmp = tcg_temp_new_i32();
- neon_load_reg32(tmp, a->vm);
+ vfp_load_reg32(tmp, a->vm);
gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode);
tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
tcg_temp_free_i32(ahp_mode);
@@ -2674,7 +2621,7 @@ static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
tmp = tcg_temp_new_i32();
vm = tcg_temp_new_i64();
- neon_load_reg64(vm, a->vm);
+ vfp_load_reg64(vm, a->vm);
gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode);
tcg_temp_free_i64(vm);
tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
@@ -2698,10 +2645,10 @@ static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
}
tmp = tcg_temp_new_i32();
- neon_load_reg32(tmp, a->vm);
+ vfp_load_reg32(tmp, a->vm);
fpst = fpstatus_ptr(FPST_FPCR_F16);
gen_helper_rinth(tmp, tmp, fpst);
- neon_store_reg32(tmp, a->vd);
+ vfp_store_reg32(tmp, a->vd);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i32(tmp);
return true;
@@ -2721,10 +2668,10 @@ static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
}
tmp = tcg_temp_new_i32();
- neon_load_reg32(tmp, a->vm);
+ vfp_load_reg32(tmp, a->vm);
fpst = fpstatus_ptr(FPST_FPCR);
gen_helper_rints(tmp, tmp, fpst);
- neon_store_reg32(tmp, a->vd);
+ vfp_store_reg32(tmp, a->vd);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i32(tmp);
return true;
@@ -2753,10 +2700,10 @@ static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
}
tmp = tcg_temp_new_i64();
- neon_load_reg64(tmp, a->vm);
+ vfp_load_reg64(tmp, a->vm);
fpst = fpstatus_ptr(FPST_FPCR);
gen_helper_rintd(tmp, tmp, fpst);
- neon_store_reg64(tmp, a->vd);
+ vfp_store_reg64(tmp, a->vd);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i64(tmp);
return true;
@@ -2777,13 +2724,13 @@ static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
}
tmp = tcg_temp_new_i32();
- neon_load_reg32(tmp, a->vm);
+ vfp_load_reg32(tmp, a->vm);
fpst = fpstatus_ptr(FPST_FPCR_F16);
tcg_rmode = tcg_const_i32(float_round_to_zero);
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
gen_helper_rinth(tmp, tmp, fpst);
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
- neon_store_reg32(tmp, a->vd);
+ vfp_store_reg32(tmp, a->vd);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i32(tcg_rmode);
tcg_temp_free_i32(tmp);
@@ -2805,13 +2752,13 @@ static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
}
tmp = tcg_temp_new_i32();
- neon_load_reg32(tmp, a->vm);
+ vfp_load_reg32(tmp, a->vm);
fpst = fpstatus_ptr(FPST_FPCR);
tcg_rmode = tcg_const_i32(float_round_to_zero);
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
gen_helper_rints(tmp, tmp, fpst);
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
- neon_store_reg32(tmp, a->vd);
+ vfp_store_reg32(tmp, a->vd);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i32(tcg_rmode);
tcg_temp_free_i32(tmp);
@@ -2842,13 +2789,13 @@ static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
}
tmp = tcg_temp_new_i64();
- neon_load_reg64(tmp, a->vm);
+ vfp_load_reg64(tmp, a->vm);
fpst = fpstatus_ptr(FPST_FPCR);
tcg_rmode = tcg_const_i32(float_round_to_zero);
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
gen_helper_rintd(tmp, tmp, fpst);
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
- neon_store_reg64(tmp, a->vd);
+ vfp_store_reg64(tmp, a->vd);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i64(tmp);
tcg_temp_free_i32(tcg_rmode);
@@ -2869,10 +2816,10 @@ static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
}
tmp = tcg_temp_new_i32();
- neon_load_reg32(tmp, a->vm);
+ vfp_load_reg32(tmp, a->vm);
fpst = fpstatus_ptr(FPST_FPCR_F16);
gen_helper_rinth_exact(tmp, tmp, fpst);
- neon_store_reg32(tmp, a->vd);
+ vfp_store_reg32(tmp, a->vd);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i32(tmp);
return true;
@@ -2892,10 +2839,10 @@ static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
}
tmp = tcg_temp_new_i32();
- neon_load_reg32(tmp, a->vm);
+ vfp_load_reg32(tmp, a->vm);
fpst = fpstatus_ptr(FPST_FPCR);
gen_helper_rints_exact(tmp, tmp, fpst);
- neon_store_reg32(tmp, a->vd);
+ vfp_store_reg32(tmp, a->vd);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i32(tmp);
return true;
@@ -2924,10 +2871,10 @@ static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
}
tmp = tcg_temp_new_i64();
- neon_load_reg64(tmp, a->vm);
+ vfp_load_reg64(tmp, a->vm);
fpst = fpstatus_ptr(FPST_FPCR);
gen_helper_rintd_exact(tmp, tmp, fpst);
- neon_store_reg64(tmp, a->vd);
+ vfp_store_reg64(tmp, a->vd);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i64(tmp);
return true;
@@ -2953,9 +2900,9 @@ static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
vm = tcg_temp_new_i32();
vd = tcg_temp_new_i64();
- neon_load_reg32(vm, a->vm);
+ vfp_load_reg32(vm, a->vm);
gen_helper_vfp_fcvtds(vd, vm, cpu_env);
- neon_store_reg64(vd, a->vd);
+ vfp_store_reg64(vd, a->vd);
tcg_temp_free_i32(vm);
tcg_temp_free_i64(vd);
return true;
@@ -2981,9 +2928,9 @@ static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
vd = tcg_temp_new_i32();
vm = tcg_temp_new_i64();
- neon_load_reg64(vm, a->vm);
+ vfp_load_reg64(vm, a->vm);
gen_helper_vfp_fcvtsd(vd, vm, cpu_env);
- neon_store_reg32(vd, a->vd);
+ vfp_store_reg32(vd, a->vd);
tcg_temp_free_i32(vd);
tcg_temp_free_i64(vm);
return true;
@@ -3003,7 +2950,7 @@ static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a)
}
vm = tcg_temp_new_i32();
- neon_load_reg32(vm, a->vm);
+ vfp_load_reg32(vm, a->vm);
fpst = fpstatus_ptr(FPST_FPCR_F16);
if (a->s) {
/* i32 -> f16 */
@@ -3012,7 +2959,7 @@ static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a)
/* u32 -> f16 */
gen_helper_vfp_uitoh(vm, vm, fpst);
}
- neon_store_reg32(vm, a->vd);
+ vfp_store_reg32(vm, a->vd);
tcg_temp_free_i32(vm);
tcg_temp_free_ptr(fpst);
return true;
@@ -3032,7 +2979,7 @@ static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
}
vm = tcg_temp_new_i32();
- neon_load_reg32(vm, a->vm);
+ vfp_load_reg32(vm, a->vm);
fpst = fpstatus_ptr(FPST_FPCR);
if (a->s) {
/* i32 -> f32 */
@@ -3041,7 +2988,7 @@ static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
/* u32 -> f32 */
gen_helper_vfp_uitos(vm, vm, fpst);
}
- neon_store_reg32(vm, a->vd);
+ vfp_store_reg32(vm, a->vd);
tcg_temp_free_i32(vm);
tcg_temp_free_ptr(fpst);
return true;
@@ -3068,7 +3015,7 @@ static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
vm = tcg_temp_new_i32();
vd = tcg_temp_new_i64();
- neon_load_reg32(vm, a->vm);
+ vfp_load_reg32(vm, a->vm);
fpst = fpstatus_ptr(FPST_FPCR);
if (a->s) {
/* i32 -> f64 */
@@ -3077,7 +3024,7 @@ static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
/* u32 -> f64 */
gen_helper_vfp_uitod(vd, vm, fpst);
}
- neon_store_reg64(vd, a->vd);
+ vfp_store_reg64(vd, a->vd);
tcg_temp_free_i32(vm);
tcg_temp_free_i64(vd);
tcg_temp_free_ptr(fpst);
@@ -3108,9 +3055,9 @@ static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
vm = tcg_temp_new_i64();
vd = tcg_temp_new_i32();
- neon_load_reg64(vm, a->vm);
+ vfp_load_reg64(vm, a->vm);
gen_helper_vjcvt(vd, vm, cpu_env);
- neon_store_reg32(vd, a->vd);
+ vfp_store_reg32(vd, a->vd);
tcg_temp_free_i64(vm);
tcg_temp_free_i32(vd);
return true;
@@ -3133,7 +3080,7 @@ static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a)
frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
vd = tcg_temp_new_i32();
- neon_load_reg32(vd, a->vd);
+ vfp_load_reg32(vd, a->vd);
fpst = fpstatus_ptr(FPST_FPCR_F16);
shift = tcg_const_i32(frac_bits);
@@ -3168,7 +3115,7 @@ static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a)
g_assert_not_reached();
}
- neon_store_reg32(vd, a->vd);
+ vfp_store_reg32(vd, a->vd);
tcg_temp_free_i32(vd);
tcg_temp_free_i32(shift);
tcg_temp_free_ptr(fpst);
@@ -3192,7 +3139,7 @@ static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
vd = tcg_temp_new_i32();
- neon_load_reg32(vd, a->vd);
+ vfp_load_reg32(vd, a->vd);
fpst = fpstatus_ptr(FPST_FPCR);
shift = tcg_const_i32(frac_bits);
@@ -3227,7 +3174,7 @@ static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
g_assert_not_reached();
}
- neon_store_reg32(vd, a->vd);
+ vfp_store_reg32(vd, a->vd);
tcg_temp_free_i32(vd);
tcg_temp_free_i32(shift);
tcg_temp_free_ptr(fpst);
@@ -3257,7 +3204,7 @@ static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
vd = tcg_temp_new_i64();
- neon_load_reg64(vd, a->vd);
+ vfp_load_reg64(vd, a->vd);
fpst = fpstatus_ptr(FPST_FPCR);
shift = tcg_const_i32(frac_bits);
@@ -3292,7 +3239,7 @@ static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
g_assert_not_reached();
}
- neon_store_reg64(vd, a->vd);
+ vfp_store_reg64(vd, a->vd);
tcg_temp_free_i64(vd);
tcg_temp_free_i32(shift);
tcg_temp_free_ptr(fpst);
@@ -3314,7 +3261,7 @@ static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
fpst = fpstatus_ptr(FPST_FPCR_F16);
vm = tcg_temp_new_i32();
- neon_load_reg32(vm, a->vm);
+ vfp_load_reg32(vm, a->vm);
if (a->s) {
if (a->rz) {
@@ -3329,7 +3276,7 @@ static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
gen_helper_vfp_touih(vm, vm, fpst);
}
}
- neon_store_reg32(vm, a->vd);
+ vfp_store_reg32(vm, a->vd);
tcg_temp_free_i32(vm);
tcg_temp_free_ptr(fpst);
return true;
@@ -3350,7 +3297,7 @@ static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
fpst = fpstatus_ptr(FPST_FPCR);
vm = tcg_temp_new_i32();
- neon_load_reg32(vm, a->vm);
+ vfp_load_reg32(vm, a->vm);
if (a->s) {
if (a->rz) {
@@ -3365,7 +3312,7 @@ static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
gen_helper_vfp_touis(vm, vm, fpst);
}
}
- neon_store_reg32(vm, a->vd);
+ vfp_store_reg32(vm, a->vd);
tcg_temp_free_i32(vm);
tcg_temp_free_ptr(fpst);
return true;
@@ -3393,7 +3340,7 @@ static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
fpst = fpstatus_ptr(FPST_FPCR);
vm = tcg_temp_new_i64();
vd = tcg_temp_new_i32();
- neon_load_reg64(vm, a->vm);
+ vfp_load_reg64(vm, a->vm);
if (a->s) {
if (a->rz) {
@@ -3408,7 +3355,7 @@ static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
gen_helper_vfp_touid(vd, vm, fpst);
}
}
- neon_store_reg32(vd, a->vd);
+ vfp_store_reg32(vd, a->vd);
tcg_temp_free_i32(vd);
tcg_temp_free_i64(vm);
tcg_temp_free_ptr(fpst);
@@ -3521,10 +3468,10 @@ static bool trans_VINS(DisasContext *s, arg_VINS *a)
/* Insert low half of Vm into high half of Vd */
rm = tcg_temp_new_i32();
rd = tcg_temp_new_i32();
- neon_load_reg32(rm, a->vm);
- neon_load_reg32(rd, a->vd);
+ vfp_load_reg32(rm, a->vm);
+ vfp_load_reg32(rd, a->vd);
tcg_gen_deposit_i32(rd, rd, rm, 16, 16);
- neon_store_reg32(rd, a->vd);
+ vfp_store_reg32(rd, a->vd);
tcg_temp_free_i32(rm);
tcg_temp_free_i32(rd);
return true;
@@ -3548,9 +3495,9 @@ static bool trans_VMOVX(DisasContext *s, arg_VINS *a)
/* Set Vd to high half of Vm */
rm = tcg_temp_new_i32();
- neon_load_reg32(rm, a->vm);
+ vfp_load_reg32(rm, a->vm);
tcg_gen_shri_i32(rm, rm, 16);
- neon_store_reg32(rm, a->vd);
+ vfp_store_reg32(rm, a->vd);
tcg_temp_free_i32(rm);
return true;
}
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 38371db540..29ea1eb781 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -1094,62 +1094,139 @@ static inline void gen_hlt(DisasContext *s, int imm)
unallocated_encoding(s);
}
-static inline long vfp_reg_offset(bool dp, unsigned reg)
+/*
+ * Return the offset of a "full" NEON Dreg.
+ */
+static long neon_full_reg_offset(unsigned reg)
+{
+ return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
+}
+
+/*
+ * Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
+ * where 0 is the least significant end of the register.
+ */
+static long neon_element_offset(int reg, int element, MemOp memop)
+{
+ int element_size = 1 << (memop & MO_SIZE);
+ int ofs = element * element_size;
+#ifdef HOST_WORDS_BIGENDIAN
+ /*
+ * Calculate the offset assuming fully little-endian,
+ * then XOR to account for the order of the 8-byte units.
+ */
+ if (element_size < 8) {
+ ofs ^= 8 - element_size;
+ }
+#endif
+ return neon_full_reg_offset(reg) + ofs;
+}
+
+/* Return the offset of a VFP Dreg (dp = true) or VFP Sreg (dp = false). */
+static long vfp_reg_offset(bool dp, unsigned reg)
{
if (dp) {
- return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
+ return neon_element_offset(reg, 0, MO_64);
} else {
- long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
- if (reg & 1) {
- ofs += offsetof(CPU_DoubleU, l.upper);
- } else {
- ofs += offsetof(CPU_DoubleU, l.lower);
- }
- return ofs;
+ return neon_element_offset(reg >> 1, reg & 1, MO_32);
}
}
-/* Return the offset of a 32-bit piece of a NEON register.
- zero is the least significant end of the register. */
-static inline long
-neon_reg_offset (int reg, int n)
+static inline void vfp_load_reg64(TCGv_i64 var, int reg)
{
- int sreg;
- sreg = reg * 2 + n;
- return vfp_reg_offset(0, sreg);
+ tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(true, reg));
}
-static TCGv_i32 neon_load_reg(int reg, int pass)
+static inline void vfp_store_reg64(TCGv_i64 var, int reg)
{
- TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
- return tmp;
+ tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(true, reg));
}
-static void neon_store_reg(int reg, int pass, TCGv_i32 var)
+static inline void vfp_load_reg32(TCGv_i32 var, int reg)
{
- tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
- tcg_temp_free_i32(var);
+ tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
}
-static inline void neon_load_reg64(TCGv_i64 var, int reg)
+static inline void vfp_store_reg32(TCGv_i32 var, int reg)
{
- tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
+ tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
}
-static inline void neon_store_reg64(TCGv_i64 var, int reg)
+static void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
{
- tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
+ long off = neon_element_offset(reg, ele, memop);
+
+ switch (memop) {
+ case MO_SB:
+ tcg_gen_ld8s_i32(dest, cpu_env, off);
+ break;
+ case MO_UB:
+ tcg_gen_ld8u_i32(dest, cpu_env, off);
+ break;
+ case MO_SW:
+ tcg_gen_ld16s_i32(dest, cpu_env, off);
+ break;
+ case MO_UW:
+ tcg_gen_ld16u_i32(dest, cpu_env, off);
+ break;
+ case MO_UL:
+ case MO_SL:
+ tcg_gen_ld_i32(dest, cpu_env, off);
+ break;
+ default:
+ g_assert_not_reached();
+ }
}
-static inline void neon_load_reg32(TCGv_i32 var, int reg)
+static void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
{
- tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
+ long off = neon_element_offset(reg, ele, memop);
+
+ switch (memop) {
+ case MO_SL:
+ tcg_gen_ld32s_i64(dest, cpu_env, off);
+ break;
+ case MO_UL:
+ tcg_gen_ld32u_i64(dest, cpu_env, off);
+ break;
+ case MO_Q:
+ tcg_gen_ld_i64(dest, cpu_env, off);
+ break;
+ default:
+ g_assert_not_reached();
+ }
}
-static inline void neon_store_reg32(TCGv_i32 var, int reg)
+static void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
{
- tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
+ long off = neon_element_offset(reg, ele, memop);
+
+ switch (memop) {
+ case MO_8:
+ tcg_gen_st8_i32(src, cpu_env, off);
+ break;
+ case MO_16:
+ tcg_gen_st16_i32(src, cpu_env, off);
+ break;
+ case MO_32:
+ tcg_gen_st_i32(src, cpu_env, off);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
+{
+ long off = neon_element_offset(reg, ele, memop);
+
+ switch (memop) {
+ case MO_64:
+ tcg_gen_st_i64(src, cpu_env, off);
+ break;
+ default:
+ g_assert_not_reached();
+ }
}
static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index a973454e4f..0f33127c4c 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -293,7 +293,7 @@ void HELPER(gvec_sdot_idx_b)(void *vd, void *vn, void *vm, uint32_t desc)
intptr_t index = simd_data(desc);
uint32_t *d = vd;
int8_t *n = vn;
- int8_t *m_indexed = (int8_t *)vm + index * 4;
+ int8_t *m_indexed = (int8_t *)vm + H4(index) * 4;
/* Notice the special case of opr_sz == 8, from aa64/aa32 advsimd.
* Otherwise opr_sz is a multiple of 16.
@@ -324,7 +324,7 @@ void HELPER(gvec_udot_idx_b)(void *vd, void *vn, void *vm, uint32_t desc)
intptr_t index = simd_data(desc);
uint32_t *d = vd;
uint8_t *n = vn;
- uint8_t *m_indexed = (uint8_t *)vm + index * 4;
+ uint8_t *m_indexed = (uint8_t *)vm + H4(index) * 4;
/* Notice the special case of opr_sz == 8, from aa64/aa32 advsimd.
* Otherwise opr_sz is a multiple of 16.
@@ -1858,10 +1858,10 @@ DO_ABA(gvec_uaba_d, uint64_t)
r2 = float16_##OP(m[H2(0)], m[H2(1)], fpst); \
r3 = float16_##OP(m[H2(2)], m[H2(3)], fpst); \
\
- d[H4(0)] = r0; \
- d[H4(1)] = r1; \
- d[H4(2)] = r2; \
- d[H4(3)] = r3; \
+ d[H2(0)] = r0; \
+ d[H2(1)] = r1; \
+ d[H2(2)] = r2; \
+ d[H2(3)] = r3; \
}
DO_NEON_PAIRWISE(neon_padd, add)
diff --git a/tests/qtest/npcm7xx_rng-test.c b/tests/qtest/npcm7xx_rng-test.c
index da6e639bf6..e7cde85fbb 100644
--- a/tests/qtest/npcm7xx_rng-test.c
+++ b/tests/qtest/npcm7xx_rng-test.c
@@ -265,10 +265,16 @@ int main(int argc, char **argv)
qtest_add_func("npcm7xx_rng/enable_disable", test_enable_disable);
qtest_add_func("npcm7xx_rng/rosel", test_rosel);
- qtest_add_func("npcm7xx_rng/continuous/monobit", test_continuous_monobit);
- qtest_add_func("npcm7xx_rng/continuous/runs", test_continuous_runs);
- qtest_add_func("npcm7xx_rng/first_byte/monobit", test_first_byte_monobit);
- qtest_add_func("npcm7xx_rng/first_byte/runs", test_first_byte_runs);
+ /*
+ * These tests fail intermittently; only run them on explicit
+ * request until we figure out why.
+ */
+ if (getenv("QEMU_TEST_FLAKY_RNG_TESTS")) {
+ qtest_add_func("npcm7xx_rng/continuous/monobit", test_continuous_monobit);
+ qtest_add_func("npcm7xx_rng/continuous/runs", test_continuous_runs);
+ qtest_add_func("npcm7xx_rng/first_byte/monobit", test_first_byte_monobit);
+ qtest_add_func("npcm7xx_rng/first_byte/runs", test_first_byte_runs);
+ }
qtest_start("-machine npcm750-evb");
ret = g_test_run();