aboutsummaryrefslogtreecommitdiff
path: root/target/arm
diff options
context:
space:
mode:
Diffstat (limited to 'target/arm')
-rw-r--r--target/arm/helper-sve.h5
-rw-r--r--target/arm/helper.c1
-rw-r--r--target/arm/helper.h28
-rw-r--r--target/arm/mte_helper.c19
-rw-r--r--target/arm/sve.decode35
-rw-r--r--target/arm/sve_helper.c70
-rw-r--r--target/arm/translate-a64.c110
-rw-r--r--target/arm/translate-sve.c393
-rw-r--r--target/arm/translate.h1
-rw-r--r--target/arm/vec_helper.c182
10 files changed, 446 insertions, 398 deletions
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index 63c4a087ca..4411c47120 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -269,11 +269,6 @@ DEF_HELPER_FLAGS_3(sve_uminv_h, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve_uminv_s, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve_uminv_d, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_clr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_clr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_clr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_clr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-
DEF_HELPER_FLAGS_4(sve_movz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_movz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_movz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 6b4f0eb533..44d666627a 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -5105,7 +5105,6 @@ static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = {
.access = PL2_RW,
.readfn = arm_cp_read_zero, .writefn = arm_cp_write_ignore },
{ .name = "HCR_EL2", .state = ARM_CP_STATE_BOTH,
- .type = ARM_CP_NO_RAW,
.opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0,
.access = PL2_RW,
.type = ARM_CP_CONST, .resetvalue = 0 },
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 759639a63a..3ca73a1764 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -758,6 +758,34 @@ DEF_HELPER_FLAGS_4(gvec_uaba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_uaba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_uaba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_mul_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_mul_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_mul_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_mla_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_mla_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_mla_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_mls_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_mls_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_mls_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(neon_sqdmulh_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_sqdmulh_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(neon_sqrdmulh_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_sqrdmulh_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
#ifdef TARGET_AARCH64
#include "helper-a64.h"
#include "helper-sve.h"
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
index 104752041f..891306f5b0 100644
--- a/target/arm/mte_helper.c
+++ b/target/arm/mte_helper.c
@@ -514,11 +514,12 @@ void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, uint64_t val)
}
/* Record a tag check failure. */
-static void mte_check_fail(CPUARMState *env, int mmu_idx,
+static void mte_check_fail(CPUARMState *env, uint32_t desc,
uint64_t dirty_ptr, uintptr_t ra)
{
+ int mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
ARMMMUIdx arm_mmu_idx = core_to_aa64_mmu_idx(mmu_idx);
- int el, reg_el, tcf, select;
+ int el, reg_el, tcf, select, is_write, syn;
uint64_t sctlr;
reg_el = regime_el(env, arm_mmu_idx);
@@ -546,9 +547,10 @@ static void mte_check_fail(CPUARMState *env, int mmu_idx,
*/
cpu_restore_state(env_cpu(env), ra, true);
env->exception.vaddress = dirty_ptr;
- raise_exception(env, EXCP_DATA_ABORT,
- syn_data_abort_no_iss(el != 0, 0, 0, 0, 0, 0, 0x11),
- exception_target_el(env));
+
+ is_write = FIELD_EX32(desc, MTEDESC, WRITE);
+ syn = syn_data_abort_no_iss(el != 0, 0, 0, 0, 0, is_write, 0x11);
+ raise_exception(env, EXCP_DATA_ABORT, syn, exception_target_el(env));
/* noreturn, but fall through to the assert anyway */
case 0:
@@ -639,8 +641,7 @@ uint64_t mte_check1(CPUARMState *env, uint32_t desc,
}
if (unlikely(!mte_probe1_int(env, desc, ptr, ra, bit55))) {
- int mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
- mte_check_fail(env, mmu_idx, ptr, ra);
+ mte_check_fail(env, desc, ptr, ra);
}
return useronly_clean_ptr(ptr);
@@ -810,7 +811,7 @@ uint64_t mte_checkN(CPUARMState *env, uint32_t desc,
fail_ofs = tag_first + n * TAG_GRANULE - ptr;
fail_ofs = ROUND_UP(fail_ofs, esize);
- mte_check_fail(env, mmu_idx, ptr + fail_ofs, ra);
+ mte_check_fail(env, desc, ptr + fail_ofs, ra);
}
done:
@@ -922,7 +923,7 @@ uint64_t HELPER(mte_check_zva)(CPUARMState *env, uint32_t desc, uint64_t ptr)
fail:
/* Locate the first nibble that differs. */
i = ctz64(mem_tag ^ ptr_tag) >> 4;
- mte_check_fail(env, mmu_idx, align_ptr + i * TAG_GRANULE, ra);
+ mte_check_fail(env, desc, align_ptr + i * TAG_GRANULE, ra);
done:
return useronly_clean_ptr(ptr);
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index 4f580a25e7..6425396ac1 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -150,13 +150,17 @@
@rd_rn_i6 ........ ... rn:5 ..... imm:s6 rd:5 &rri
# Two register operand, one immediate operand, with predicate,
-# element size encoded as TSZHL. User must fill in imm.
-@rdn_pg_tszimm ........ .. ... ... ... pg:3 ..... rd:5 \
- &rpri_esz rn=%reg_movprfx esz=%tszimm_esz
+# element size encoded as TSZHL.
+@rdn_pg_tszimm_shl ........ .. ... ... ... pg:3 ..... rd:5 \
+ &rpri_esz rn=%reg_movprfx esz=%tszimm_esz imm=%tszimm_shl
+@rdn_pg_tszimm_shr ........ .. ... ... ... pg:3 ..... rd:5 \
+ &rpri_esz rn=%reg_movprfx esz=%tszimm_esz imm=%tszimm_shr
# Similarly without predicate.
-@rd_rn_tszimm ........ .. ... ... ...... rn:5 rd:5 \
- &rri_esz esz=%tszimm16_esz
+@rd_rn_tszimm_shl ........ .. ... ... ...... rn:5 rd:5 \
+ &rri_esz esz=%tszimm16_esz imm=%tszimm16_shl
+@rd_rn_tszimm_shr ........ .. ... ... ...... rn:5 rd:5 \
+ &rri_esz esz=%tszimm16_esz imm=%tszimm16_shr
# Two register operand, one immediate operand, with 4-bit predicate.
# User must fill in imm.
@@ -289,14 +293,10 @@ UMINV 00000100 .. 001 011 001 ... ..... ..... @rd_pg_rn
### SVE Shift by Immediate - Predicated Group
# SVE bitwise shift by immediate (predicated)
-ASR_zpzi 00000100 .. 000 000 100 ... .. ... ..... \
- @rdn_pg_tszimm imm=%tszimm_shr
-LSR_zpzi 00000100 .. 000 001 100 ... .. ... ..... \
- @rdn_pg_tszimm imm=%tszimm_shr
-LSL_zpzi 00000100 .. 000 011 100 ... .. ... ..... \
- @rdn_pg_tszimm imm=%tszimm_shl
-ASRD 00000100 .. 000 100 100 ... .. ... ..... \
- @rdn_pg_tszimm imm=%tszimm_shr
+ASR_zpzi 00000100 .. 000 000 100 ... .. ... ..... @rdn_pg_tszimm_shr
+LSR_zpzi 00000100 .. 000 001 100 ... .. ... ..... @rdn_pg_tszimm_shr
+LSL_zpzi 00000100 .. 000 011 100 ... .. ... ..... @rdn_pg_tszimm_shl
+ASRD 00000100 .. 000 100 100 ... .. ... ..... @rdn_pg_tszimm_shr
# SVE bitwise shift by vector (predicated)
ASR_zpzz 00000100 .. 010 000 100 ... ..... ..... @rdn_pg_rm
@@ -400,12 +400,9 @@ RDVL 00000100 101 11111 01010 imm:s6 rd:5
### SVE Bitwise Shift - Unpredicated Group
# SVE bitwise shift by immediate (unpredicated)
-ASR_zzi 00000100 .. 1 ..... 1001 00 ..... ..... \
- @rd_rn_tszimm imm=%tszimm16_shr
-LSR_zzi 00000100 .. 1 ..... 1001 01 ..... ..... \
- @rd_rn_tszimm imm=%tszimm16_shr
-LSL_zzi 00000100 .. 1 ..... 1001 11 ..... ..... \
- @rd_rn_tszimm imm=%tszimm16_shl
+ASR_zzi 00000100 .. 1 ..... 1001 00 ..... ..... @rd_rn_tszimm_shr
+LSR_zzi 00000100 .. 1 ..... 1001 01 ..... ..... @rd_rn_tszimm_shr
+LSL_zzi 00000100 .. 1 ..... 1001 11 ..... ..... @rd_rn_tszimm_shl
# SVE bitwise shift by wide elements (unpredicated)
# Note esz != 3
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index 382fa82bc8..4758d46f34 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -956,85 +956,43 @@ uint32_t HELPER(sve_pnext)(void *vd, void *vg, uint32_t pred_desc)
return flags;
}
-/* Store zero into every active element of Zd. We will use this for two
- * and three-operand predicated instructions for which logic dictates a
- * zero result. In particular, logical shift by element size, which is
- * otherwise undefined on the host.
- *
- * For element sizes smaller than uint64_t, we use tables to expand
- * the N bits of the controlling predicate to a byte mask, and clear
- * those bytes.
+/*
+ * Copy Zn into Zd, and store zero into inactive elements.
+ * If inv, store zeros into the active elements.
*/
-void HELPER(sve_clr_b)(void *vd, void *vg, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd;
- uint8_t *pg = vg;
- for (i = 0; i < opr_sz; i += 1) {
- d[i] &= ~expand_pred_b(pg[H1(i)]);
- }
-}
-
-void HELPER(sve_clr_h)(void *vd, void *vg, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd;
- uint8_t *pg = vg;
- for (i = 0; i < opr_sz; i += 1) {
- d[i] &= ~expand_pred_h(pg[H1(i)]);
- }
-}
-
-void HELPER(sve_clr_s)(void *vd, void *vg, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd;
- uint8_t *pg = vg;
- for (i = 0; i < opr_sz; i += 1) {
- d[i] &= ~expand_pred_s(pg[H1(i)]);
- }
-}
-
-void HELPER(sve_clr_d)(void *vd, void *vg, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd;
- uint8_t *pg = vg;
- for (i = 0; i < opr_sz; i += 1) {
- if (pg[H1(i)] & 1) {
- d[i] = 0;
- }
- }
-}
-
-/* Copy Zn into Zd, and store zero into inactive elements. */
void HELPER(sve_movz_b)(void *vd, void *vn, void *vg, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t inv = -(uint64_t)(simd_data(desc) & 1);
uint64_t *d = vd, *n = vn;
uint8_t *pg = vg;
+
for (i = 0; i < opr_sz; i += 1) {
- d[i] = n[i] & expand_pred_b(pg[H1(i)]);
+ d[i] = n[i] & (expand_pred_b(pg[H1(i)]) ^ inv);
}
}
void HELPER(sve_movz_h)(void *vd, void *vn, void *vg, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t inv = -(uint64_t)(simd_data(desc) & 1);
uint64_t *d = vd, *n = vn;
uint8_t *pg = vg;
+
for (i = 0; i < opr_sz; i += 1) {
- d[i] = n[i] & expand_pred_h(pg[H1(i)]);
+ d[i] = n[i] & (expand_pred_h(pg[H1(i)]) ^ inv);
}
}
void HELPER(sve_movz_s)(void *vd, void *vn, void *vg, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t inv = -(uint64_t)(simd_data(desc) & 1);
uint64_t *d = vd, *n = vn;
uint8_t *pg = vg;
+
for (i = 0; i < opr_sz; i += 1) {
- d[i] = n[i] & expand_pred_s(pg[H1(i)]);
+ d[i] = n[i] & (expand_pred_s(pg[H1(i)]) ^ inv);
}
}
@@ -1043,8 +1001,10 @@ void HELPER(sve_movz_d)(void *vd, void *vn, void *vg, uint32_t desc)
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
uint64_t *d = vd, *n = vn;
uint8_t *pg = vg;
+ uint8_t inv = simd_data(desc);
+
for (i = 0; i < opr_sz; i += 1) {
- d[i] = n[i] & -(uint64_t)(pg[H1(i)] & 1);
+ d[i] = n[i] & -(uint64_t)((pg[H1(i)] ^ inv) & 1);
}
}
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 0fc5e12fab..4ba6918b60 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -678,6 +678,20 @@ static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
tcg_temp_free_ptr(fpst);
}
+/* Expand a 3-operand + qc + operation using an out-of-line helper. */
+static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn,
+ int rm, gen_helper_gvec_3_ptr *fn)
+{
+ TCGv_ptr qc_ptr = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
+ tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm), qc_ptr,
+ is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
+ tcg_temp_free_ptr(qc_ptr);
+}
+
/* Set ZF and NF based on a 64 bit result. This is alas fiddlier
* than the 32 bit equivalent.
*/
@@ -1156,18 +1170,18 @@ static void do_vec_ld(DisasContext *s, int destidx, int element,
* unallocated-encoding checks (otherwise the syndrome information
* for the resulting exception will be incorrect).
*/
-static inline bool fp_access_check(DisasContext *s)
+static bool fp_access_check(DisasContext *s)
{
- assert(!s->fp_access_checked);
- s->fp_access_checked = true;
+ if (s->fp_excp_el) {
+ assert(!s->fp_access_checked);
+ s->fp_access_checked = true;
- if (!s->fp_excp_el) {
- return true;
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
+ syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
+ return false;
}
-
- gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
- syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
- return false;
+ s->fp_access_checked = true;
+ return true;
}
/* Check that SVE access is enabled. If it is, return true.
@@ -1176,10 +1190,14 @@ static inline bool fp_access_check(DisasContext *s)
bool sve_access_check(DisasContext *s)
{
if (s->sve_excp_el) {
- gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_sve_access_trap(),
- s->sve_excp_el);
+ assert(!s->sve_access_checked);
+ s->sve_access_checked = true;
+
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
+ syn_sve_access_trap(), s->sve_excp_el);
return false;
}
+ s->sve_access_checked = true;
return fp_access_check(s);
}
@@ -11730,6 +11748,15 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size);
}
return;
+ case 0x16: /* SQDMULH, SQRDMULH */
+ {
+ static gen_helper_gvec_3_ptr * const fns[2][2] = {
+ { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h },
+ { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s },
+ };
+ gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]);
+ }
+ return;
case 0x11:
if (!u) { /* CMTST */
gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size);
@@ -11841,16 +11868,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
genenvfn = fns[size][u];
break;
}
- case 0x16: /* SQDMULH, SQRDMULH */
- {
- static NeonGenTwoOpEnvFn * const fns[2][2] = {
- { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
- { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
- };
- assert(size == 1 || size == 2);
- genenvfn = fns[size - 1][u];
- break;
- }
default:
g_assert_not_reached();
}
@@ -13484,6 +13501,56 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
data, gen_helper_gvec_fmlal_idx_a64);
}
return;
+
+ case 0x08: /* MUL */
+ if (!is_long && !is_scalar) {
+ static gen_helper_gvec_3 * const fns[3] = {
+ gen_helper_gvec_mul_idx_h,
+ gen_helper_gvec_mul_idx_s,
+ gen_helper_gvec_mul_idx_d,
+ };
+ tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ is_q ? 16 : 8, vec_full_reg_size(s),
+ index, fns[size - 1]);
+ return;
+ }
+ break;
+
+ case 0x10: /* MLA */
+ if (!is_long && !is_scalar) {
+ static gen_helper_gvec_4 * const fns[3] = {
+ gen_helper_gvec_mla_idx_h,
+ gen_helper_gvec_mla_idx_s,
+ gen_helper_gvec_mla_idx_d,
+ };
+ tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ vec_full_reg_offset(s, rd),
+ is_q ? 16 : 8, vec_full_reg_size(s),
+ index, fns[size - 1]);
+ return;
+ }
+ break;
+
+ case 0x14: /* MLS */
+ if (!is_long && !is_scalar) {
+ static gen_helper_gvec_4 * const fns[3] = {
+ gen_helper_gvec_mls_idx_h,
+ gen_helper_gvec_mls_idx_s,
+ gen_helper_gvec_mls_idx_d,
+ };
+ tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ vec_full_reg_offset(s, rd),
+ is_q ? 16 : 8, vec_full_reg_size(s),
+ index, fns[size - 1]);
+ return;
+ }
+ break;
}
if (size == 3) {
@@ -14529,6 +14596,7 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s)
s->base.pc_next += 4;
s->fp_access_checked = false;
+ s->sve_access_checked = false;
if (dc_isar_feature(aa64_bti, s)) {
if (s->base.num_insns == 1) {
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index d97cb37d83..15ad6c7d32 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -142,35 +142,76 @@ static int pred_gvec_reg_size(DisasContext *s)
return size_for_gvec(pred_full_reg_size(s));
}
+/* Invoke an out-of-line helper on 2 Zregs. */
+static void gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
+ int rd, int rn, int data)
+{
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vsz, vsz, data, fn);
+}
+
+/* Invoke an out-of-line helper on 3 Zregs. */
+static void gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
+ int rd, int rn, int rm, int data)
+{
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ vsz, vsz, data, fn);
+}
+
+/* Invoke an out-of-line helper on 2 Zregs and a predicate. */
+static void gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
+ int rd, int rn, int pg, int data)
+{
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ pred_full_reg_offset(s, pg),
+ vsz, vsz, data, fn);
+}
+
+/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
+static void gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
+ int rd, int rn, int rm, int pg, int data)
+{
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ pred_full_reg_offset(s, pg),
+ vsz, vsz, data, fn);
+}
+
/* Invoke a vector expander on two Zregs. */
-static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
- int esz, int rd, int rn)
+static void gen_gvec_fn_zz(DisasContext *s, GVecGen2Fn *gvec_fn,
+ int esz, int rd, int rn)
{
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- gvec_fn(esz, vec_full_reg_offset(s, rd),
- vec_full_reg_offset(s, rn), vsz, vsz);
- }
- return true;
+ unsigned vsz = vec_full_reg_size(s);
+ gvec_fn(esz, vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn), vsz, vsz);
}
/* Invoke a vector expander on three Zregs. */
-static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
- int esz, int rd, int rn, int rm)
+static void gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
+ int esz, int rd, int rn, int rm)
{
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- gvec_fn(esz, vec_full_reg_offset(s, rd),
- vec_full_reg_offset(s, rn),
- vec_full_reg_offset(s, rm), vsz, vsz);
- }
- return true;
+ unsigned vsz = vec_full_reg_size(s);
+ gvec_fn(esz, vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm), vsz, vsz);
}
/* Invoke a vector move on two Zregs. */
static bool do_mov_z(DisasContext *s, int rd, int rn)
{
- return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
+ if (sve_access_check(s)) {
+ gen_gvec_fn_zz(s, tcg_gen_gvec_mov, MO_8, rd, rn);
+ }
+ return true;
}
/* Initialize a Zreg with replications of a 64-bit immediate. */
@@ -180,52 +221,27 @@ static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
}
-/* Invoke a vector expander on two Pregs. */
-static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
- int esz, int rd, int rn)
-{
- if (sve_access_check(s)) {
- unsigned psz = pred_gvec_reg_size(s);
- gvec_fn(esz, pred_full_reg_offset(s, rd),
- pred_full_reg_offset(s, rn), psz, psz);
- }
- return true;
-}
-
/* Invoke a vector expander on three Pregs. */
-static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
- int esz, int rd, int rn, int rm)
+static void gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
+ int rd, int rn, int rm)
{
- if (sve_access_check(s)) {
- unsigned psz = pred_gvec_reg_size(s);
- gvec_fn(esz, pred_full_reg_offset(s, rd),
- pred_full_reg_offset(s, rn),
- pred_full_reg_offset(s, rm), psz, psz);
- }
- return true;
+ unsigned psz = pred_gvec_reg_size(s);
+ gvec_fn(MO_64, pred_full_reg_offset(s, rd),
+ pred_full_reg_offset(s, rn),
+ pred_full_reg_offset(s, rm), psz, psz);
}
-/* Invoke a vector operation on four Pregs. */
-static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
- int rd, int rn, int rm, int rg)
+/* Invoke a vector move on two Pregs. */
+static bool do_mov_p(DisasContext *s, int rd, int rn)
{
if (sve_access_check(s)) {
unsigned psz = pred_gvec_reg_size(s);
- tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
- pred_full_reg_offset(s, rn),
- pred_full_reg_offset(s, rm),
- pred_full_reg_offset(s, rg),
- psz, psz, gvec_op);
+ tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
+ pred_full_reg_offset(s, rn), psz, psz);
}
return true;
}
-/* Invoke a vector move on two Pregs. */
-static bool do_mov_p(DisasContext *s, int rd, int rn)
-{
- return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
-}
-
/* Set the cpu flags as per a return from an SVE helper. */
static void do_pred_flags(TCGv_i32 t)
{
@@ -273,24 +289,32 @@ const uint64_t pred_esz_masks[4] = {
*** SVE Logical - Unpredicated Group
*/
+static bool do_zzz_fn(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *gvec_fn)
+{
+ if (sve_access_check(s)) {
+ gen_gvec_fn_zzz(s, gvec_fn, a->esz, a->rd, a->rn, a->rm);
+ }
+ return true;
+}
+
static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
{
- return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
+ return do_zzz_fn(s, a, tcg_gen_gvec_and);
}
static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
{
- return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
+ return do_zzz_fn(s, a, tcg_gen_gvec_or);
}
static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
{
- return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
+ return do_zzz_fn(s, a, tcg_gen_gvec_xor);
}
static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
{
- return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
+ return do_zzz_fn(s, a, tcg_gen_gvec_andc);
}
/*
@@ -299,32 +323,32 @@ static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
{
- return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
+ return do_zzz_fn(s, a, tcg_gen_gvec_add);
}
static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
{
- return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
+ return do_zzz_fn(s, a, tcg_gen_gvec_sub);
}
static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
{
- return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
+ return do_zzz_fn(s, a, tcg_gen_gvec_ssadd);
}
static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
{
- return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
+ return do_zzz_fn(s, a, tcg_gen_gvec_sssub);
}
static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
{
- return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
+ return do_zzz_fn(s, a, tcg_gen_gvec_usadd);
}
static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
{
- return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
+ return do_zzz_fn(s, a, tcg_gen_gvec_ussub);
}
/*
@@ -333,16 +357,11 @@ static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
{
- unsigned vsz = vec_full_reg_size(s);
if (fn == NULL) {
return false;
}
if (sve_access_check(s)) {
- tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- pred_full_reg_offset(s, a->pg),
- vsz, vsz, 0, fn);
+ gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0);
}
return true;
}
@@ -356,12 +375,7 @@ static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
};
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
- vec_full_reg_offset(s, rn),
- vec_full_reg_offset(s, rm),
- pred_full_reg_offset(s, pg),
- vsz, vsz, 0, fns[esz]);
+ gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
}
#define DO_ZPZZ(NAME, name) \
@@ -433,11 +447,7 @@ static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
return false;
}
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- pred_full_reg_offset(s, a->pg),
- vsz, vsz, 0, fn);
+ gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, 0);
}
return true;
}
@@ -608,48 +618,29 @@ static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
*** SVE Shift by Immediate - Predicated Group
*/
-/* Store zero into every active element of Zd. We will use this for two
- * and three-operand predicated instructions for which logic dictates a
- * zero result.
+/*
+ * Copy Zn into Zd, storing zeros into inactive elements.
+ * If invert, store zeros into the active elements.
*/
-static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
-{
- static gen_helper_gvec_2 * const fns[4] = {
- gen_helper_sve_clr_b, gen_helper_sve_clr_h,
- gen_helper_sve_clr_s, gen_helper_sve_clr_d,
- };
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
- pred_full_reg_offset(s, pg),
- vsz, vsz, 0, fns[esz]);
- }
- return true;
-}
-
-/* Copy Zn into Zd, storing zeros into inactive elements. */
-static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
+static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
+ int esz, bool invert)
{
static gen_helper_gvec_3 * const fns[4] = {
gen_helper_sve_movz_b, gen_helper_sve_movz_h,
gen_helper_sve_movz_s, gen_helper_sve_movz_d,
};
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
- vec_full_reg_offset(s, rn),
- pred_full_reg_offset(s, pg),
- vsz, vsz, 0, fns[esz]);
+
+ if (sve_access_check(s)) {
+ gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
+ }
+ return true;
}
static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
gen_helper_gvec_3 *fn)
{
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- pred_full_reg_offset(s, a->pg),
- vsz, vsz, a->imm, fn);
+ gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
}
return true;
}
@@ -682,7 +673,7 @@ static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
/* Shift by element size is architecturally valid.
For logical shifts, it is a zeroing operation. */
if (a->imm >= (8 << a->esz)) {
- return do_clr_zp(s, a->rd, a->pg, a->esz);
+ return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
} else {
return do_zpzi_ool(s, a, fns[a->esz]);
}
@@ -700,7 +691,7 @@ static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
/* Shift by element size is architecturally valid.
For logical shifts, it is a zeroing operation. */
if (a->imm >= (8 << a->esz)) {
- return do_clr_zp(s, a->rd, a->pg, a->esz);
+ return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
} else {
return do_zpzi_ool(s, a, fns[a->esz]);
}
@@ -718,7 +709,7 @@ static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
/* Shift by element size is architecturally valid. For arithmetic
right shift for division, it is a zeroing operation. */
if (a->imm >= (8 << a->esz)) {
- return do_clr_zp(s, a->rd, a->pg, a->esz);
+ return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
} else {
return do_zpzi_ool(s, a, fns[a->esz]);
}
@@ -799,11 +790,7 @@ static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
return false;
}
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- vsz, vsz, 0, fn);
+ gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
}
return true;
}
@@ -977,11 +964,7 @@ static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
{
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- vsz, vsz, a->imm, fn);
+ gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
}
return true;
}
@@ -1022,10 +1005,7 @@ static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
return false;
}
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vsz, vsz, 0, fns[a->esz]);
+ gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
}
return true;
}
@@ -1042,11 +1022,7 @@ static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
return false;
}
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- vsz, vsz, 0, fns[a->esz]);
+ gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
}
return true;
}
@@ -1068,6 +1044,11 @@ static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
int mofs = pred_full_reg_offset(s, a->rm);
int gofs = pred_full_reg_offset(s, a->pg);
+ if (!a->s) {
+ tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
+ return true;
+ }
+
if (psz == 8) {
/* Do the operation and the flags generation in temps. */
TCGv_i64 pd = tcg_temp_new_i64();
@@ -1127,19 +1108,24 @@ static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
.fno = gen_helper_sve_and_pppp,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
};
- if (a->s) {
- return do_pppp_flags(s, a, &op);
- } else if (a->rn == a->rm) {
- if (a->pg == a->rn) {
- return do_mov_p(s, a->rd, a->rn);
- } else {
- return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
+
+ if (!a->s) {
+ if (!sve_access_check(s)) {
+ return true;
+ }
+ if (a->rn == a->rm) {
+ if (a->pg == a->rn) {
+ do_mov_p(s, a->rd, a->rn);
+ } else {
+ gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
+ }
+ return true;
+ } else if (a->pg == a->rn || a->pg == a->rm) {
+ gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
+ return true;
}
- } else if (a->pg == a->rn || a->pg == a->rm) {
- return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
}
+ return do_pppp_flags(s, a, &op);
}
static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
@@ -1163,13 +1149,14 @@ static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
.fno = gen_helper_sve_bic_pppp,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
};
- if (a->s) {
- return do_pppp_flags(s, a, &op);
- } else if (a->pg == a->rn) {
- return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
+
+ if (!a->s && a->pg == a->rn) {
+ if (sve_access_check(s)) {
+ gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
+ }
+ return true;
}
+ return do_pppp_flags(s, a, &op);
}
static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
@@ -1193,41 +1180,22 @@ static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
.fno = gen_helper_sve_eor_pppp,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
};
- if (a->s) {
- return do_pppp_flags(s, a, &op);
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
- }
-}
-
-static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
-{
- tcg_gen_and_i64(pn, pn, pg);
- tcg_gen_andc_i64(pm, pm, pg);
- tcg_gen_or_i64(pd, pn, pm);
-}
-
-static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
- TCGv_vec pm, TCGv_vec pg)
-{
- tcg_gen_and_vec(vece, pn, pn, pg);
- tcg_gen_andc_vec(vece, pm, pm, pg);
- tcg_gen_or_vec(vece, pd, pn, pm);
+ return do_pppp_flags(s, a, &op);
}
static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
{
- static const GVecGen4 op = {
- .fni8 = gen_sel_pg_i64,
- .fniv = gen_sel_pg_vec,
- .fno = gen_helper_sve_sel_pppp,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- };
if (a->s) {
return false;
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
}
+ if (sve_access_check(s)) {
+ unsigned psz = pred_gvec_reg_size(s);
+ tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
+ pred_full_reg_offset(s, a->pg),
+ pred_full_reg_offset(s, a->rn),
+ pred_full_reg_offset(s, a->rm), psz, psz);
+ }
+ return true;
}
static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
@@ -1251,13 +1219,11 @@ static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
.fno = gen_helper_sve_orr_pppp,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
};
- if (a->s) {
- return do_pppp_flags(s, a, &op);
- } else if (a->pg == a->rn && a->rn == a->rm) {
+
+ if (!a->s && a->pg == a->rn && a->rn == a->rm) {
return do_mov_p(s, a->rd, a->rn);
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
}
+ return do_pppp_flags(s, a, &op);
}
static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
@@ -1281,11 +1247,7 @@ static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
.fno = gen_helper_sve_orn_pppp,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
};
- if (a->s) {
- return do_pppp_flags(s, a, &op);
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
- }
+ return do_pppp_flags(s, a, &op);
}
static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
@@ -1309,11 +1271,7 @@ static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
.fno = gen_helper_sve_nor_pppp,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
};
- if (a->s) {
- return do_pppp_flags(s, a, &op);
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
- }
+ return do_pppp_flags(s, a, &op);
}
static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
@@ -1337,11 +1295,7 @@ static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
.fno = gen_helper_sve_nand_pppp,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
};
- if (a->s) {
- return do_pppp_flags(s, a, &op);
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
- }
+ return do_pppp_flags(s, a, &op);
}
/*
@@ -2103,10 +2057,7 @@ static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
};
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vsz, vsz, 0, fns[a->esz]);
+ gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
}
return true;
}
@@ -2119,11 +2070,7 @@ static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
};
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- vsz, vsz, 0, fns[a->esz]);
+ gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
}
return true;
}
@@ -2296,11 +2243,7 @@ static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
gen_helper_gvec_3 *fn)
{
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- vsz, vsz, data, fn);
+ gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
}
return true;
}
@@ -2745,12 +2688,8 @@ static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
{
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- pred_full_reg_offset(s, a->pg),
- vsz, vsz, a->esz, gen_helper_sve_splice);
+ gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
+ a->rd, a->rn, a->rm, a->pg, 0);
}
return true;
}
@@ -3429,11 +3368,7 @@ static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a)
};
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- vsz, vsz, 0, fns[a->u][a->sz]);
+ gen_gvec_ool_zzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, 0);
}
return true;
}
@@ -3446,11 +3381,7 @@ static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a)
};
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- vsz, vsz, a->index, fns[a->u][a->sz]);
+ gen_gvec_ool_zzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, a->index);
}
return true;
}
@@ -5093,8 +5024,7 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
/* Zero the inactive elements. */
gen_set_label(over);
- do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
- return true;
+ return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
}
static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
@@ -5877,8 +5807,5 @@ static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
{
- if (sve_access_check(s)) {
- do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz);
- }
- return true;
+ return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false);
}
diff --git a/target/arm/translate.h b/target/arm/translate.h
index 6d6d4c0f42..423b0e08df 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -64,6 +64,7 @@ typedef struct DisasContext {
* that it is set at the point where we actually touch the FP regs.
*/
bool fp_access_checked;
+ bool sve_access_checked;
/* ARMv8 single-step state (this is distinct from the QEMU gdbstub
* single-step support).
*/
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index 7d76412ee0..a6c53d2ab6 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -37,19 +37,24 @@
#endif
/* Signed saturating rounding doubling multiply-accumulate high half, 16-bit */
-static int16_t inl_qrdmlah_s16(int16_t src1, int16_t src2,
- int16_t src3, uint32_t *sat)
+static int16_t do_sqrdmlah_h(int16_t src1, int16_t src2, int16_t src3,
+ bool neg, bool round, uint32_t *sat)
{
- /* Simplify:
+ /*
+ * Simplify:
* = ((a3 << 16) + ((e1 * e2) << 1) + (1 << 15)) >> 16
* = ((a3 << 15) + (e1 * e2) + (1 << 14)) >> 15
*/
int32_t ret = (int32_t)src1 * src2;
- ret = ((int32_t)src3 << 15) + ret + (1 << 14);
+ if (neg) {
+ ret = -ret;
+ }
+ ret += ((int32_t)src3 << 15) + (round << 14);
ret >>= 15;
+
if (ret != (int16_t)ret) {
*sat = 1;
- ret = (ret < 0 ? -0x8000 : 0x7fff);
+ ret = (ret < 0 ? INT16_MIN : INT16_MAX);
}
return ret;
}
@@ -58,8 +63,9 @@ uint32_t HELPER(neon_qrdmlah_s16)(CPUARMState *env, uint32_t src1,
uint32_t src2, uint32_t src3)
{
uint32_t *sat = &env->vfp.qc[0];
- uint16_t e1 = inl_qrdmlah_s16(src1, src2, src3, sat);
- uint16_t e2 = inl_qrdmlah_s16(src1 >> 16, src2 >> 16, src3 >> 16, sat);
+ uint16_t e1 = do_sqrdmlah_h(src1, src2, src3, false, true, sat);
+ uint16_t e2 = do_sqrdmlah_h(src1 >> 16, src2 >> 16, src3 >> 16,
+ false, true, sat);
return deposit32(e1, 16, 16, e2);
}
@@ -73,35 +79,18 @@ void HELPER(gvec_qrdmlah_s16)(void *vd, void *vn, void *vm,
uintptr_t i;
for (i = 0; i < opr_sz / 2; ++i) {
- d[i] = inl_qrdmlah_s16(n[i], m[i], d[i], vq);
+ d[i] = do_sqrdmlah_h(n[i], m[i], d[i], false, true, vq);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
-/* Signed saturating rounding doubling multiply-subtract high half, 16-bit */
-static int16_t inl_qrdmlsh_s16(int16_t src1, int16_t src2,
- int16_t src3, uint32_t *sat)
-{
- /* Similarly, using subtraction:
- * = ((a3 << 16) - ((e1 * e2) << 1) + (1 << 15)) >> 16
- * = ((a3 << 15) - (e1 * e2) + (1 << 14)) >> 15
- */
- int32_t ret = (int32_t)src1 * src2;
- ret = ((int32_t)src3 << 15) - ret + (1 << 14);
- ret >>= 15;
- if (ret != (int16_t)ret) {
- *sat = 1;
- ret = (ret < 0 ? -0x8000 : 0x7fff);
- }
- return ret;
-}
-
uint32_t HELPER(neon_qrdmlsh_s16)(CPUARMState *env, uint32_t src1,
uint32_t src2, uint32_t src3)
{
uint32_t *sat = &env->vfp.qc[0];
- uint16_t e1 = inl_qrdmlsh_s16(src1, src2, src3, sat);
- uint16_t e2 = inl_qrdmlsh_s16(src1 >> 16, src2 >> 16, src3 >> 16, sat);
+ uint16_t e1 = do_sqrdmlah_h(src1, src2, src3, true, true, sat);
+ uint16_t e2 = do_sqrdmlah_h(src1 >> 16, src2 >> 16, src3 >> 16,
+ true, true, sat);
return deposit32(e1, 16, 16, e2);
}
@@ -115,19 +104,47 @@ void HELPER(gvec_qrdmlsh_s16)(void *vd, void *vn, void *vm,
uintptr_t i;
for (i = 0; i < opr_sz / 2; ++i) {
- d[i] = inl_qrdmlsh_s16(n[i], m[i], d[i], vq);
+ d[i] = do_sqrdmlah_h(n[i], m[i], d[i], true, true, vq);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(neon_sqdmulh_h)(void *vd, void *vn, void *vm,
+ void *vq, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int16_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 2; ++i) {
+ d[i] = do_sqrdmlah_h(n[i], m[i], 0, false, false, vq);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(neon_sqrdmulh_h)(void *vd, void *vn, void *vm,
+ void *vq, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int16_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 2; ++i) {
+ d[i] = do_sqrdmlah_h(n[i], m[i], 0, false, true, vq);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
/* Signed saturating rounding doubling multiply-accumulate high half, 32-bit */
-static int32_t inl_qrdmlah_s32(int32_t src1, int32_t src2,
- int32_t src3, uint32_t *sat)
+static int32_t do_sqrdmlah_s(int32_t src1, int32_t src2, int32_t src3,
+ bool neg, bool round, uint32_t *sat)
{
/* Simplify similarly to int_qrdmlah_s16 above. */
int64_t ret = (int64_t)src1 * src2;
- ret = ((int64_t)src3 << 31) + ret + (1 << 30);
+ if (neg) {
+ ret = -ret;
+ }
+ ret += ((int64_t)src3 << 31) + (round << 30);
ret >>= 31;
+
if (ret != (int32_t)ret) {
*sat = 1;
ret = (ret < 0 ? INT32_MIN : INT32_MAX);
@@ -139,7 +156,7 @@ uint32_t HELPER(neon_qrdmlah_s32)(CPUARMState *env, int32_t src1,
int32_t src2, int32_t src3)
{
uint32_t *sat = &env->vfp.qc[0];
- return inl_qrdmlah_s32(src1, src2, src3, sat);
+ return do_sqrdmlah_s(src1, src2, src3, false, true, sat);
}
void HELPER(gvec_qrdmlah_s32)(void *vd, void *vn, void *vm,
@@ -152,31 +169,16 @@ void HELPER(gvec_qrdmlah_s32)(void *vd, void *vn, void *vm,
uintptr_t i;
for (i = 0; i < opr_sz / 4; ++i) {
- d[i] = inl_qrdmlah_s32(n[i], m[i], d[i], vq);
+ d[i] = do_sqrdmlah_s(n[i], m[i], d[i], false, true, vq);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
-/* Signed saturating rounding doubling multiply-subtract high half, 32-bit */
-static int32_t inl_qrdmlsh_s32(int32_t src1, int32_t src2,
- int32_t src3, uint32_t *sat)
-{
- /* Simplify similarly to int_qrdmlsh_s16 above. */
- int64_t ret = (int64_t)src1 * src2;
- ret = ((int64_t)src3 << 31) - ret + (1 << 30);
- ret >>= 31;
- if (ret != (int32_t)ret) {
- *sat = 1;
- ret = (ret < 0 ? INT32_MIN : INT32_MAX);
- }
- return ret;
-}
-
uint32_t HELPER(neon_qrdmlsh_s32)(CPUARMState *env, int32_t src1,
int32_t src2, int32_t src3)
{
uint32_t *sat = &env->vfp.qc[0];
- return inl_qrdmlsh_s32(src1, src2, src3, sat);
+ return do_sqrdmlah_s(src1, src2, src3, true, true, sat);
}
void HELPER(gvec_qrdmlsh_s32)(void *vd, void *vn, void *vm,
@@ -189,7 +191,31 @@ void HELPER(gvec_qrdmlsh_s32)(void *vd, void *vn, void *vm,
uintptr_t i;
for (i = 0; i < opr_sz / 4; ++i) {
- d[i] = inl_qrdmlsh_s32(n[i], m[i], d[i], vq);
+ d[i] = do_sqrdmlah_s(n[i], m[i], d[i], true, true, vq);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(neon_sqdmulh_s)(void *vd, void *vn, void *vm,
+ void *vq, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int32_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 4; ++i) {
+ d[i] = do_sqrdmlah_s(n[i], m[i], 0, false, false, vq);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(neon_sqrdmulh_s)(void *vd, void *vn, void *vm,
+ void *vq, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int32_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 4; ++i) {
+ d[i] = do_sqrdmlah_s(n[i], m[i], 0, false, true, vq);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
@@ -733,6 +759,52 @@ DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64)
*/
#define DO_MUL_IDX(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{ \
+ intptr_t i, j, oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \
+ intptr_t idx = simd_data(desc); \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
+ TYPE mm = m[H(i + idx)]; \
+ for (j = 0; j < segment; j++) { \
+ d[i + j] = n[i + j] * mm; \
+ } \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+DO_MUL_IDX(gvec_mul_idx_h, uint16_t, H2)
+DO_MUL_IDX(gvec_mul_idx_s, uint32_t, H4)
+DO_MUL_IDX(gvec_mul_idx_d, uint64_t, )
+
+#undef DO_MUL_IDX
+
+#define DO_MLA_IDX(NAME, TYPE, OP, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
+{ \
+ intptr_t i, j, oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \
+ intptr_t idx = simd_data(desc); \
+ TYPE *d = vd, *n = vn, *m = vm, *a = va; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
+ TYPE mm = m[H(i + idx)]; \
+ for (j = 0; j < segment; j++) { \
+ d[i + j] = a[i + j] OP n[i + j] * mm; \
+ } \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+DO_MLA_IDX(gvec_mla_idx_h, uint16_t, +, H2)
+DO_MLA_IDX(gvec_mla_idx_s, uint32_t, +, H4)
+DO_MLA_IDX(gvec_mla_idx_d, uint64_t, +, )
+
+DO_MLA_IDX(gvec_mls_idx_h, uint16_t, -, H2)
+DO_MLA_IDX(gvec_mls_idx_s, uint32_t, -, H4)
+DO_MLA_IDX(gvec_mls_idx_d, uint64_t, -, )
+
+#undef DO_MLA_IDX
+
+#define DO_FMUL_IDX(NAME, TYPE, H) \
void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
{ \
intptr_t i, j, oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \
@@ -747,11 +819,11 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
clear_tail(d, oprsz, simd_maxsz(desc)); \
}
-DO_MUL_IDX(gvec_fmul_idx_h, float16, H2)
-DO_MUL_IDX(gvec_fmul_idx_s, float32, H4)
-DO_MUL_IDX(gvec_fmul_idx_d, float64, )
+DO_FMUL_IDX(gvec_fmul_idx_h, float16, H2)
+DO_FMUL_IDX(gvec_fmul_idx_s, float32, H4)
+DO_FMUL_IDX(gvec_fmul_idx_d, float64, )
-#undef DO_MUL_IDX
+#undef DO_FMUL_IDX
#define DO_FMLA_IDX(NAME, TYPE, H) \
void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \