aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--target/arm/helper-sve.h5
-rw-r--r--target/arm/sve_helper.c70
-rw-r--r--target/arm/translate-sve.c49
3 files changed, 32 insertions, 92 deletions
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index 63c4a087ca..4411c47120 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -269,11 +269,6 @@ DEF_HELPER_FLAGS_3(sve_uminv_h, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve_uminv_s, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve_uminv_d, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_clr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_clr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_clr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_clr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-
DEF_HELPER_FLAGS_4(sve_movz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_movz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_movz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index 382fa82bc8..4758d46f34 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -956,85 +956,43 @@ uint32_t HELPER(sve_pnext)(void *vd, void *vg, uint32_t pred_desc)
return flags;
}
-/* Store zero into every active element of Zd. We will use this for two
- * and three-operand predicated instructions for which logic dictates a
- * zero result. In particular, logical shift by element size, which is
- * otherwise undefined on the host.
- *
- * For element sizes smaller than uint64_t, we use tables to expand
- * the N bits of the controlling predicate to a byte mask, and clear
- * those bytes.
+/*
+ * Copy Zn into Zd, and store zero into inactive elements.
+ * If inv, store zeros into the active elements.
*/
-void HELPER(sve_clr_b)(void *vd, void *vg, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd;
- uint8_t *pg = vg;
- for (i = 0; i < opr_sz; i += 1) {
- d[i] &= ~expand_pred_b(pg[H1(i)]);
- }
-}
-
-void HELPER(sve_clr_h)(void *vd, void *vg, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd;
- uint8_t *pg = vg;
- for (i = 0; i < opr_sz; i += 1) {
- d[i] &= ~expand_pred_h(pg[H1(i)]);
- }
-}
-
-void HELPER(sve_clr_s)(void *vd, void *vg, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd;
- uint8_t *pg = vg;
- for (i = 0; i < opr_sz; i += 1) {
- d[i] &= ~expand_pred_s(pg[H1(i)]);
- }
-}
-
-void HELPER(sve_clr_d)(void *vd, void *vg, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd;
- uint8_t *pg = vg;
- for (i = 0; i < opr_sz; i += 1) {
- if (pg[H1(i)] & 1) {
- d[i] = 0;
- }
- }
-}
-
-/* Copy Zn into Zd, and store zero into inactive elements. */
void HELPER(sve_movz_b)(void *vd, void *vn, void *vg, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t inv = -(uint64_t)(simd_data(desc) & 1);
uint64_t *d = vd, *n = vn;
uint8_t *pg = vg;
+
for (i = 0; i < opr_sz; i += 1) {
- d[i] = n[i] & expand_pred_b(pg[H1(i)]);
+ d[i] = n[i] & (expand_pred_b(pg[H1(i)]) ^ inv);
}
}
void HELPER(sve_movz_h)(void *vd, void *vn, void *vg, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t inv = -(uint64_t)(simd_data(desc) & 1);
uint64_t *d = vd, *n = vn;
uint8_t *pg = vg;
+
for (i = 0; i < opr_sz; i += 1) {
- d[i] = n[i] & expand_pred_h(pg[H1(i)]);
+ d[i] = n[i] & (expand_pred_h(pg[H1(i)]) ^ inv);
}
}
void HELPER(sve_movz_s)(void *vd, void *vn, void *vg, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t inv = -(uint64_t)(simd_data(desc) & 1);
uint64_t *d = vd, *n = vn;
uint8_t *pg = vg;
+
for (i = 0; i < opr_sz; i += 1) {
- d[i] = n[i] & expand_pred_s(pg[H1(i)]);
+ d[i] = n[i] & (expand_pred_s(pg[H1(i)]) ^ inv);
}
}
@@ -1043,8 +1001,10 @@ void HELPER(sve_movz_d)(void *vd, void *vn, void *vg, uint32_t desc)
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
uint64_t *d = vd, *n = vn;
uint8_t *pg = vg;
+ uint8_t inv = simd_data(desc);
+
for (i = 0; i < opr_sz; i += 1) {
- d[i] = n[i] & -(uint64_t)(pg[H1(i)] & 1);
+ d[i] = n[i] & -(uint64_t)((pg[H1(i)] ^ inv) & 1);
}
}
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 96bfd32bbe..aba156d5ab 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -590,39 +590,28 @@ static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
*** SVE Shift by Immediate - Predicated Group
*/
-/* Store zero into every active element of Zd. We will use this for two
- * and three-operand predicated instructions for which logic dictates a
- * zero result.
+/*
+ * Copy Zn into Zd, storing zeros into inactive elements.
+ * If invert, store zeros into the active elements.
*/
-static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
+static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
+ int esz, bool invert)
{
- static gen_helper_gvec_2 * const fns[4] = {
- gen_helper_sve_clr_b, gen_helper_sve_clr_h,
- gen_helper_sve_clr_s, gen_helper_sve_clr_d,
+ static gen_helper_gvec_3 * const fns[4] = {
+ gen_helper_sve_movz_b, gen_helper_sve_movz_h,
+ gen_helper_sve_movz_s, gen_helper_sve_movz_d,
};
+
if (sve_access_check(s)) {
unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
+ tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
pred_full_reg_offset(s, pg),
- vsz, vsz, 0, fns[esz]);
+ vsz, vsz, invert, fns[esz]);
}
return true;
}
-/* Copy Zn into Zd, storing zeros into inactive elements. */
-static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
-{
- static gen_helper_gvec_3 * const fns[4] = {
- gen_helper_sve_movz_b, gen_helper_sve_movz_h,
- gen_helper_sve_movz_s, gen_helper_sve_movz_d,
- };
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
- vec_full_reg_offset(s, rn),
- pred_full_reg_offset(s, pg),
- vsz, vsz, 0, fns[esz]);
-}
-
static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
gen_helper_gvec_3 *fn)
{
@@ -664,7 +653,7 @@ static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
/* Shift by element size is architecturally valid.
For logical shifts, it is a zeroing operation. */
if (a->imm >= (8 << a->esz)) {
- return do_clr_zp(s, a->rd, a->pg, a->esz);
+ return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
} else {
return do_zpzi_ool(s, a, fns[a->esz]);
}
@@ -682,7 +671,7 @@ static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
/* Shift by element size is architecturally valid.
For logical shifts, it is a zeroing operation. */
if (a->imm >= (8 << a->esz)) {
- return do_clr_zp(s, a->rd, a->pg, a->esz);
+ return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
} else {
return do_zpzi_ool(s, a, fns[a->esz]);
}
@@ -700,7 +689,7 @@ static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
/* Shift by element size is architecturally valid. For arithmetic
right shift for division, it is a zeroing operation. */
if (a->imm >= (8 << a->esz)) {
- return do_clr_zp(s, a->rd, a->pg, a->esz);
+ return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
} else {
return do_zpzi_ool(s, a, fns[a->esz]);
}
@@ -5049,8 +5038,7 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
/* Zero the inactive elements. */
gen_set_label(over);
- do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
- return true;
+ return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
}
static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
@@ -5833,8 +5821,5 @@ static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
{
- if (sve_access_check(s)) {
- do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz);
- }
- return true;
+ return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false);
}