aboutsummaryrefslogtreecommitdiff
path: root/target-arm
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2014-02-20 10:35:49 +0000
committerPeter Maydell <peter.maydell@linaro.org>2014-02-20 10:35:49 +0000
commitc44ad1fddcf5a3deea3fb5cc340935bb11ccfb8e (patch)
treef0a52e7cd2a3de10bf7f3258968fb502d319dd86 /target-arm
parentf5e51e7f10d6dbbeac268a7defc89831c62eff12 (diff)
target-arm: A64: Implement long vector x indexed insns
Implement the 'long' operations in the vector x indexed element category. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <rth@twiddle.net>
Diffstat (limited to 'target-arm')
-rw-r--r--target-arm/translate-a64.c144
1 files changed, 139 insertions, 5 deletions
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index a96ee4aa4b..f1cd08aa7e 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -7909,11 +7909,6 @@ static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
}
}
- if (is_long) {
- unsupported_encoding(s, insn);
- return;
- }
-
if (is_fp) {
fpst = get_fpstatus_ptr();
} else {
@@ -8055,6 +8050,145 @@ static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
}
} else {
/* long ops: 16x16->32 or 32x32->64 */
+ TCGv_i64 tcg_res[2];
+ int pass;
+ bool satop = extract32(opcode, 0, 1);
+ TCGMemOp memop = MO_32;
+
+ if (satop || !u) {
+ memop |= MO_SIGN;
+ }
+
+ if (size == 2) {
+ TCGv_i64 tcg_idx = tcg_temp_new_i64();
+
+ read_vec_element(s, tcg_idx, rm, index, memop);
+
+ for (pass = 0; pass < 2; pass++) {
+ TCGv_i64 tcg_op = tcg_temp_new_i64();
+ TCGv_i64 tcg_passres;
+
+ read_vec_element(s, tcg_op, rn, pass + (is_q * 2), memop);
+
+ tcg_res[pass] = tcg_temp_new_i64();
+
+ if (opcode == 0xa || opcode == 0xb) {
+ /* Non-accumulating ops */
+ tcg_passres = tcg_res[pass];
+ } else {
+ tcg_passres = tcg_temp_new_i64();
+ }
+
+ tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
+ tcg_temp_free_i64(tcg_op);
+
+ if (satop) {
+ /* saturating, doubling */
+ gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
+ tcg_passres, tcg_passres);
+ }
+
+ if (opcode == 0xa || opcode == 0xb) {
+ continue;
+ }
+
+ /* Accumulating op: handle accumulate step */
+ read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
+
+ switch (opcode) {
+ case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
+ tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
+ break;
+ case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
+ tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
+ break;
+ case 0x7: /* SQDMLSL, SQDMLSL2 */
+ tcg_gen_neg_i64(tcg_passres, tcg_passres);
+ /* fall through */
+ case 0x3: /* SQDMLAL, SQDMLAL2 */
+ gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
+ tcg_res[pass],
+ tcg_passres);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ tcg_temp_free_i64(tcg_passres);
+ }
+ tcg_temp_free_i64(tcg_idx);
+ } else {
+ TCGv_i32 tcg_idx = tcg_temp_new_i32();
+
+ assert(size == 1);
+ read_vec_element_i32(s, tcg_idx, rm, index, size);
+
+ /* The simplest way to handle the 16x16 indexed ops is to duplicate
+ * the index into both halves of the 32 bit tcg_idx and then use
+ * the usual Neon helpers.
+ */
+ tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
+
+ for (pass = 0; pass < 2; pass++) {
+ TCGv_i32 tcg_op = tcg_temp_new_i32();
+ TCGv_i64 tcg_passres;
+
+ read_vec_element_i32(s, tcg_op, rn, pass + (is_q * 2), MO_32);
+ tcg_res[pass] = tcg_temp_new_i64();
+
+ if (opcode == 0xa || opcode == 0xb) {
+ /* Non-accumulating ops */
+ tcg_passres = tcg_res[pass];
+ } else {
+ tcg_passres = tcg_temp_new_i64();
+ }
+
+ if (memop & MO_SIGN) {
+ gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
+ } else {
+ gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
+ }
+ if (satop) {
+ gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
+ tcg_passres, tcg_passres);
+ }
+ tcg_temp_free_i32(tcg_op);
+
+ if (opcode == 0xa || opcode == 0xb) {
+ continue;
+ }
+
+ /* Accumulating op: handle accumulate step */
+ read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
+
+ switch (opcode) {
+ case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
+ gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
+ tcg_passres);
+ break;
+ case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
+ gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
+ tcg_passres);
+ break;
+ case 0x7: /* SQDMLSL, SQDMLSL2 */
+ gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
+ /* fall through */
+ case 0x3: /* SQDMLAL, SQDMLAL2 */
+ gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
+ tcg_res[pass],
+ tcg_passres);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ tcg_temp_free_i64(tcg_passres);
+ }
+ tcg_temp_free_i32(tcg_idx);
+ }
+
+ for (pass = 0; pass < 2; pass++) {
+ write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
+ tcg_temp_free_i64(tcg_res[pass]);
+ }
}
if (!TCGV_IS_UNUSED_PTR(fpst)) {