target/arm: Convert VFP fused multiply-add insns to decodetree

Convert the VFP fused multiply-add instructions (VFNMA, VFNMS, VFMA, VFMS) to decodetree. Note that in the old decode structure we were implementing these to honour the VFP vector stride/length. These instructions were introduced in VFPv4, and in the v7A architecture they are UNPREDICTABLE if the vector stride or length are non-zero. In v8A they must UNDEF if stride or length are non-zero, like all VFP instructions; we choose to UNDEF always. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
author: Peter Maydell <peter.maydell@linaro.org> 2019-06-11 16:39:49 +0100
committer: Peter Maydell <peter.maydell@linaro.org> 2019-06-13 15:14:05 +0100
commit: d4893b01d23060845ee3855bc96626e16aad9ab5 (patch)
tree: a92fe3bbd8616fdf0febd1ad375cb0321ad71534 /target/arm/translate-vfp.inc.c
parent: 519ee7ae31e050eb0ff9ad35c213f0bd7ab1c03e (diff)
1 files changed, 121 insertions, 0 deletions
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
index 6af99605d5..ba6506a378 100644
--- a/target/arm/translate-vfp.inc.c
+++ b/target/arm/translate-vfp.inc.c
@@ -1481,3 +1481,124 @@ static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_sp *a)
 {
     return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
 }
+
+static bool trans_VFM_sp(DisasContext *s, arg_VFM_sp *a)
+{
+    /*
+     * VFNMA : fd = muladd(-fd,  fn, fm)
+     * VFNMS : fd = muladd(-fd, -fn, fm)
+     * VFMA  : fd = muladd( fd,  fn, fm)
+     * VFMS  : fd = muladd( fd, -fn, fm)
+     *
+     * These are fused multiply-add, and must be done as one floating
+     * point operation with no rounding between the multiplication and
+     * addition steps.  NB that doing the negations here as separate
+     * steps is correct : an input NaN should come out with its sign
+     * bit flipped if it is a negated-input.
+     */
+    TCGv_ptr fpst;
+    TCGv_i32 vn, vm, vd;
+
+    /*
+     * Present in VFPv4 only.
+     * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
+     * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
+     */
+    if (!arm_dc_feature(s, ARM_FEATURE_VFP4) ||
+        (s->vec_len != 0 || s->vec_stride != 0)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    vn = tcg_temp_new_i32();
+    vm = tcg_temp_new_i32();
+    vd = tcg_temp_new_i32();
+
+    neon_load_reg32(vn, a->vn);
+    neon_load_reg32(vm, a->vm);
+    if (a->o2) {
+        /* VFNMS, VFMS */
+        gen_helper_vfp_negs(vn, vn);
+    }
+    neon_load_reg32(vd, a->vd);
+    if (a->o1 & 1) {
+        /* VFNMA, VFNMS */
+        gen_helper_vfp_negs(vd, vd);
+    }
+    fpst = get_fpstatus_ptr(0);
+    gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
+    neon_store_reg32(vd, a->vd);
+
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i32(vn);
+    tcg_temp_free_i32(vm);
+    tcg_temp_free_i32(vd);
+
+    return true;
+}
+
+static bool trans_VFM_dp(DisasContext *s, arg_VFM_sp *a)
+{
+    /*
+     * VFNMA : fd = muladd(-fd,  fn, fm)
+     * VFNMS : fd = muladd(-fd, -fn, fm)
+     * VFMA  : fd = muladd( fd,  fn, fm)
+     * VFMS  : fd = muladd( fd, -fn, fm)
+     *
+     * These are fused multiply-add, and must be done as one floating
+     * point operation with no rounding between the multiplication and
+     * addition steps.  NB that doing the negations here as separate
+     * steps is correct : an input NaN should come out with its sign
+     * bit flipped if it is a negated-input.
+     */
+    TCGv_ptr fpst;
+    TCGv_i64 vn, vm, vd;
+
+    /*
+     * Present in VFPv4 only.
+     * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
+     * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
+     */
+    if (!arm_dc_feature(s, ARM_FEATURE_VFP4) ||
+        (s->vec_len != 0 || s->vec_stride != 0)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_fp_d32, s) && ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    vn = tcg_temp_new_i64();
+    vm = tcg_temp_new_i64();
+    vd = tcg_temp_new_i64();
+
+    neon_load_reg64(vn, a->vn);
+    neon_load_reg64(vm, a->vm);
+    if (a->o2) {
+        /* VFNMS, VFMS */
+        gen_helper_vfp_negd(vn, vn);
+    }
+    neon_load_reg64(vd, a->vd);
+    if (a->o1 & 1) {
+        /* VFNMA, VFNMS */
+        gen_helper_vfp_negd(vd, vd);
+    }
+    fpst = get_fpstatus_ptr(0);
+    gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
+    neon_store_reg64(vd, a->vd);
+
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i64(vn);
+    tcg_temp_free_i64(vm);
+    tcg_temp_free_i64(vd);
+
+    return true;
+}
author	Peter Maydell <peter.maydell@linaro.org>	2019-06-11 16:39:49 +0100
committer	Peter Maydell <peter.maydell@linaro.org>	2019-06-13 15:14:05 +0100
commit	d4893b01d23060845ee3855bc96626e16aad9ab5 (patch)
tree	a92fe3bbd8616fdf0febd1ad375cb0321ad71534 /target/arm/translate-vfp.inc.c
parent	519ee7ae31e050eb0ff9ad35c213f0bd7ab1c03e (diff)