aboutsummaryrefslogtreecommitdiff
path: root/target-arm/translate.c
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2011-10-19 16:14:07 +0000
committerPeter Maydell <peter.maydell@linaro.org>2011-10-19 16:14:07 +0000
commitda97f52cb35ccf72a4a3d745926ee5e3c263ed07 (patch)
tree4a83c955b33a27bc620502e3470e7fbf8ebe44dc /target-arm/translate.c
parent369be8f618ac145d2421ea4bfff86ee774ad618c (diff)
target-arm: Implement VFPv4 fused multiply-accumulate insns
Implement the fused multiply-accumulate instructions (VFMA, VFMS, VFNMA, VFNMS) which are new in VFPv4. Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'target-arm/translate.c')
-rw-r--r--target-arm/translate.c72
1 files changed, 72 insertions, 0 deletions
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 812a9e7be4..0f35b60946 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -3141,6 +3141,57 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
case 8: /* div: fn / fm */
gen_vfp_div(dp);
break;
+ case 10: /* VFNMA : fd = muladd(-fd, fn, fm) */
+ case 11: /* VFNMS : fd = muladd(-fd, -fn, fm) */
+ case 12: /* VFMA : fd = muladd( fd, fn, fm) */
+ case 13: /* VFMS : fd = muladd( fd, -fn, fm) */
+ /* These are fused multiply-add, and must be done as one
+ * floating point operation with no rounding between the
+ * multiplication and addition steps.
+ * NB that doing the negations here as separate steps is
+ * correct : an input NaN should come out with its sign bit
+ * flipped if it is a negated-input.
+ */
+ if (!arm_feature(env, ARM_FEATURE_VFP4)) {
+ return 1;
+ }
+ if (dp) {
+ TCGv_ptr fpst;
+ TCGv_i64 frd;
+ if (op & 1) {
+ /* VFNMS, VFMS */
+ gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
+ }
+ frd = tcg_temp_new_i64();
+ tcg_gen_ld_f64(frd, cpu_env, vfp_reg_offset(dp, rd));
+ if (op & 2) {
+ /* VFNMA, VFNMS */
+ gen_helper_vfp_negd(frd, frd);
+ }
+ fpst = get_fpstatus_ptr(0);
+ gen_helper_vfp_muladdd(cpu_F0d, cpu_F0d,
+ cpu_F1d, frd, fpst);
+ tcg_temp_free_ptr(fpst);
+ tcg_temp_free_i64(frd);
+ } else {
+ TCGv_ptr fpst;
+ TCGv_i32 frd;
+ if (op & 1) {
+ /* VFNMS, VFMS */
+ gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
+ }
+ frd = tcg_temp_new_i32();
+ tcg_gen_ld_f32(frd, cpu_env, vfp_reg_offset(dp, rd));
+ if (op & 2) {
+ gen_helper_vfp_negs(frd, frd);
+ }
+ fpst = get_fpstatus_ptr(0);
+ gen_helper_vfp_muladds(cpu_F0s, cpu_F0s,
+ cpu_F1s, frd, fpst);
+ tcg_temp_free_ptr(fpst);
+ tcg_temp_free_i32(frd);
+ }
+ break;
case 14: /* fconst */
if (!arm_feature(env, ARM_FEATURE_VFP3))
return 1;
@@ -4417,6 +4468,7 @@ static void gen_neon_narrow_op(int op, int u, int size, TCGv dest, TCGv_i64 src)
#define NEON_3R_VPMIN 21
#define NEON_3R_VQDMULH_VQRDMULH 22
#define NEON_3R_VPADD 23
+#define NEON_3R_VFM 25 /* VFMA, VFMS : float fused multiply-add */
#define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
#define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
#define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
@@ -4449,6 +4501,7 @@ static const uint8_t neon_3r_sizes[] = {
[NEON_3R_VPMIN] = 0x7,
[NEON_3R_VQDMULH_VQRDMULH] = 0x6,
[NEON_3R_VPADD] = 0x7,
+ [NEON_3R_VFM] = 0x5, /* size bit 1 encodes op */
[NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
[NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
[NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
@@ -4726,6 +4779,11 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
return 1;
}
break;
+ case NEON_3R_VFM:
+ if (!arm_feature(env, ARM_FEATURE_VFP4) || u) {
+ return 1;
+ }
+ break;
default:
break;
}
@@ -5006,6 +5064,20 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
else
gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
break;
+ case NEON_3R_VFM:
+ {
+ /* VFMA, VFMS: fused multiply-add */
+ TCGv_ptr fpstatus = get_fpstatus_ptr(1);
+ TCGv_i32 tmp3 = neon_load_reg(rd, pass);
+ if (size) {
+ /* VFMS */
+ gen_helper_vfp_negs(tmp, tmp);
+ }
+ gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus);
+ tcg_temp_free_i32(tmp3);
+ tcg_temp_free_ptr(fpstatus);
+ break;
+ }
default:
abort();
}