From 120a0eb3ea23a5b06fae2f3daebd46a4035864cf Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 28 Aug 2020 19:33:12 +0100 Subject: target/arm: Implement VFP fp16 for VFP_BINOP operations Implmeent VFP fp16 support for simple binary-operator VFP insns VADD, VSUB, VMUL, VDIV, VMINNM and VMAXNM: * make the VFP_BINOP() macro generate float16 helpers as well as float32 and float64 * implement a do_vfp_3op_hp() function similar to the existing do_vfp_3op_sp() * add decode for the half-precision insn patterns Note that the VFP_BINOP macro use creates a couple of unused helper functions vfp_maxh and vfp_minh, but they're small so it's not worth splitting the BINOP operations into "needs halfprec" and "no halfprec" groups. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20200828183354.27913-4-peter.maydell@linaro.org --- target/arm/helper.h | 8 ++++ target/arm/translate-vfp.c.inc | 86 ++++++++++++++++++++++++++++++++++++++++++ target/arm/vfp-uncond.decode | 3 ++ target/arm/vfp.decode | 4 ++ target/arm/vfp_helper.c | 5 +++ 5 files changed, 106 insertions(+) diff --git a/target/arm/helper.h b/target/arm/helper.h index 3ca73a1764..61e4e93886 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -101,20 +101,28 @@ DEF_HELPER_FLAGS_5(probe_access, TCG_CALL_NO_WG, void, env, tl, i32, i32, i32) DEF_HELPER_1(vfp_get_fpscr, i32, env) DEF_HELPER_2(vfp_set_fpscr, void, env, i32) +DEF_HELPER_3(vfp_addh, f16, f16, f16, ptr) DEF_HELPER_3(vfp_adds, f32, f32, f32, ptr) DEF_HELPER_3(vfp_addd, f64, f64, f64, ptr) +DEF_HELPER_3(vfp_subh, f16, f16, f16, ptr) DEF_HELPER_3(vfp_subs, f32, f32, f32, ptr) DEF_HELPER_3(vfp_subd, f64, f64, f64, ptr) +DEF_HELPER_3(vfp_mulh, f16, f16, f16, ptr) DEF_HELPER_3(vfp_muls, f32, f32, f32, ptr) DEF_HELPER_3(vfp_muld, f64, f64, f64, ptr) +DEF_HELPER_3(vfp_divh, f16, f16, f16, ptr) DEF_HELPER_3(vfp_divs, f32, f32, f32, ptr) DEF_HELPER_3(vfp_divd, f64, f64, f64, ptr) +DEF_HELPER_3(vfp_maxh, f16, f16, f16, ptr) DEF_HELPER_3(vfp_maxs, f32, f32, f32, ptr) DEF_HELPER_3(vfp_maxd, f64, f64, f64, ptr) +DEF_HELPER_3(vfp_minh, f16, f16, f16, ptr) DEF_HELPER_3(vfp_mins, f32, f32, f32, ptr) DEF_HELPER_3(vfp_mind, f64, f64, f64, ptr) +DEF_HELPER_3(vfp_maxnumh, f16, f16, f16, ptr) DEF_HELPER_3(vfp_maxnums, f32, f32, f32, ptr) DEF_HELPER_3(vfp_maxnumd, f64, f64, f64, ptr) +DEF_HELPER_3(vfp_minnumh, f16, f16, f16, ptr) DEF_HELPER_3(vfp_minnums, f32, f32, f32, ptr) DEF_HELPER_3(vfp_minnumd, f64, f64, f64, ptr) DEF_HELPER_1(vfp_negs, f32, f32) diff --git a/target/arm/translate-vfp.c.inc b/target/arm/translate-vfp.c.inc index 4eeafb494a..01a5fd6511 100644 --- a/target/arm/translate-vfp.c.inc +++ b/target/arm/translate-vfp.c.inc @@ -1266,6 +1266,54 @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn, return true; } +static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn, + int vd, int vn, int vm, bool reads_vd) +{ + /* + * Do a half-precision operation. Functionally this is + * the same as do_vfp_3op_sp(), except: + * - it uses the FPST_FPCR_F16 + * - it doesn't need the VFP vector handling (fp16 is a + * v8 feature, and in v8 VFP vectors don't exist) + * - it does the aa32_fp16_arith feature test + */ + TCGv_i32 f0, f1, fd; + TCGv_ptr fpst; + + if (!dc_isar_feature(aa32_fp16_arith, s)) { + return false; + } + + if (s->vec_len != 0 || s->vec_stride != 0) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + f0 = tcg_temp_new_i32(); + f1 = tcg_temp_new_i32(); + fd = tcg_temp_new_i32(); + fpst = fpstatus_ptr(FPST_FPCR_F16); + + neon_load_reg32(f0, vn); + neon_load_reg32(f1, vm); + + if (reads_vd) { + neon_load_reg32(fd, vd); + } + fn(fd, f0, f1, fpst); + neon_store_reg32(fd, vd); + + tcg_temp_free_i32(f0); + tcg_temp_free_i32(f1); + tcg_temp_free_i32(fd); + tcg_temp_free_ptr(fpst); + + return true; +} + static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn, int vd, int vn, int vm, bool reads_vd) { @@ -1643,6 +1691,11 @@ static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a) return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true); } +static bool trans_VMUL_hp(DisasContext *s, arg_VMUL_sp *a) +{ + return do_vfp_3op_hp(s, gen_helper_vfp_mulh, a->vd, a->vn, a->vm, false); +} + static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a) { return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false); @@ -1677,6 +1730,11 @@ static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a) return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false); } +static bool trans_VADD_hp(DisasContext *s, arg_VADD_sp *a) +{ + return do_vfp_3op_hp(s, gen_helper_vfp_addh, a->vd, a->vn, a->vm, false); +} + static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a) { return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false); @@ -1687,6 +1745,11 @@ static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a) return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false); } +static bool trans_VSUB_hp(DisasContext *s, arg_VSUB_sp *a) +{ + return do_vfp_3op_hp(s, gen_helper_vfp_subh, a->vd, a->vn, a->vm, false); +} + static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a) { return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false); @@ -1697,6 +1760,11 @@ static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a) return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false); } +static bool trans_VDIV_hp(DisasContext *s, arg_VDIV_sp *a) +{ + return do_vfp_3op_hp(s, gen_helper_vfp_divh, a->vd, a->vn, a->vm, false); +} + static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a) { return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false); @@ -1707,6 +1775,24 @@ static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a) return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false); } +static bool trans_VMINNM_hp(DisasContext *s, arg_VMINNM_sp *a) +{ + if (!dc_isar_feature(aa32_vminmaxnm, s)) { + return false; + } + return do_vfp_3op_hp(s, gen_helper_vfp_minnumh, + a->vd, a->vn, a->vm, false); +} + +static bool trans_VMAXNM_hp(DisasContext *s, arg_VMAXNM_sp *a) +{ + if (!dc_isar_feature(aa32_vminmaxnm, s)) { + return false; + } + return do_vfp_3op_hp(s, gen_helper_vfp_maxnumh, + a->vd, a->vn, a->vm, false); +} + static bool trans_VMINNM_sp(DisasContext *s, arg_VMINNM_sp *a) { if (!dc_isar_feature(aa32_vminmaxnm, s)) { diff --git a/target/arm/vfp-uncond.decode b/target/arm/vfp-uncond.decode index 34ca164266..ee700e5197 100644 --- a/target/arm/vfp-uncond.decode +++ b/target/arm/vfp-uncond.decode @@ -49,6 +49,9 @@ VSEL 1111 1110 0. cc:2 .... .... 1010 .0.0 .... \ VSEL 1111 1110 0. cc:2 .... .... 1011 .0.0 .... \ vm=%vm_dp vn=%vn_dp vd=%vd_dp dp=1 +VMAXNM_hp 1111 1110 1.00 .... .... 1001 .0.0 .... @vfp_dnm_s +VMINNM_hp 1111 1110 1.00 .... .... 1001 .1.0 .... @vfp_dnm_s + VMAXNM_sp 1111 1110 1.00 .... .... 1010 .0.0 .... @vfp_dnm_s VMINNM_sp 1111 1110 1.00 .... .... 1010 .1.0 .... @vfp_dnm_s diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode index 2c793e3e87..1ecd5e28ca 100644 --- a/target/arm/vfp.decode +++ b/target/arm/vfp.decode @@ -115,18 +115,22 @@ VNMLS_dp ---- 1110 0.01 .... .... 1011 .0.0 .... @vfp_dnm_d VNMLA_sp ---- 1110 0.01 .... .... 1010 .1.0 .... @vfp_dnm_s VNMLA_dp ---- 1110 0.01 .... .... 1011 .1.0 .... @vfp_dnm_d +VMUL_hp ---- 1110 0.10 .... .... 1001 .0.0 .... @vfp_dnm_s VMUL_sp ---- 1110 0.10 .... .... 1010 .0.0 .... @vfp_dnm_s VMUL_dp ---- 1110 0.10 .... .... 1011 .0.0 .... @vfp_dnm_d VNMUL_sp ---- 1110 0.10 .... .... 1010 .1.0 .... @vfp_dnm_s VNMUL_dp ---- 1110 0.10 .... .... 1011 .1.0 .... @vfp_dnm_d +VADD_hp ---- 1110 0.11 .... .... 1001 .0.0 .... @vfp_dnm_s VADD_sp ---- 1110 0.11 .... .... 1010 .0.0 .... @vfp_dnm_s VADD_dp ---- 1110 0.11 .... .... 1011 .0.0 .... @vfp_dnm_d +VSUB_hp ---- 1110 0.11 .... .... 1001 .1.0 .... @vfp_dnm_s VSUB_sp ---- 1110 0.11 .... .... 1010 .1.0 .... @vfp_dnm_s VSUB_dp ---- 1110 0.11 .... .... 1011 .1.0 .... @vfp_dnm_d +VDIV_hp ---- 1110 1.00 .... .... 1001 .0.0 .... @vfp_dnm_s VDIV_sp ---- 1110 1.00 .... .... 1010 .0.0 .... @vfp_dnm_s VDIV_dp ---- 1110 1.00 .... .... 1011 .0.0 .... @vfp_dnm_d diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c index 02ab8d7f2d..b8ca744bcc 100644 --- a/target/arm/vfp_helper.c +++ b/target/arm/vfp_helper.c @@ -236,6 +236,11 @@ void vfp_set_fpscr(CPUARMState *env, uint32_t val) #define VFP_HELPER(name, p) HELPER(glue(glue(vfp_,name),p)) #define VFP_BINOP(name) \ +dh_ctype_f16 VFP_HELPER(name, h)(dh_ctype_f16 a, dh_ctype_f16 b, void *fpstp) \ +{ \ + float_status *fpst = fpstp; \ + return float16_ ## name(a, b, fpst); \ +} \ float32 VFP_HELPER(name, s)(float32 a, float32 b, void *fpstp) \ { \ float_status *fpst = fpstp; \ -- cgit v1.2.3