diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2018-05-15 15:07:34 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2018-05-15 15:07:34 +0100 |
commit | 0275e6b66c0947f83ad0d8dd687eadbcbf0c5ec5 (patch) | |
tree | c68f377430d1a11d50bf05d62c494c7cc0ff47a7 | |
parent | ad1b4ec39caa5b3f17cbd8160283a03a3dcfe2ae (diff) | |
parent | ae7651804748c6b479d5ae09aeac4edb9c44f76e (diff) |
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20180515' into staging
target-arm queue:
* Fix coverity nit in int_to_float code
* Don't set Invalid for float-to-int(MAXINT)
* Fix fp_status_f16 tininess before rounding
* Add various missing insns from the v8.2-FP16 extension
* Fix sqrt_f16 exception raising
* sdcard: Correct CRC16 offset in sd_function_switch()
* tcg: Optionally log FPU state in TCG -d cpu logging
# gpg: Signature made Tue 15 May 2018 15:06:09 BST
# gpg: using RSA key 3C2525ED14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"
# gpg: aka "Peter Maydell <pmaydell@gmail.com>"
# gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>"
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE
* remotes/pmaydell/tags/pull-target-arm-20180515:
tcg: Optionally log FPU state in TCG -d cpu logging
sdcard: Correct CRC16 offset in sd_function_switch()
target/arm: Fix sqrt_f16 exception raising
target/arm: Implement FMOV (immediate) for fp16
target/arm: Implement FCSEL for fp16
target/arm: Implement FCMP for fp16
target/arm: Implement FP data-processing (3 source) for fp16
target/arm: Implement FP data-processing (2 source) for fp16
target/arm: Introduce and use read_fp_hreg
target/arm: Implement FCVT (scalar, fixed-point) for fp16
target/arm: Implement FCVT (scalar, integer) for fp16
target/arm: Early exit after unallocated_encoding in disas_fp_int_conv
target/arm: Implement FMOV (general) for fp16
target/arm: Fix fp_status_f16 tininess before rounding
fpu/softfloat: Don't set Invalid for float-to-int(MAXINT)
fpu/softfloat: int_to_float ensure r fully initialised
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | accel/tcg/cpu-exec.c | 9 | ||||
-rw-r--r-- | fpu/softfloat.c | 6 | ||||
-rw-r--r-- | hw/sd/sd.c | 2 | ||||
-rw-r--r-- | include/qemu/log.h | 1 | ||||
-rw-r--r-- | target/arm/cpu.c | 2 | ||||
-rw-r--r-- | target/arm/helper-a64.c | 10 | ||||
-rw-r--r-- | target/arm/helper-a64.h | 2 | ||||
-rw-r--r-- | target/arm/helper.c | 38 | ||||
-rw-r--r-- | target/arm/helper.h | 6 | ||||
-rw-r--r-- | target/arm/translate-a64.c | 421 | ||||
-rw-r--r-- | util/log.c | 2 |
11 files changed, 428 insertions, 71 deletions
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 81153e7a13..0b154cc678 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -156,11 +156,14 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb) if (qemu_loglevel_mask(CPU_LOG_TB_CPU) && qemu_log_in_addr_range(itb->pc)) { qemu_log_lock(); + int flags = 0; + if (qemu_loglevel_mask(CPU_LOG_TB_FPU)) { + flags |= CPU_DUMP_FPU; + } #if defined(TARGET_I386) - log_cpu_state(cpu, CPU_DUMP_CCOP); -#else - log_cpu_state(cpu, 0); + flags |= CPU_DUMP_CCOP; #endif + log_cpu_state(cpu, flags); qemu_log_unlock(); } #endif /* DEBUG_DISAS */ diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 8401b37bd4..bc0f52fa54 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -1368,14 +1368,14 @@ static int64_t round_to_int_and_pack(FloatParts in, int rmode, r = UINT64_MAX; } if (p.sign) { - if (r < -(uint64_t) min) { + if (r <= -(uint64_t) min) { return -r; } else { s->float_exception_flags = orig_flags | float_flag_invalid; return min; } } else { - if (r < max) { + if (r <= max) { return r; } else { s->float_exception_flags = orig_flags | float_flag_invalid; @@ -1525,7 +1525,7 @@ FLOAT_TO_UINT(64, 64) static FloatParts int_to_float(int64_t a, float_status *status) { - FloatParts r; + FloatParts r = {}; if (a == 0) { r.cls = float_class_zero; r.sign = false; diff --git a/hw/sd/sd.c b/hw/sd/sd.c index 235e0518d6..7af19fa06c 100644 --- a/hw/sd/sd.c +++ b/hw/sd/sd.c @@ -787,7 +787,7 @@ static void sd_function_switch(SDState *sd, uint32_t arg) sd->data[14 + (i >> 1)] = new_func << ((i * 4) & 4); } memset(&sd->data[17], 0, 47); - stw_be_p(sd->data + 65, sd_crc16(sd->data, 64)); + stw_be_p(sd->data + 64, sd_crc16(sd->data, 64)); } static inline bool sd_wp_addr(SDState *sd, uint64_t addr) diff --git a/include/qemu/log.h b/include/qemu/log.h index ff92a8b86a..b097a6cae1 100644 --- a/include/qemu/log.h +++ b/include/qemu/log.h @@ -44,6 +44,7 @@ static inline bool qemu_log_separate(void) #define CPU_LOG_PAGE (1 << 14) /* LOG_TRACE (1 << 15) is defined in log-for-trace.h */ #define CPU_LOG_TB_OP_IND (1 << 16) +#define CPU_LOG_TB_FPU (1 << 17) /* Lock output for a series of related logs. Since this is not needed * for a single qemu_log / qemu_log_mask / qemu_log_mask_and_addr, we diff --git a/target/arm/cpu.c b/target/arm/cpu.c index d175c5e94f..7939c6b8ae 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -324,6 +324,8 @@ static void arm_cpu_reset(CPUState *s) &env->vfp.fp_status); set_float_detect_tininess(float_tininess_before_rounding, &env->vfp.standard_fp_status); + set_float_detect_tininess(float_tininess_before_rounding, + &env->vfp.fp_status_f16); #ifndef CONFIG_USER_ONLY if (kvm_enabled()) { kvm_arm_reset_vcpu(cpu); diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c index 549ed3513e..4f8034c513 100644 --- a/target/arm/helper-a64.c +++ b/target/arm/helper-a64.c @@ -85,6 +85,16 @@ static inline uint32_t float_rel_to_flags(int res) return flags; } +uint64_t HELPER(vfp_cmph_a64)(float16 x, float16 y, void *fp_status) +{ + return float_rel_to_flags(float16_compare_quiet(x, y, fp_status)); +} + +uint64_t HELPER(vfp_cmpeh_a64)(float16 x, float16 y, void *fp_status) +{ + return float_rel_to_flags(float16_compare(x, y, fp_status)); +} + uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, void *fp_status) { return float_rel_to_flags(float32_compare_quiet(x, y, fp_status)); diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h index b8028ac98c..9d3a907049 100644 --- a/target/arm/helper-a64.h +++ b/target/arm/helper-a64.h @@ -19,6 +19,8 @@ DEF_HELPER_FLAGS_2(udiv64, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_2(sdiv64, TCG_CALL_NO_RWG_SE, s64, s64, s64) DEF_HELPER_FLAGS_1(rbit64, TCG_CALL_NO_RWG_SE, i64, i64) +DEF_HELPER_3(vfp_cmph_a64, i64, f16, f16, ptr) +DEF_HELPER_3(vfp_cmpeh_a64, i64, f16, f16, ptr) DEF_HELPER_3(vfp_cmps_a64, i64, f32, f32, ptr) DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr) DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr) diff --git a/target/arm/helper.c b/target/arm/helper.c index 817f9d81a0..c6fd7f9479 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -11427,8 +11427,12 @@ VFP_CONV_FIX_A64(uq, s, 32, 64, uint64) #undef VFP_CONV_FIX_A64 /* Conversion to/from f16 can overflow to infinity before/after scaling. - * Therefore we convert to f64 (which does not round), scale, - * and then convert f64 to f16 (which may round). + * Therefore we convert to f64, scale, and then convert f64 to f16; or + * vice versa for conversion to integer. + * + * For 16- and 32-bit integers, the conversion to f64 never rounds. + * For 64-bit integers, any integer that would cause rounding will also + * overflow to f16 infinity, so there is no double rounding problem. */ static float16 do_postscale_fp16(float64 f, int shift, float_status *fpst) @@ -11446,6 +11450,16 @@ float16 HELPER(vfp_ultoh)(uint32_t x, uint32_t shift, void *fpst) return do_postscale_fp16(uint32_to_float64(x, fpst), shift, fpst); } +float16 HELPER(vfp_sqtoh)(uint64_t x, uint32_t shift, void *fpst) +{ + return do_postscale_fp16(int64_to_float64(x, fpst), shift, fpst); +} + +float16 HELPER(vfp_uqtoh)(uint64_t x, uint32_t shift, void *fpst) +{ + return do_postscale_fp16(uint64_to_float64(x, fpst), shift, fpst); +} + static float64 do_prescale_fp16(float16 f, int shift, float_status *fpst) { if (unlikely(float16_is_any_nan(f))) { @@ -11475,6 +11489,26 @@ uint32_t HELPER(vfp_touhh)(float16 x, uint32_t shift, void *fpst) return float64_to_uint16(do_prescale_fp16(x, shift, fpst), fpst); } +uint32_t HELPER(vfp_toslh)(float16 x, uint32_t shift, void *fpst) +{ + return float64_to_int32(do_prescale_fp16(x, shift, fpst), fpst); +} + +uint32_t HELPER(vfp_toulh)(float16 x, uint32_t shift, void *fpst) +{ + return float64_to_uint32(do_prescale_fp16(x, shift, fpst), fpst); +} + +uint64_t HELPER(vfp_tosqh)(float16 x, uint32_t shift, void *fpst) +{ + return float64_to_int64(do_prescale_fp16(x, shift, fpst), fpst); +} + +uint64_t HELPER(vfp_touqh)(float16 x, uint32_t shift, void *fpst) +{ + return float64_to_uint64(do_prescale_fp16(x, shift, fpst), fpst); +} + /* Set the current fp rounding mode and return the old one. * The argument is a softfloat float_round_ value. */ diff --git a/target/arm/helper.h b/target/arm/helper.h index 1969b37f2d..ce89968b2d 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -151,6 +151,10 @@ DEF_HELPER_3(vfp_touhd_round_to_zero, i64, f64, i32, ptr) DEF_HELPER_3(vfp_tould_round_to_zero, i64, f64, i32, ptr) DEF_HELPER_3(vfp_touhh, i32, f16, i32, ptr) DEF_HELPER_3(vfp_toshh, i32, f16, i32, ptr) +DEF_HELPER_3(vfp_toulh, i32, f16, i32, ptr) +DEF_HELPER_3(vfp_toslh, i32, f16, i32, ptr) +DEF_HELPER_3(vfp_touqh, i64, f16, i32, ptr) +DEF_HELPER_3(vfp_tosqh, i64, f16, i32, ptr) DEF_HELPER_3(vfp_toshs, i32, f32, i32, ptr) DEF_HELPER_3(vfp_tosls, i32, f32, i32, ptr) DEF_HELPER_3(vfp_tosqs, i64, f32, i32, ptr) @@ -177,6 +181,8 @@ DEF_HELPER_3(vfp_ultod, f64, i64, i32, ptr) DEF_HELPER_3(vfp_uqtod, f64, i64, i32, ptr) DEF_HELPER_3(vfp_sltoh, f16, i32, i32, ptr) DEF_HELPER_3(vfp_ultoh, f16, i32, i32, ptr) +DEF_HELPER_3(vfp_sqtoh, f16, i64, i32, ptr) +DEF_HELPER_3(vfp_uqtoh, f16, i64, i32, ptr) DEF_HELPER_FLAGS_2(set_rmode, TCG_CALL_NO_RWG, i32, i32, ptr) DEF_HELPER_FLAGS_2(set_neon_rmode, TCG_CALL_NO_RWG, i32, i32, env) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 4d1b220cc6..a0b0c43d12 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -615,6 +615,14 @@ static TCGv_i32 read_fp_sreg(DisasContext *s, int reg) return v; } +static TCGv_i32 read_fp_hreg(DisasContext *s, int reg) +{ + TCGv_i32 v = tcg_temp_new_i32(); + + tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16)); + return v; +} + /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64). * If SVE is not enabled, then there are only 128 bits in the vector. */ @@ -4704,14 +4712,14 @@ static void disas_data_proc_reg(DisasContext *s, uint32_t insn) } } -static void handle_fp_compare(DisasContext *s, bool is_double, +static void handle_fp_compare(DisasContext *s, int size, unsigned int rn, unsigned int rm, bool cmp_with_zero, bool signal_all_nans) { TCGv_i64 tcg_flags = tcg_temp_new_i64(); - TCGv_ptr fpst = get_fpstatus_ptr(false); + TCGv_ptr fpst = get_fpstatus_ptr(size == MO_16); - if (is_double) { + if (size == MO_64) { TCGv_i64 tcg_vn, tcg_vm; tcg_vn = read_fp_dreg(s, rn); @@ -4728,19 +4736,35 @@ static void handle_fp_compare(DisasContext *s, bool is_double, tcg_temp_free_i64(tcg_vn); tcg_temp_free_i64(tcg_vm); } else { - TCGv_i32 tcg_vn, tcg_vm; + TCGv_i32 tcg_vn = tcg_temp_new_i32(); + TCGv_i32 tcg_vm = tcg_temp_new_i32(); - tcg_vn = read_fp_sreg(s, rn); + read_vec_element_i32(s, tcg_vn, rn, 0, size); if (cmp_with_zero) { - tcg_vm = tcg_const_i32(0); + tcg_gen_movi_i32(tcg_vm, 0); } else { - tcg_vm = read_fp_sreg(s, rm); + read_vec_element_i32(s, tcg_vm, rm, 0, size); } - if (signal_all_nans) { - gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst); - } else { - gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst); + + switch (size) { + case MO_32: + if (signal_all_nans) { + gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst); + } else { + gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst); + } + break; + case MO_16: + if (signal_all_nans) { + gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst); + } else { + gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst); + } + break; + default: + g_assert_not_reached(); } + tcg_temp_free_i32(tcg_vn); tcg_temp_free_i32(tcg_vm); } @@ -4761,16 +4785,35 @@ static void handle_fp_compare(DisasContext *s, bool is_double, static void disas_fp_compare(DisasContext *s, uint32_t insn) { unsigned int mos, type, rm, op, rn, opc, op2r; + int size; mos = extract32(insn, 29, 3); - type = extract32(insn, 22, 2); /* 0 = single, 1 = double */ + type = extract32(insn, 22, 2); rm = extract32(insn, 16, 5); op = extract32(insn, 14, 2); rn = extract32(insn, 5, 5); opc = extract32(insn, 3, 2); op2r = extract32(insn, 0, 3); - if (mos || op || op2r || type > 1) { + if (mos || op || op2r) { + unallocated_encoding(s); + return; + } + + switch (type) { + case 0: + size = MO_32; + break; + case 1: + size = MO_64; + break; + case 3: + size = MO_16; + if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + break; + } + /* fallthru */ + default: unallocated_encoding(s); return; } @@ -4779,7 +4822,7 @@ static void disas_fp_compare(DisasContext *s, uint32_t insn) return; } - handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2); + handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2); } /* Floating point conditional compare @@ -4793,16 +4836,35 @@ static void disas_fp_ccomp(DisasContext *s, uint32_t insn) unsigned int mos, type, rm, cond, rn, op, nzcv; TCGv_i64 tcg_flags; TCGLabel *label_continue = NULL; + int size; mos = extract32(insn, 29, 3); - type = extract32(insn, 22, 2); /* 0 = single, 1 = double */ + type = extract32(insn, 22, 2); rm = extract32(insn, 16, 5); cond = extract32(insn, 12, 4); rn = extract32(insn, 5, 5); op = extract32(insn, 4, 1); nzcv = extract32(insn, 0, 4); - if (mos || type > 1) { + if (mos) { + unallocated_encoding(s); + return; + } + + switch (type) { + case 0: + size = MO_32; + break; + case 1: + size = MO_64; + break; + case 3: + size = MO_16; + if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + break; + } + /* fallthru */ + default: unallocated_encoding(s); return; } @@ -4823,7 +4885,7 @@ static void disas_fp_ccomp(DisasContext *s, uint32_t insn) gen_set_label(label_match); } - handle_fp_compare(s, type, rn, rm, false, op); + handle_fp_compare(s, size, rn, rm, false, op); if (cond < 0x0e) { gen_set_label(label_continue); @@ -4841,15 +4903,34 @@ static void disas_fp_csel(DisasContext *s, uint32_t insn) unsigned int mos, type, rm, cond, rn, rd; TCGv_i64 t_true, t_false, t_zero; DisasCompare64 c; + TCGMemOp sz; mos = extract32(insn, 29, 3); - type = extract32(insn, 22, 2); /* 0 = single, 1 = double */ + type = extract32(insn, 22, 2); rm = extract32(insn, 16, 5); cond = extract32(insn, 12, 4); rn = extract32(insn, 5, 5); rd = extract32(insn, 0, 5); - if (mos || type > 1) { + if (mos) { + unallocated_encoding(s); + return; + } + + switch (type) { + case 0: + sz = MO_32; + break; + case 1: + sz = MO_64; + break; + case 3: + sz = MO_16; + if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + break; + } + /* fallthru */ + default: unallocated_encoding(s); return; } @@ -4858,11 +4939,11 @@ static void disas_fp_csel(DisasContext *s, uint32_t insn) return; } - /* Zero extend sreg inputs to 64 bits now. */ + /* Zero extend sreg & hreg inputs to 64 bits now. */ t_true = tcg_temp_new_i64(); t_false = tcg_temp_new_i64(); - read_vec_element(s, t_true, rn, 0, type ? MO_64 : MO_32); - read_vec_element(s, t_false, rm, 0, type ? MO_64 : MO_32); + read_vec_element(s, t_true, rn, 0, sz); + read_vec_element(s, t_false, rm, 0, sz); a64_test_cc(&c, cond); t_zero = tcg_const_i64(0); @@ -4871,7 +4952,7 @@ static void disas_fp_csel(DisasContext *s, uint32_t insn) tcg_temp_free_i64(t_false); a64_free_cc(&c); - /* Note that sregs write back zeros to the high bits, + /* Note that sregs & hregs write back zeros to the high bits, and we've already done the zero-extension. */ write_fp_dreg(s, rd, t_true); tcg_temp_free_i64(t_true); @@ -4881,11 +4962,9 @@ static void disas_fp_csel(DisasContext *s, uint32_t insn) static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) { TCGv_ptr fpst = NULL; - TCGv_i32 tcg_op = tcg_temp_new_i32(); + TCGv_i32 tcg_op = read_fp_hreg(s, rn); TCGv_i32 tcg_res = tcg_temp_new_i32(); - read_vec_element_i32(s, tcg_op, rn, 0, MO_16); - switch (opcode) { case 0x0: /* FMOV */ tcg_gen_mov_i32(tcg_res, tcg_op); @@ -4897,7 +4976,8 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); break; case 0x3: /* FSQRT */ - gen_helper_sqrt_f16(tcg_res, tcg_op, cpu_env); + fpst = get_fpstatus_ptr(true); + gen_helper_sqrt_f16(tcg_res, tcg_op, fpst); break; case 0x8: /* FRINTN */ case 0x9: /* FRINTP */ @@ -5293,6 +5373,61 @@ static void handle_fp_2src_double(DisasContext *s, int opcode, tcg_temp_free_i64(tcg_res); } +/* Floating-point data-processing (2 source) - half precision */ +static void handle_fp_2src_half(DisasContext *s, int opcode, + int rd, int rn, int rm) +{ + TCGv_i32 tcg_op1; + TCGv_i32 tcg_op2; + TCGv_i32 tcg_res; + TCGv_ptr fpst; + + tcg_res = tcg_temp_new_i32(); + fpst = get_fpstatus_ptr(true); + tcg_op1 = read_fp_hreg(s, rn); + tcg_op2 = read_fp_hreg(s, rm); + + switch (opcode) { + case 0x0: /* FMUL */ + gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x1: /* FDIV */ + gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x2: /* FADD */ + gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x3: /* FSUB */ + gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x4: /* FMAX */ + gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x5: /* FMIN */ + gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x6: /* FMAXNM */ + gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x7: /* FMINNM */ + gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x8: /* FNMUL */ + gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); + tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000); + break; + default: + g_assert_not_reached(); + } + + write_fp_sreg(s, rd, tcg_res); + + tcg_temp_free_ptr(fpst); + tcg_temp_free_i32(tcg_op1); + tcg_temp_free_i32(tcg_op2); + tcg_temp_free_i32(tcg_res); +} + /* Floating point data-processing (2 source) * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ @@ -5325,6 +5460,16 @@ static void disas_fp_2src(DisasContext *s, uint32_t insn) } handle_fp_2src_double(s, opcode, rd, rn, rm); break; + case 3: + if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + unallocated_encoding(s); + return; + } + if (!fp_access_check(s)) { + return; + } + handle_fp_2src_half(s, opcode, rd, rn, rm); + break; default: unallocated_encoding(s); } @@ -5406,6 +5551,44 @@ static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1, tcg_temp_free_i64(tcg_res); } +/* Floating-point data-processing (3 source) - half precision */ +static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1, + int rd, int rn, int rm, int ra) +{ + TCGv_i32 tcg_op1, tcg_op2, tcg_op3; + TCGv_i32 tcg_res = tcg_temp_new_i32(); + TCGv_ptr fpst = get_fpstatus_ptr(true); + + tcg_op1 = read_fp_hreg(s, rn); + tcg_op2 = read_fp_hreg(s, rm); + tcg_op3 = read_fp_hreg(s, ra); + + /* These are fused multiply-add, and must be done as one + * floating point operation with no rounding between the + * multiplication and addition steps. + * NB that doing the negations here as separate steps is + * correct : an input NaN should come out with its sign bit + * flipped if it is a negated-input. + */ + if (o1 == true) { + tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000); + } + + if (o0 != o1) { + tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000); + } + + gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); + + write_fp_sreg(s, rd, tcg_res); + + tcg_temp_free_ptr(fpst); + tcg_temp_free_i32(tcg_op1); + tcg_temp_free_i32(tcg_op2); + tcg_temp_free_i32(tcg_op3); + tcg_temp_free_i32(tcg_res); +} + /* Floating point data-processing (3 source) * 31 30 29 28 24 23 22 21 20 16 15 14 10 9 5 4 0 * +---+---+---+-----------+------+----+------+----+------+------+------+ @@ -5435,6 +5618,16 @@ static void disas_fp_3src(DisasContext *s, uint32_t insn) } handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra); break; + case 3: + if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + unallocated_encoding(s); + return; + } + if (!fp_access_check(s)) { + return; + } + handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra); + break; default: unallocated_encoding(s); } @@ -5482,11 +5675,25 @@ static void disas_fp_imm(DisasContext *s, uint32_t insn) { int rd = extract32(insn, 0, 5); int imm8 = extract32(insn, 13, 8); - int is_double = extract32(insn, 22, 2); + int type = extract32(insn, 22, 2); uint64_t imm; TCGv_i64 tcg_res; + TCGMemOp sz; - if (is_double > 1) { + switch (type) { + case 0: + sz = MO_32; + break; + case 1: + sz = MO_64; + break; + case 3: + sz = MO_16; + if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + break; + } + /* fallthru */ + default: unallocated_encoding(s); return; } @@ -5495,7 +5702,7 @@ static void disas_fp_imm(DisasContext *s, uint32_t insn) return; } - imm = vfp_expand_imm(MO_32 + is_double, imm8); + imm = vfp_expand_imm(sz, imm8); tcg_res = tcg_const_i64(imm); write_fp_dreg(s, rd, tcg_res); @@ -5511,11 +5718,11 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, bool itof, int rmode, int scale, int sf, int type) { bool is_signed = !(opcode & 1); - bool is_double = type; TCGv_ptr tcg_fpstatus; - TCGv_i32 tcg_shift; + TCGv_i32 tcg_shift, tcg_single; + TCGv_i64 tcg_double; - tcg_fpstatus = get_fpstatus_ptr(false); + tcg_fpstatus = get_fpstatus_ptr(type == 3); tcg_shift = tcg_const_i32(64 - scale); @@ -5533,8 +5740,9 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, tcg_int = tcg_extend; } - if (is_double) { - TCGv_i64 tcg_double = tcg_temp_new_i64(); + switch (type) { + case 1: /* float64 */ + tcg_double = tcg_temp_new_i64(); if (is_signed) { gen_helper_vfp_sqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus); @@ -5544,8 +5752,10 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, } write_fp_dreg(s, rd, tcg_double); tcg_temp_free_i64(tcg_double); - } else { - TCGv_i32 tcg_single = tcg_temp_new_i32(); + break; + + case 0: /* float32 */ + tcg_single = tcg_temp_new_i32(); if (is_signed) { gen_helper_vfp_sqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); @@ -5555,6 +5765,23 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, } write_fp_sreg(s, rd, tcg_single); tcg_temp_free_i32(tcg_single); + break; + + case 3: /* float16 */ + tcg_single = tcg_temp_new_i32(); + if (is_signed) { + gen_helper_vfp_sqtoh(tcg_single, tcg_int, + tcg_shift, tcg_fpstatus); + } else { + gen_helper_vfp_uqtoh(tcg_single, tcg_int, + tcg_shift, tcg_fpstatus); + } + write_fp_sreg(s, rd, tcg_single); + tcg_temp_free_i32(tcg_single); + break; + + default: + g_assert_not_reached(); } } else { TCGv_i64 tcg_int = cpu_reg(s, rd); @@ -5571,8 +5798,9 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); - if (is_double) { - TCGv_i64 tcg_double = read_fp_dreg(s, rn); + switch (type) { + case 1: /* float64 */ + tcg_double = read_fp_dreg(s, rn); if (is_signed) { if (!sf) { gen_helper_vfp_tosld(tcg_int, tcg_double, @@ -5590,9 +5818,14 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, tcg_shift, tcg_fpstatus); } } + if (!sf) { + tcg_gen_ext32u_i64(tcg_int, tcg_int); + } tcg_temp_free_i64(tcg_double); - } else { - TCGv_i32 tcg_single = read_fp_sreg(s, rn); + break; + + case 0: /* float32 */ + tcg_single = read_fp_sreg(s, rn); if (sf) { if (is_signed) { gen_helper_vfp_tosqs(tcg_int, tcg_single, @@ -5614,14 +5847,39 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, tcg_temp_free_i32(tcg_dest); } tcg_temp_free_i32(tcg_single); + break; + + case 3: /* float16 */ + tcg_single = read_fp_sreg(s, rn); + if (sf) { + if (is_signed) { + gen_helper_vfp_tosqh(tcg_int, tcg_single, + tcg_shift, tcg_fpstatus); + } else { + gen_helper_vfp_touqh(tcg_int, tcg_single, + tcg_shift, tcg_fpstatus); + } + } else { + TCGv_i32 tcg_dest = tcg_temp_new_i32(); + if (is_signed) { + gen_helper_vfp_toslh(tcg_dest, tcg_single, + tcg_shift, tcg_fpstatus); + } else { + gen_helper_vfp_toulh(tcg_dest, tcg_single, + tcg_shift, tcg_fpstatus); + } + tcg_gen_extu_i32_i64(tcg_int, tcg_dest); + tcg_temp_free_i32(tcg_dest); + } + tcg_temp_free_i32(tcg_single); + break; + + default: + g_assert_not_reached(); } gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); tcg_temp_free_i32(tcg_rmode); - - if (!sf) { - tcg_gen_ext32u_i64(tcg_int, tcg_int); - } } tcg_temp_free_ptr(tcg_fpstatus); @@ -5646,8 +5904,21 @@ static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn) bool sf = extract32(insn, 31, 1); bool itof; - if (sbit || (type > 1) - || (!sf && scale < 32)) { + if (sbit || (!sf && scale < 32)) { + unallocated_encoding(s); + return; + } + + switch (type) { + case 0: /* float32 */ + case 1: /* float64 */ + break; + case 3: /* float16 */ + if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + break; + } + /* fallthru */ + default: unallocated_encoding(s); return; } @@ -5700,6 +5971,15 @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof) tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd)); clear_vec_high(s, true, rd); break; + case 3: + /* 16 bit */ + tmp = tcg_temp_new_i64(); + tcg_gen_ext16u_i64(tmp, tcg_rn); + write_fp_dreg(s, rd, tmp); + tcg_temp_free_i64(tmp); + break; + default: + g_assert_not_reached(); } } else { TCGv_i64 tcg_rd = cpu_reg(s, rd); @@ -5717,6 +5997,12 @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof) /* 64 bits from top half */ tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn)); break; + case 3: + /* 16 bit */ + tcg_gen_ld16u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_16)); + break; + default: + g_assert_not_reached(); } } } @@ -5756,10 +6042,16 @@ static void disas_fp_int_conv(DisasContext *s, uint32_t insn) case 0xa: /* 64 bit */ case 0xd: /* 64 bit to top half of quad */ break; + case 0x6: /* 16-bit float, 32-bit int */ + case 0xe: /* 16-bit float, 64-bit int */ + if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + break; + } + /* fallthru */ default: /* all other sf/type/rmode combinations are invalid */ unallocated_encoding(s); - break; + return; } if (!fp_access_check(s)) { @@ -5770,7 +6062,20 @@ static void disas_fp_int_conv(DisasContext *s, uint32_t insn) /* actual FP conversions */ bool itof = extract32(opcode, 1, 1); - if (type > 1 || (rmode != 0 && opcode > 1)) { + if (rmode != 0 && opcode > 1) { + unallocated_encoding(s); + return; + } + switch (type) { + case 0: /* float32 */ + case 1: /* float64 */ + break; + case 3: /* float16 */ + if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + break; + } + /* fallthru */ + default: unallocated_encoding(s); return; } @@ -7686,13 +7991,10 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) tcg_temp_free_i64(tcg_op2); tcg_temp_free_i64(tcg_res); } else { - TCGv_i32 tcg_op1 = tcg_temp_new_i32(); - TCGv_i32 tcg_op2 = tcg_temp_new_i32(); + TCGv_i32 tcg_op1 = read_fp_hreg(s, rn); + TCGv_i32 tcg_op2 = read_fp_hreg(s, rm); TCGv_i64 tcg_res = tcg_temp_new_i64(); - read_vec_element_i32(s, tcg_op1, rn, 0, MO_16); - read_vec_element_i32(s, tcg_op2, rm, 0, MO_16); - gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2); gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res); @@ -8233,13 +8535,10 @@ static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s, fpst = get_fpstatus_ptr(true); - tcg_op1 = tcg_temp_new_i32(); - tcg_op2 = tcg_temp_new_i32(); + tcg_op1 = read_fp_hreg(s, rn); + tcg_op2 = read_fp_hreg(s, rm); tcg_res = tcg_temp_new_i32(); - read_vec_element_i32(s, tcg_op1, rn, 0, MO_16); - read_vec_element_i32(s, tcg_op2, rm, 0, MO_16); - switch (fpopcode) { case 0x03: /* FMULX */ gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst); @@ -12137,11 +12436,9 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) } if (is_scalar) { - TCGv_i32 tcg_op = tcg_temp_new_i32(); + TCGv_i32 tcg_op = read_fp_hreg(s, rn); TCGv_i32 tcg_res = tcg_temp_new_i32(); - read_vec_element_i32(s, tcg_op, rn, 0, MO_16); - switch (fpop) { case 0x1a: /* FCVTNS */ case 0x1b: /* FCVTMS */ diff --git a/util/log.c b/util/log.c index 96f30dd21a..c0dbbd4700 100644 --- a/util/log.c +++ b/util/log.c @@ -256,6 +256,8 @@ const QEMULogItem qemu_log_items[] = { "show trace before each executed TB (lots of logs)" }, { CPU_LOG_TB_CPU, "cpu", "show CPU registers before entering a TB (lots of logs)" }, + { CPU_LOG_TB_FPU, "fpu", + "include FPU registers in the 'cpu' logging" }, { CPU_LOG_MMU, "mmu", "log MMU-related activities" }, { CPU_LOG_PCALL, "pcall", |