diff options
Diffstat (limited to 'target')
50 files changed, 4378 insertions, 3166 deletions
diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h index 56395b87f6..fdef05cacf 100644 --- a/target/arm/cpu-qom.h +++ b/target/arm/cpu-qom.h @@ -67,7 +67,7 @@ typedef struct ARMCPU ARMCPU; #define AARCH64_CPU_CLASS(klass) \ OBJECT_CLASS_CHECK(AArch64CPUClass, (klass), TYPE_AARCH64_CPU) #define AARCH64_CPU_GET_CLASS(obj) \ - OBJECT_GET_CLASS(AArch64CPUClass, (obj), TYPE_AArch64_CPU) + OBJECT_GET_CLASS(AArch64CPUClass, (obj), TYPE_AARCH64_CPU) typedef struct AArch64CPUClass { /*< private >*/ diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 6b382fcd60..c179e0752d 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -2143,7 +2143,8 @@ static void arm_max_initfn(Object *obj) cpu->isar.id_isar6 = t; t = cpu->isar.mvfr1; - t = FIELD_DP32(t, MVFR1, FPHP, 2); /* v8.0 FP support */ + t = FIELD_DP32(t, MVFR1, FPHP, 3); /* v8.2-FP16 */ + t = FIELD_DP32(t, MVFR1, SIMDHP, 2); /* v8.2-FP16 */ cpu->isar.mvfr1 = t; t = cpu->isar.mvfr2; diff --git a/target/arm/cpu.h b/target/arm/cpu.h index ac857bdc2c..a1c7d8ebae 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -3519,12 +3519,7 @@ static inline bool isar_feature_aa32_predinv(const ARMISARegisters *id) static inline bool isar_feature_aa32_fp16_arith(const ARMISARegisters *id) { - /* - * This is a placeholder for use by VCMA until the rest of - * the ARMv8.2-FP16 extension is implemented for aa32 mode. - * At which point we can properly set and check MVFR1.FPHP. - */ - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1; + return FIELD_EX32(id->mvfr1, MVFR1, FPHP) >= 3; } static inline bool isar_feature_aa32_vfp_simd(const ARMISARegisters *id) diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index dd696183df..3c2b3d9599 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -704,12 +704,10 @@ static void aarch64_max_initfn(Object *obj) u = FIELD_DP32(u, ID_DFR0, PERFMON, 5); /* v8.4-PMU */ cpu->isar.id_dfr0 = u; - /* - * FIXME: We do not yet support ARMv8.2-fp16 for AArch32 yet, - * so do not set MVFR1.FPHP. Strictly speaking this is not legal, - * but it is also not legal to enable SVE without support for FP16, - * and enabling SVE in system mode is more useful in the short term. - */ + u = cpu->isar.mvfr1; + u = FIELD_DP32(u, MVFR1, FPHP, 3); /* v8.2-FP16 */ + u = FIELD_DP32(u, MVFR1, SIMDHP, 2); /* v8.2-FP16 */ + cpu->isar.mvfr1 = u; #ifdef CONFIG_USER_ONLY /* For usermode -cpu max we can use a larger and more efficient DCZ diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c index 8682630ff6..030821489b 100644 --- a/target/arm/helper-a64.c +++ b/target/arm/helper-a64.c @@ -234,17 +234,6 @@ uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp) * versions, these do a fully fused multiply-add or * multiply-add-and-halve. */ -#define float16_two make_float16(0x4000) -#define float16_three make_float16(0x4200) -#define float16_one_point_five make_float16(0x3e00) - -#define float32_two make_float32(0x40000000) -#define float32_three make_float32(0x40400000) -#define float32_one_point_five make_float32(0x3fc00000) - -#define float64_two make_float64(0x4000000000000000ULL) -#define float64_three make_float64(0x4008000000000000ULL) -#define float64_one_point_five make_float64(0x3FF8000000000000ULL) uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, void *fpstp) { diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h index 63c4a087ca..4411c47120 100644 --- a/target/arm/helper-sve.h +++ b/target/arm/helper-sve.h @@ -269,11 +269,6 @@ DEF_HELPER_FLAGS_3(sve_uminv_h, TCG_CALL_NO_RWG, i64, ptr, ptr, i32) DEF_HELPER_FLAGS_3(sve_uminv_s, TCG_CALL_NO_RWG, i64, ptr, ptr, i32) DEF_HELPER_FLAGS_3(sve_uminv_d, TCG_CALL_NO_RWG, i64, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(sve_clr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(sve_clr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(sve_clr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(sve_clr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - DEF_HELPER_FLAGS_4(sve_movz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_movz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_movz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) diff --git a/target/arm/helper.c b/target/arm/helper.c index 6b4f0eb533..44d666627a 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -5105,7 +5105,6 @@ static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = { .access = PL2_RW, .readfn = arm_cp_read_zero, .writefn = arm_cp_write_ignore }, { .name = "HCR_EL2", .state = ARM_CP_STATE_BOTH, - .type = ARM_CP_NO_RAW, .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0, .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, diff --git a/target/arm/helper.h b/target/arm/helper.h index 759639a63a..8defd7c801 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -101,30 +101,43 @@ DEF_HELPER_FLAGS_5(probe_access, TCG_CALL_NO_WG, void, env, tl, i32, i32, i32) DEF_HELPER_1(vfp_get_fpscr, i32, env) DEF_HELPER_2(vfp_set_fpscr, void, env, i32) +DEF_HELPER_3(vfp_addh, f16, f16, f16, ptr) DEF_HELPER_3(vfp_adds, f32, f32, f32, ptr) DEF_HELPER_3(vfp_addd, f64, f64, f64, ptr) +DEF_HELPER_3(vfp_subh, f16, f16, f16, ptr) DEF_HELPER_3(vfp_subs, f32, f32, f32, ptr) DEF_HELPER_3(vfp_subd, f64, f64, f64, ptr) +DEF_HELPER_3(vfp_mulh, f16, f16, f16, ptr) DEF_HELPER_3(vfp_muls, f32, f32, f32, ptr) DEF_HELPER_3(vfp_muld, f64, f64, f64, ptr) +DEF_HELPER_3(vfp_divh, f16, f16, f16, ptr) DEF_HELPER_3(vfp_divs, f32, f32, f32, ptr) DEF_HELPER_3(vfp_divd, f64, f64, f64, ptr) +DEF_HELPER_3(vfp_maxh, f16, f16, f16, ptr) DEF_HELPER_3(vfp_maxs, f32, f32, f32, ptr) DEF_HELPER_3(vfp_maxd, f64, f64, f64, ptr) +DEF_HELPER_3(vfp_minh, f16, f16, f16, ptr) DEF_HELPER_3(vfp_mins, f32, f32, f32, ptr) DEF_HELPER_3(vfp_mind, f64, f64, f64, ptr) +DEF_HELPER_3(vfp_maxnumh, f16, f16, f16, ptr) DEF_HELPER_3(vfp_maxnums, f32, f32, f32, ptr) DEF_HELPER_3(vfp_maxnumd, f64, f64, f64, ptr) +DEF_HELPER_3(vfp_minnumh, f16, f16, f16, ptr) DEF_HELPER_3(vfp_minnums, f32, f32, f32, ptr) DEF_HELPER_3(vfp_minnumd, f64, f64, f64, ptr) +DEF_HELPER_1(vfp_negh, f16, f16) DEF_HELPER_1(vfp_negs, f32, f32) DEF_HELPER_1(vfp_negd, f64, f64) +DEF_HELPER_1(vfp_absh, f16, f16) DEF_HELPER_1(vfp_abss, f32, f32) DEF_HELPER_1(vfp_absd, f64, f64) +DEF_HELPER_2(vfp_sqrth, f16, f16, env) DEF_HELPER_2(vfp_sqrts, f32, f32, env) DEF_HELPER_2(vfp_sqrtd, f64, f64, env) +DEF_HELPER_3(vfp_cmph, void, f16, f16, env) DEF_HELPER_3(vfp_cmps, void, f32, f32, env) DEF_HELPER_3(vfp_cmpd, void, f64, f64, env) +DEF_HELPER_3(vfp_cmpeh, void, f16, f16, env) DEF_HELPER_3(vfp_cmpes, void, f32, f32, env) DEF_HELPER_3(vfp_cmped, void, f64, f64, env) @@ -151,6 +164,10 @@ DEF_HELPER_2(vfp_tosizh, s32, f16, ptr) DEF_HELPER_2(vfp_tosizs, s32, f32, ptr) DEF_HELPER_2(vfp_tosizd, s32, f64, ptr) +DEF_HELPER_3(vfp_toshh_round_to_zero, i32, f16, i32, ptr) +DEF_HELPER_3(vfp_toslh_round_to_zero, i32, f16, i32, ptr) +DEF_HELPER_3(vfp_touhh_round_to_zero, i32, f16, i32, ptr) +DEF_HELPER_3(vfp_toulh_round_to_zero, i32, f16, i32, ptr) DEF_HELPER_3(vfp_toshs_round_to_zero, i32, f32, i32, ptr) DEF_HELPER_3(vfp_tosls_round_to_zero, i32, f32, i32, ptr) DEF_HELPER_3(vfp_touhs_round_to_zero, i32, f32, i32, ptr) @@ -189,13 +206,14 @@ DEF_HELPER_3(vfp_sqtod, f64, i64, i32, ptr) DEF_HELPER_3(vfp_uhtod, f64, i64, i32, ptr) DEF_HELPER_3(vfp_ultod, f64, i64, i32, ptr) DEF_HELPER_3(vfp_uqtod, f64, i64, i32, ptr) +DEF_HELPER_3(vfp_shtoh, f16, i32, i32, ptr) +DEF_HELPER_3(vfp_uhtoh, f16, i32, i32, ptr) DEF_HELPER_3(vfp_sltoh, f16, i32, i32, ptr) DEF_HELPER_3(vfp_ultoh, f16, i32, i32, ptr) DEF_HELPER_3(vfp_sqtoh, f16, i64, i32, ptr) DEF_HELPER_3(vfp_uqtoh, f16, i64, i32, ptr) DEF_HELPER_FLAGS_2(set_rmode, TCG_CALL_NO_RWG, i32, i32, ptr) -DEF_HELPER_FLAGS_2(set_neon_rmode, TCG_CALL_NO_RWG, i32, i32, env) DEF_HELPER_FLAGS_3(vfp_fcvt_f16_to_f32, TCG_CALL_NO_RWG, f32, f16, ptr, i32) DEF_HELPER_FLAGS_3(vfp_fcvt_f32_to_f16, TCG_CALL_NO_RWG, f16, f32, ptr, i32) @@ -204,9 +222,8 @@ DEF_HELPER_FLAGS_3(vfp_fcvt_f64_to_f16, TCG_CALL_NO_RWG, f16, f64, ptr, i32) DEF_HELPER_4(vfp_muladdd, f64, f64, f64, f64, ptr) DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, ptr) +DEF_HELPER_4(vfp_muladdh, f16, f16, f16, f16, ptr) -DEF_HELPER_3(recps_f32, f32, env, f32, f32) -DEF_HELPER_3(rsqrts_f32, f32, env, f32, f32) DEF_HELPER_FLAGS_2(recpe_f16, TCG_CALL_NO_RWG, f16, f16, ptr) DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, ptr) DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, ptr) @@ -222,8 +239,10 @@ DEF_HELPER_3(shr_cc, i32, env, i32, i32) DEF_HELPER_3(sar_cc, i32, env, i32, i32) DEF_HELPER_3(ror_cc, i32, env, i32, i32) +DEF_HELPER_FLAGS_2(rinth_exact, TCG_CALL_NO_RWG, f16, f16, ptr) DEF_HELPER_FLAGS_2(rints_exact, TCG_CALL_NO_RWG, f32, f32, ptr) DEF_HELPER_FLAGS_2(rintd_exact, TCG_CALL_NO_RWG, f64, f64, ptr) +DEF_HELPER_FLAGS_2(rinth, TCG_CALL_NO_RWG, f16, f16, ptr) DEF_HELPER_FLAGS_2(rints, TCG_CALL_NO_RWG, f32, f32, ptr) DEF_HELPER_FLAGS_2(rintd, TCG_CALL_NO_RWG, f64, f64, ptr) @@ -587,6 +606,43 @@ DEF_HELPER_FLAGS_5(gvec_fcmlas_idx, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_5(gvec_fcmlad, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_paddh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_pmaxh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_pminh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_padds, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_pmaxs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_pmins, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_sstoh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sitos, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_ustoh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uitos, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_tosszh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_tosizs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_touszh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_touizs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_vcvt_sf, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_uf, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_fs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_fu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_vcvt_sh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_uh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_hs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_vcvt_rm_ss, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_rm_us, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_rm_sh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_rm_uh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_vrint_rm_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_vrint_rm_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_vrintx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_vrintx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_4(gvec_frecpe_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_frecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_frecpe_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) @@ -595,6 +651,21 @@ DEF_HELPER_FLAGS_4(gvec_frsqrte_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_frsqrte_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_frsqrte_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_fcgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_fcgt0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_fcge0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_fcge0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_fceq0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_fceq0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_fcle0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_fcle0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_fclt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_fclt0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_5(gvec_fadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) @@ -607,8 +678,54 @@ DEF_HELPER_FLAGS_5(gvec_fmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fmul_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fceq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fceq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_fcge_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fcge_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_fcgt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fcgt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_facge_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_facge_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_facgt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_facgt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_fmax_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmax_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_fmin_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmin_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_fmaxnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmaxnum_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_fminnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fminnum_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_recps_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_recps_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_rsqrts_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_rsqrts_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_fmla_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmla_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_fmls_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmls_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_vfma_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_vfma_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG, @@ -623,6 +740,16 @@ DEF_HELPER_FLAGS_5(gvec_fmul_idx_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_5(gvec_fmul_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmla_nf_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmla_nf_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_fmls_nf_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmls_nf_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_6(gvec_fmla_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG, @@ -758,6 +885,34 @@ DEF_HELPER_FLAGS_4(gvec_uaba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_uaba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_uaba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_mul_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_mul_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_mul_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_mla_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_mla_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_mla_idx_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_mls_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_mls_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_mls_idx_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(neon_sqdmulh_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqdmulh_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(neon_sqrdmulh_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqrdmulh_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + #ifdef TARGET_AARCH64 #include "helper-a64.h" #include "helper-sve.h" diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c index 1169237905..ef1e960285 100644 --- a/target/arm/kvm64.c +++ b/target/arm/kvm64.c @@ -330,7 +330,6 @@ int kvm_arch_remove_hw_breakpoint(target_ulong addr, switch (type) { case GDB_BREAKPOINT_HW: return delete_hw_breakpoint(addr); - break; case GDB_WATCHPOINT_READ: case GDB_WATCHPOINT_WRITE: case GDB_WATCHPOINT_ACCESS: diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c index 104752041f..891306f5b0 100644 --- a/target/arm/mte_helper.c +++ b/target/arm/mte_helper.c @@ -514,11 +514,12 @@ void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, uint64_t val) } /* Record a tag check failure. */ -static void mte_check_fail(CPUARMState *env, int mmu_idx, +static void mte_check_fail(CPUARMState *env, uint32_t desc, uint64_t dirty_ptr, uintptr_t ra) { + int mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); ARMMMUIdx arm_mmu_idx = core_to_aa64_mmu_idx(mmu_idx); - int el, reg_el, tcf, select; + int el, reg_el, tcf, select, is_write, syn; uint64_t sctlr; reg_el = regime_el(env, arm_mmu_idx); @@ -546,9 +547,10 @@ static void mte_check_fail(CPUARMState *env, int mmu_idx, */ cpu_restore_state(env_cpu(env), ra, true); env->exception.vaddress = dirty_ptr; - raise_exception(env, EXCP_DATA_ABORT, - syn_data_abort_no_iss(el != 0, 0, 0, 0, 0, 0, 0x11), - exception_target_el(env)); + + is_write = FIELD_EX32(desc, MTEDESC, WRITE); + syn = syn_data_abort_no_iss(el != 0, 0, 0, 0, 0, is_write, 0x11); + raise_exception(env, EXCP_DATA_ABORT, syn, exception_target_el(env)); /* noreturn, but fall through to the assert anyway */ case 0: @@ -639,8 +641,7 @@ uint64_t mte_check1(CPUARMState *env, uint32_t desc, } if (unlikely(!mte_probe1_int(env, desc, ptr, ra, bit55))) { - int mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); - mte_check_fail(env, mmu_idx, ptr, ra); + mte_check_fail(env, desc, ptr, ra); } return useronly_clean_ptr(ptr); @@ -810,7 +811,7 @@ uint64_t mte_checkN(CPUARMState *env, uint32_t desc, fail_ofs = tag_first + n * TAG_GRANULE - ptr; fail_ofs = ROUND_UP(fail_ofs, esize); - mte_check_fail(env, mmu_idx, ptr + fail_ofs, ra); + mte_check_fail(env, desc, ptr + fail_ofs, ra); } done: @@ -922,7 +923,7 @@ uint64_t HELPER(mte_check_zva)(CPUARMState *env, uint32_t desc, uint64_t ptr) fail: /* Locate the first nibble that differs. */ i = ctz64(mem_tag ^ ptr_tag) >> 4; - mte_check_fail(env, mmu_idx, align_ptr + i * TAG_GRANULE, ra); + mte_check_fail(env, desc, align_ptr + i * TAG_GRANULE, ra); done: return useronly_clean_ptr(ptr); diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode index 686f9fbf46..1e9e859291 100644 --- a/target/arm/neon-dp.decode +++ b/target/arm/neon-dp.decode @@ -254,6 +254,8 @@ VMINNM_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 1 .... @3same_fp # We use size=0 for fp32 and size=1 for fp16 to match the 3-same encodings. @2reg_vcvt .... ... . . . 1 ..... .... .... . q:1 . . .... \ &2reg_shift vm=%vm_dp vd=%vd_dp size=0 shift=%neon_rshift_i5 +@2reg_vcvt_f16 .... ... . . . 11 .... .... .... . q:1 . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=1 shift=%neon_rshift_i4 VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_d VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_s @@ -370,7 +372,11 @@ VSHLL_U_2sh 1111 001 1 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_h VSHLL_U_2sh 1111 001 1 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_b # VCVT fixed<->float conversions -# TODO: FP16 fixed<->float conversions are opc==0b1100 and 0b1101 +VCVT_SH_2sh 1111 001 0 1 . ...... .... 1100 0 . . 1 .... @2reg_vcvt_f16 +VCVT_UH_2sh 1111 001 1 1 . ...... .... 1100 0 . . 1 .... @2reg_vcvt_f16 +VCVT_HS_2sh 1111 001 0 1 . ...... .... 1101 0 . . 1 .... @2reg_vcvt_f16 +VCVT_HU_2sh 1111 001 1 1 . ...... .... 1101 0 . . 1 .... @2reg_vcvt_f16 + VCVT_SF_2sh 1111 001 0 1 . ...... .... 1110 0 . . 1 .... @2reg_vcvt VCVT_UF_2sh 1111 001 1 1 . ...... .... 1110 0 . . 1 .... @2reg_vcvt VCVT_FS_2sh 1111 001 0 1 . ...... .... 1111 0 . . 1 .... @2reg_vcvt diff --git a/target/arm/sve.decode b/target/arm/sve.decode index 4f580a25e7..6425396ac1 100644 --- a/target/arm/sve.decode +++ b/target/arm/sve.decode @@ -150,13 +150,17 @@ @rd_rn_i6 ........ ... rn:5 ..... imm:s6 rd:5 &rri # Two register operand, one immediate operand, with predicate, -# element size encoded as TSZHL. User must fill in imm. -@rdn_pg_tszimm ........ .. ... ... ... pg:3 ..... rd:5 \ - &rpri_esz rn=%reg_movprfx esz=%tszimm_esz +# element size encoded as TSZHL. +@rdn_pg_tszimm_shl ........ .. ... ... ... pg:3 ..... rd:5 \ + &rpri_esz rn=%reg_movprfx esz=%tszimm_esz imm=%tszimm_shl +@rdn_pg_tszimm_shr ........ .. ... ... ... pg:3 ..... rd:5 \ + &rpri_esz rn=%reg_movprfx esz=%tszimm_esz imm=%tszimm_shr # Similarly without predicate. -@rd_rn_tszimm ........ .. ... ... ...... rn:5 rd:5 \ - &rri_esz esz=%tszimm16_esz +@rd_rn_tszimm_shl ........ .. ... ... ...... rn:5 rd:5 \ + &rri_esz esz=%tszimm16_esz imm=%tszimm16_shl +@rd_rn_tszimm_shr ........ .. ... ... ...... rn:5 rd:5 \ + &rri_esz esz=%tszimm16_esz imm=%tszimm16_shr # Two register operand, one immediate operand, with 4-bit predicate. # User must fill in imm. @@ -289,14 +293,10 @@ UMINV 00000100 .. 001 011 001 ... ..... ..... @rd_pg_rn ### SVE Shift by Immediate - Predicated Group # SVE bitwise shift by immediate (predicated) -ASR_zpzi 00000100 .. 000 000 100 ... .. ... ..... \ - @rdn_pg_tszimm imm=%tszimm_shr -LSR_zpzi 00000100 .. 000 001 100 ... .. ... ..... \ - @rdn_pg_tszimm imm=%tszimm_shr -LSL_zpzi 00000100 .. 000 011 100 ... .. ... ..... \ - @rdn_pg_tszimm imm=%tszimm_shl -ASRD 00000100 .. 000 100 100 ... .. ... ..... \ - @rdn_pg_tszimm imm=%tszimm_shr +ASR_zpzi 00000100 .. 000 000 100 ... .. ... ..... @rdn_pg_tszimm_shr +LSR_zpzi 00000100 .. 000 001 100 ... .. ... ..... @rdn_pg_tszimm_shr +LSL_zpzi 00000100 .. 000 011 100 ... .. ... ..... @rdn_pg_tszimm_shl +ASRD 00000100 .. 000 100 100 ... .. ... ..... @rdn_pg_tszimm_shr # SVE bitwise shift by vector (predicated) ASR_zpzz 00000100 .. 010 000 100 ... ..... ..... @rdn_pg_rm @@ -400,12 +400,9 @@ RDVL 00000100 101 11111 01010 imm:s6 rd:5 ### SVE Bitwise Shift - Unpredicated Group # SVE bitwise shift by immediate (unpredicated) -ASR_zzi 00000100 .. 1 ..... 1001 00 ..... ..... \ - @rd_rn_tszimm imm=%tszimm16_shr -LSR_zzi 00000100 .. 1 ..... 1001 01 ..... ..... \ - @rd_rn_tszimm imm=%tszimm16_shr -LSL_zzi 00000100 .. 1 ..... 1001 11 ..... ..... \ - @rd_rn_tszimm imm=%tszimm16_shl +ASR_zzi 00000100 .. 1 ..... 1001 00 ..... ..... @rd_rn_tszimm_shr +LSR_zzi 00000100 .. 1 ..... 1001 01 ..... ..... @rd_rn_tszimm_shr +LSL_zzi 00000100 .. 1 ..... 1001 11 ..... ..... @rd_rn_tszimm_shl # SVE bitwise shift by wide elements (unpredicated) # Note esz != 3 diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index 382fa82bc8..4758d46f34 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -956,85 +956,43 @@ uint32_t HELPER(sve_pnext)(void *vd, void *vg, uint32_t pred_desc) return flags; } -/* Store zero into every active element of Zd. We will use this for two - * and three-operand predicated instructions for which logic dictates a - * zero result. In particular, logical shift by element size, which is - * otherwise undefined on the host. - * - * For element sizes smaller than uint64_t, we use tables to expand - * the N bits of the controlling predicate to a byte mask, and clear - * those bytes. +/* + * Copy Zn into Zd, and store zero into inactive elements. + * If inv, store zeros into the active elements. */ -void HELPER(sve_clr_b)(void *vd, void *vg, uint32_t desc) -{ - intptr_t i, opr_sz = simd_oprsz(desc) / 8; - uint64_t *d = vd; - uint8_t *pg = vg; - for (i = 0; i < opr_sz; i += 1) { - d[i] &= ~expand_pred_b(pg[H1(i)]); - } -} - -void HELPER(sve_clr_h)(void *vd, void *vg, uint32_t desc) -{ - intptr_t i, opr_sz = simd_oprsz(desc) / 8; - uint64_t *d = vd; - uint8_t *pg = vg; - for (i = 0; i < opr_sz; i += 1) { - d[i] &= ~expand_pred_h(pg[H1(i)]); - } -} - -void HELPER(sve_clr_s)(void *vd, void *vg, uint32_t desc) -{ - intptr_t i, opr_sz = simd_oprsz(desc) / 8; - uint64_t *d = vd; - uint8_t *pg = vg; - for (i = 0; i < opr_sz; i += 1) { - d[i] &= ~expand_pred_s(pg[H1(i)]); - } -} - -void HELPER(sve_clr_d)(void *vd, void *vg, uint32_t desc) -{ - intptr_t i, opr_sz = simd_oprsz(desc) / 8; - uint64_t *d = vd; - uint8_t *pg = vg; - for (i = 0; i < opr_sz; i += 1) { - if (pg[H1(i)] & 1) { - d[i] = 0; - } - } -} - -/* Copy Zn into Zd, and store zero into inactive elements. */ void HELPER(sve_movz_b)(void *vd, void *vn, void *vg, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t inv = -(uint64_t)(simd_data(desc) & 1); uint64_t *d = vd, *n = vn; uint8_t *pg = vg; + for (i = 0; i < opr_sz; i += 1) { - d[i] = n[i] & expand_pred_b(pg[H1(i)]); + d[i] = n[i] & (expand_pred_b(pg[H1(i)]) ^ inv); } } void HELPER(sve_movz_h)(void *vd, void *vn, void *vg, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t inv = -(uint64_t)(simd_data(desc) & 1); uint64_t *d = vd, *n = vn; uint8_t *pg = vg; + for (i = 0; i < opr_sz; i += 1) { - d[i] = n[i] & expand_pred_h(pg[H1(i)]); + d[i] = n[i] & (expand_pred_h(pg[H1(i)]) ^ inv); } } void HELPER(sve_movz_s)(void *vd, void *vn, void *vg, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t inv = -(uint64_t)(simd_data(desc) & 1); uint64_t *d = vd, *n = vn; uint8_t *pg = vg; + for (i = 0; i < opr_sz; i += 1) { - d[i] = n[i] & expand_pred_s(pg[H1(i)]); + d[i] = n[i] & (expand_pred_s(pg[H1(i)]) ^ inv); } } @@ -1043,8 +1001,10 @@ void HELPER(sve_movz_d)(void *vd, void *vn, void *vg, uint32_t desc) intptr_t i, opr_sz = simd_oprsz(desc) / 8; uint64_t *d = vd, *n = vn; uint8_t *pg = vg; + uint8_t inv = simd_data(desc); + for (i = 0; i < opr_sz; i += 1) { - d[i] = n[i] & -(uint64_t)(pg[H1(i)] & 1); + d[i] = n[i] & -(uint64_t)((pg[H1(i)] ^ inv) & 1); } } diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 0fc5e12fab..7188808341 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -678,6 +678,20 @@ static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, tcg_temp_free_ptr(fpst); } +/* Expand a 3-operand + qc + operation using an out-of-line helper. */ +static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn, + int rm, gen_helper_gvec_3_ptr *fn) +{ + TCGv_ptr qc_ptr = tcg_temp_new_ptr(); + + tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc)); + tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), qc_ptr, + is_q ? 16 : 8, vec_full_reg_size(s), 0, fn); + tcg_temp_free_ptr(qc_ptr); +} + /* Set ZF and NF based on a 64 bit result. This is alas fiddlier * than the 32 bit equivalent. */ @@ -1156,18 +1170,18 @@ static void do_vec_ld(DisasContext *s, int destidx, int element, * unallocated-encoding checks (otherwise the syndrome information * for the resulting exception will be incorrect). */ -static inline bool fp_access_check(DisasContext *s) +static bool fp_access_check(DisasContext *s) { - assert(!s->fp_access_checked); - s->fp_access_checked = true; + if (s->fp_excp_el) { + assert(!s->fp_access_checked); + s->fp_access_checked = true; - if (!s->fp_excp_el) { - return true; + gen_exception_insn(s, s->pc_curr, EXCP_UDEF, + syn_fp_access_trap(1, 0xe, false), s->fp_excp_el); + return false; } - - gen_exception_insn(s, s->pc_curr, EXCP_UDEF, - syn_fp_access_trap(1, 0xe, false), s->fp_excp_el); - return false; + s->fp_access_checked = true; + return true; } /* Check that SVE access is enabled. If it is, return true. @@ -1176,10 +1190,14 @@ static inline bool fp_access_check(DisasContext *s) bool sve_access_check(DisasContext *s) { if (s->sve_excp_el) { - gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_sve_access_trap(), - s->sve_excp_el); + assert(!s->sve_access_checked); + s->sve_access_checked = true; + + gen_exception_insn(s, s->pc_curr, EXCP_UDEF, + syn_sve_access_trap(), s->sve_excp_el); return false; } + s->sve_access_checked = true; return fp_access_check(s); } @@ -8613,8 +8631,8 @@ static void handle_scalar_simd_shli(DisasContext *s, bool insert, int size = 32 - clz32(immh) - 1; int immhb = immh << 3 | immb; int shift = immhb - (8 << size); - TCGv_i64 tcg_rn = new_tmp_a64(s); - TCGv_i64 tcg_rd = new_tmp_a64(s); + TCGv_i64 tcg_rn; + TCGv_i64 tcg_rd; if (!extract32(immh, 3, 1)) { unallocated_encoding(s); @@ -11730,6 +11748,15 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size); } return; + case 0x16: /* SQDMULH, SQRDMULH */ + { + static gen_helper_gvec_3_ptr * const fns[2][2] = { + { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h }, + { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s }, + }; + gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]); + } + return; case 0x11: if (!u) { /* CMTST */ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size); @@ -11841,16 +11868,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) genenvfn = fns[size][u]; break; } - case 0x16: /* SQDMULH, SQRDMULH */ - { - static NeonGenTwoOpEnvFn * const fns[2][2] = { - { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 }, - { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 }, - }; - assert(size == 1 || size == 2); - genenvfn = fns[size - 1][u]; - break; - } default: g_assert_not_reached(); } @@ -12997,9 +13014,6 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) fpop = deposit32(opcode, 5, 1, a); fpop = deposit32(fpop, 6, 1, u); - rd = extract32(insn, 0, 5); - rn = extract32(insn, 5, 5); - switch (fpop) { case 0x1d: /* SCVTF */ case 0x5d: /* UCVTF */ @@ -13484,6 +13498,56 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) data, gen_helper_gvec_fmlal_idx_a64); } return; + + case 0x08: /* MUL */ + if (!is_long && !is_scalar) { + static gen_helper_gvec_3 * const fns[3] = { + gen_helper_gvec_mul_idx_h, + gen_helper_gvec_mul_idx_s, + gen_helper_gvec_mul_idx_d, + }; + tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), + is_q ? 16 : 8, vec_full_reg_size(s), + index, fns[size - 1]); + return; + } + break; + + case 0x10: /* MLA */ + if (!is_long && !is_scalar) { + static gen_helper_gvec_4 * const fns[3] = { + gen_helper_gvec_mla_idx_h, + gen_helper_gvec_mla_idx_s, + gen_helper_gvec_mla_idx_d, + }; + tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), + vec_full_reg_offset(s, rd), + is_q ? 16 : 8, vec_full_reg_size(s), + index, fns[size - 1]); + return; + } + break; + + case 0x14: /* MLS */ + if (!is_long && !is_scalar) { + static gen_helper_gvec_4 * const fns[3] = { + gen_helper_gvec_mls_idx_h, + gen_helper_gvec_mls_idx_s, + gen_helper_gvec_mls_idx_d, + }; + tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), + vec_full_reg_offset(s, rd), + is_q ? 16 : 8, vec_full_reg_size(s), + index, fns[size - 1]); + return; + } + break; } if (size == 3) { @@ -14529,6 +14593,7 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s) s->base.pc_next += 4; s->fp_access_checked = false; + s->sve_access_checked = false; if (dc_isar_feature(aa64_bti, s)) { if (s->base.num_insns == 1) { diff --git a/target/arm/translate-neon.c.inc b/target/arm/translate-neon.c.inc index 9879731a52..2d4926316a 100644 --- a/target/arm/translate-neon.c.inc +++ b/target/arm/translate-neon.c.inc @@ -1033,122 +1033,54 @@ DO_3SAME_PAIR(VPADD, padd_u) DO_3SAME_VQDMULH(VQDMULH, qdmulh) DO_3SAME_VQDMULH(VQRDMULH, qrdmulh) -static bool do_3same_fp(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn, - bool reads_vd) -{ - /* - * FP operations handled elementwise 32 bits at a time. - * If reads_vd is true then the old value of Vd will be - * loaded before calling the callback function. This is - * used for multiply-accumulate type operations. - */ - TCGv_i32 tmp, tmp2; - int pass; - - if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { - return false; - } - - /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_simd_r32, s) && - ((a->vd | a->vn | a->vm) & 0x10)) { - return false; - } - - if ((a->vn | a->vm | a->vd) & a->q) { - return false; - } - - if (!vfp_access_check(s)) { - return true; - } - - TCGv_ptr fpstatus = fpstatus_ptr(FPST_STD); - for (pass = 0; pass < (a->q ? 4 : 2); pass++) { - tmp = neon_load_reg(a->vn, pass); - tmp2 = neon_load_reg(a->vm, pass); - if (reads_vd) { - TCGv_i32 tmp_rd = neon_load_reg(a->vd, pass); - fn(tmp_rd, tmp, tmp2, fpstatus); - neon_store_reg(a->vd, pass, tmp_rd); - tcg_temp_free_i32(tmp); - } else { - fn(tmp, tmp, tmp2, fpstatus); - neon_store_reg(a->vd, pass, tmp); - } - tcg_temp_free_i32(tmp2); - } - tcg_temp_free_ptr(fpstatus); - return true; -} - -/* - * For all the functions using this macro, size == 1 means fp16, - * which is an architecture extension we don't implement yet. - */ -#define DO_3S_FP_GVEC(INSN,FUNC) \ - static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ - uint32_t rn_ofs, uint32_t rm_ofs, \ - uint32_t oprsz, uint32_t maxsz) \ +#define WRAP_FP_GVEC(WRAPNAME, FPST, FUNC) \ + static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \ + uint32_t rn_ofs, uint32_t rm_ofs, \ + uint32_t oprsz, uint32_t maxsz) \ { \ - TCGv_ptr fpst = fpstatus_ptr(FPST_STD); \ + TCGv_ptr fpst = fpstatus_ptr(FPST); \ tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst, \ oprsz, maxsz, 0, FUNC); \ tcg_temp_free_ptr(fpst); \ - } \ + } + +#define DO_3S_FP_GVEC(INSN,SFUNC,HFUNC) \ + WRAP_FP_GVEC(gen_##INSN##_fp32_3s, FPST_STD, SFUNC) \ + WRAP_FP_GVEC(gen_##INSN##_fp16_3s, FPST_STD_F16, HFUNC) \ static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \ { \ if (a->size != 0) { \ - /* TODO fp16 support */ \ - return false; \ + if (!dc_isar_feature(aa32_fp16_arith, s)) { \ + return false; \ + } \ + return do_3same(s, a, gen_##INSN##_fp16_3s); \ } \ - return do_3same(s, a, gen_##INSN##_3s); \ - } - - -DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s) -DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s) -DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s) -DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s) - -/* - * For all the functions using this macro, size == 1 means fp16, - * which is an architecture extension we don't implement yet. - */ -#define DO_3S_FP(INSN,FUNC,READS_VD) \ - static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \ - { \ - if (a->size != 0) { \ - /* TODO fp16 support */ \ - return false; \ - } \ - return do_3same_fp(s, a, FUNC, READS_VD); \ - } - -DO_3S_FP(VCEQ, gen_helper_neon_ceq_f32, false) -DO_3S_FP(VCGE, gen_helper_neon_cge_f32, false) -DO_3S_FP(VCGT, gen_helper_neon_cgt_f32, false) -DO_3S_FP(VACGE, gen_helper_neon_acge_f32, false) -DO_3S_FP(VACGT, gen_helper_neon_acgt_f32, false) -DO_3S_FP(VMAX, gen_helper_vfp_maxs, false) -DO_3S_FP(VMIN, gen_helper_vfp_mins, false) - -static void gen_VMLA_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, - TCGv_ptr fpstatus) -{ - gen_helper_vfp_muls(vn, vn, vm, fpstatus); - gen_helper_vfp_adds(vd, vd, vn, fpstatus); -} - -static void gen_VMLS_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, - TCGv_ptr fpstatus) -{ - gen_helper_vfp_muls(vn, vn, vm, fpstatus); - gen_helper_vfp_subs(vd, vd, vn, fpstatus); -} - -DO_3S_FP(VMLA, gen_VMLA_fp_3s, true) -DO_3S_FP(VMLS, gen_VMLS_fp_3s, true) + return do_3same(s, a, gen_##INSN##_fp32_3s); \ + } + + +DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s, gen_helper_gvec_fadd_h) +DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s, gen_helper_gvec_fsub_h) +DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s, gen_helper_gvec_fabd_h) +DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s, gen_helper_gvec_fmul_h) +DO_3S_FP_GVEC(VCEQ, gen_helper_gvec_fceq_s, gen_helper_gvec_fceq_h) +DO_3S_FP_GVEC(VCGE, gen_helper_gvec_fcge_s, gen_helper_gvec_fcge_h) +DO_3S_FP_GVEC(VCGT, gen_helper_gvec_fcgt_s, gen_helper_gvec_fcgt_h) +DO_3S_FP_GVEC(VACGE, gen_helper_gvec_facge_s, gen_helper_gvec_facge_h) +DO_3S_FP_GVEC(VACGT, gen_helper_gvec_facgt_s, gen_helper_gvec_facgt_h) +DO_3S_FP_GVEC(VMAX, gen_helper_gvec_fmax_s, gen_helper_gvec_fmax_h) +DO_3S_FP_GVEC(VMIN, gen_helper_gvec_fmin_s, gen_helper_gvec_fmin_h) +DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_s, gen_helper_gvec_fmla_h) +DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h) +DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h) +DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h) +DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h) +DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h) + +WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s) +WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h) +WRAP_FP_GVEC(gen_VMINNM_fp32_3s, FPST_STD, gen_helper_gvec_fminnum_s) +WRAP_FP_GVEC(gen_VMINNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fminnum_h) static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a) { @@ -1157,11 +1089,12 @@ static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a) } if (a->size != 0) { - /* TODO fp16 support */ - return false; + if (!dc_isar_feature(aa32_fp16_arith, s)) { + return false; + } + return do_3same(s, a, gen_VMAXNM_fp16_3s); } - - return do_3same_fp(s, a, gen_helper_vfp_maxnums, false); + return do_3same(s, a, gen_VMAXNM_fp32_3s); } static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a) @@ -1171,98 +1104,18 @@ static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a) } if (a->size != 0) { - /* TODO fp16 support */ - return false; - } - - return do_3same_fp(s, a, gen_helper_vfp_minnums, false); -} - -WRAP_ENV_FN(gen_VRECPS_tramp, gen_helper_recps_f32) - -static void gen_VRECPS_fp_3s(unsigned vece, uint32_t rd_ofs, - uint32_t rn_ofs, uint32_t rm_ofs, - uint32_t oprsz, uint32_t maxsz) -{ - static const GVecGen3 ops = { .fni4 = gen_VRECPS_tramp }; - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops); -} - -static bool trans_VRECPS_fp_3s(DisasContext *s, arg_3same *a) -{ - if (a->size != 0) { - /* TODO fp16 support */ - return false; - } - - return do_3same(s, a, gen_VRECPS_fp_3s); -} - -WRAP_ENV_FN(gen_VRSQRTS_tramp, gen_helper_rsqrts_f32) - -static void gen_VRSQRTS_fp_3s(unsigned vece, uint32_t rd_ofs, - uint32_t rn_ofs, uint32_t rm_ofs, - uint32_t oprsz, uint32_t maxsz) -{ - static const GVecGen3 ops = { .fni4 = gen_VRSQRTS_tramp }; - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops); -} - -static bool trans_VRSQRTS_fp_3s(DisasContext *s, arg_3same *a) -{ - if (a->size != 0) { - /* TODO fp16 support */ - return false; - } - - return do_3same(s, a, gen_VRSQRTS_fp_3s); -} - -static void gen_VFMA_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, - TCGv_ptr fpstatus) -{ - gen_helper_vfp_muladds(vd, vn, vm, vd, fpstatus); -} - -static bool trans_VFMA_fp_3s(DisasContext *s, arg_3same *a) -{ - if (!dc_isar_feature(aa32_simdfmac, s)) { - return false; - } - - if (a->size != 0) { - /* TODO fp16 support */ - return false; + if (!dc_isar_feature(aa32_fp16_arith, s)) { + return false; + } + return do_3same(s, a, gen_VMINNM_fp16_3s); } - - return do_3same_fp(s, a, gen_VFMA_fp_3s, true); + return do_3same(s, a, gen_VMINNM_fp32_3s); } -static void gen_VFMS_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, - TCGv_ptr fpstatus) +static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, + gen_helper_gvec_3_ptr *fn) { - gen_helper_vfp_negs(vn, vn); - gen_helper_vfp_muladds(vd, vn, vm, vd, fpstatus); -} - -static bool trans_VFMS_fp_3s(DisasContext *s, arg_3same *a) -{ - if (!dc_isar_feature(aa32_simdfmac, s)) { - return false; - } - - if (a->size != 0) { - /* TODO fp16 support */ - return false; - } - - return do_3same_fp(s, a, gen_VFMS_fp_3s, true); -} - -static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn) -{ - /* FP operations handled pairwise 32 bits at a time */ - TCGv_i32 tmp, tmp2, tmp3; + /* FP pairwise operations */ TCGv_ptr fpstatus; if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { @@ -1281,26 +1134,14 @@ static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn) assert(a->q == 0); /* enforced by decode patterns */ - /* - * Note that we have to be careful not to clobber the source operands - * in the "vm == vd" case by storing the result of the first pass too - * early. Since Q is 0 there are always just two passes, so instead - * of a complicated loop over each pass we just unroll. - */ - fpstatus = fpstatus_ptr(FPST_STD); - tmp = neon_load_reg(a->vn, 0); - tmp2 = neon_load_reg(a->vn, 1); - fn(tmp, tmp, tmp2, fpstatus); - tcg_temp_free_i32(tmp2); - tmp3 = neon_load_reg(a->vm, 0); - tmp2 = neon_load_reg(a->vm, 1); - fn(tmp3, tmp3, tmp2, fpstatus); - tcg_temp_free_i32(tmp2); + fpstatus = fpstatus_ptr(a->size != 0 ? FPST_STD_F16 : FPST_STD); + tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), + vfp_reg_offset(1, a->vn), + vfp_reg_offset(1, a->vm), + fpstatus, 8, 8, 0, fn); tcg_temp_free_ptr(fpstatus); - neon_store_reg(a->vd, 0, tmp); - neon_store_reg(a->vd, 1, tmp3); return true; } @@ -1312,15 +1153,17 @@ static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn) static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \ { \ if (a->size != 0) { \ - /* TODO fp16 support */ \ - return false; \ + if (!dc_isar_feature(aa32_fp16_arith, s)) { \ + return false; \ + } \ + return do_3same_fp_pair(s, a, FUNC##h); \ } \ - return do_3same_fp_pair(s, a, FUNC); \ + return do_3same_fp_pair(s, a, FUNC##s); \ } -DO_3S_FP_PAIR(VPADD, gen_helper_vfp_adds) -DO_3S_FP_PAIR(VPMAX, gen_helper_vfp_maxs) -DO_3S_FP_PAIR(VPMIN, gen_helper_vfp_mins) +DO_3S_FP_PAIR(VPADD, gen_helper_neon_padd) +DO_3S_FP_PAIR(VPMAX, gen_helper_neon_pmax) +DO_3S_FP_PAIR(VPMIN, gen_helper_neon_pmin) static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn) { @@ -1765,17 +1608,24 @@ static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a) } static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a, - NeonGenTwoSingleOpFn *fn) + gen_helper_gvec_2_ptr *fn) { /* FP operations in 2-reg-and-shift group */ - TCGv_i32 tmp, shiftv; - TCGv_ptr fpstatus; - int pass; + int vec_size = a->q ? 16 : 8; + int rd_ofs = neon_reg_offset(a->vd, 0); + int rm_ofs = neon_reg_offset(a->vm, 0); + TCGv_ptr fpst; if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { return false; } + if (a->size != 0) { + if (!dc_isar_feature(aa32_fp16_arith, s)) { + return false; + } + } + /* UNDEF accesses to D16-D31 if they don't exist. */ if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) { @@ -1790,15 +1640,9 @@ static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a, return true; } - fpstatus = fpstatus_ptr(FPST_STD); - shiftv = tcg_const_i32(a->shift); - for (pass = 0; pass < (a->q ? 4 : 2); pass++) { - tmp = neon_load_reg(a->vm, pass); - fn(tmp, tmp, shiftv, fpstatus); - neon_store_reg(a->vd, pass, tmp); - } - tcg_temp_free_ptr(fpstatus); - tcg_temp_free_i32(shiftv); + fpst = fpstatus_ptr(a->size ? FPST_STD_F16 : FPST_STD); + tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, vec_size, vec_size, a->shift, fn); + tcg_temp_free_ptr(fpst); return true; } @@ -1808,10 +1652,15 @@ static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a, return do_fp_2sh(s, a, FUNC); \ } -DO_FP_2SH(VCVT_SF, gen_helper_vfp_sltos) -DO_FP_2SH(VCVT_UF, gen_helper_vfp_ultos) -DO_FP_2SH(VCVT_FS, gen_helper_vfp_tosls_round_to_zero) -DO_FP_2SH(VCVT_FU, gen_helper_vfp_touls_round_to_zero) +DO_FP_2SH(VCVT_SF, gen_helper_gvec_vcvt_sf) +DO_FP_2SH(VCVT_UF, gen_helper_gvec_vcvt_uf) +DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_fs) +DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_fu) + +DO_FP_2SH(VCVT_SH, gen_helper_gvec_vcvt_sh) +DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh) +DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs) +DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu) static uint64_t asimd_imm_const(uint32_t imm, int cmode, int op) { @@ -2583,70 +2432,70 @@ static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a) return do_2scalar(s, a, opfn[a->size], accfn[a->size]); } -/* - * Rather than have a float-specific version of do_2scalar just for - * three insns, we wrap a NeonGenTwoSingleOpFn to turn it into - * a NeonGenTwoOpFn. - */ -#define WRAP_FP_FN(WRAPNAME, FUNC) \ - static void WRAPNAME(TCGv_i32 rd, TCGv_i32 rn, TCGv_i32 rm) \ - { \ - TCGv_ptr fpstatus = fpstatus_ptr(FPST_STD); \ - FUNC(rd, rn, rm, fpstatus); \ - tcg_temp_free_ptr(fpstatus); \ +static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a, + gen_helper_gvec_3_ptr *fn) +{ + /* Two registers and a scalar, using gvec */ + int vec_size = a->q ? 16 : 8; + int rd_ofs = neon_reg_offset(a->vd, 0); + int rn_ofs = neon_reg_offset(a->vn, 0); + int rm_ofs; + int idx; + TCGv_ptr fpstatus; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; } -WRAP_FP_FN(gen_VMUL_F_mul, gen_helper_vfp_muls) -WRAP_FP_FN(gen_VMUL_F_add, gen_helper_vfp_adds) -WRAP_FP_FN(gen_VMUL_F_sub, gen_helper_vfp_subs) + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } -static bool trans_VMUL_F_2sc(DisasContext *s, arg_2scalar *a) -{ - static NeonGenTwoOpFn * const opfn[] = { - NULL, - NULL, /* TODO: fp16 support */ - gen_VMUL_F_mul, - NULL, - }; + if (!fn) { + /* Bad size (including size == 3, which is a different insn group) */ + return false; + } - return do_2scalar(s, a, opfn[a->size], NULL); -} + if (a->q && ((a->vd | a->vn) & 1)) { + return false; + } -static bool trans_VMLA_F_2sc(DisasContext *s, arg_2scalar *a) -{ - static NeonGenTwoOpFn * const opfn[] = { - NULL, - NULL, /* TODO: fp16 support */ - gen_VMUL_F_mul, - NULL, - }; - static NeonGenTwoOpFn * const accfn[] = { - NULL, - NULL, /* TODO: fp16 support */ - gen_VMUL_F_add, - NULL, - }; + if (!vfp_access_check(s)) { + return true; + } - return do_2scalar(s, a, opfn[a->size], accfn[a->size]); + /* a->vm is M:Vm, which encodes both register and index */ + idx = extract32(a->vm, a->size + 2, 2); + a->vm = extract32(a->vm, 0, a->size + 2); + rm_ofs = neon_reg_offset(a->vm, 0); + + fpstatus = fpstatus_ptr(a->size == 1 ? FPST_STD_F16 : FPST_STD); + tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpstatus, + vec_size, vec_size, idx, fn); + tcg_temp_free_ptr(fpstatus); + return true; } -static bool trans_VMLS_F_2sc(DisasContext *s, arg_2scalar *a) -{ - static NeonGenTwoOpFn * const opfn[] = { - NULL, - NULL, /* TODO: fp16 support */ - gen_VMUL_F_mul, - NULL, - }; - static NeonGenTwoOpFn * const accfn[] = { - NULL, - NULL, /* TODO: fp16 support */ - gen_VMUL_F_sub, - NULL, - }; +#define DO_VMUL_F_2sc(NAME, FUNC) \ + static bool trans_##NAME##_F_2sc(DisasContext *s, arg_2scalar *a) \ + { \ + static gen_helper_gvec_3_ptr * const opfn[] = { \ + NULL, \ + gen_helper_##FUNC##_h, \ + gen_helper_##FUNC##_s, \ + NULL, \ + }; \ + if (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s)) { \ + return false; \ + } \ + return do_2scalar_fp_vec(s, a, opfn[a->size]); \ + } - return do_2scalar(s, a, opfn[a->size], accfn[a->size]); -} +DO_VMUL_F_2sc(VMUL, gvec_fmul_idx) +DO_VMUL_F_2sc(VMLA, gvec_fmla_nf_idx) +DO_VMUL_F_2sc(VMLS, gvec_fmls_nf_idx) WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16) WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32) @@ -3739,22 +3588,44 @@ static bool trans_VCNT(DisasContext *s, arg_2misc *a) return do_2misc(s, a, gen_helper_neon_cnt_u8); } +static void gen_VABS_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + uint32_t oprsz, uint32_t maxsz) +{ + tcg_gen_gvec_andi(vece, rd_ofs, rm_ofs, + vece == MO_16 ? 0x7fff : 0x7fffffff, + oprsz, maxsz); +} + static bool trans_VABS_F(DisasContext *s, arg_2misc *a) { - if (a->size != 2) { + if (a->size == MO_16) { + if (!dc_isar_feature(aa32_fp16_arith, s)) { + return false; + } + } else if (a->size != MO_32) { return false; } - /* TODO: FP16 : size == 1 */ - return do_2misc(s, a, gen_helper_vfp_abss); + return do_2misc_vec(s, a, gen_VABS_F); +} + +static void gen_VNEG_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + uint32_t oprsz, uint32_t maxsz) +{ + tcg_gen_gvec_xori(vece, rd_ofs, rm_ofs, + vece == MO_16 ? 0x8000 : 0x80000000, + oprsz, maxsz); } static bool trans_VNEG_F(DisasContext *s, arg_2misc *a) { - if (a->size != 2) { + if (a->size == MO_16) { + if (!dc_isar_feature(aa32_fp16_arith, s)) { + return false; + } + } else if (a->size != MO_32) { return false; } - /* TODO: FP16 : size == 1 */ - return do_2misc(s, a, gen_helper_vfp_negs); + return do_2misc_vec(s, a, gen_VNEG_F); } static bool trans_VRECPE(DisasContext *s, arg_2misc *a) @@ -3808,226 +3679,100 @@ static bool trans_VQNEG(DisasContext *s, arg_2misc *a) return do_2misc(s, a, fn[a->size]); } -static bool do_2misc_fp(DisasContext *s, arg_2misc *a, - NeonGenOneSingleOpFn *fn) -{ - int pass; - TCGv_ptr fpst; - - /* Handle a 2-reg-misc operation by iterating 32 bits at a time */ - if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { - return false; - } - - /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_simd_r32, s) && - ((a->vd | a->vm) & 0x10)) { - return false; - } - - if (a->size != 2) { - /* TODO: FP16 will be the size == 1 case */ - return false; - } - - if ((a->vd | a->vm) & a->q) { - return false; - } - - if (!vfp_access_check(s)) { - return true; - } - - fpst = fpstatus_ptr(FPST_STD); - for (pass = 0; pass < (a->q ? 4 : 2); pass++) { - TCGv_i32 tmp = neon_load_reg(a->vm, pass); - fn(tmp, tmp, fpst); - neon_store_reg(a->vd, pass, tmp); +#define DO_2MISC_FP_VEC(INSN, HFUNC, SFUNC) \ + static void gen_##INSN(unsigned vece, uint32_t rd_ofs, \ + uint32_t rm_ofs, \ + uint32_t oprsz, uint32_t maxsz) \ + { \ + static gen_helper_gvec_2_ptr * const fns[4] = { \ + NULL, HFUNC, SFUNC, NULL, \ + }; \ + TCGv_ptr fpst; \ + fpst = fpstatus_ptr(vece == MO_16 ? FPST_STD_F16 : FPST_STD); \ + tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, 0, \ + fns[vece]); \ + tcg_temp_free_ptr(fpst); \ + } \ + static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ + { \ + if (a->size == MO_16) { \ + if (!dc_isar_feature(aa32_fp16_arith, s)) { \ + return false; \ + } \ + } else if (a->size != MO_32) { \ + return false; \ + } \ + return do_2misc_vec(s, a, gen_##INSN); \ } - tcg_temp_free_ptr(fpst); - return true; -} - -#define DO_2MISC_FP(INSN, FUNC) \ - static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ - { \ - return do_2misc_fp(s, a, FUNC); \ - } +DO_2MISC_FP_VEC(VRECPE_F, gen_helper_gvec_frecpe_h, gen_helper_gvec_frecpe_s) +DO_2MISC_FP_VEC(VRSQRTE_F, gen_helper_gvec_frsqrte_h, gen_helper_gvec_frsqrte_s) +DO_2MISC_FP_VEC(VCGT0_F, gen_helper_gvec_fcgt0_h, gen_helper_gvec_fcgt0_s) +DO_2MISC_FP_VEC(VCGE0_F, gen_helper_gvec_fcge0_h, gen_helper_gvec_fcge0_s) +DO_2MISC_FP_VEC(VCEQ0_F, gen_helper_gvec_fceq0_h, gen_helper_gvec_fceq0_s) +DO_2MISC_FP_VEC(VCLT0_F, gen_helper_gvec_fclt0_h, gen_helper_gvec_fclt0_s) +DO_2MISC_FP_VEC(VCLE0_F, gen_helper_gvec_fcle0_h, gen_helper_gvec_fcle0_s) +DO_2MISC_FP_VEC(VCVT_FS, gen_helper_gvec_sstoh, gen_helper_gvec_sitos) +DO_2MISC_FP_VEC(VCVT_FU, gen_helper_gvec_ustoh, gen_helper_gvec_uitos) +DO_2MISC_FP_VEC(VCVT_SF, gen_helper_gvec_tosszh, gen_helper_gvec_tosizs) +DO_2MISC_FP_VEC(VCVT_UF, gen_helper_gvec_touszh, gen_helper_gvec_touizs) -DO_2MISC_FP(VRECPE_F, gen_helper_recpe_f32) -DO_2MISC_FP(VRSQRTE_F, gen_helper_rsqrte_f32) -DO_2MISC_FP(VCVT_FS, gen_helper_vfp_sitos) -DO_2MISC_FP(VCVT_FU, gen_helper_vfp_uitos) -DO_2MISC_FP(VCVT_SF, gen_helper_vfp_tosizs) -DO_2MISC_FP(VCVT_UF, gen_helper_vfp_touizs) +DO_2MISC_FP_VEC(VRINTX_impl, gen_helper_gvec_vrintx_h, gen_helper_gvec_vrintx_s) static bool trans_VRINTX(DisasContext *s, arg_2misc *a) { if (!arm_dc_feature(s, ARM_FEATURE_V8)) { return false; } - return do_2misc_fp(s, a, gen_helper_rints_exact); + return trans_VRINTX_impl(s, a); } -#define WRAP_FP_CMP0_FWD(WRAPNAME, FUNC) \ - static void WRAPNAME(TCGv_i32 d, TCGv_i32 m, TCGv_ptr fpst) \ - { \ - TCGv_i32 zero = tcg_const_i32(0); \ - FUNC(d, m, zero, fpst); \ - tcg_temp_free_i32(zero); \ - } -#define WRAP_FP_CMP0_REV(WRAPNAME, FUNC) \ - static void WRAPNAME(TCGv_i32 d, TCGv_i32 m, TCGv_ptr fpst) \ - { \ - TCGv_i32 zero = tcg_const_i32(0); \ - FUNC(d, zero, m, fpst); \ - tcg_temp_free_i32(zero); \ - } - -#define DO_FP_CMP0(INSN, FUNC, REV) \ - WRAP_FP_CMP0_##REV(gen_##INSN, FUNC) \ - static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ - { \ - return do_2misc_fp(s, a, gen_##INSN); \ - } - -DO_FP_CMP0(VCGT0_F, gen_helper_neon_cgt_f32, FWD) -DO_FP_CMP0(VCGE0_F, gen_helper_neon_cge_f32, FWD) -DO_FP_CMP0(VCEQ0_F, gen_helper_neon_ceq_f32, FWD) -DO_FP_CMP0(VCLE0_F, gen_helper_neon_cge_f32, REV) -DO_FP_CMP0(VCLT0_F, gen_helper_neon_cgt_f32, REV) - -static bool do_vrint(DisasContext *s, arg_2misc *a, int rmode) -{ - /* - * Handle a VRINT* operation by iterating 32 bits at a time, - * with a specified rounding mode in operation. - */ - int pass; - TCGv_ptr fpst; - TCGv_i32 tcg_rmode; - - if (!arm_dc_feature(s, ARM_FEATURE_NEON) || - !arm_dc_feature(s, ARM_FEATURE_V8)) { - return false; - } - - /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_simd_r32, s) && - ((a->vd | a->vm) & 0x10)) { - return false; - } - - if (a->size != 2) { - /* TODO: FP16 will be the size == 1 case */ - return false; - } - - if ((a->vd | a->vm) & a->q) { - return false; - } - - if (!vfp_access_check(s)) { - return true; - } - - fpst = fpstatus_ptr(FPST_STD); - tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); - gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, cpu_env); - for (pass = 0; pass < (a->q ? 4 : 2); pass++) { - TCGv_i32 tmp = neon_load_reg(a->vm, pass); - gen_helper_rints(tmp, tmp, fpst); - neon_store_reg(a->vd, pass, tmp); - } - gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, cpu_env); - tcg_temp_free_i32(tcg_rmode); - tcg_temp_free_ptr(fpst); - - return true; -} - -#define DO_VRINT(INSN, RMODE) \ - static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ - { \ - return do_vrint(s, a, RMODE); \ - } - -DO_VRINT(VRINTN, FPROUNDING_TIEEVEN) -DO_VRINT(VRINTA, FPROUNDING_TIEAWAY) -DO_VRINT(VRINTZ, FPROUNDING_ZERO) -DO_VRINT(VRINTM, FPROUNDING_NEGINF) -DO_VRINT(VRINTP, FPROUNDING_POSINF) - -static bool do_vcvt(DisasContext *s, arg_2misc *a, int rmode, bool is_signed) -{ - /* - * Handle a VCVT* operation by iterating 32 bits at a time, - * with a specified rounding mode in operation. - */ - int pass; - TCGv_ptr fpst; - TCGv_i32 tcg_rmode, tcg_shift; - - if (!arm_dc_feature(s, ARM_FEATURE_NEON) || - !arm_dc_feature(s, ARM_FEATURE_V8)) { - return false; - } - - /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_simd_r32, s) && - ((a->vd | a->vm) & 0x10)) { - return false; - } - - if (a->size != 2) { - /* TODO: FP16 will be the size == 1 case */ - return false; - } - - if ((a->vd | a->vm) & a->q) { - return false; - } - - if (!vfp_access_check(s)) { - return true; - } - - fpst = fpstatus_ptr(FPST_STD); - tcg_shift = tcg_const_i32(0); - tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); - gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, cpu_env); - for (pass = 0; pass < (a->q ? 4 : 2); pass++) { - TCGv_i32 tmp = neon_load_reg(a->vm, pass); - if (is_signed) { - gen_helper_vfp_tosls(tmp, tmp, tcg_shift, fpst); - } else { - gen_helper_vfp_touls(tmp, tmp, tcg_shift, fpst); - } - neon_store_reg(a->vd, pass, tmp); - } - gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, cpu_env); - tcg_temp_free_i32(tcg_rmode); - tcg_temp_free_i32(tcg_shift); - tcg_temp_free_ptr(fpst); - - return true; -} - -#define DO_VCVT(INSN, RMODE, SIGNED) \ - static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ - { \ - return do_vcvt(s, a, RMODE, SIGNED); \ - } - -DO_VCVT(VCVTAU, FPROUNDING_TIEAWAY, false) -DO_VCVT(VCVTAS, FPROUNDING_TIEAWAY, true) -DO_VCVT(VCVTNU, FPROUNDING_TIEEVEN, false) -DO_VCVT(VCVTNS, FPROUNDING_TIEEVEN, true) -DO_VCVT(VCVTPU, FPROUNDING_POSINF, false) -DO_VCVT(VCVTPS, FPROUNDING_POSINF, true) -DO_VCVT(VCVTMU, FPROUNDING_NEGINF, false) -DO_VCVT(VCVTMS, FPROUNDING_NEGINF, true) +#define DO_VEC_RMODE(INSN, RMODE, OP) \ + static void gen_##INSN(unsigned vece, uint32_t rd_ofs, \ + uint32_t rm_ofs, \ + uint32_t oprsz, uint32_t maxsz) \ + { \ + static gen_helper_gvec_2_ptr * const fns[4] = { \ + NULL, \ + gen_helper_gvec_##OP##h, \ + gen_helper_gvec_##OP##s, \ + NULL, \ + }; \ + TCGv_ptr fpst; \ + fpst = fpstatus_ptr(vece == 1 ? FPST_STD_F16 : FPST_STD); \ + tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, \ + arm_rmode_to_sf(RMODE), fns[vece]); \ + tcg_temp_free_ptr(fpst); \ + } \ + static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ + { \ + if (!arm_dc_feature(s, ARM_FEATURE_V8)) { \ + return false; \ + } \ + if (a->size == MO_16) { \ + if (!dc_isar_feature(aa32_fp16_arith, s)) { \ + return false; \ + } \ + } else if (a->size != MO_32) { \ + return false; \ + } \ + return do_2misc_vec(s, a, gen_##INSN); \ + } + +DO_VEC_RMODE(VCVTAU, FPROUNDING_TIEAWAY, vcvt_rm_u) +DO_VEC_RMODE(VCVTAS, FPROUNDING_TIEAWAY, vcvt_rm_s) +DO_VEC_RMODE(VCVTNU, FPROUNDING_TIEEVEN, vcvt_rm_u) +DO_VEC_RMODE(VCVTNS, FPROUNDING_TIEEVEN, vcvt_rm_s) +DO_VEC_RMODE(VCVTPU, FPROUNDING_POSINF, vcvt_rm_u) +DO_VEC_RMODE(VCVTPS, FPROUNDING_POSINF, vcvt_rm_s) +DO_VEC_RMODE(VCVTMU, FPROUNDING_NEGINF, vcvt_rm_u) +DO_VEC_RMODE(VCVTMS, FPROUNDING_NEGINF, vcvt_rm_s) + +DO_VEC_RMODE(VRINTN, FPROUNDING_TIEEVEN, vrint_rm_) +DO_VEC_RMODE(VRINTA, FPROUNDING_TIEAWAY, vrint_rm_) +DO_VEC_RMODE(VRINTZ, FPROUNDING_ZERO, vrint_rm_) +DO_VEC_RMODE(VRINTM, FPROUNDING_NEGINF, vrint_rm_) +DO_VEC_RMODE(VRINTP, FPROUNDING_POSINF, vrint_rm_) static bool trans_VSWP(DisasContext *s, arg_2misc *a) { diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index d97cb37d83..e4cd6b6251 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -142,35 +142,76 @@ static int pred_gvec_reg_size(DisasContext *s) return size_for_gvec(pred_full_reg_size(s)); } +/* Invoke an out-of-line helper on 2 Zregs. */ +static void gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn, + int rd, int rn, int data) +{ + unsigned vsz = vec_full_reg_size(s); + tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vsz, vsz, data, fn); +} + +/* Invoke an out-of-line helper on 3 Zregs. */ +static void gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn, + int rd, int rn, int rm, int data) +{ + unsigned vsz = vec_full_reg_size(s); + tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), + vsz, vsz, data, fn); +} + +/* Invoke an out-of-line helper on 2 Zregs and a predicate. */ +static void gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn, + int rd, int rn, int pg, int data) +{ + unsigned vsz = vec_full_reg_size(s); + tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + pred_full_reg_offset(s, pg), + vsz, vsz, data, fn); +} + +/* Invoke an out-of-line helper on 3 Zregs and a predicate. */ +static void gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn, + int rd, int rn, int rm, int pg, int data) +{ + unsigned vsz = vec_full_reg_size(s); + tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), + pred_full_reg_offset(s, pg), + vsz, vsz, data, fn); +} + /* Invoke a vector expander on two Zregs. */ -static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn, - int esz, int rd, int rn) +static void gen_gvec_fn_zz(DisasContext *s, GVecGen2Fn *gvec_fn, + int esz, int rd, int rn) { - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - gvec_fn(esz, vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), vsz, vsz); - } - return true; + unsigned vsz = vec_full_reg_size(s); + gvec_fn(esz, vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), vsz, vsz); } /* Invoke a vector expander on three Zregs. */ -static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn, - int esz, int rd, int rn, int rm) +static void gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn, + int esz, int rd, int rn, int rm) { - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - gvec_fn(esz, vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), vsz, vsz); - } - return true; + unsigned vsz = vec_full_reg_size(s); + gvec_fn(esz, vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), vsz, vsz); } /* Invoke a vector move on two Zregs. */ static bool do_mov_z(DisasContext *s, int rd, int rn) { - return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn); + if (sve_access_check(s)) { + gen_gvec_fn_zz(s, tcg_gen_gvec_mov, MO_8, rd, rn); + } + return true; } /* Initialize a Zreg with replications of a 64-bit immediate. */ @@ -180,52 +221,27 @@ static void do_dupi_z(DisasContext *s, int rd, uint64_t word) tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word); } -/* Invoke a vector expander on two Pregs. */ -static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn, - int esz, int rd, int rn) -{ - if (sve_access_check(s)) { - unsigned psz = pred_gvec_reg_size(s); - gvec_fn(esz, pred_full_reg_offset(s, rd), - pred_full_reg_offset(s, rn), psz, psz); - } - return true; -} - /* Invoke a vector expander on three Pregs. */ -static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn, - int esz, int rd, int rn, int rm) +static void gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn, + int rd, int rn, int rm) { - if (sve_access_check(s)) { - unsigned psz = pred_gvec_reg_size(s); - gvec_fn(esz, pred_full_reg_offset(s, rd), - pred_full_reg_offset(s, rn), - pred_full_reg_offset(s, rm), psz, psz); - } - return true; + unsigned psz = pred_gvec_reg_size(s); + gvec_fn(MO_64, pred_full_reg_offset(s, rd), + pred_full_reg_offset(s, rn), + pred_full_reg_offset(s, rm), psz, psz); } -/* Invoke a vector operation on four Pregs. */ -static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op, - int rd, int rn, int rm, int rg) +/* Invoke a vector move on two Pregs. */ +static bool do_mov_p(DisasContext *s, int rd, int rn) { if (sve_access_check(s)) { unsigned psz = pred_gvec_reg_size(s); - tcg_gen_gvec_4(pred_full_reg_offset(s, rd), - pred_full_reg_offset(s, rn), - pred_full_reg_offset(s, rm), - pred_full_reg_offset(s, rg), - psz, psz, gvec_op); + tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd), + pred_full_reg_offset(s, rn), psz, psz); } return true; } -/* Invoke a vector move on two Pregs. */ -static bool do_mov_p(DisasContext *s, int rd, int rn) -{ - return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn); -} - /* Set the cpu flags as per a return from an SVE helper. */ static void do_pred_flags(TCGv_i32 t) { @@ -273,24 +289,32 @@ const uint64_t pred_esz_masks[4] = { *** SVE Logical - Unpredicated Group */ +static bool do_zzz_fn(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *gvec_fn) +{ + if (sve_access_check(s)) { + gen_gvec_fn_zzz(s, gvec_fn, a->esz, a->rd, a->rn, a->rm); + } + return true; +} + static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a) { - return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm); + return do_zzz_fn(s, a, tcg_gen_gvec_and); } static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a) { - return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm); + return do_zzz_fn(s, a, tcg_gen_gvec_or); } static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a) { - return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm); + return do_zzz_fn(s, a, tcg_gen_gvec_xor); } static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a) { - return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm); + return do_zzz_fn(s, a, tcg_gen_gvec_andc); } /* @@ -299,32 +323,32 @@ static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a) static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a) { - return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm); + return do_zzz_fn(s, a, tcg_gen_gvec_add); } static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a) { - return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm); + return do_zzz_fn(s, a, tcg_gen_gvec_sub); } static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a) { - return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm); + return do_zzz_fn(s, a, tcg_gen_gvec_ssadd); } static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a) { - return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm); + return do_zzz_fn(s, a, tcg_gen_gvec_sssub); } static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a) { - return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm); + return do_zzz_fn(s, a, tcg_gen_gvec_usadd); } static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a) { - return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm); + return do_zzz_fn(s, a, tcg_gen_gvec_ussub); } /* @@ -333,16 +357,11 @@ static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a) static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn) { - unsigned vsz = vec_full_reg_size(s); if (fn == NULL) { return false; } if (sve_access_check(s)) { - tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - pred_full_reg_offset(s, a->pg), - vsz, vsz, 0, fn); + gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0); } return true; } @@ -356,12 +375,7 @@ static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz) gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h, gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d }; - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), - pred_full_reg_offset(s, pg), - vsz, vsz, 0, fns[esz]); + gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0); } #define DO_ZPZZ(NAME, name) \ @@ -433,11 +447,7 @@ static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn) return false; } if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - pred_full_reg_offset(s, a->pg), - vsz, vsz, 0, fn); + gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, 0); } return true; } @@ -608,48 +618,29 @@ static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a) *** SVE Shift by Immediate - Predicated Group */ -/* Store zero into every active element of Zd. We will use this for two - * and three-operand predicated instructions for which logic dictates a - * zero result. +/* + * Copy Zn into Zd, storing zeros into inactive elements. + * If invert, store zeros into the active elements. */ -static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz) -{ - static gen_helper_gvec_2 * const fns[4] = { - gen_helper_sve_clr_b, gen_helper_sve_clr_h, - gen_helper_sve_clr_s, gen_helper_sve_clr_d, - }; - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), - pred_full_reg_offset(s, pg), - vsz, vsz, 0, fns[esz]); - } - return true; -} - -/* Copy Zn into Zd, storing zeros into inactive elements. */ -static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz) +static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg, + int esz, bool invert) { static gen_helper_gvec_3 * const fns[4] = { gen_helper_sve_movz_b, gen_helper_sve_movz_h, gen_helper_sve_movz_s, gen_helper_sve_movz_d, }; - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - pred_full_reg_offset(s, pg), - vsz, vsz, 0, fns[esz]); + + if (sve_access_check(s)) { + gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert); + } + return true; } static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a, gen_helper_gvec_3 *fn) { if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - pred_full_reg_offset(s, a->pg), - vsz, vsz, a->imm, fn); + gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm); } return true; } @@ -682,7 +673,7 @@ static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a) /* Shift by element size is architecturally valid. For logical shifts, it is a zeroing operation. */ if (a->imm >= (8 << a->esz)) { - return do_clr_zp(s, a->rd, a->pg, a->esz); + return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); } else { return do_zpzi_ool(s, a, fns[a->esz]); } @@ -700,7 +691,7 @@ static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a) /* Shift by element size is architecturally valid. For logical shifts, it is a zeroing operation. */ if (a->imm >= (8 << a->esz)) { - return do_clr_zp(s, a->rd, a->pg, a->esz); + return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); } else { return do_zpzi_ool(s, a, fns[a->esz]); } @@ -718,7 +709,7 @@ static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a) /* Shift by element size is architecturally valid. For arithmetic right shift for division, it is a zeroing operation. */ if (a->imm >= (8 << a->esz)) { - return do_clr_zp(s, a->rd, a->pg, a->esz); + return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); } else { return do_zpzi_ool(s, a, fns[a->esz]); } @@ -799,11 +790,7 @@ static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn) return false; } if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vsz, vsz, 0, fn); + gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0); } return true; } @@ -977,11 +964,7 @@ static bool trans_RDVL(DisasContext *s, arg_RDVL *a) static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn) { if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vsz, vsz, a->imm, fn); + gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm); } return true; } @@ -1022,10 +1005,7 @@ static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a) return false; } if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vsz, vsz, 0, fns[a->esz]); + gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0); } return true; } @@ -1042,11 +1022,7 @@ static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a) return false; } if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vsz, vsz, 0, fns[a->esz]); + gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0); } return true; } @@ -1068,6 +1044,11 @@ static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a, int mofs = pred_full_reg_offset(s, a->rm); int gofs = pred_full_reg_offset(s, a->pg); + if (!a->s) { + tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); + return true; + } + if (psz == 8) { /* Do the operation and the flags generation in temps. */ TCGv_i64 pd = tcg_temp_new_i64(); @@ -1127,19 +1108,24 @@ static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a) .fno = gen_helper_sve_and_pppp, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; - if (a->s) { - return do_pppp_flags(s, a, &op); - } else if (a->rn == a->rm) { - if (a->pg == a->rn) { - return do_mov_p(s, a->rd, a->rn); - } else { - return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg); + + if (!a->s) { + if (!sve_access_check(s)) { + return true; + } + if (a->rn == a->rm) { + if (a->pg == a->rn) { + do_mov_p(s, a->rd, a->rn); + } else { + gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg); + } + return true; + } else if (a->pg == a->rn || a->pg == a->rm) { + gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm); + return true; } - } else if (a->pg == a->rn || a->pg == a->rm) { - return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm); - } else { - return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg); } + return do_pppp_flags(s, a, &op); } static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) @@ -1163,13 +1149,14 @@ static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a) .fno = gen_helper_sve_bic_pppp, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; - if (a->s) { - return do_pppp_flags(s, a, &op); - } else if (a->pg == a->rn) { - return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm); - } else { - return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg); + + if (!a->s && a->pg == a->rn) { + if (sve_access_check(s)) { + gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm); + } + return true; } + return do_pppp_flags(s, a, &op); } static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) @@ -1193,41 +1180,22 @@ static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a) .fno = gen_helper_sve_eor_pppp, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; - if (a->s) { - return do_pppp_flags(s, a, &op); - } else { - return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg); - } -} - -static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) -{ - tcg_gen_and_i64(pn, pn, pg); - tcg_gen_andc_i64(pm, pm, pg); - tcg_gen_or_i64(pd, pn, pm); -} - -static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, - TCGv_vec pm, TCGv_vec pg) -{ - tcg_gen_and_vec(vece, pn, pn, pg); - tcg_gen_andc_vec(vece, pm, pm, pg); - tcg_gen_or_vec(vece, pd, pn, pm); + return do_pppp_flags(s, a, &op); } static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a) { - static const GVecGen4 op = { - .fni8 = gen_sel_pg_i64, - .fniv = gen_sel_pg_vec, - .fno = gen_helper_sve_sel_pppp, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - }; if (a->s) { return false; - } else { - return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg); } + if (sve_access_check(s)) { + unsigned psz = pred_gvec_reg_size(s); + tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd), + pred_full_reg_offset(s, a->pg), + pred_full_reg_offset(s, a->rn), + pred_full_reg_offset(s, a->rm), psz, psz); + } + return true; } static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) @@ -1251,13 +1219,11 @@ static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a) .fno = gen_helper_sve_orr_pppp, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; - if (a->s) { - return do_pppp_flags(s, a, &op); - } else if (a->pg == a->rn && a->rn == a->rm) { + + if (!a->s && a->pg == a->rn && a->rn == a->rm) { return do_mov_p(s, a->rd, a->rn); - } else { - return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg); } + return do_pppp_flags(s, a, &op); } static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) @@ -1281,11 +1247,7 @@ static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a) .fno = gen_helper_sve_orn_pppp, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; - if (a->s) { - return do_pppp_flags(s, a, &op); - } else { - return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg); - } + return do_pppp_flags(s, a, &op); } static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) @@ -1309,11 +1271,7 @@ static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a) .fno = gen_helper_sve_nor_pppp, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; - if (a->s) { - return do_pppp_flags(s, a, &op); - } else { - return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg); - } + return do_pppp_flags(s, a, &op); } static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) @@ -1337,11 +1295,7 @@ static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a) .fno = gen_helper_sve_nand_pppp, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; - if (a->s) { - return do_pppp_flags(s, a, &op); - } else { - return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg); - } + return do_pppp_flags(s, a, &op); } /* @@ -2103,10 +2057,7 @@ static bool trans_REV_v(DisasContext *s, arg_rr_esz *a) }; if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vsz, vsz, 0, fns[a->esz]); + gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0); } return true; } @@ -2119,11 +2070,7 @@ static bool trans_TBL(DisasContext *s, arg_rrr_esz *a) }; if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vsz, vsz, 0, fns[a->esz]); + gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0); } return true; } @@ -2296,11 +2243,7 @@ static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data, gen_helper_gvec_3 *fn) { if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vsz, vsz, data, fn); + gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data); } return true; } @@ -2745,12 +2688,8 @@ static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a) static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a) { if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - pred_full_reg_offset(s, a->pg), - vsz, vsz, a->esz, gen_helper_sve_splice); + gen_gvec_ool_zzzp(s, gen_helper_sve_splice, + a->rd, a->rn, a->rm, a->pg, 0); } return true; } @@ -3429,11 +3368,7 @@ static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a) }; if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vsz, vsz, 0, fns[a->u][a->sz]); + gen_gvec_ool_zzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, 0); } return true; } @@ -3446,11 +3381,7 @@ static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a) }; if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vsz, vsz, a->index, fns[a->u][a->sz]); + gen_gvec_ool_zzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, a->index); } return true; } @@ -3872,10 +3803,6 @@ static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \ return true; \ } -#define float16_two make_float16(0x4000) -#define float32_two make_float32(0x40000000) -#define float64_two make_float64(0x4000000000000000ULL) - DO_FP_IMM(FADD, fadds, half, one) DO_FP_IMM(FSUB, fsubs, half, one) DO_FP_IMM(FMUL, fmuls, half, two) @@ -5093,8 +5020,7 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) /* Zero the inactive elements. */ gen_set_label(over); - do_movz_zpz(s, a->rd, a->rd, a->pg, esz); - return true; + return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false); } static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, @@ -5877,8 +5803,5 @@ static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a) static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a) { - if (sve_access_check(s)) { - do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz); - } - return true; + return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false); } diff --git a/target/arm/translate-vfp.c.inc b/target/arm/translate-vfp.c.inc index 4eeafb494a..28e0dba5f1 100644 --- a/target/arm/translate-vfp.c.inc +++ b/target/arm/translate-vfp.c.inc @@ -190,18 +190,22 @@ static bool vfp_access_check(DisasContext *s) static bool trans_VSEL(DisasContext *s, arg_VSEL *a) { uint32_t rd, rn, rm; - bool dp = a->dp; + int sz = a->sz; if (!dc_isar_feature(aa32_vsel, s)) { return false; } - if (dp && !dc_isar_feature(aa32_fpdp_v2, s)) { + if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) { + return false; + } + + if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) { return false; } /* UNDEF accesses to D16-D31 if they don't exist */ - if (dp && !dc_isar_feature(aa32_simd_r32, s) && + if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && ((a->vm | a->vn | a->vd) & 0x10)) { return false; } @@ -214,7 +218,7 @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a) return true; } - if (dp) { + if (sz == 3) { TCGv_i64 frn, frm, dest; TCGv_i64 tmp, zero, zf, nf, vf; @@ -307,6 +311,10 @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a) tcg_temp_free_i32(tmp); break; } + /* For fp16 the top half is always zeroes */ + if (sz == 1) { + tcg_gen_andi_i32(dest, dest, 0xffff); + } neon_store_reg32(dest, rd); tcg_temp_free_i32(frn); tcg_temp_free_i32(frm); @@ -333,7 +341,7 @@ static const uint8_t fp_decode_rm[] = { static bool trans_VRINT(DisasContext *s, arg_VRINT *a) { uint32_t rd, rm; - bool dp = a->dp; + int sz = a->sz; TCGv_ptr fpst; TCGv_i32 tcg_rmode; int rounding = fp_decode_rm[a->rm]; @@ -342,12 +350,16 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a) return false; } - if (dp && !dc_isar_feature(aa32_fpdp_v2, s)) { + if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) { + return false; + } + + if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) { return false; } /* UNDEF accesses to D16-D31 if they don't exist */ - if (dp && !dc_isar_feature(aa32_simd_r32, s) && + if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && ((a->vm | a->vd) & 0x10)) { return false; } @@ -359,12 +371,16 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a) return true; } - fpst = fpstatus_ptr(FPST_FPCR); + if (sz == 1) { + fpst = fpstatus_ptr(FPST_FPCR_F16); + } else { + fpst = fpstatus_ptr(FPST_FPCR); + } tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding)); gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); - if (dp) { + if (sz == 3) { TCGv_i64 tcg_op; TCGv_i64 tcg_res; tcg_op = tcg_temp_new_i64(); @@ -380,7 +396,11 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a) tcg_op = tcg_temp_new_i32(); tcg_res = tcg_temp_new_i32(); neon_load_reg32(tcg_op, rm); - gen_helper_rints(tcg_res, tcg_op, fpst); + if (sz == 1) { + gen_helper_rinth(tcg_res, tcg_op, fpst); + } else { + gen_helper_rints(tcg_res, tcg_op, fpst); + } neon_store_reg32(tcg_res, rd); tcg_temp_free_i32(tcg_op); tcg_temp_free_i32(tcg_res); @@ -396,7 +416,7 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a) static bool trans_VCVT(DisasContext *s, arg_VCVT *a) { uint32_t rd, rm; - bool dp = a->dp; + int sz = a->sz; TCGv_ptr fpst; TCGv_i32 tcg_rmode, tcg_shift; int rounding = fp_decode_rm[a->rm]; @@ -406,12 +426,16 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a) return false; } - if (dp && !dc_isar_feature(aa32_fpdp_v2, s)) { + if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) { + return false; + } + + if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) { return false; } /* UNDEF accesses to D16-D31 if they don't exist */ - if (dp && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) { + if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) { return false; } @@ -422,14 +446,18 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a) return true; } - fpst = fpstatus_ptr(FPST_FPCR); + if (sz == 1) { + fpst = fpstatus_ptr(FPST_FPCR_F16); + } else { + fpst = fpstatus_ptr(FPST_FPCR); + } tcg_shift = tcg_const_i32(0); tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding)); gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); - if (dp) { + if (sz == 3) { TCGv_i64 tcg_double, tcg_res; TCGv_i32 tcg_tmp; tcg_double = tcg_temp_new_i64(); @@ -451,10 +479,18 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a) tcg_single = tcg_temp_new_i32(); tcg_res = tcg_temp_new_i32(); neon_load_reg32(tcg_single, rm); - if (is_signed) { - gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst); + if (sz == 1) { + if (is_signed) { + gen_helper_vfp_toslh(tcg_res, tcg_single, tcg_shift, fpst); + } else { + gen_helper_vfp_toulh(tcg_res, tcg_single, tcg_shift, fpst); + } } else { - gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst); + if (is_signed) { + gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst); + } else { + gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst); + } } neon_store_reg32(tcg_res, rd); tcg_temp_free_i32(tcg_res); @@ -773,6 +809,40 @@ static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a) return true; } +static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a) +{ + TCGv_i32 tmp; + + if (!dc_isar_feature(aa32_fp16_arith, s)) { + return false; + } + + if (a->rt == 15) { + /* UNPREDICTABLE; we choose to UNDEF */ + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + if (a->l) { + /* VFP to general purpose register */ + tmp = tcg_temp_new_i32(); + neon_load_reg32(tmp, a->vn); + tcg_gen_andi_i32(tmp, tmp, 0xffff); + store_reg(s, a->rt, tmp); + } else { + /* general purpose register to VFP */ + tmp = load_reg(s, a->rt); + tcg_gen_andi_i32(tmp, tmp, 0xffff); + neon_store_reg32(tmp, a->vn); + tcg_temp_free_i32(tmp); + } + + return true; +} + static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a) { TCGv_i32 tmp; @@ -886,6 +956,41 @@ static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a) return true; } +static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a) +{ + uint32_t offset; + TCGv_i32 addr, tmp; + + if (!dc_isar_feature(aa32_fp16_arith, s)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + /* imm8 field is offset/2 for fp16, unlike fp32 and fp64 */ + offset = a->imm << 1; + if (!a->u) { + offset = -offset; + } + + /* For thumb, use of PC is UNPREDICTABLE. */ + addr = add_reg_for_lit(s, a->rn, offset); + tmp = tcg_temp_new_i32(); + if (a->l) { + gen_aa32_ld16u(s, tmp, addr, get_mem_index(s)); + neon_store_reg32(tmp, a->vd); + } else { + neon_load_reg32(tmp, a->vd); + gen_aa32_st16(s, tmp, addr, get_mem_index(s)); + } + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(addr); + + return true; +} + static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a) { uint32_t offset; @@ -1266,6 +1371,54 @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn, return true; } +static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn, + int vd, int vn, int vm, bool reads_vd) +{ + /* + * Do a half-precision operation. Functionally this is + * the same as do_vfp_3op_sp(), except: + * - it uses the FPST_FPCR_F16 + * - it doesn't need the VFP vector handling (fp16 is a + * v8 feature, and in v8 VFP vectors don't exist) + * - it does the aa32_fp16_arith feature test + */ + TCGv_i32 f0, f1, fd; + TCGv_ptr fpst; + + if (!dc_isar_feature(aa32_fp16_arith, s)) { + return false; + } + + if (s->vec_len != 0 || s->vec_stride != 0) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + f0 = tcg_temp_new_i32(); + f1 = tcg_temp_new_i32(); + fd = tcg_temp_new_i32(); + fpst = fpstatus_ptr(FPST_FPCR_F16); + + neon_load_reg32(f0, vn); + neon_load_reg32(f1, vm); + + if (reads_vd) { + neon_load_reg32(fd, vd); + } + fn(fd, f0, f1, fpst); + neon_store_reg32(fd, vd); + + tcg_temp_free_i32(f0); + tcg_temp_free_i32(f1); + tcg_temp_free_i32(fd); + tcg_temp_free_ptr(fpst); + + return true; +} + static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn, int vd, int vn, int vm, bool reads_vd) { @@ -1421,6 +1574,38 @@ static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm) return true; } +static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm) +{ + /* + * Do a half-precision operation. Functionally this is + * the same as do_vfp_2op_sp(), except: + * - it doesn't need the VFP vector handling (fp16 is a + * v8 feature, and in v8 VFP vectors don't exist) + * - it does the aa32_fp16_arith feature test + */ + TCGv_i32 f0; + + if (!dc_isar_feature(aa32_fp16_arith, s)) { + return false; + } + + if (s->vec_len != 0 || s->vec_stride != 0) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + f0 = tcg_temp_new_i32(); + neon_load_reg32(f0, vm); + fn(f0, f0); + neon_store_reg32(f0, vd); + tcg_temp_free_i32(f0); + + return true; +} + static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm) { uint32_t delta_m = 0; @@ -1499,6 +1684,21 @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm) return true; } +static void gen_VMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) +{ + /* Note that order of inputs to the add matters for NaNs */ + TCGv_i32 tmp = tcg_temp_new_i32(); + + gen_helper_vfp_mulh(tmp, vn, vm, fpst); + gen_helper_vfp_addh(vd, vd, tmp, fpst); + tcg_temp_free_i32(tmp); +} + +static bool trans_VMLA_hp(DisasContext *s, arg_VMLA_sp *a) +{ + return do_vfp_3op_hp(s, gen_VMLA_hp, a->vd, a->vn, a->vm, true); +} + static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) { /* Note that order of inputs to the add matters for NaNs */ @@ -1529,6 +1729,25 @@ static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a) return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true); } +static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) +{ + /* + * VMLS: vd = vd + -(vn * vm) + * Note that order of inputs to the add matters for NaNs. + */ + TCGv_i32 tmp = tcg_temp_new_i32(); + + gen_helper_vfp_mulh(tmp, vn, vm, fpst); + gen_helper_vfp_negh(tmp, tmp); + gen_helper_vfp_addh(vd, vd, tmp, fpst); + tcg_temp_free_i32(tmp); +} + +static bool trans_VMLS_hp(DisasContext *s, arg_VMLS_sp *a) +{ + return do_vfp_3op_hp(s, gen_VMLS_hp, a->vd, a->vn, a->vm, true); +} + static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) { /* @@ -1567,6 +1786,27 @@ static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a) return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true); } +static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) +{ + /* + * VNMLS: -fd + (fn * fm) + * Note that it isn't valid to replace (-A + B) with (B - A) or similar + * plausible looking simplifications because this will give wrong results + * for NaNs. + */ + TCGv_i32 tmp = tcg_temp_new_i32(); + + gen_helper_vfp_mulh(tmp, vn, vm, fpst); + gen_helper_vfp_negh(vd, vd); + gen_helper_vfp_addh(vd, vd, tmp, fpst); + tcg_temp_free_i32(tmp); +} + +static bool trans_VNMLS_hp(DisasContext *s, arg_VNMLS_sp *a) +{ + return do_vfp_3op_hp(s, gen_VNMLS_hp, a->vd, a->vn, a->vm, true); +} + static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) { /* @@ -1609,6 +1849,23 @@ static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a) return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true); } +static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) +{ + /* VNMLA: -fd + -(fn * fm) */ + TCGv_i32 tmp = tcg_temp_new_i32(); + + gen_helper_vfp_mulh(tmp, vn, vm, fpst); + gen_helper_vfp_negh(tmp, tmp); + gen_helper_vfp_negh(vd, vd); + gen_helper_vfp_addh(vd, vd, tmp, fpst); + tcg_temp_free_i32(tmp); +} + +static bool trans_VNMLA_hp(DisasContext *s, arg_VNMLA_sp *a) +{ + return do_vfp_3op_hp(s, gen_VNMLA_hp, a->vd, a->vn, a->vm, true); +} + static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) { /* VNMLA: -fd + -(fn * fm) */ @@ -1643,6 +1900,11 @@ static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a) return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true); } +static bool trans_VMUL_hp(DisasContext *s, arg_VMUL_sp *a) +{ + return do_vfp_3op_hp(s, gen_helper_vfp_mulh, a->vd, a->vn, a->vm, false); +} + static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a) { return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false); @@ -1653,6 +1915,18 @@ static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a) return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false); } +static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) +{ + /* VNMUL: -(fn * fm) */ + gen_helper_vfp_mulh(vd, vn, vm, fpst); + gen_helper_vfp_negh(vd, vd); +} + +static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a) +{ + return do_vfp_3op_hp(s, gen_VNMUL_hp, a->vd, a->vn, a->vm, false); +} + static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) { /* VNMUL: -(fn * fm) */ @@ -1677,6 +1951,11 @@ static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a) return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false); } +static bool trans_VADD_hp(DisasContext *s, arg_VADD_sp *a) +{ + return do_vfp_3op_hp(s, gen_helper_vfp_addh, a->vd, a->vn, a->vm, false); +} + static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a) { return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false); @@ -1687,6 +1966,11 @@ static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a) return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false); } +static bool trans_VSUB_hp(DisasContext *s, arg_VSUB_sp *a) +{ + return do_vfp_3op_hp(s, gen_helper_vfp_subh, a->vd, a->vn, a->vm, false); +} + static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a) { return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false); @@ -1697,6 +1981,11 @@ static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a) return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false); } +static bool trans_VDIV_hp(DisasContext *s, arg_VDIV_sp *a) +{ + return do_vfp_3op_hp(s, gen_helper_vfp_divh, a->vd, a->vn, a->vm, false); +} + static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a) { return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false); @@ -1707,6 +1996,24 @@ static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a) return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false); } +static bool trans_VMINNM_hp(DisasContext *s, arg_VMINNM_sp *a) +{ + if (!dc_isar_feature(aa32_vminmaxnm, s)) { + return false; + } + return do_vfp_3op_hp(s, gen_helper_vfp_minnumh, + a->vd, a->vn, a->vm, false); +} + +static bool trans_VMAXNM_hp(DisasContext *s, arg_VMAXNM_sp *a) +{ + if (!dc_isar_feature(aa32_vminmaxnm, s)) { + return false; + } + return do_vfp_3op_hp(s, gen_helper_vfp_maxnumh, + a->vd, a->vn, a->vm, false); +} + static bool trans_VMINNM_sp(DisasContext *s, arg_VMINNM_sp *a) { if (!dc_isar_feature(aa32_vminmaxnm, s)) { @@ -1743,6 +2050,69 @@ static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a) a->vd, a->vn, a->vm, false); } +static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d) +{ + /* + * VFNMA : fd = muladd(-fd, fn, fm) + * VFNMS : fd = muladd(-fd, -fn, fm) + * VFMA : fd = muladd( fd, fn, fm) + * VFMS : fd = muladd( fd, -fn, fm) + * + * These are fused multiply-add, and must be done as one floating + * point operation with no rounding between the multiplication and + * addition steps. NB that doing the negations here as separate + * steps is correct : an input NaN should come out with its sign + * bit flipped if it is a negated-input. + */ + TCGv_ptr fpst; + TCGv_i32 vn, vm, vd; + + /* + * Present in VFPv4 only, and only with the FP16 extension. + * Note that we can't rely on the SIMDFMAC check alone, because + * in a Neon-no-VFP core that ID register field will be non-zero. + */ + if (!dc_isar_feature(aa32_fp16_arith, s) || + !dc_isar_feature(aa32_simdfmac, s) || + !dc_isar_feature(aa32_fpsp_v2, s)) { + return false; + } + + if (s->vec_len != 0 || s->vec_stride != 0) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + vn = tcg_temp_new_i32(); + vm = tcg_temp_new_i32(); + vd = tcg_temp_new_i32(); + + neon_load_reg32(vn, a->vn); + neon_load_reg32(vm, a->vm); + if (neg_n) { + /* VFNMS, VFMS */ + gen_helper_vfp_negh(vn, vn); + } + neon_load_reg32(vd, a->vd); + if (neg_d) { + /* VFNMA, VFNMS */ + gen_helper_vfp_negh(vd, vd); + } + fpst = fpstatus_ptr(FPST_FPCR_F16); + gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst); + neon_store_reg32(vd, a->vd); + + tcg_temp_free_ptr(fpst); + tcg_temp_free_i32(vn); + tcg_temp_free_i32(vm); + tcg_temp_free_i32(vd); + + return true; +} + static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d) { /* @@ -1808,26 +2178,6 @@ static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d) return true; } -static bool trans_VFMA_sp(DisasContext *s, arg_VFMA_sp *a) -{ - return do_vfm_sp(s, a, false, false); -} - -static bool trans_VFMS_sp(DisasContext *s, arg_VFMS_sp *a) -{ - return do_vfm_sp(s, a, true, false); -} - -static bool trans_VFNMA_sp(DisasContext *s, arg_VFNMA_sp *a) -{ - return do_vfm_sp(s, a, false, true); -} - -static bool trans_VFNMS_sp(DisasContext *s, arg_VFNMS_sp *a) -{ - return do_vfm_sp(s, a, true, true); -} - static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d) { /* @@ -1899,24 +2249,43 @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d) return true; } -static bool trans_VFMA_dp(DisasContext *s, arg_VFMA_dp *a) -{ - return do_vfm_dp(s, a, false, false); -} +#define MAKE_ONE_VFM_TRANS_FN(INSN, PREC, NEGN, NEGD) \ + static bool trans_##INSN##_##PREC(DisasContext *s, \ + arg_##INSN##_##PREC *a) \ + { \ + return do_vfm_##PREC(s, a, NEGN, NEGD); \ + } -static bool trans_VFMS_dp(DisasContext *s, arg_VFMS_dp *a) -{ - return do_vfm_dp(s, a, true, false); -} +#define MAKE_VFM_TRANS_FNS(PREC) \ + MAKE_ONE_VFM_TRANS_FN(VFMA, PREC, false, false) \ + MAKE_ONE_VFM_TRANS_FN(VFMS, PREC, true, false) \ + MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \ + MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true) -static bool trans_VFNMA_dp(DisasContext *s, arg_VFNMA_dp *a) -{ - return do_vfm_dp(s, a, false, true); -} +MAKE_VFM_TRANS_FNS(hp) +MAKE_VFM_TRANS_FNS(sp) +MAKE_VFM_TRANS_FNS(dp) -static bool trans_VFNMS_dp(DisasContext *s, arg_VFNMS_dp *a) +static bool trans_VMOV_imm_hp(DisasContext *s, arg_VMOV_imm_sp *a) { - return do_vfm_dp(s, a, true, true); + TCGv_i32 fd; + + if (!dc_isar_feature(aa32_fp16_arith, s)) { + return false; + } + + if (s->vec_len != 0 || s->vec_stride != 0) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fd = tcg_const_i32(vfp_expand_imm(MO_16, a->imm)); + neon_store_reg32(fd, a->vd); + tcg_temp_free_i32(fd); + return true; } static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a) @@ -2024,34 +2393,27 @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a) return true; } -static bool trans_VMOV_reg_sp(DisasContext *s, arg_VMOV_reg_sp *a) -{ - return do_vfp_2op_sp(s, tcg_gen_mov_i32, a->vd, a->vm); -} +#define DO_VFP_2OP(INSN, PREC, FN) \ + static bool trans_##INSN##_##PREC(DisasContext *s, \ + arg_##INSN##_##PREC *a) \ + { \ + return do_vfp_2op_##PREC(s, FN, a->vd, a->vm); \ + } -static bool trans_VMOV_reg_dp(DisasContext *s, arg_VMOV_reg_dp *a) -{ - return do_vfp_2op_dp(s, tcg_gen_mov_i64, a->vd, a->vm); -} +DO_VFP_2OP(VMOV_reg, sp, tcg_gen_mov_i32) +DO_VFP_2OP(VMOV_reg, dp, tcg_gen_mov_i64) -static bool trans_VABS_sp(DisasContext *s, arg_VABS_sp *a) -{ - return do_vfp_2op_sp(s, gen_helper_vfp_abss, a->vd, a->vm); -} +DO_VFP_2OP(VABS, hp, gen_helper_vfp_absh) +DO_VFP_2OP(VABS, sp, gen_helper_vfp_abss) +DO_VFP_2OP(VABS, dp, gen_helper_vfp_absd) -static bool trans_VABS_dp(DisasContext *s, arg_VABS_dp *a) -{ - return do_vfp_2op_dp(s, gen_helper_vfp_absd, a->vd, a->vm); -} +DO_VFP_2OP(VNEG, hp, gen_helper_vfp_negh) +DO_VFP_2OP(VNEG, sp, gen_helper_vfp_negs) +DO_VFP_2OP(VNEG, dp, gen_helper_vfp_negd) -static bool trans_VNEG_sp(DisasContext *s, arg_VNEG_sp *a) +static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm) { - return do_vfp_2op_sp(s, gen_helper_vfp_negs, a->vd, a->vm); -} - -static bool trans_VNEG_dp(DisasContext *s, arg_VNEG_dp *a) -{ - return do_vfp_2op_dp(s, gen_helper_vfp_negd, a->vd, a->vm); + gen_helper_vfp_sqrth(vd, vm, cpu_env); } static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm) @@ -2059,19 +2421,52 @@ static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm) gen_helper_vfp_sqrts(vd, vm, cpu_env); } -static bool trans_VSQRT_sp(DisasContext *s, arg_VSQRT_sp *a) -{ - return do_vfp_2op_sp(s, gen_VSQRT_sp, a->vd, a->vm); -} - static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm) { gen_helper_vfp_sqrtd(vd, vm, cpu_env); } -static bool trans_VSQRT_dp(DisasContext *s, arg_VSQRT_dp *a) +DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp) +DO_VFP_2OP(VSQRT, sp, gen_VSQRT_sp) +DO_VFP_2OP(VSQRT, dp, gen_VSQRT_dp) + +static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a) { - return do_vfp_2op_dp(s, gen_VSQRT_dp, a->vd, a->vm); + TCGv_i32 vd, vm; + + if (!dc_isar_feature(aa32_fp16_arith, s)) { + return false; + } + + /* Vm/M bits must be zero for the Z variant */ + if (a->z && a->vm != 0) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + vd = tcg_temp_new_i32(); + vm = tcg_temp_new_i32(); + + neon_load_reg32(vd, a->vd); + if (a->z) { + tcg_gen_movi_i32(vm, 0); + } else { + neon_load_reg32(vm, a->vm); + } + + if (a->e) { + gen_helper_vfp_cmpeh(vd, vm, cpu_env); + } else { + gen_helper_vfp_cmph(vd, vm, cpu_env); + } + + tcg_temp_free_i32(vd); + tcg_temp_free_i32(vm); + + return true; } static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a) @@ -2289,6 +2684,29 @@ static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a) return true; } +static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a) +{ + TCGv_ptr fpst; + TCGv_i32 tmp; + + if (!dc_isar_feature(aa32_fp16_arith, s)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + tmp = tcg_temp_new_i32(); + neon_load_reg32(tmp, a->vm); + fpst = fpstatus_ptr(FPST_FPCR_F16); + gen_helper_rinth(tmp, tmp, fpst); + neon_store_reg32(tmp, a->vd); + tcg_temp_free_ptr(fpst); + tcg_temp_free_i32(tmp); + return true; +} + static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a) { TCGv_ptr fpst; @@ -2344,6 +2762,34 @@ static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a) return true; } +static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a) +{ + TCGv_ptr fpst; + TCGv_i32 tmp; + TCGv_i32 tcg_rmode; + + if (!dc_isar_feature(aa32_fp16_arith, s)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + tmp = tcg_temp_new_i32(); + neon_load_reg32(tmp, a->vm); + fpst = fpstatus_ptr(FPST_FPCR_F16); + tcg_rmode = tcg_const_i32(float_round_to_zero); + gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); + gen_helper_rinth(tmp, tmp, fpst); + gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); + neon_store_reg32(tmp, a->vd); + tcg_temp_free_ptr(fpst); + tcg_temp_free_i32(tcg_rmode); + tcg_temp_free_i32(tmp); + return true; +} + static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a) { TCGv_ptr fpst; @@ -2409,6 +2855,29 @@ static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a) return true; } +static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a) +{ + TCGv_ptr fpst; + TCGv_i32 tmp; + + if (!dc_isar_feature(aa32_fp16_arith, s)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + tmp = tcg_temp_new_i32(); + neon_load_reg32(tmp, a->vm); + fpst = fpstatus_ptr(FPST_FPCR_F16); + gen_helper_rinth_exact(tmp, tmp, fpst); + neon_store_reg32(tmp, a->vd); + tcg_temp_free_ptr(fpst); + tcg_temp_free_i32(tmp); + return true; +} + static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a) { TCGv_ptr fpst; @@ -2520,6 +2989,35 @@ static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a) return true; } +static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a) +{ + TCGv_i32 vm; + TCGv_ptr fpst; + + if (!dc_isar_feature(aa32_fp16_arith, s)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + vm = tcg_temp_new_i32(); + neon_load_reg32(vm, a->vm); + fpst = fpstatus_ptr(FPST_FPCR_F16); + if (a->s) { + /* i32 -> f16 */ + gen_helper_vfp_sitoh(vm, vm, fpst); + } else { + /* u32 -> f16 */ + gen_helper_vfp_uitoh(vm, vm, fpst); + } + neon_store_reg32(vm, a->vd); + tcg_temp_free_i32(vm); + tcg_temp_free_ptr(fpst); + return true; +} + static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a) { TCGv_i32 vm; @@ -2618,6 +3116,65 @@ static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a) return true; } +static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a) +{ + TCGv_i32 vd, shift; + TCGv_ptr fpst; + int frac_bits; + + if (!dc_isar_feature(aa32_fp16_arith, s)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm); + + vd = tcg_temp_new_i32(); + neon_load_reg32(vd, a->vd); + + fpst = fpstatus_ptr(FPST_FPCR_F16); + shift = tcg_const_i32(frac_bits); + + /* Switch on op:U:sx bits */ + switch (a->opc) { + case 0: + gen_helper_vfp_shtoh(vd, vd, shift, fpst); + break; + case 1: + gen_helper_vfp_sltoh(vd, vd, shift, fpst); + break; + case 2: + gen_helper_vfp_uhtoh(vd, vd, shift, fpst); + break; + case 3: + gen_helper_vfp_ultoh(vd, vd, shift, fpst); + break; + case 4: + gen_helper_vfp_toshh_round_to_zero(vd, vd, shift, fpst); + break; + case 5: + gen_helper_vfp_toslh_round_to_zero(vd, vd, shift, fpst); + break; + case 6: + gen_helper_vfp_touhh_round_to_zero(vd, vd, shift, fpst); + break; + case 7: + gen_helper_vfp_toulh_round_to_zero(vd, vd, shift, fpst); + break; + default: + g_assert_not_reached(); + } + + neon_store_reg32(vd, a->vd); + tcg_temp_free_i32(vd); + tcg_temp_free_i32(shift); + tcg_temp_free_ptr(fpst); + return true; +} + static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a) { TCGv_i32 vd, shift; @@ -2742,6 +3299,42 @@ static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a) return true; } +static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a) +{ + TCGv_i32 vm; + TCGv_ptr fpst; + + if (!dc_isar_feature(aa32_fp16_arith, s)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fpst = fpstatus_ptr(FPST_FPCR_F16); + vm = tcg_temp_new_i32(); + neon_load_reg32(vm, a->vm); + + if (a->s) { + if (a->rz) { + gen_helper_vfp_tosizh(vm, vm, fpst); + } else { + gen_helper_vfp_tosih(vm, vm, fpst); + } + } else { + if (a->rz) { + gen_helper_vfp_touizh(vm, vm, fpst); + } else { + gen_helper_vfp_touih(vm, vm, fpst); + } + } + neon_store_reg32(vm, a->vd); + tcg_temp_free_i32(vm); + tcg_temp_free_ptr(fpst); + return true; +} + static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a) { TCGv_i32 vm; @@ -2895,3 +3488,56 @@ static bool trans_NOCP(DisasContext *s, arg_NOCP *a) return false; } + +static bool trans_VINS(DisasContext *s, arg_VINS *a) +{ + TCGv_i32 rd, rm; + + if (!dc_isar_feature(aa32_fp16_arith, s)) { + return false; + } + + if (s->vec_len != 0 || s->vec_stride != 0) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + /* Insert low half of Vm into high half of Vd */ + rm = tcg_temp_new_i32(); + rd = tcg_temp_new_i32(); + neon_load_reg32(rm, a->vm); + neon_load_reg32(rd, a->vd); + tcg_gen_deposit_i32(rd, rd, rm, 16, 16); + neon_store_reg32(rd, a->vd); + tcg_temp_free_i32(rm); + tcg_temp_free_i32(rd); + return true; +} + +static bool trans_VMOVX(DisasContext *s, arg_VINS *a) +{ + TCGv_i32 rm; + + if (!dc_isar_feature(aa32_fp16_arith, s)) { + return false; + } + + if (s->vec_len != 0 || s->vec_stride != 0) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + /* Set Vd to high half of Vm */ + rm = tcg_temp_new_i32(); + neon_load_reg32(rm, a->vm); + tcg_gen_shri_i32(rm, rm, 16); + neon_store_reg32(rm, a->vd); + tcg_temp_free_i32(rm); + return true; +} diff --git a/target/arm/translate.h b/target/arm/translate.h index 6d6d4c0f42..423b0e08df 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -64,6 +64,7 @@ typedef struct DisasContext { * that it is set at the point where we actually touch the FP regs. */ bool fp_access_checked; + bool sve_access_checked; /* ARMv8 single-step state (this is distinct from the QEMU gdbstub * single-step support). */ diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c index 7d76412ee0..a973454e4f 100644 --- a/target/arm/vec_helper.c +++ b/target/arm/vec_helper.c @@ -37,19 +37,24 @@ #endif /* Signed saturating rounding doubling multiply-accumulate high half, 16-bit */ -static int16_t inl_qrdmlah_s16(int16_t src1, int16_t src2, - int16_t src3, uint32_t *sat) +static int16_t do_sqrdmlah_h(int16_t src1, int16_t src2, int16_t src3, + bool neg, bool round, uint32_t *sat) { - /* Simplify: + /* + * Simplify: * = ((a3 << 16) + ((e1 * e2) << 1) + (1 << 15)) >> 16 * = ((a3 << 15) + (e1 * e2) + (1 << 14)) >> 15 */ int32_t ret = (int32_t)src1 * src2; - ret = ((int32_t)src3 << 15) + ret + (1 << 14); + if (neg) { + ret = -ret; + } + ret += ((int32_t)src3 << 15) + (round << 14); ret >>= 15; + if (ret != (int16_t)ret) { *sat = 1; - ret = (ret < 0 ? -0x8000 : 0x7fff); + ret = (ret < 0 ? INT16_MIN : INT16_MAX); } return ret; } @@ -58,8 +63,9 @@ uint32_t HELPER(neon_qrdmlah_s16)(CPUARMState *env, uint32_t src1, uint32_t src2, uint32_t src3) { uint32_t *sat = &env->vfp.qc[0]; - uint16_t e1 = inl_qrdmlah_s16(src1, src2, src3, sat); - uint16_t e2 = inl_qrdmlah_s16(src1 >> 16, src2 >> 16, src3 >> 16, sat); + uint16_t e1 = do_sqrdmlah_h(src1, src2, src3, false, true, sat); + uint16_t e2 = do_sqrdmlah_h(src1 >> 16, src2 >> 16, src3 >> 16, + false, true, sat); return deposit32(e1, 16, 16, e2); } @@ -73,35 +79,18 @@ void HELPER(gvec_qrdmlah_s16)(void *vd, void *vn, void *vm, uintptr_t i; for (i = 0; i < opr_sz / 2; ++i) { - d[i] = inl_qrdmlah_s16(n[i], m[i], d[i], vq); + d[i] = do_sqrdmlah_h(n[i], m[i], d[i], false, true, vq); } clear_tail(d, opr_sz, simd_maxsz(desc)); } -/* Signed saturating rounding doubling multiply-subtract high half, 16-bit */ -static int16_t inl_qrdmlsh_s16(int16_t src1, int16_t src2, - int16_t src3, uint32_t *sat) -{ - /* Similarly, using subtraction: - * = ((a3 << 16) - ((e1 * e2) << 1) + (1 << 15)) >> 16 - * = ((a3 << 15) - (e1 * e2) + (1 << 14)) >> 15 - */ - int32_t ret = (int32_t)src1 * src2; - ret = ((int32_t)src3 << 15) - ret + (1 << 14); - ret >>= 15; - if (ret != (int16_t)ret) { - *sat = 1; - ret = (ret < 0 ? -0x8000 : 0x7fff); - } - return ret; -} - uint32_t HELPER(neon_qrdmlsh_s16)(CPUARMState *env, uint32_t src1, uint32_t src2, uint32_t src3) { uint32_t *sat = &env->vfp.qc[0]; - uint16_t e1 = inl_qrdmlsh_s16(src1, src2, src3, sat); - uint16_t e2 = inl_qrdmlsh_s16(src1 >> 16, src2 >> 16, src3 >> 16, sat); + uint16_t e1 = do_sqrdmlah_h(src1, src2, src3, true, true, sat); + uint16_t e2 = do_sqrdmlah_h(src1 >> 16, src2 >> 16, src3 >> 16, + true, true, sat); return deposit32(e1, 16, 16, e2); } @@ -115,19 +104,47 @@ void HELPER(gvec_qrdmlsh_s16)(void *vd, void *vn, void *vm, uintptr_t i; for (i = 0; i < opr_sz / 2; ++i) { - d[i] = inl_qrdmlsh_s16(n[i], m[i], d[i], vq); + d[i] = do_sqrdmlah_h(n[i], m[i], d[i], true, true, vq); + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(neon_sqdmulh_h)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + int16_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 2; ++i) { + d[i] = do_sqrdmlah_h(n[i], m[i], 0, false, false, vq); + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(neon_sqrdmulh_h)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + int16_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 2; ++i) { + d[i] = do_sqrdmlah_h(n[i], m[i], 0, false, true, vq); } clear_tail(d, opr_sz, simd_maxsz(desc)); } /* Signed saturating rounding doubling multiply-accumulate high half, 32-bit */ -static int32_t inl_qrdmlah_s32(int32_t src1, int32_t src2, - int32_t src3, uint32_t *sat) +static int32_t do_sqrdmlah_s(int32_t src1, int32_t src2, int32_t src3, + bool neg, bool round, uint32_t *sat) { /* Simplify similarly to int_qrdmlah_s16 above. */ int64_t ret = (int64_t)src1 * src2; - ret = ((int64_t)src3 << 31) + ret + (1 << 30); + if (neg) { + ret = -ret; + } + ret += ((int64_t)src3 << 31) + (round << 30); ret >>= 31; + if (ret != (int32_t)ret) { *sat = 1; ret = (ret < 0 ? INT32_MIN : INT32_MAX); @@ -139,7 +156,7 @@ uint32_t HELPER(neon_qrdmlah_s32)(CPUARMState *env, int32_t src1, int32_t src2, int32_t src3) { uint32_t *sat = &env->vfp.qc[0]; - return inl_qrdmlah_s32(src1, src2, src3, sat); + return do_sqrdmlah_s(src1, src2, src3, false, true, sat); } void HELPER(gvec_qrdmlah_s32)(void *vd, void *vn, void *vm, @@ -152,31 +169,16 @@ void HELPER(gvec_qrdmlah_s32)(void *vd, void *vn, void *vm, uintptr_t i; for (i = 0; i < opr_sz / 4; ++i) { - d[i] = inl_qrdmlah_s32(n[i], m[i], d[i], vq); + d[i] = do_sqrdmlah_s(n[i], m[i], d[i], false, true, vq); } clear_tail(d, opr_sz, simd_maxsz(desc)); } -/* Signed saturating rounding doubling multiply-subtract high half, 32-bit */ -static int32_t inl_qrdmlsh_s32(int32_t src1, int32_t src2, - int32_t src3, uint32_t *sat) -{ - /* Simplify similarly to int_qrdmlsh_s16 above. */ - int64_t ret = (int64_t)src1 * src2; - ret = ((int64_t)src3 << 31) - ret + (1 << 30); - ret >>= 31; - if (ret != (int32_t)ret) { - *sat = 1; - ret = (ret < 0 ? INT32_MIN : INT32_MAX); - } - return ret; -} - uint32_t HELPER(neon_qrdmlsh_s32)(CPUARMState *env, int32_t src1, int32_t src2, int32_t src3) { uint32_t *sat = &env->vfp.qc[0]; - return inl_qrdmlsh_s32(src1, src2, src3, sat); + return do_sqrdmlah_s(src1, src2, src3, true, true, sat); } void HELPER(gvec_qrdmlsh_s32)(void *vd, void *vn, void *vm, @@ -189,7 +191,31 @@ void HELPER(gvec_qrdmlsh_s32)(void *vd, void *vn, void *vm, uintptr_t i; for (i = 0; i < opr_sz / 4; ++i) { - d[i] = inl_qrdmlsh_s32(n[i], m[i], d[i], vq); + d[i] = do_sqrdmlah_s(n[i], m[i], d[i], true, true, vq); + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(neon_sqdmulh_s)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + int32_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 4; ++i) { + d[i] = do_sqrdmlah_s(n[i], m[i], 0, false, false, vq); + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(neon_sqrdmulh_s)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + int32_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 4; ++i) { + d[i] = do_sqrdmlah_s(n[i], m[i], 0, false, true, vq); } clear_tail(d, opr_sz, simd_maxsz(desc)); } @@ -630,6 +656,81 @@ void HELPER(gvec_fcmlad)(void *vd, void *vn, void *vm, clear_tail(d, opr_sz, simd_maxsz(desc)); } +/* + * Floating point comparisons producing an integer result (all 1s or all 0s). + * Note that EQ doesn't signal InvalidOp for QNaNs but GE and GT do. + * Softfloat routines return 0/1, which we convert to the 0/-1 Neon requires. + */ +static uint16_t float16_ceq(float16 op1, float16 op2, float_status *stat) +{ + return -float16_eq_quiet(op1, op2, stat); +} + +static uint32_t float32_ceq(float32 op1, float32 op2, float_status *stat) +{ + return -float32_eq_quiet(op1, op2, stat); +} + +static uint16_t float16_cge(float16 op1, float16 op2, float_status *stat) +{ + return -float16_le(op2, op1, stat); +} + +static uint32_t float32_cge(float32 op1, float32 op2, float_status *stat) +{ + return -float32_le(op2, op1, stat); +} + +static uint16_t float16_cgt(float16 op1, float16 op2, float_status *stat) +{ + return -float16_lt(op2, op1, stat); +} + +static uint32_t float32_cgt(float32 op1, float32 op2, float_status *stat) +{ + return -float32_lt(op2, op1, stat); +} + +static uint16_t float16_acge(float16 op1, float16 op2, float_status *stat) +{ + return -float16_le(float16_abs(op2), float16_abs(op1), stat); +} + +static uint32_t float32_acge(float32 op1, float32 op2, float_status *stat) +{ + return -float32_le(float32_abs(op2), float32_abs(op1), stat); +} + +static uint16_t float16_acgt(float16 op1, float16 op2, float_status *stat) +{ + return -float16_lt(float16_abs(op2), float16_abs(op1), stat); +} + +static uint32_t float32_acgt(float32 op1, float32 op2, float_status *stat) +{ + return -float32_lt(float32_abs(op2), float32_abs(op1), stat); +} + +static int16_t vfp_tosszh(float16 x, void *fpstp) +{ + float_status *fpst = fpstp; + if (float16_is_any_nan(x)) { + float_raise(float_flag_invalid, fpst); + return 0; + } + return float16_to_int16_round_to_zero(x, fpst); +} + +static uint16_t vfp_touszh(float16 x, void *fpstp) +{ + float_status *fpst = fpstp; + if (float16_is_any_nan(x)) { + float_raise(float_flag_invalid, fpst); + return 0; + } + return float16_to_uint16_round_to_zero(x, fpst); +} + #define DO_2OP(NAME, FUNC, TYPE) \ void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \ { \ @@ -649,7 +750,44 @@ DO_2OP(gvec_frsqrte_h, helper_rsqrte_f16, float16) DO_2OP(gvec_frsqrte_s, helper_rsqrte_f32, float32) DO_2OP(gvec_frsqrte_d, helper_rsqrte_f64, float64) +DO_2OP(gvec_vrintx_h, float16_round_to_int, float16) +DO_2OP(gvec_vrintx_s, float32_round_to_int, float32) + +DO_2OP(gvec_sitos, helper_vfp_sitos, int32_t) +DO_2OP(gvec_uitos, helper_vfp_uitos, uint32_t) +DO_2OP(gvec_tosizs, helper_vfp_tosizs, float32) +DO_2OP(gvec_touizs, helper_vfp_touizs, float32) +DO_2OP(gvec_sstoh, int16_to_float16, int16_t) +DO_2OP(gvec_ustoh, uint16_to_float16, uint16_t) +DO_2OP(gvec_tosszh, vfp_tosszh, float16) +DO_2OP(gvec_touszh, vfp_touszh, float16) + +#define WRAP_CMP0_FWD(FN, CMPOP, TYPE) \ + static TYPE TYPE##_##FN##0(TYPE op, float_status *stat) \ + { \ + return TYPE##_##CMPOP(op, TYPE##_zero, stat); \ + } + +#define WRAP_CMP0_REV(FN, CMPOP, TYPE) \ + static TYPE TYPE##_##FN##0(TYPE op, float_status *stat) \ + { \ + return TYPE##_##CMPOP(TYPE##_zero, op, stat); \ + } + +#define DO_2OP_CMP0(FN, CMPOP, DIRN) \ + WRAP_CMP0_##DIRN(FN, CMPOP, float16) \ + WRAP_CMP0_##DIRN(FN, CMPOP, float32) \ + DO_2OP(gvec_f##FN##0_h, float16_##FN##0, float16) \ + DO_2OP(gvec_f##FN##0_s, float32_##FN##0, float32) + +DO_2OP_CMP0(cgt, cgt, FWD) +DO_2OP_CMP0(cge, cge, FWD) +DO_2OP_CMP0(ceq, ceq, FWD) +DO_2OP_CMP0(clt, cgt, REV) +DO_2OP_CMP0(cle, cge, REV) + #undef DO_2OP +#undef DO_2OP_CMP0 /* Floating-point trigonometric starting value. * See the ARM ARM pseudocode function FPTrigSMul. @@ -681,11 +819,71 @@ static float64 float64_ftsmul(float64 op1, uint64_t op2, float_status *stat) return result; } +static float16 float16_abd(float16 op1, float16 op2, float_status *stat) +{ + return float16_abs(float16_sub(op1, op2, stat)); +} + static float32 float32_abd(float32 op1, float32 op2, float_status *stat) { return float32_abs(float32_sub(op1, op2, stat)); } +/* + * Reciprocal step. These are the AArch32 version which uses a + * non-fused multiply-and-subtract. + */ +static float16 float16_recps_nf(float16 op1, float16 op2, float_status *stat) +{ + op1 = float16_squash_input_denormal(op1, stat); + op2 = float16_squash_input_denormal(op2, stat); + + if ((float16_is_infinity(op1) && float16_is_zero(op2)) || + (float16_is_infinity(op2) && float16_is_zero(op1))) { + return float16_two; + } + return float16_sub(float16_two, float16_mul(op1, op2, stat), stat); +} + +static float32 float32_recps_nf(float32 op1, float32 op2, float_status *stat) +{ + op1 = float32_squash_input_denormal(op1, stat); + op2 = float32_squash_input_denormal(op2, stat); + + if ((float32_is_infinity(op1) && float32_is_zero(op2)) || + (float32_is_infinity(op2) && float32_is_zero(op1))) { + return float32_two; + } + return float32_sub(float32_two, float32_mul(op1, op2, stat), stat); +} + +/* Reciprocal square-root step. AArch32 non-fused semantics. */ +static float16 float16_rsqrts_nf(float16 op1, float16 op2, float_status *stat) +{ + op1 = float16_squash_input_denormal(op1, stat); + op2 = float16_squash_input_denormal(op2, stat); + + if ((float16_is_infinity(op1) && float16_is_zero(op2)) || + (float16_is_infinity(op2) && float16_is_zero(op1))) { + return float16_one_point_five; + } + op1 = float16_sub(float16_three, float16_mul(op1, op2, stat), stat); + return float16_div(op1, float16_two, stat); +} + +static float32 float32_rsqrts_nf(float32 op1, float32 op2, float_status *stat) +{ + op1 = float32_squash_input_denormal(op1, stat); + op2 = float32_squash_input_denormal(op2, stat); + + if ((float32_is_infinity(op1) && float32_is_zero(op2)) || + (float32_is_infinity(op2) && float32_is_zero(op1))) { + return float32_one_point_five; + } + op1 = float32_sub(float32_three, float32_mul(op1, op2, stat), stat); + return float32_div(op1, float32_two, stat); +} + #define DO_3OP(NAME, FUNC, TYPE) \ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \ { \ @@ -713,8 +911,42 @@ DO_3OP(gvec_ftsmul_h, float16_ftsmul, float16) DO_3OP(gvec_ftsmul_s, float32_ftsmul, float32) DO_3OP(gvec_ftsmul_d, float64_ftsmul, float64) +DO_3OP(gvec_fabd_h, float16_abd, float16) DO_3OP(gvec_fabd_s, float32_abd, float32) +DO_3OP(gvec_fceq_h, float16_ceq, float16) +DO_3OP(gvec_fceq_s, float32_ceq, float32) + +DO_3OP(gvec_fcge_h, float16_cge, float16) +DO_3OP(gvec_fcge_s, float32_cge, float32) + +DO_3OP(gvec_fcgt_h, float16_cgt, float16) +DO_3OP(gvec_fcgt_s, float32_cgt, float32) + +DO_3OP(gvec_facge_h, float16_acge, float16) +DO_3OP(gvec_facge_s, float32_acge, float32) + +DO_3OP(gvec_facgt_h, float16_acgt, float16) +DO_3OP(gvec_facgt_s, float32_acgt, float32) + +DO_3OP(gvec_fmax_h, float16_max, float16) +DO_3OP(gvec_fmax_s, float32_max, float32) + +DO_3OP(gvec_fmin_h, float16_min, float16) +DO_3OP(gvec_fmin_s, float32_min, float32) + +DO_3OP(gvec_fmaxnum_h, float16_maxnum, float16) +DO_3OP(gvec_fmaxnum_s, float32_maxnum, float32) + +DO_3OP(gvec_fminnum_h, float16_minnum, float16) +DO_3OP(gvec_fminnum_s, float32_minnum, float32) + +DO_3OP(gvec_recps_nf_h, float16_recps_nf, float16) +DO_3OP(gvec_recps_nf_s, float32_recps_nf, float32) + +DO_3OP(gvec_rsqrts_nf_h, float16_rsqrts_nf, float16) +DO_3OP(gvec_rsqrts_nf_s, float32_rsqrts_nf, float32) + #ifdef TARGET_AARCH64 DO_3OP(gvec_recps_h, helper_recpsf_f16, float16) @@ -728,36 +960,176 @@ DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64) #endif #undef DO_3OP +/* Non-fused multiply-add (unlike float16_muladd etc, which are fused) */ +static float16 float16_muladd_nf(float16 dest, float16 op1, float16 op2, + float_status *stat) +{ + return float16_add(dest, float16_mul(op1, op2, stat), stat); +} + +static float32 float32_muladd_nf(float32 dest, float32 op1, float32 op2, + float_status *stat) +{ + return float32_add(dest, float32_mul(op1, op2, stat), stat); +} + +static float16 float16_mulsub_nf(float16 dest, float16 op1, float16 op2, + float_status *stat) +{ + return float16_sub(dest, float16_mul(op1, op2, stat), stat); +} + +static float32 float32_mulsub_nf(float32 dest, float32 op1, float32 op2, + float_status *stat) +{ + return float32_sub(dest, float32_mul(op1, op2, stat), stat); +} + +/* Fused versions; these have the semantics Neon VFMA/VFMS want */ +static float16 float16_muladd_f(float16 dest, float16 op1, float16 op2, + float_status *stat) +{ + return float16_muladd(op1, op2, dest, 0, stat); +} + +static float32 float32_muladd_f(float32 dest, float32 op1, float32 op2, + float_status *stat) +{ + return float32_muladd(op1, op2, dest, 0, stat); +} + +static float16 float16_mulsub_f(float16 dest, float16 op1, float16 op2, + float_status *stat) +{ + return float16_muladd(float16_chs(op1), op2, dest, 0, stat); +} + +static float32 float32_mulsub_f(float32 dest, float32 op1, float32 op2, + float_status *stat) +{ + return float32_muladd(float32_chs(op1), op2, dest, 0, stat); +} + +#define DO_MULADD(NAME, FUNC, TYPE) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \ +{ \ + intptr_t i, oprsz = simd_oprsz(desc); \ + TYPE *d = vd, *n = vn, *m = vm; \ + for (i = 0; i < oprsz / sizeof(TYPE); i++) { \ + d[i] = FUNC(d[i], n[i], m[i], stat); \ + } \ + clear_tail(d, oprsz, simd_maxsz(desc)); \ +} + +DO_MULADD(gvec_fmla_h, float16_muladd_nf, float16) +DO_MULADD(gvec_fmla_s, float32_muladd_nf, float32) + +DO_MULADD(gvec_fmls_h, float16_mulsub_nf, float16) +DO_MULADD(gvec_fmls_s, float32_mulsub_nf, float32) + +DO_MULADD(gvec_vfma_h, float16_muladd_f, float16) +DO_MULADD(gvec_vfma_s, float32_muladd_f, float32) + +DO_MULADD(gvec_vfms_h, float16_mulsub_f, float16) +DO_MULADD(gvec_vfms_s, float32_mulsub_f, float32) + /* For the indexed ops, SVE applies the index per 128-bit vector segment. * For AdvSIMD, there is of course only one such vector segment. */ #define DO_MUL_IDX(NAME, TYPE, H) \ -void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ { \ - intptr_t i, j, oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \ + intptr_t i, j, oprsz = simd_oprsz(desc); \ + intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \ intptr_t idx = simd_data(desc); \ TYPE *d = vd, *n = vn, *m = vm; \ for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \ TYPE mm = m[H(i + idx)]; \ for (j = 0; j < segment; j++) { \ - d[i + j] = TYPE##_mul(n[i + j], mm, stat); \ + d[i + j] = n[i + j] * mm; \ } \ } \ clear_tail(d, oprsz, simd_maxsz(desc)); \ } -DO_MUL_IDX(gvec_fmul_idx_h, float16, H2) -DO_MUL_IDX(gvec_fmul_idx_s, float32, H4) -DO_MUL_IDX(gvec_fmul_idx_d, float64, ) +DO_MUL_IDX(gvec_mul_idx_h, uint16_t, H2) +DO_MUL_IDX(gvec_mul_idx_s, uint32_t, H4) +DO_MUL_IDX(gvec_mul_idx_d, uint64_t, ) #undef DO_MUL_IDX +#define DO_MLA_IDX(NAME, TYPE, OP, H) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ +{ \ + intptr_t i, j, oprsz = simd_oprsz(desc); \ + intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \ + intptr_t idx = simd_data(desc); \ + TYPE *d = vd, *n = vn, *m = vm, *a = va; \ + for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \ + TYPE mm = m[H(i + idx)]; \ + for (j = 0; j < segment; j++) { \ + d[i + j] = a[i + j] OP n[i + j] * mm; \ + } \ + } \ + clear_tail(d, oprsz, simd_maxsz(desc)); \ +} + +DO_MLA_IDX(gvec_mla_idx_h, uint16_t, +, H2) +DO_MLA_IDX(gvec_mla_idx_s, uint32_t, +, H4) +DO_MLA_IDX(gvec_mla_idx_d, uint64_t, +, ) + +DO_MLA_IDX(gvec_mls_idx_h, uint16_t, -, H2) +DO_MLA_IDX(gvec_mls_idx_s, uint32_t, -, H4) +DO_MLA_IDX(gvec_mls_idx_d, uint64_t, -, ) + +#undef DO_MLA_IDX + +#define DO_FMUL_IDX(NAME, ADD, TYPE, H) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \ +{ \ + intptr_t i, j, oprsz = simd_oprsz(desc); \ + intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \ + intptr_t idx = simd_data(desc); \ + TYPE *d = vd, *n = vn, *m = vm; \ + for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \ + TYPE mm = m[H(i + idx)]; \ + for (j = 0; j < segment; j++) { \ + d[i + j] = TYPE##_##ADD(d[i + j], \ + TYPE##_mul(n[i + j], mm, stat), stat); \ + } \ + } \ + clear_tail(d, oprsz, simd_maxsz(desc)); \ +} + +#define float16_nop(N, M, S) (M) +#define float32_nop(N, M, S) (M) +#define float64_nop(N, M, S) (M) + +DO_FMUL_IDX(gvec_fmul_idx_h, nop, float16, H2) +DO_FMUL_IDX(gvec_fmul_idx_s, nop, float32, H4) +DO_FMUL_IDX(gvec_fmul_idx_d, nop, float64, ) + +/* + * Non-fused multiply-accumulate operations, for Neon. NB that unlike + * the fused ops below they assume accumulate both from and into Vd. + */ +DO_FMUL_IDX(gvec_fmla_nf_idx_h, add, float16, H2) +DO_FMUL_IDX(gvec_fmla_nf_idx_s, add, float32, H4) +DO_FMUL_IDX(gvec_fmls_nf_idx_h, sub, float16, H2) +DO_FMUL_IDX(gvec_fmls_nf_idx_s, sub, float32, H4) + +#undef float16_nop +#undef float32_nop +#undef float64_nop +#undef DO_FMUL_IDX + #define DO_FMLA_IDX(NAME, TYPE, H) \ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \ void *stat, uint32_t desc) \ { \ - intptr_t i, j, oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \ + intptr_t i, j, oprsz = simd_oprsz(desc); \ + intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \ TYPE op1_neg = extract32(desc, SIMD_DATA_SHIFT, 1); \ intptr_t idx = desc >> (SIMD_DATA_SHIFT + 1); \ TYPE *d = vd, *n = vn, *m = vm, *a = va; \ @@ -1452,3 +1824,116 @@ DO_ABA(gvec_uaba_s, uint32_t) DO_ABA(gvec_uaba_d, uint64_t) #undef DO_ABA + +#define DO_NEON_PAIRWISE(NAME, OP) \ + void HELPER(NAME##s)(void *vd, void *vn, void *vm, \ + void *stat, uint32_t oprsz) \ + { \ + float_status *fpst = stat; \ + float32 *d = vd; \ + float32 *n = vn; \ + float32 *m = vm; \ + float32 r0, r1; \ + \ + /* Read all inputs before writing outputs in case vm == vd */ \ + r0 = float32_##OP(n[H4(0)], n[H4(1)], fpst); \ + r1 = float32_##OP(m[H4(0)], m[H4(1)], fpst); \ + \ + d[H4(0)] = r0; \ + d[H4(1)] = r1; \ + } \ + \ + void HELPER(NAME##h)(void *vd, void *vn, void *vm, \ + void *stat, uint32_t oprsz) \ + { \ + float_status *fpst = stat; \ + float16 *d = vd; \ + float16 *n = vn; \ + float16 *m = vm; \ + float16 r0, r1, r2, r3; \ + \ + /* Read all inputs before writing outputs in case vm == vd */ \ + r0 = float16_##OP(n[H2(0)], n[H2(1)], fpst); \ + r1 = float16_##OP(n[H2(2)], n[H2(3)], fpst); \ + r2 = float16_##OP(m[H2(0)], m[H2(1)], fpst); \ + r3 = float16_##OP(m[H2(2)], m[H2(3)], fpst); \ + \ + d[H4(0)] = r0; \ + d[H4(1)] = r1; \ + d[H4(2)] = r2; \ + d[H4(3)] = r3; \ + } + +DO_NEON_PAIRWISE(neon_padd, add) +DO_NEON_PAIRWISE(neon_pmax, max) +DO_NEON_PAIRWISE(neon_pmin, min) + +#undef DO_NEON_PAIRWISE + +#define DO_VCVT_FIXED(NAME, FUNC, TYPE) \ + void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \ + { \ + intptr_t i, oprsz = simd_oprsz(desc); \ + int shift = simd_data(desc); \ + TYPE *d = vd, *n = vn; \ + float_status *fpst = stat; \ + for (i = 0; i < oprsz / sizeof(TYPE); i++) { \ + d[i] = FUNC(n[i], shift, fpst); \ + } \ + clear_tail(d, oprsz, simd_maxsz(desc)); \ + } + +DO_VCVT_FIXED(gvec_vcvt_sf, helper_vfp_sltos, uint32_t) +DO_VCVT_FIXED(gvec_vcvt_uf, helper_vfp_ultos, uint32_t) +DO_VCVT_FIXED(gvec_vcvt_fs, helper_vfp_tosls_round_to_zero, uint32_t) +DO_VCVT_FIXED(gvec_vcvt_fu, helper_vfp_touls_round_to_zero, uint32_t) +DO_VCVT_FIXED(gvec_vcvt_sh, helper_vfp_shtoh, uint16_t) +DO_VCVT_FIXED(gvec_vcvt_uh, helper_vfp_uhtoh, uint16_t) +DO_VCVT_FIXED(gvec_vcvt_hs, helper_vfp_toshh_round_to_zero, uint16_t) +DO_VCVT_FIXED(gvec_vcvt_hu, helper_vfp_touhh_round_to_zero, uint16_t) + +#undef DO_VCVT_FIXED + +#define DO_VCVT_RMODE(NAME, FUNC, TYPE) \ + void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \ + { \ + float_status *fpst = stat; \ + intptr_t i, oprsz = simd_oprsz(desc); \ + uint32_t rmode = simd_data(desc); \ + uint32_t prev_rmode = get_float_rounding_mode(fpst); \ + TYPE *d = vd, *n = vn; \ + set_float_rounding_mode(rmode, fpst); \ + for (i = 0; i < oprsz / sizeof(TYPE); i++) { \ + d[i] = FUNC(n[i], 0, fpst); \ + } \ + set_float_rounding_mode(prev_rmode, fpst); \ + clear_tail(d, oprsz, simd_maxsz(desc)); \ + } + +DO_VCVT_RMODE(gvec_vcvt_rm_ss, helper_vfp_tosls, uint32_t) +DO_VCVT_RMODE(gvec_vcvt_rm_us, helper_vfp_touls, uint32_t) +DO_VCVT_RMODE(gvec_vcvt_rm_sh, helper_vfp_toshh, uint16_t) +DO_VCVT_RMODE(gvec_vcvt_rm_uh, helper_vfp_touhh, uint16_t) + +#undef DO_VCVT_RMODE + +#define DO_VRINT_RMODE(NAME, FUNC, TYPE) \ + void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \ + { \ + float_status *fpst = stat; \ + intptr_t i, oprsz = simd_oprsz(desc); \ + uint32_t rmode = simd_data(desc); \ + uint32_t prev_rmode = get_float_rounding_mode(fpst); \ + TYPE *d = vd, *n = vn; \ + set_float_rounding_mode(rmode, fpst); \ + for (i = 0; i < oprsz / sizeof(TYPE); i++) { \ + d[i] = FUNC(n[i], fpst); \ + } \ + set_float_rounding_mode(prev_rmode, fpst); \ + clear_tail(d, oprsz, simd_maxsz(desc)); \ + } + +DO_VRINT_RMODE(gvec_vrint_rm_h, helper_rinth, uint16_t) +DO_VRINT_RMODE(gvec_vrint_rm_s, helper_rints, uint32_t) + +#undef DO_VRINT_RMODE diff --git a/target/arm/vfp-uncond.decode b/target/arm/vfp-uncond.decode index 34ca164266..8891ab3d54 100644 --- a/target/arm/vfp-uncond.decode +++ b/target/arm/vfp-uncond.decode @@ -44,10 +44,15 @@ @vfp_dnm_s ................................ vm=%vm_sp vn=%vn_sp vd=%vd_sp @vfp_dnm_d ................................ vm=%vm_dp vn=%vn_dp vd=%vd_dp +VSEL 1111 1110 0. cc:2 .... .... 1001 .0.0 .... \ + vm=%vm_sp vn=%vn_sp vd=%vd_sp sz=1 VSEL 1111 1110 0. cc:2 .... .... 1010 .0.0 .... \ - vm=%vm_sp vn=%vn_sp vd=%vd_sp dp=0 + vm=%vm_sp vn=%vn_sp vd=%vd_sp sz=2 VSEL 1111 1110 0. cc:2 .... .... 1011 .0.0 .... \ - vm=%vm_dp vn=%vn_dp vd=%vd_dp dp=1 + vm=%vm_dp vn=%vn_dp vd=%vd_dp sz=3 + +VMAXNM_hp 1111 1110 1.00 .... .... 1001 .0.0 .... @vfp_dnm_s +VMINNM_hp 1111 1110 1.00 .... .... 1001 .1.0 .... @vfp_dnm_s VMAXNM_sp 1111 1110 1.00 .... .... 1010 .0.0 .... @vfp_dnm_s VMINNM_sp 1111 1110 1.00 .... .... 1010 .1.0 .... @vfp_dnm_s @@ -55,13 +60,23 @@ VMINNM_sp 1111 1110 1.00 .... .... 1010 .1.0 .... @vfp_dnm_s VMAXNM_dp 1111 1110 1.00 .... .... 1011 .0.0 .... @vfp_dnm_d VMINNM_dp 1111 1110 1.00 .... .... 1011 .1.0 .... @vfp_dnm_d +VRINT 1111 1110 1.11 10 rm:2 .... 1001 01.0 .... \ + vm=%vm_sp vd=%vd_sp sz=1 VRINT 1111 1110 1.11 10 rm:2 .... 1010 01.0 .... \ - vm=%vm_sp vd=%vd_sp dp=0 + vm=%vm_sp vd=%vd_sp sz=2 VRINT 1111 1110 1.11 10 rm:2 .... 1011 01.0 .... \ - vm=%vm_dp vd=%vd_dp dp=1 + vm=%vm_dp vd=%vd_dp sz=3 # VCVT float to int with specified rounding mode; Vd is always single-precision +VCVT 1111 1110 1.11 11 rm:2 .... 1001 op:1 1.0 .... \ + vm=%vm_sp vd=%vd_sp sz=1 VCVT 1111 1110 1.11 11 rm:2 .... 1010 op:1 1.0 .... \ - vm=%vm_sp vd=%vd_sp dp=0 + vm=%vm_sp vd=%vd_sp sz=2 VCVT 1111 1110 1.11 11 rm:2 .... 1011 op:1 1.0 .... \ - vm=%vm_dp vd=%vd_sp dp=1 + vm=%vm_dp vd=%vd_sp sz=3 + +VMOVX 1111 1110 1.11 0000 .... 1010 01 . 0 .... \ + vd=%vd_sp vm=%vm_sp + +VINS 1111 1110 1.11 0000 .... 1010 11 . 0 .... \ + vd=%vd_sp vm=%vm_sp diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode index 2c793e3e87..51f143b4a5 100644 --- a/target/arm/vfp.decode +++ b/target/arm/vfp.decode @@ -74,13 +74,13 @@ VDUP ---- 1110 1 b:1 q:1 0 .... rt:4 1011 . 0 e:1 1 0000 \ vn=%vn_dp VMSR_VMRS ---- 1110 111 l:1 reg:4 rt:4 1010 0001 0000 +VMOV_half ---- 1110 000 l:1 .... rt:4 1001 . 001 0000 vn=%vn_sp VMOV_single ---- 1110 000 l:1 .... rt:4 1010 . 001 0000 vn=%vn_sp VMOV_64_sp ---- 1100 010 op:1 rt2:4 rt:4 1010 00.1 .... vm=%vm_sp VMOV_64_dp ---- 1100 010 op:1 rt2:4 rt:4 1011 00.1 .... vm=%vm_dp -# Note that the half-precision variants of VLDR and VSTR are -# not part of this decodetree at all because they have bits [9:8] == 0b01 +VLDR_VSTR_hp ---- 1101 u:1 .0 l:1 rn:4 .... 1001 imm:8 vd=%vd_sp VLDR_VSTR_sp ---- 1101 u:1 .0 l:1 rn:4 .... 1010 imm:8 vd=%vd_sp VLDR_VSTR_dp ---- 1101 u:1 .0 l:1 rn:4 .... 1011 imm:8 vd=%vd_dp @@ -103,33 +103,47 @@ VLDM_VSTM_dp ---- 1101 0.1 l:1 rn:4 .... 1011 imm:8 \ vd=%vd_dp p=1 u=0 w=1 # 3-register VFP data-processing; bits [23,21:20,6] identify the operation. +VMLA_hp ---- 1110 0.00 .... .... 1001 .0.0 .... @vfp_dnm_s VMLA_sp ---- 1110 0.00 .... .... 1010 .0.0 .... @vfp_dnm_s VMLA_dp ---- 1110 0.00 .... .... 1011 .0.0 .... @vfp_dnm_d +VMLS_hp ---- 1110 0.00 .... .... 1001 .1.0 .... @vfp_dnm_s VMLS_sp ---- 1110 0.00 .... .... 1010 .1.0 .... @vfp_dnm_s VMLS_dp ---- 1110 0.00 .... .... 1011 .1.0 .... @vfp_dnm_d +VNMLS_hp ---- 1110 0.01 .... .... 1001 .0.0 .... @vfp_dnm_s VNMLS_sp ---- 1110 0.01 .... .... 1010 .0.0 .... @vfp_dnm_s VNMLS_dp ---- 1110 0.01 .... .... 1011 .0.0 .... @vfp_dnm_d +VNMLA_hp ---- 1110 0.01 .... .... 1001 .1.0 .... @vfp_dnm_s VNMLA_sp ---- 1110 0.01 .... .... 1010 .1.0 .... @vfp_dnm_s VNMLA_dp ---- 1110 0.01 .... .... 1011 .1.0 .... @vfp_dnm_d +VMUL_hp ---- 1110 0.10 .... .... 1001 .0.0 .... @vfp_dnm_s VMUL_sp ---- 1110 0.10 .... .... 1010 .0.0 .... @vfp_dnm_s VMUL_dp ---- 1110 0.10 .... .... 1011 .0.0 .... @vfp_dnm_d +VNMUL_hp ---- 1110 0.10 .... .... 1001 .1.0 .... @vfp_dnm_s VNMUL_sp ---- 1110 0.10 .... .... 1010 .1.0 .... @vfp_dnm_s VNMUL_dp ---- 1110 0.10 .... .... 1011 .1.0 .... @vfp_dnm_d +VADD_hp ---- 1110 0.11 .... .... 1001 .0.0 .... @vfp_dnm_s VADD_sp ---- 1110 0.11 .... .... 1010 .0.0 .... @vfp_dnm_s VADD_dp ---- 1110 0.11 .... .... 1011 .0.0 .... @vfp_dnm_d +VSUB_hp ---- 1110 0.11 .... .... 1001 .1.0 .... @vfp_dnm_s VSUB_sp ---- 1110 0.11 .... .... 1010 .1.0 .... @vfp_dnm_s VSUB_dp ---- 1110 0.11 .... .... 1011 .1.0 .... @vfp_dnm_d +VDIV_hp ---- 1110 1.00 .... .... 1001 .0.0 .... @vfp_dnm_s VDIV_sp ---- 1110 1.00 .... .... 1010 .0.0 .... @vfp_dnm_s VDIV_dp ---- 1110 1.00 .... .... 1011 .0.0 .... @vfp_dnm_d +VFMA_hp ---- 1110 1.10 .... .... 1001 .0. 0 .... @vfp_dnm_s +VFMS_hp ---- 1110 1.10 .... .... 1001 .1. 0 .... @vfp_dnm_s +VFNMA_hp ---- 1110 1.01 .... .... 1001 .0. 0 .... @vfp_dnm_s +VFNMS_hp ---- 1110 1.01 .... .... 1001 .1. 0 .... @vfp_dnm_s + VFMA_sp ---- 1110 1.10 .... .... 1010 .0. 0 .... @vfp_dnm_s VFMS_sp ---- 1110 1.10 .... .... 1010 .1. 0 .... @vfp_dnm_s VFNMA_sp ---- 1110 1.01 .... .... 1010 .0. 0 .... @vfp_dnm_s @@ -140,6 +154,8 @@ VFMS_dp ---- 1110 1.10 .... .... 1011 .1.0 .... @vfp_dnm_d VFNMA_dp ---- 1110 1.01 .... .... 1011 .0.0 .... @vfp_dnm_d VFNMS_dp ---- 1110 1.01 .... .... 1011 .1.0 .... @vfp_dnm_d +VMOV_imm_hp ---- 1110 1.11 .... .... 1001 0000 .... \ + vd=%vd_sp imm=%vmov_imm VMOV_imm_sp ---- 1110 1.11 .... .... 1010 0000 .... \ vd=%vd_sp imm=%vmov_imm VMOV_imm_dp ---- 1110 1.11 .... .... 1011 0000 .... \ @@ -148,15 +164,20 @@ VMOV_imm_dp ---- 1110 1.11 .... .... 1011 0000 .... \ VMOV_reg_sp ---- 1110 1.11 0000 .... 1010 01.0 .... @vfp_dm_ss VMOV_reg_dp ---- 1110 1.11 0000 .... 1011 01.0 .... @vfp_dm_dd +VABS_hp ---- 1110 1.11 0000 .... 1001 11.0 .... @vfp_dm_ss VABS_sp ---- 1110 1.11 0000 .... 1010 11.0 .... @vfp_dm_ss VABS_dp ---- 1110 1.11 0000 .... 1011 11.0 .... @vfp_dm_dd +VNEG_hp ---- 1110 1.11 0001 .... 1001 01.0 .... @vfp_dm_ss VNEG_sp ---- 1110 1.11 0001 .... 1010 01.0 .... @vfp_dm_ss VNEG_dp ---- 1110 1.11 0001 .... 1011 01.0 .... @vfp_dm_dd +VSQRT_hp ---- 1110 1.11 0001 .... 1001 11.0 .... @vfp_dm_ss VSQRT_sp ---- 1110 1.11 0001 .... 1010 11.0 .... @vfp_dm_ss VSQRT_dp ---- 1110 1.11 0001 .... 1011 11.0 .... @vfp_dm_dd +VCMP_hp ---- 1110 1.11 010 z:1 .... 1001 e:1 1.0 .... \ + vd=%vd_sp vm=%vm_sp VCMP_sp ---- 1110 1.11 010 z:1 .... 1010 e:1 1.0 .... \ vd=%vd_sp vm=%vm_sp VCMP_dp ---- 1110 1.11 010 z:1 .... 1011 e:1 1.0 .... \ @@ -175,12 +196,15 @@ VCVT_f16_f32 ---- 1110 1.11 0011 .... 1010 t:1 1.0 .... \ VCVT_f16_f64 ---- 1110 1.11 0011 .... 1011 t:1 1.0 .... \ vd=%vd_sp vm=%vm_dp +VRINTR_hp ---- 1110 1.11 0110 .... 1001 01.0 .... @vfp_dm_ss VRINTR_sp ---- 1110 1.11 0110 .... 1010 01.0 .... @vfp_dm_ss VRINTR_dp ---- 1110 1.11 0110 .... 1011 01.0 .... @vfp_dm_dd +VRINTZ_hp ---- 1110 1.11 0110 .... 1001 11.0 .... @vfp_dm_ss VRINTZ_sp ---- 1110 1.11 0110 .... 1010 11.0 .... @vfp_dm_ss VRINTZ_dp ---- 1110 1.11 0110 .... 1011 11.0 .... @vfp_dm_dd +VRINTX_hp ---- 1110 1.11 0111 .... 1001 01.0 .... @vfp_dm_ss VRINTX_sp ---- 1110 1.11 0111 .... 1010 01.0 .... @vfp_dm_ss VRINTX_dp ---- 1110 1.11 0111 .... 1011 01.0 .... @vfp_dm_dd @@ -190,6 +214,8 @@ VCVT_sp ---- 1110 1.11 0111 .... 1010 11.0 .... @vfp_dm_ds VCVT_dp ---- 1110 1.11 0111 .... 1011 11.0 .... @vfp_dm_sd # VCVT from integer to floating point: Vm always single; Vd depends on size +VCVT_int_hp ---- 1110 1.11 1000 .... 1001 s:1 1.0 .... \ + vd=%vd_sp vm=%vm_sp VCVT_int_sp ---- 1110 1.11 1000 .... 1010 s:1 1.0 .... \ vd=%vd_sp vm=%vm_sp VCVT_int_dp ---- 1110 1.11 1000 .... 1011 s:1 1.0 .... \ @@ -203,12 +229,16 @@ VJCVT ---- 1110 1.11 1001 .... 1011 11.0 .... @vfp_dm_sd # We assemble bits 18 (op), 16 (u) and 7 (sx) into a single opc field # for the convenience of the trans_VCVT_fix functions. %vcvt_fix_op 18:1 16:1 7:1 +VCVT_fix_hp ---- 1110 1.11 1.1. .... 1001 .1.0 .... \ + vd=%vd_sp imm=%vm_sp opc=%vcvt_fix_op VCVT_fix_sp ---- 1110 1.11 1.1. .... 1010 .1.0 .... \ vd=%vd_sp imm=%vm_sp opc=%vcvt_fix_op VCVT_fix_dp ---- 1110 1.11 1.1. .... 1011 .1.0 .... \ vd=%vd_dp imm=%vm_sp opc=%vcvt_fix_op # VCVT float to integer (VCVT and VCVTR): Vd always single; Vd depends on size +VCVT_hp_int ---- 1110 1.11 110 s:1 .... 1001 rz:1 1.0 .... \ + vd=%vd_sp vm=%vm_sp VCVT_sp_int ---- 1110 1.11 110 s:1 .... 1010 rz:1 1.0 .... \ vd=%vd_sp vm=%vm_sp VCVT_dp_int ---- 1110 1.11 110 s:1 .... 1011 rz:1 1.0 .... \ diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c index 64266ece62..5666393ef7 100644 --- a/target/arm/vfp_helper.c +++ b/target/arm/vfp_helper.c @@ -236,6 +236,11 @@ void vfp_set_fpscr(CPUARMState *env, uint32_t val) #define VFP_HELPER(name, p) HELPER(glue(glue(vfp_,name),p)) #define VFP_BINOP(name) \ +dh_ctype_f16 VFP_HELPER(name, h)(dh_ctype_f16 a, dh_ctype_f16 b, void *fpstp) \ +{ \ + float_status *fpst = fpstp; \ + return float16_ ## name(a, b, fpst); \ +} \ float32 VFP_HELPER(name, s)(float32 a, float32 b, void *fpstp) \ { \ float_status *fpst = fpstp; \ @@ -256,6 +261,11 @@ VFP_BINOP(minnum) VFP_BINOP(maxnum) #undef VFP_BINOP +dh_ctype_f16 VFP_HELPER(neg, h)(dh_ctype_f16 a) +{ + return float16_chs(a); +} + float32 VFP_HELPER(neg, s)(float32 a) { return float32_chs(a); @@ -266,6 +276,11 @@ float64 VFP_HELPER(neg, d)(float64 a) return float64_chs(a); } +dh_ctype_f16 VFP_HELPER(abs, h)(dh_ctype_f16 a) +{ + return float16_abs(a); +} + float32 VFP_HELPER(abs, s)(float32 a) { return float32_abs(a); @@ -276,6 +291,11 @@ float64 VFP_HELPER(abs, d)(float64 a) return float64_abs(a); } +dh_ctype_f16 VFP_HELPER(sqrt, h)(dh_ctype_f16 a, CPUARMState *env) +{ + return float16_sqrt(a, &env->vfp.fp_status_f16); +} + float32 VFP_HELPER(sqrt, s)(float32 a, CPUARMState *env) { return float32_sqrt(a, &env->vfp.fp_status); @@ -310,19 +330,20 @@ static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp) } /* XXX: check quiet/signaling case */ -#define DO_VFP_cmp(p, type) \ -void VFP_HELPER(cmp, p)(type a, type b, CPUARMState *env) \ +#define DO_VFP_cmp(P, FLOATTYPE, ARGTYPE, FPST) \ +void VFP_HELPER(cmp, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ { \ softfloat_to_vfp_compare(env, \ - type ## _compare_quiet(a, b, &env->vfp.fp_status)); \ + FLOATTYPE ## _compare_quiet(a, b, &env->vfp.FPST)); \ } \ -void VFP_HELPER(cmpe, p)(type a, type b, CPUARMState *env) \ +void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ { \ softfloat_to_vfp_compare(env, \ - type ## _compare(a, b, &env->vfp.fp_status)); \ + FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \ } -DO_VFP_cmp(s, float32) -DO_VFP_cmp(d, float64) +DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status_f16) +DO_VFP_cmp(s, float32, float32, fp_status) +DO_VFP_cmp(d, float64, float64, fp_status) #undef DO_VFP_cmp /* Integer to float and float to integer conversions */ @@ -373,13 +394,13 @@ float32 VFP_HELPER(fcvts, d)(float64 x, CPUARMState *env) } /* VFP3 fixed point conversion. */ -#define VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype) \ -float##fsz HELPER(vfp_##name##to##p)(uint##isz##_t x, uint32_t shift, \ +#define VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype) \ +ftype HELPER(vfp_##name##to##p)(uint##isz##_t x, uint32_t shift, \ void *fpstp) \ { return itype##_to_##float##fsz##_scalbn(x, -shift, fpstp); } -#define VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, ROUND, suff) \ -uint##isz##_t HELPER(vfp_to##name##p##suff)(float##fsz x, uint32_t shift, \ +#define VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, ROUND, suff) \ +uint##isz##_t HELPER(vfp_to##name##p##suff)(ftype x, uint32_t shift, \ void *fpst) \ { \ if (unlikely(float##fsz##_is_any_nan(x))) { \ @@ -389,116 +410,42 @@ uint##isz##_t HELPER(vfp_to##name##p##suff)(float##fsz x, uint32_t shift, \ return float##fsz##_to_##itype##_scalbn(x, ROUND, shift, fpst); \ } -#define VFP_CONV_FIX(name, p, fsz, isz, itype) \ -VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype) \ -VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, \ +#define VFP_CONV_FIX(name, p, fsz, ftype, isz, itype) \ +VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype) \ +VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, \ float_round_to_zero, _round_to_zero) \ -VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, \ +VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, \ get_float_rounding_mode(fpst), ) -#define VFP_CONV_FIX_A64(name, p, fsz, isz, itype) \ -VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype) \ -VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, \ +#define VFP_CONV_FIX_A64(name, p, fsz, ftype, isz, itype) \ +VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype) \ +VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, \ get_float_rounding_mode(fpst), ) -VFP_CONV_FIX(sh, d, 64, 64, int16) -VFP_CONV_FIX(sl, d, 64, 64, int32) -VFP_CONV_FIX_A64(sq, d, 64, 64, int64) -VFP_CONV_FIX(uh, d, 64, 64, uint16) -VFP_CONV_FIX(ul, d, 64, 64, uint32) -VFP_CONV_FIX_A64(uq, d, 64, 64, uint64) -VFP_CONV_FIX(sh, s, 32, 32, int16) -VFP_CONV_FIX(sl, s, 32, 32, int32) -VFP_CONV_FIX_A64(sq, s, 32, 64, int64) -VFP_CONV_FIX(uh, s, 32, 32, uint16) -VFP_CONV_FIX(ul, s, 32, 32, uint32) -VFP_CONV_FIX_A64(uq, s, 32, 64, uint64) +VFP_CONV_FIX(sh, d, 64, float64, 64, int16) +VFP_CONV_FIX(sl, d, 64, float64, 64, int32) +VFP_CONV_FIX_A64(sq, d, 64, float64, 64, int64) +VFP_CONV_FIX(uh, d, 64, float64, 64, uint16) +VFP_CONV_FIX(ul, d, 64, float64, 64, uint32) +VFP_CONV_FIX_A64(uq, d, 64, float64, 64, uint64) +VFP_CONV_FIX(sh, s, 32, float32, 32, int16) +VFP_CONV_FIX(sl, s, 32, float32, 32, int32) +VFP_CONV_FIX_A64(sq, s, 32, float32, 64, int64) +VFP_CONV_FIX(uh, s, 32, float32, 32, uint16) +VFP_CONV_FIX(ul, s, 32, float32, 32, uint32) +VFP_CONV_FIX_A64(uq, s, 32, float32, 64, uint64) +VFP_CONV_FIX(sh, h, 16, dh_ctype_f16, 32, int16) +VFP_CONV_FIX(sl, h, 16, dh_ctype_f16, 32, int32) +VFP_CONV_FIX_A64(sq, h, 16, dh_ctype_f16, 64, int64) +VFP_CONV_FIX(uh, h, 16, dh_ctype_f16, 32, uint16) +VFP_CONV_FIX(ul, h, 16, dh_ctype_f16, 32, uint32) +VFP_CONV_FIX_A64(uq, h, 16, dh_ctype_f16, 64, uint64) #undef VFP_CONV_FIX #undef VFP_CONV_FIX_FLOAT #undef VFP_CONV_FLOAT_FIX_ROUND #undef VFP_CONV_FIX_A64 -uint32_t HELPER(vfp_sltoh)(uint32_t x, uint32_t shift, void *fpst) -{ - return int32_to_float16_scalbn(x, -shift, fpst); -} - -uint32_t HELPER(vfp_ultoh)(uint32_t x, uint32_t shift, void *fpst) -{ - return uint32_to_float16_scalbn(x, -shift, fpst); -} - -uint32_t HELPER(vfp_sqtoh)(uint64_t x, uint32_t shift, void *fpst) -{ - return int64_to_float16_scalbn(x, -shift, fpst); -} - -uint32_t HELPER(vfp_uqtoh)(uint64_t x, uint32_t shift, void *fpst) -{ - return uint64_to_float16_scalbn(x, -shift, fpst); -} - -uint32_t HELPER(vfp_toshh)(uint32_t x, uint32_t shift, void *fpst) -{ - if (unlikely(float16_is_any_nan(x))) { - float_raise(float_flag_invalid, fpst); - return 0; - } - return float16_to_int16_scalbn(x, get_float_rounding_mode(fpst), - shift, fpst); -} - -uint32_t HELPER(vfp_touhh)(uint32_t x, uint32_t shift, void *fpst) -{ - if (unlikely(float16_is_any_nan(x))) { - float_raise(float_flag_invalid, fpst); - return 0; - } - return float16_to_uint16_scalbn(x, get_float_rounding_mode(fpst), - shift, fpst); -} - -uint32_t HELPER(vfp_toslh)(uint32_t x, uint32_t shift, void *fpst) -{ - if (unlikely(float16_is_any_nan(x))) { - float_raise(float_flag_invalid, fpst); - return 0; - } - return float16_to_int32_scalbn(x, get_float_rounding_mode(fpst), - shift, fpst); -} - -uint32_t HELPER(vfp_toulh)(uint32_t x, uint32_t shift, void *fpst) -{ - if (unlikely(float16_is_any_nan(x))) { - float_raise(float_flag_invalid, fpst); - return 0; - } - return float16_to_uint32_scalbn(x, get_float_rounding_mode(fpst), - shift, fpst); -} - -uint64_t HELPER(vfp_tosqh)(uint32_t x, uint32_t shift, void *fpst) -{ - if (unlikely(float16_is_any_nan(x))) { - float_raise(float_flag_invalid, fpst); - return 0; - } - return float16_to_int64_scalbn(x, get_float_rounding_mode(fpst), - shift, fpst); -} - -uint64_t HELPER(vfp_touqh)(uint32_t x, uint32_t shift, void *fpst) -{ - if (unlikely(float16_is_any_nan(x))) { - float_raise(float_flag_invalid, fpst); - return 0; - } - return float16_to_uint64_scalbn(x, get_float_rounding_mode(fpst), - shift, fpst); -} - /* Set the current fp rounding mode and return the old one. * The argument is a softfloat float_round_ value. */ @@ -512,23 +459,6 @@ uint32_t HELPER(set_rmode)(uint32_t rmode, void *fpstp) return prev_rmode; } -/* Set the current fp rounding mode in the standard fp status and return - * the old one. This is for NEON instructions that need to change the - * rounding mode but wish to use the standard FPSCR values for everything - * else. Always set the rounding mode back to the correct value after - * modifying it. - * The argument is a softfloat float_round_ value. - */ -uint32_t HELPER(set_neon_rmode)(uint32_t rmode, CPUARMState *env) -{ - float_status *fp_status = &env->vfp.standard_fp_status; - - uint32_t prev_rmode = get_float_rounding_mode(fp_status); - set_float_rounding_mode(rmode, fp_status); - - return prev_rmode; -} - /* Half precision conversions. */ float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, void *fpstp, uint32_t ahp_mode) { @@ -582,38 +512,6 @@ uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t ahp_mode) return r; } -#define float32_two make_float32(0x40000000) -#define float32_three make_float32(0x40400000) -#define float32_one_point_five make_float32(0x3fc00000) - -float32 HELPER(recps_f32)(CPUARMState *env, float32 a, float32 b) -{ - float_status *s = &env->vfp.standard_fp_status; - if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) || - (float32_is_infinity(b) && float32_is_zero_or_denormal(a))) { - if (!(float32_is_zero(a) || float32_is_zero(b))) { - float_raise(float_flag_input_denormal, s); - } - return float32_two; - } - return float32_sub(float32_two, float32_mul(a, b, s), s); -} - -float32 HELPER(rsqrts_f32)(CPUARMState *env, float32 a, float32 b) -{ - float_status *s = &env->vfp.standard_fp_status; - float32 product; - if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) || - (float32_is_infinity(b) && float32_is_zero_or_denormal(a))) { - if (!(float32_is_zero(a) || float32_is_zero(b))) { - float_raise(float_flag_input_denormal, s); - } - return float32_one_point_five; - } - product = float32_mul(a, b, s); - return float32_div(float32_sub(float32_three, product, s), float32_two, s); -} - /* NEON helpers. */ /* Constants 256 and 512 are used in some helpers; we avoid relying on @@ -1056,6 +954,13 @@ uint32_t HELPER(rsqrte_u32)(uint32_t a) } /* VFPv4 fused multiply-accumulate */ +dh_ctype_f16 VFP_HELPER(muladd, h)(dh_ctype_f16 a, dh_ctype_f16 b, + dh_ctype_f16 c, void *fpstp) +{ + float_status *fpst = fpstp; + return float16_muladd(a, b, c, 0, fpst); +} + float32 VFP_HELPER(muladd, s)(float32 a, float32 b, float32 c, void *fpstp) { float_status *fpst = fpstp; @@ -1069,6 +974,11 @@ float64 VFP_HELPER(muladd, d)(float64 a, float64 b, float64 c, void *fpstp) } /* ARMv8 round to integral */ +dh_ctype_f16 HELPER(rinth_exact)(dh_ctype_f16 x, void *fp_status) +{ + return float16_round_to_int(x, fp_status); +} + float32 HELPER(rints_exact)(float32 x, void *fp_status) { return float32_round_to_int(x, fp_status); @@ -1079,6 +989,22 @@ float64 HELPER(rintd_exact)(float64 x, void *fp_status) return float64_round_to_int(x, fp_status); } +dh_ctype_f16 HELPER(rinth)(dh_ctype_f16 x, void *fp_status) +{ + int old_flags = get_float_exception_flags(fp_status), new_flags; + float16 ret; + + ret = float16_round_to_int(x, fp_status); + + /* Suppress any inexact exceptions the conversion produced */ + if (!(old_flags & float_flag_inexact)) { + new_flags = get_float_exception_flags(fp_status); + set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status); + } + + return ret; +} + float32 HELPER(rints)(float32 x, void *fp_status) { int old_flags = get_float_exception_flags(fp_status), new_flags; diff --git a/target/cris/translate.c b/target/cris/translate.c index ee5e359c77..c312e6f8a6 100644 --- a/target/cris/translate.c +++ b/target/cris/translate.c @@ -1178,12 +1178,11 @@ static inline void t_gen_zext(TCGv d, TCGv s, int size) static char memsize_char(int size) { switch (size) { - case 1: return 'b'; break; - case 2: return 'w'; break; - case 4: return 'd'; break; + case 1: return 'b'; + case 2: return 'w'; + case 4: return 'd'; default: return 'x'; - break; } } #endif diff --git a/target/cris/translate_v10.c.inc b/target/cris/translate_v10.c.inc index ae34a0d1a3..7f38fd215e 100644 --- a/target/cris/translate_v10.c.inc +++ b/target/cris/translate_v10.c.inc @@ -1026,10 +1026,8 @@ static unsigned int dec10_ind(CPUCRISState *env, DisasContext *dc) switch (dc->opcode) { case CRISV10_IND_MOVE_M_R: return dec10_ind_move_m_r(env, dc, size); - break; case CRISV10_IND_MOVE_R_M: return dec10_ind_move_r_m(dc, size); - break; case CRISV10_IND_CMP: LOG_DIS("cmp size=%d op=%d %d\n", size, dc->src, dc->dst); cris_cc_mask(dc, CC_MASK_NZVC); diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 588f32e136..49d8958528 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -338,15 +338,68 @@ static void encode_cache_cpuid80000006(CPUCacheInfo *l2, } } +/* + * Definitions used for building CPUID Leaf 0x8000001D and 0x8000001E + * Please refer to the AMD64 Architecture Programmer’s Manual Volume 3. + * Define the constants to build the cpu topology. Right now, TOPOEXT + * feature is enabled only on EPYC. So, these constants are based on + * EPYC supported configurations. We may need to handle the cases if + * these values change in future. + */ +/* Maximum core complexes in a node */ +#define MAX_CCX 2 +/* Maximum cores in a core complex */ +#define MAX_CORES_IN_CCX 4 +/* Maximum cores in a node */ +#define MAX_CORES_IN_NODE 8 +/* Maximum nodes in a socket */ +#define MAX_NODES_PER_SOCKET 4 + +/* + * Figure out the number of nodes required to build this config. + * Max cores in a node is 8 + */ +static int nodes_in_socket(int nr_cores) +{ + int nodes; + + nodes = DIV_ROUND_UP(nr_cores, MAX_CORES_IN_NODE); + + /* Hardware does not support config with 3 nodes, return 4 in that case */ + return (nodes == 3) ? 4 : nodes; +} + +/* + * Decide the number of cores in a core complex with the given nr_cores using + * following set constants MAX_CCX, MAX_CORES_IN_CCX, MAX_CORES_IN_NODE and + * MAX_NODES_PER_SOCKET. Maintain symmetry as much as possible + * L3 cache is shared across all cores in a core complex. So, this will also + * tell us how many cores are sharing the L3 cache. + */ +static int cores_in_core_complex(int nr_cores) +{ + int nodes; + + /* Check if we can fit all the cores in one core complex */ + if (nr_cores <= MAX_CORES_IN_CCX) { + return nr_cores; + } + /* Get the number of nodes required to build this config */ + nodes = nodes_in_socket(nr_cores); + + /* + * Divide the cores accros all the core complexes + * Return rounded up value + */ + return DIV_ROUND_UP(nr_cores, nodes * MAX_CCX); +} + /* Encode cache info for CPUID[8000001D] */ -static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, - X86CPUTopoInfo *topo_info, - uint32_t *eax, uint32_t *ebx, - uint32_t *ecx, uint32_t *edx) +static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, CPUState *cs, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) { uint32_t l3_cores; - unsigned nodes = MAX(topo_info->nodes_per_pkg, 1); - assert(cache->size == cache->line_size * cache->associativity * cache->partitions * cache->sets); @@ -355,13 +408,10 @@ static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, /* L3 is shared among multiple cores */ if (cache->level == 3) { - l3_cores = DIV_ROUND_UP((topo_info->dies_per_pkg * - topo_info->cores_per_die * - topo_info->threads_per_core), - nodes); - *eax |= (l3_cores - 1) << 14; + l3_cores = cores_in_core_complex(cs->nr_cores); + *eax |= ((l3_cores * cs->nr_threads) - 1) << 14; } else { - *eax |= ((topo_info->threads_per_core - 1) << 14); + *eax |= ((cs->nr_threads - 1) << 14); } assert(cache->line_size > 0); @@ -381,17 +431,55 @@ static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, (cache->complex_indexing ? CACHE_COMPLEX_IDX : 0); } +/* Data structure to hold the configuration info for a given core index */ +struct core_topology { + /* core complex id of the current core index */ + int ccx_id; + /* + * Adjusted core index for this core in the topology + * This can be 0,1,2,3 with max 4 cores in a core complex + */ + int core_id; + /* Node id for this core index */ + int node_id; + /* Number of nodes in this config */ + int num_nodes; +}; + +/* + * Build the configuration closely match the EPYC hardware. Using the EPYC + * hardware configuration values (MAX_CCX, MAX_CORES_IN_CCX, MAX_CORES_IN_NODE) + * right now. This could change in future. + * nr_cores : Total number of cores in the config + * core_id : Core index of the current CPU + * topo : Data structure to hold all the config info for this core index + */ +static void build_core_topology(int nr_cores, int core_id, + struct core_topology *topo) +{ + int nodes, cores_in_ccx; + + /* First get the number of nodes required */ + nodes = nodes_in_socket(nr_cores); + + cores_in_ccx = cores_in_core_complex(nr_cores); + + topo->node_id = core_id / (cores_in_ccx * MAX_CCX); + topo->ccx_id = (core_id % (cores_in_ccx * MAX_CCX)) / cores_in_ccx; + topo->core_id = core_id % cores_in_ccx; + topo->num_nodes = nodes; +} + /* Encode cache info for CPUID[8000001E] */ -static void encode_topo_cpuid8000001e(X86CPUTopoInfo *topo_info, X86CPU *cpu, +static void encode_topo_cpuid8000001e(CPUState *cs, X86CPU *cpu, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) { - X86CPUTopoIDs topo_ids = {0}; - unsigned long nodes = MAX(topo_info->nodes_per_pkg, 1); + struct core_topology topo = {0}; + unsigned long nodes; int shift; - x86_topo_ids_from_apicid_epyc(cpu->apic_id, topo_info, &topo_ids); - + build_core_topology(cs->nr_cores, cpu->core_id, &topo); *eax = cpu->apic_id; /* * CPUID_Fn8000001E_EBX @@ -408,8 +496,12 @@ static void encode_topo_cpuid8000001e(X86CPUTopoInfo *topo_info, X86CPU *cpu, * 3 Core complex id * 1:0 Core id */ - *ebx = ((topo_info->threads_per_core - 1) << 8) | (topo_ids.node_id << 3) | - (topo_ids.core_id); + if (cs->nr_threads - 1) { + *ebx = ((cs->nr_threads - 1) << 8) | (topo.node_id << 3) | + (topo.ccx_id << 2) | topo.core_id; + } else { + *ebx = (topo.node_id << 4) | (topo.ccx_id << 3) | topo.core_id; + } /* * CPUID_Fn8000001E_ECX * 31:11 Reserved @@ -418,8 +510,9 @@ static void encode_topo_cpuid8000001e(X86CPUTopoInfo *topo_info, X86CPU *cpu, * 2 Socket id * 1:0 Node id */ - if (nodes <= 4) { - *ecx = ((nodes - 1) << 8) | (topo_ids.pkg_id << 2) | topo_ids.node_id; + if (topo.num_nodes <= 4) { + *ecx = ((topo.num_nodes - 1) << 8) | (cpu->socket_id << 2) | + topo.node_id; } else { /* * Node id fix up. Actual hardware supports up to 4 nodes. But with @@ -434,10 +527,10 @@ static void encode_topo_cpuid8000001e(X86CPUTopoInfo *topo_info, X86CPU *cpu, * number of nodes. find_last_bit returns last set bit(0 based). Left * shift(+1) the socket id to represent all the nodes. */ - nodes -= 1; + nodes = topo.num_nodes - 1; shift = find_last_bit(&nodes, 8); - *ecx = (nodes << 8) | (topo_ids.pkg_id << (shift + 1)) | - topo_ids.node_id; + *ecx = ((topo.num_nodes - 1) << 8) | (cpu->socket_id << (shift + 1)) | + topo.node_id; } *edx = 0; } @@ -1638,10 +1731,6 @@ typedef struct X86CPUDefinition { FeatureWordArray features; const char *model_id; CPUCaches *cache_info; - - /* Use AMD EPYC encoding for apic id */ - bool use_epyc_apic_id_encoding; - /* * Definitions for alternative versions of CPU model. * List is terminated by item with version == 0. @@ -1683,18 +1772,6 @@ static const X86CPUVersionDefinition *x86_cpu_def_get_versions(X86CPUDefinition return def->versions ?: default_version_list; } -bool cpu_x86_use_epyc_apic_id_encoding(const char *cpu_type) -{ - X86CPUClass *xcc = X86_CPU_CLASS(object_class_by_name(cpu_type)); - - assert(xcc); - if (xcc->model && xcc->model->cpudef) { - return xcc->model->cpudef->use_epyc_apic_id_encoding; - } else { - return false; - } -} - static CPUCaches epyc_cache_info = { .l1d_cache = &(CPUCacheInfo) { .type = DATA_CACHE, @@ -3995,7 +4072,6 @@ static X86CPUDefinition builtin_x86_defs[] = { .xlevel = 0x8000001E, .model_id = "AMD EPYC Processor", .cache_info = &epyc_cache_info, - .use_epyc_apic_id_encoding = 1, .versions = (X86CPUVersionDefinition[]) { { .version = 1 }, { @@ -4123,7 +4199,6 @@ static X86CPUDefinition builtin_x86_defs[] = { .xlevel = 0x8000001E, .model_id = "AMD EPYC-Rome Processor", .cache_info = &epyc_rome_cache_info, - .use_epyc_apic_id_encoding = 1, }, }; @@ -4872,6 +4947,7 @@ static void x86_cpu_class_check_missing_features(X86CPUClass *xcc, new->value = g_strdup("type"); *next = new; next = &new->next; + error_free(err); } x86_cpu_filter_features(xc, false); @@ -5489,7 +5565,6 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, uint32_t signature[3]; X86CPUTopoInfo topo_info; - topo_info.nodes_per_pkg = env->nr_nodes; topo_info.dies_per_pkg = env->nr_dies; topo_info.cores_per_die = cs->nr_cores; topo_info.threads_per_core = cs->nr_threads; @@ -5678,7 +5753,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *ecx |= CPUID_TOPOLOGY_LEVEL_SMT; break; case 1: - *eax = env->pkg_offset; + *eax = apicid_pkg_offset(&topo_info); *ebx = cs->nr_cores * cs->nr_threads; *ecx |= CPUID_TOPOLOGY_LEVEL_CORE; break; @@ -5712,7 +5787,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *ecx |= CPUID_TOPOLOGY_LEVEL_CORE; break; case 2: - *eax = env->pkg_offset; + *eax = apicid_pkg_offset(&topo_info); *ebx = env->nr_dies * cs->nr_cores * cs->nr_threads; *ecx |= CPUID_TOPOLOGY_LEVEL_DIE; break; @@ -5889,11 +5964,10 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, /* * Bits 15:12 is "The number of bits in the initial * Core::X86::Apic::ApicId[ApicId] value that indicate - * thread ID within a package". This is already stored at - * CPUX86State::pkg_offset. + * thread ID within a package". * Bits 7:0 is "The number of threads in the package is NC+1" */ - *ecx = (env->pkg_offset << 12) | + *ecx = (apicid_pkg_offset(&topo_info) << 12) | ((cs->nr_cores * cs->nr_threads) - 1); } else { *ecx = 0; @@ -5921,20 +5995,20 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } switch (count) { case 0: /* L1 dcache info */ - encode_cache_cpuid8000001d(env->cache_info_amd.l1d_cache, - &topo_info, eax, ebx, ecx, edx); + encode_cache_cpuid8000001d(env->cache_info_amd.l1d_cache, cs, + eax, ebx, ecx, edx); break; case 1: /* L1 icache info */ - encode_cache_cpuid8000001d(env->cache_info_amd.l1i_cache, - &topo_info, eax, ebx, ecx, edx); + encode_cache_cpuid8000001d(env->cache_info_amd.l1i_cache, cs, + eax, ebx, ecx, edx); break; case 2: /* L2 cache info */ - encode_cache_cpuid8000001d(env->cache_info_amd.l2_cache, - &topo_info, eax, ebx, ecx, edx); + encode_cache_cpuid8000001d(env->cache_info_amd.l2_cache, cs, + eax, ebx, ecx, edx); break; case 3: /* L3 cache info */ - encode_cache_cpuid8000001d(env->cache_info_amd.l3_cache, - &topo_info, eax, ebx, ecx, edx); + encode_cache_cpuid8000001d(env->cache_info_amd.l3_cache, cs, + eax, ebx, ecx, edx); break; default: /* end of info */ *eax = *ebx = *ecx = *edx = 0; @@ -5943,7 +6017,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, break; case 0x8000001E: assert(cpu->core_id <= 255); - encode_topo_cpuid8000001e(&topo_info, cpu, eax, ebx, ecx, edx); + encode_topo_cpuid8000001e(cs, cpu, + eax, ebx, ecx, edx); break; case 0xC0000000: *eax = env->cpuid_xlevel2; @@ -6949,7 +7024,6 @@ static void x86_cpu_initfn(Object *obj) FeatureWord w; env->nr_dies = 1; - env->nr_nodes = 1; cpu_set_cpustate_pointers(cpu); object_property_add(obj, "family", "int", diff --git a/target/i386/cpu.h b/target/i386/cpu.h index e1a5c174dc..d3097be6a5 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1629,8 +1629,6 @@ typedef struct CPUX86State { TPRAccess tpr_access_type; unsigned nr_dies; - unsigned nr_nodes; - unsigned pkg_offset; } CPUX86State; struct kvm_msrs; @@ -1919,7 +1917,6 @@ void cpu_clear_apic_feature(CPUX86State *env); void host_cpuid(uint32_t function, uint32_t count, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx); void host_vendor_fms(char *vendor, int *family, int *model, int *stepping); -bool cpu_x86_use_epyc_apic_id_encoding(const char *cpu_type); /* helper.c */ bool x86_cpu_tlb_fill(CPUState *cs, vaddr address, int size, diff --git a/target/i386/hvf/hvf-i386.h b/target/i386/hvf/hvf-i386.h index ef20c73eca..e0edffd077 100644 --- a/target/i386/hvf/hvf-i386.h +++ b/target/i386/hvf/hvf-i386.h @@ -57,13 +57,13 @@ typedef struct hvf_vcpu_caps { uint64_t vmx_cap_preemption_timer; } hvf_vcpu_caps; -typedef struct HVFState { +struct HVFState { AccelState parent; hvf_slot slots[32]; int num_slots; hvf_vcpu_caps *hvf_caps; -} HVFState; +}; extern HVFState *hvf_state; void hvf_set_phys_mem(MemoryRegionSection *, bool); diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 6f18d940a5..205b68bc0c 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -4607,7 +4607,7 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, if (iommu) { int ret; MSIMessage src, dst; - X86IOMMUClass *class = X86_IOMMU_GET_CLASS(iommu); + X86IOMMUClass *class = X86_IOMMU_DEVICE_GET_CLASS(iommu); if (!class->int_remap) { return 0; diff --git a/target/i386/sev.c b/target/i386/sev.c index c3ecf86704..de4818da6d 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -500,6 +500,7 @@ sev_read_file_base64(const char *filename, guchar **data, gsize *len) if (!g_file_get_contents(filename, &base64, &sz, &error)) { error_report("failed to read '%s' (%s)", filename, error->message); + g_error_free(error); return -1; } diff --git a/target/microblaze/cpu-param.h b/target/microblaze/cpu-param.h index 4abbc62d50..4d8297fa94 100644 --- a/target/microblaze/cpu-param.h +++ b/target/microblaze/cpu-param.h @@ -8,9 +8,24 @@ #ifndef MICROBLAZE_CPU_PARAM_H #define MICROBLAZE_CPU_PARAM_H 1 +/* + * While system mode can address up to 64 bits of address space, + * this is done via the lea/sea instructions, which are system-only + * (as they also bypass the mmu). + * + * We can improve the user-only experience by only exposing 32 bits + * of address space. + */ +#ifdef CONFIG_USER_ONLY +#define TARGET_LONG_BITS 32 +#define TARGET_PHYS_ADDR_SPACE_BITS 32 +#define TARGET_VIRT_ADDR_SPACE_BITS 32 +#else #define TARGET_LONG_BITS 64 #define TARGET_PHYS_ADDR_SPACE_BITS 64 #define TARGET_VIRT_ADDR_SPACE_BITS 64 +#endif + /* FIXME: MB uses variable pages down to 1K but linux only uses 4k. */ #define TARGET_PAGE_BITS 12 #define NB_MMU_MODES 3 diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c index 51e5c85b10..6392524135 100644 --- a/target/microblaze/cpu.c +++ b/target/microblaze/cpu.c @@ -79,7 +79,17 @@ static void mb_cpu_set_pc(CPUState *cs, vaddr value) { MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs); - cpu->env.sregs[SR_PC] = value; + cpu->env.pc = value; + /* Ensure D_FLAG and IMM_FLAG are clear for the new PC */ + cpu->env.iflags = 0; +} + +static void mb_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb) +{ + MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs); + + cpu->env.pc = tb->pc; + cpu->env.iflags = tb->flags & IFLAGS_TB_MASK; } static bool mb_cpu_has_work(CPUState *cs) @@ -117,13 +127,13 @@ static void mb_cpu_reset(DeviceState *dev) /* Disable stack protector. */ env->shr = ~0; - env->sregs[SR_PC] = cpu->cfg.base_vectors; + env->pc = cpu->cfg.base_vectors; #if defined(CONFIG_USER_ONLY) /* start in user mode with interrupts enabled. */ - env->sregs[SR_MSR] = MSR_EE | MSR_IE | MSR_VM | MSR_UM; + mb_cpu_write_msr(env, MSR_EE | MSR_IE | MSR_VM | MSR_UM); #else - env->sregs[SR_MSR] = 0; + mb_cpu_write_msr(env, 0); mmu_init(&env->mmu); env->mmu.c_mmu = 3; env->mmu.c_mmu_tlb_access = 3; @@ -317,9 +327,11 @@ static void mb_cpu_class_init(ObjectClass *oc, void *data) cc->class_by_name = mb_cpu_class_by_name; cc->has_work = mb_cpu_has_work; cc->do_interrupt = mb_cpu_do_interrupt; + cc->do_unaligned_access = mb_cpu_do_unaligned_access; cc->cpu_exec_interrupt = mb_cpu_exec_interrupt; cc->dump_state = mb_cpu_dump_state; cc->set_pc = mb_cpu_set_pc; + cc->synchronize_from_tb = mb_cpu_synchronize_from_tb; cc->gdb_read_register = mb_cpu_gdb_read_register; cc->gdb_write_register = mb_cpu_gdb_write_register; cc->tlb_fill = mb_cpu_tlb_fill; diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h index a31134b65c..a25a2b427f 100644 --- a/target/microblaze/cpu.h +++ b/target/microblaze/cpu.h @@ -31,7 +31,7 @@ typedef struct CPUMBState CPUMBState; #define EXCP_MMU 1 #define EXCP_IRQ 2 -#define EXCP_BREAK 3 +#define EXCP_SYSCALL 3 /* user-only */ #define EXCP_HW_BREAK 4 #define EXCP_HW_EXCP 5 @@ -79,10 +79,13 @@ typedef struct CPUMBState CPUMBState; /* Exception State Register (ESR) Fields */ #define ESR_DIZ (1<<11) /* Zone Protection */ +#define ESR_W (1<<11) /* Unaligned word access */ #define ESR_S (1<<10) /* Store instruction */ #define ESR_ESS_FSL_OFFSET 5 +#define ESR_ESS_MASK (0x7f << 5) + #define ESR_EC_FSL 0 #define ESR_EC_UNALIGNED_DATA 1 #define ESR_EC_ILLEGAL_OP 2 @@ -228,15 +231,22 @@ typedef struct CPUMBState CPUMBState; #define STREAM_CONTROL (1 << 3) #define STREAM_NONBLOCK (1 << 4) +#define TARGET_INSN_START_EXTRA_WORDS 1 + struct CPUMBState { - uint32_t debug; - uint32_t btaken; - uint64_t btarget; - uint32_t bimm; + uint32_t bvalue; /* TCG temporary, only valid during a TB */ + uint32_t btarget; /* Full resolved branch destination */ uint32_t imm; uint32_t regs[32]; - uint64_t sregs[14]; + uint32_t pc; + uint32_t msr; /* All bits of MSR except MSR[C] and MSR[CC] */ + uint32_t msr_c; /* MSR[C], in low bit; other bits must be 0 */ + target_ulong ear; + uint32_t esr; + uint32_t fsr; + uint32_t btr; + uint32_t edr; float_status fp_status; /* Stack protectors. Yes, it's a hw feature. */ uint32_t slr, shr; @@ -247,14 +257,23 @@ struct CPUMBState { uint32_t res_val; /* Internal flags. */ -#define IMM_FLAG 4 -#define MSR_EE_FLAG (1 << 8) +#define IMM_FLAG (1 << 0) +#define BIMM_FLAG (1 << 1) +#define ESR_ESS_FLAG (1 << 2) /* indicates ESR_ESS_MASK is present */ +/* MSR_EE (1 << 8) -- these 3 are not in iflags but tb_flags */ +/* MSR_UM (1 << 11) */ +/* MSR_VM (1 << 13) */ +/* ESR_ESS_MASK [11:5] -- unwind into iflags for unaligned excp */ #define DRTI_FLAG (1 << 16) #define DRTE_FLAG (1 << 17) #define DRTB_FLAG (1 << 18) #define D_FLAG (1 << 19) /* Bit in ESR. */ + /* TB dependent CPUMBState. */ -#define IFLAGS_TB_MASK (D_FLAG | IMM_FLAG | DRTI_FLAG | DRTE_FLAG | DRTB_FLAG) +#define IFLAGS_TB_MASK (D_FLAG | BIMM_FLAG | IMM_FLAG | \ + DRTI_FLAG | DRTE_FLAG | DRTB_FLAG) +#define MSR_TB_MASK (MSR_UM | MSR_VM | MSR_EE) + uint32_t iflags; #if !defined(CONFIG_USER_ONLY) @@ -317,11 +336,30 @@ struct MicroBlazeCPU { void mb_cpu_do_interrupt(CPUState *cs); bool mb_cpu_exec_interrupt(CPUState *cs, int int_req); +void mb_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr, + MMUAccessType access_type, + int mmu_idx, uintptr_t retaddr); void mb_cpu_dump_state(CPUState *cpu, FILE *f, int flags); hwaddr mb_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr); int mb_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); int mb_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); +static inline uint32_t mb_cpu_read_msr(const CPUMBState *env) +{ + /* Replicate MSR[C] to MSR[CC]. */ + return env->msr | (env->msr_c * (MSR_C | MSR_CC)); +} + +static inline void mb_cpu_write_msr(CPUMBState *env, uint32_t val) +{ + env->msr_c = (val >> 2) & 1; + /* + * Clear both MSR[C] and MSR[CC] from the saved copy. + * MSR_PVR is not writable and is always clear. + */ + env->msr = val & ~(MSR_C | MSR_CC | MSR_PVR); +} + void mb_tcg_init(void); /* you can call this signal handler from your SIGBUS and SIGSEGV signal handlers to inform the virtual CPU of exceptions. non zero @@ -348,13 +386,15 @@ typedef MicroBlazeCPU ArchCPU; #include "exec/cpu-all.h" +/* Ensure there is no overlap between the two masks. */ +QEMU_BUILD_BUG_ON(MSR_TB_MASK & IFLAGS_TB_MASK); + static inline void cpu_get_tb_cpu_state(CPUMBState *env, target_ulong *pc, target_ulong *cs_base, uint32_t *flags) { - *pc = env->sregs[SR_PC]; - *cs_base = 0; - *flags = (env->iflags & IFLAGS_TB_MASK) | - (env->sregs[SR_MSR] & (MSR_UM | MSR_VM | MSR_EE)); + *pc = env->pc; + *flags = (env->iflags & IFLAGS_TB_MASK) | (env->msr & MSR_TB_MASK); + *cs_base = (*flags & IMM_FLAG ? env->imm : 0); } #if !defined(CONFIG_USER_ONLY) @@ -369,11 +409,11 @@ static inline int cpu_mmu_index(CPUMBState *env, bool ifetch) MicroBlazeCPU *cpu = env_archcpu(env); /* Are we in nommu mode?. */ - if (!(env->sregs[SR_MSR] & MSR_VM) || !cpu->cfg.use_mmu) { + if (!(env->msr & MSR_VM) || !cpu->cfg.use_mmu) { return MMU_NOMMU_IDX; } - if (env->sregs[SR_MSR] & MSR_UM) { + if (env->msr & MSR_UM) { return MMU_USER_IDX; } return MMU_KERNEL_IDX; diff --git a/target/microblaze/gdbstub.c b/target/microblaze/gdbstub.c index 73e8973597..08d6a0e807 100644 --- a/target/microblaze/gdbstub.c +++ b/target/microblaze/gdbstub.c @@ -21,58 +21,80 @@ #include "cpu.h" #include "exec/gdbstub.h" +/* + * GDB expects SREGs in the following order: + * PC, MSR, EAR, ESR, FSR, BTR, EDR, PID, ZPR, TLBX, TLBSX, TLBLO, TLBHI. + * + * PID, ZPR, TLBx, TLBsx, TLBLO, and TLBHI aren't modeled, so we don't + * map them to anything and return a value of 0 instead. + */ + +enum { + GDB_PC = 32 + 0, + GDB_MSR = 32 + 1, + GDB_EAR = 32 + 2, + GDB_ESR = 32 + 3, + GDB_FSR = 32 + 4, + GDB_BTR = 32 + 5, + GDB_PVR0 = 32 + 6, + GDB_PVR11 = 32 + 17, + GDB_EDR = 32 + 18, + GDB_SLR = 32 + 25, + GDB_SHR = 32 + 26, +}; + int mb_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n) { MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs); + CPUClass *cc = CPU_GET_CLASS(cs); CPUMBState *env = &cpu->env; - /* - * GDB expects SREGs in the following order: - * PC, MSR, EAR, ESR, FSR, BTR, EDR, PID, ZPR, TLBX, TLBSX, TLBLO, TLBHI. - * They aren't stored in this order, so make a map. - * PID, ZPR, TLBx, TLBsx, TLBLO, and TLBHI aren't modeled, so we don't - * map them to anything and return a value of 0 instead. - */ - static const uint8_t sreg_map[6] = { - SR_PC, - SR_MSR, - SR_EAR, - SR_ESR, - SR_FSR, - SR_BTR - }; + uint32_t val; - /* - * GDB expects registers to be reported in this order: - * R0-R31 - * PC-BTR - * PVR0-PVR11 - * EDR-TLBHI - * SLR-SHR - */ - if (n < 32) { - return gdb_get_reg32(mem_buf, env->regs[n]); - } else { - n -= 32; - switch (n) { - case 0 ... 5: - return gdb_get_reg32(mem_buf, env->sregs[sreg_map[n]]); + if (n > cc->gdb_num_core_regs) { + return 0; + } + + switch (n) { + case 1 ... 31: + val = env->regs[n]; + break; + case GDB_PC: + val = env->pc; + break; + case GDB_MSR: + val = mb_cpu_read_msr(env); + break; + case GDB_EAR: + val = env->ear; + break; + case GDB_ESR: + val = env->esr; + break; + case GDB_FSR: + val = env->fsr; + break; + case GDB_BTR: + val = env->btr; + break; + case GDB_PVR0 ... GDB_PVR11: /* PVR12 is intentionally skipped */ - case 6 ... 17: - n -= 6; - return gdb_get_reg32(mem_buf, env->pvr.regs[n]); - case 18: - return gdb_get_reg32(mem_buf, env->sregs[SR_EDR]); + val = env->pvr.regs[n - GDB_PVR0]; + break; + case GDB_EDR: + val = env->edr; + break; + case GDB_SLR: + val = env->slr; + break; + case GDB_SHR: + val = env->shr; + break; + default: /* Other SRegs aren't modeled, so report a value of 0 */ - case 19 ... 24: - return gdb_get_reg32(mem_buf, 0); - case 25: - return gdb_get_reg32(mem_buf, env->slr); - case 26: - return gdb_get_reg32(mem_buf, env->shr); - default: - return 0; - } + val = 0; + break; } + return gdb_get_reg32(mem_buf, val); } int mb_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) @@ -82,60 +104,47 @@ int mb_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) CPUMBState *env = &cpu->env; uint32_t tmp; - /* - * GDB expects SREGs in the following order: - * PC, MSR, EAR, ESR, FSR, BTR, EDR, PID, ZPR, TLBX, TLBSX, TLBLO, TLBHI. - * They aren't stored in this order, so make a map. - * PID, ZPR, TLBx, TLBsx, TLBLO, and TLBHI aren't modeled, so we don't - * map them to anything. - */ - static const uint8_t sreg_map[6] = { - SR_PC, - SR_MSR, - SR_EAR, - SR_ESR, - SR_FSR, - SR_BTR - }; - if (n > cc->gdb_num_core_regs) { return 0; } tmp = ldl_p(mem_buf); - /* - * GDB expects registers to be reported in this order: - * R0-R31 - * PC-BTR - * PVR0-PVR11 - * EDR-TLBHI - * SLR-SHR - */ - if (n < 32) { + switch (n) { + case 1 ... 31: env->regs[n] = tmp; - } else { - n -= 32; - switch (n) { - case 0 ... 5: - env->sregs[sreg_map[n]] = tmp; - break; + break; + case GDB_PC: + env->pc = tmp; + break; + case GDB_MSR: + mb_cpu_write_msr(env, tmp); + break; + case GDB_EAR: + env->ear = tmp; + break; + case GDB_ESR: + env->esr = tmp; + break; + case GDB_FSR: + env->fsr = tmp; + break; + case GDB_BTR: + env->btr = tmp; + break; + case GDB_PVR0 ... GDB_PVR11: /* PVR12 is intentionally skipped */ - case 6 ... 17: - n -= 6; - env->pvr.regs[n] = tmp; - break; - /* Only EDR is modeled in these indeces, so ignore the rest */ - case 18: - env->sregs[SR_EDR] = tmp; - break; - case 25: - env->slr = tmp; - break; - case 26: - env->shr = tmp; - break; - } + env->pvr.regs[n - GDB_PVR0] = tmp; + break; + case GDB_EDR: + env->edr = tmp; + break; + case GDB_SLR: + env->slr = tmp; + break; + case GDB_SHR: + env->shr = tmp; + break; } return 4; } diff --git a/target/microblaze/helper.c b/target/microblaze/helper.c index ab2ceeb055..00090526da 100644 --- a/target/microblaze/helper.c +++ b/target/microblaze/helper.c @@ -24,8 +24,6 @@ #include "qemu/host-utils.h" #include "exec/log.h" -#define D(x) - #if defined(CONFIG_USER_ONLY) void mb_cpu_do_interrupt(CPUState *cs) @@ -35,7 +33,7 @@ void mb_cpu_do_interrupt(CPUState *cs) cs->exception_index = -1; env->res_addr = RES_ADDR_NONE; - env->regs[14] = env->sregs[SR_PC]; + env->regs[14] = env->pc; } bool mb_cpu_tlb_fill(CPUState *cs, vaddr address, int size, @@ -85,15 +83,15 @@ bool mb_cpu_tlb_fill(CPUState *cs, vaddr address, int size, qemu_log_mask(CPU_LOG_MMU, "mmu=%d miss v=%" VADDR_PRIx "\n", mmu_idx, address); - env->sregs[SR_EAR] = address; + env->ear = address; switch (lu.err) { case ERR_PROT: - env->sregs[SR_ESR] = access_type == MMU_INST_FETCH ? 17 : 16; - env->sregs[SR_ESR] |= (access_type == MMU_DATA_STORE) << 10; + env->esr = access_type == MMU_INST_FETCH ? 17 : 16; + env->esr |= (access_type == MMU_DATA_STORE) << 10; break; case ERR_MISS: - env->sregs[SR_ESR] = access_type == MMU_INST_FETCH ? 19 : 18; - env->sregs[SR_ESR] |= (access_type == MMU_DATA_STORE) << 10; + env->esr = access_type == MMU_INST_FETCH ? 19 : 18; + env->esr |= (access_type == MMU_DATA_STORE) << 10; break; default: abort(); @@ -112,12 +110,14 @@ void mb_cpu_do_interrupt(CPUState *cs) { MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs); CPUMBState *env = &cpu->env; - uint32_t t; + uint32_t t, msr = mb_cpu_read_msr(env); /* IMM flag cannot propagate across a branch and into the dslot. */ - assert(!((env->iflags & D_FLAG) && (env->iflags & IMM_FLAG))); + assert((env->iflags & (D_FLAG | IMM_FLAG)) != (D_FLAG | IMM_FLAG)); + /* BIMM flag cannot be set without D_FLAG. */ + assert((env->iflags & (D_FLAG | BIMM_FLAG)) != BIMM_FLAG); + /* RTI flags are private to translate. */ assert(!(env->iflags & (DRTI_FLAG | DRTE_FLAG | DRTB_FLAG))); -/* assert(env->sregs[SR_MSR] & (MSR_EE)); Only for HW exceptions. */ env->res_addr = RES_ADDR_NONE; switch (cs->exception_index) { case EXCP_HW_EXCP: @@ -126,80 +126,79 @@ void mb_cpu_do_interrupt(CPUState *cs) return; } - env->regs[17] = env->sregs[SR_PC] + 4; - env->sregs[SR_ESR] &= ~(1 << 12); + env->regs[17] = env->pc + 4; + env->esr &= ~(1 << 12); /* Exception breaks branch + dslot sequence? */ if (env->iflags & D_FLAG) { - env->sregs[SR_ESR] |= 1 << 12 ; - env->sregs[SR_BTR] = env->btarget; + env->esr |= 1 << 12 ; + env->btr = env->btarget; } /* Disable the MMU. */ - t = (env->sregs[SR_MSR] & (MSR_VM | MSR_UM)) << 1; - env->sregs[SR_MSR] &= ~(MSR_VMS | MSR_UMS | MSR_VM | MSR_UM); - env->sregs[SR_MSR] |= t; + t = (msr & (MSR_VM | MSR_UM)) << 1; + msr &= ~(MSR_VMS | MSR_UMS | MSR_VM | MSR_UM); + msr |= t; /* Exception in progress. */ - env->sregs[SR_MSR] |= MSR_EIP; + msr |= MSR_EIP; + mb_cpu_write_msr(env, msr); qemu_log_mask(CPU_LOG_INT, - "hw exception at pc=%" PRIx64 " ear=%" PRIx64 " " - "esr=%" PRIx64 " iflags=%x\n", - env->sregs[SR_PC], env->sregs[SR_EAR], - env->sregs[SR_ESR], env->iflags); + "hw exception at pc=%x ear=%" PRIx64 " " + "esr=%x iflags=%x\n", + env->pc, env->ear, + env->esr, env->iflags); log_cpu_state_mask(CPU_LOG_INT, cs, 0); - env->iflags &= ~(IMM_FLAG | D_FLAG); - env->sregs[SR_PC] = cpu->cfg.base_vectors + 0x20; + env->iflags = 0; + env->pc = cpu->cfg.base_vectors + 0x20; break; case EXCP_MMU: - env->regs[17] = env->sregs[SR_PC]; + env->regs[17] = env->pc; + + qemu_log_mask(CPU_LOG_INT, + "MMU exception at pc=%x iflags=%x ear=%" PRIx64 "\n", + env->pc, env->iflags, env->ear); - env->sregs[SR_ESR] &= ~(1 << 12); + env->esr &= ~(1 << 12); /* Exception breaks branch + dslot sequence? */ if (env->iflags & D_FLAG) { - D(qemu_log("D_FLAG set at exception bimm=%d\n", env->bimm)); - env->sregs[SR_ESR] |= 1 << 12 ; - env->sregs[SR_BTR] = env->btarget; + env->esr |= 1 << 12 ; + env->btr = env->btarget; /* Reexecute the branch. */ env->regs[17] -= 4; /* was the branch immprefixed?. */ - if (env->bimm) { - qemu_log_mask(CPU_LOG_INT, - "bimm exception at pc=%" PRIx64 " " - "iflags=%x\n", - env->sregs[SR_PC], env->iflags); + if (env->iflags & BIMM_FLAG) { env->regs[17] -= 4; log_cpu_state_mask(CPU_LOG_INT, cs, 0); } } else if (env->iflags & IMM_FLAG) { - D(qemu_log("IMM_FLAG set at exception\n")); env->regs[17] -= 4; } /* Disable the MMU. */ - t = (env->sregs[SR_MSR] & (MSR_VM | MSR_UM)) << 1; - env->sregs[SR_MSR] &= ~(MSR_VMS | MSR_UMS | MSR_VM | MSR_UM); - env->sregs[SR_MSR] |= t; + t = (msr & (MSR_VM | MSR_UM)) << 1; + msr &= ~(MSR_VMS | MSR_UMS | MSR_VM | MSR_UM); + msr |= t; /* Exception in progress. */ - env->sregs[SR_MSR] |= MSR_EIP; + msr |= MSR_EIP; + mb_cpu_write_msr(env, msr); qemu_log_mask(CPU_LOG_INT, - "exception at pc=%" PRIx64 " ear=%" PRIx64 " " - "iflags=%x\n", - env->sregs[SR_PC], env->sregs[SR_EAR], env->iflags); + "exception at pc=%x ear=%" PRIx64 " iflags=%x\n", + env->pc, env->ear, env->iflags); log_cpu_state_mask(CPU_LOG_INT, cs, 0); - env->iflags &= ~(IMM_FLAG | D_FLAG); - env->sregs[SR_PC] = cpu->cfg.base_vectors + 0x20; + env->iflags = 0; + env->pc = cpu->cfg.base_vectors + 0x20; break; case EXCP_IRQ: - assert(!(env->sregs[SR_MSR] & (MSR_EIP | MSR_BIP))); - assert(env->sregs[SR_MSR] & MSR_IE); - assert(!(env->iflags & D_FLAG)); + assert(!(msr & (MSR_EIP | MSR_BIP))); + assert(msr & MSR_IE); + assert(!(env->iflags & (D_FLAG | IMM_FLAG))); - t = (env->sregs[SR_MSR] & (MSR_VM | MSR_UM)) << 1; + t = (msr & (MSR_VM | MSR_UM)) << 1; #if 0 #include "disas/disas.h" @@ -209,53 +208,47 @@ void mb_cpu_do_interrupt(CPUState *cs) { const char *sym; - sym = lookup_symbol(env->sregs[SR_PC]); + sym = lookup_symbol(env->pc); if (sym && (!strcmp("netif_rx", sym) || !strcmp("process_backlog", sym))) { - qemu_log( - "interrupt at pc=%x msr=%x %x iflags=%x sym=%s\n", - env->sregs[SR_PC], env->sregs[SR_MSR], t, env->iflags, - sym); + qemu_log("interrupt at pc=%x msr=%x %x iflags=%x sym=%s\n", + env->pc, msr, t, env->iflags, sym); log_cpu_state(cs, 0); } } #endif qemu_log_mask(CPU_LOG_INT, - "interrupt at pc=%" PRIx64 " msr=%" PRIx64 " %x " - "iflags=%x\n", - env->sregs[SR_PC], env->sregs[SR_MSR], t, env->iflags); + "interrupt at pc=%x msr=%x %x iflags=%x\n", + env->pc, msr, t, env->iflags); - env->sregs[SR_MSR] &= ~(MSR_VMS | MSR_UMS | MSR_VM \ - | MSR_UM | MSR_IE); - env->sregs[SR_MSR] |= t; + msr &= ~(MSR_VMS | MSR_UMS | MSR_VM | MSR_UM | MSR_IE); + msr |= t; + mb_cpu_write_msr(env, msr); - env->regs[14] = env->sregs[SR_PC]; - env->sregs[SR_PC] = cpu->cfg.base_vectors + 0x10; + env->regs[14] = env->pc; + env->iflags = 0; + env->pc = cpu->cfg.base_vectors + 0x10; //log_cpu_state_mask(CPU_LOG_INT, cs, 0); break; - case EXCP_BREAK: case EXCP_HW_BREAK: - assert(!(env->iflags & IMM_FLAG)); - assert(!(env->iflags & D_FLAG)); - t = (env->sregs[SR_MSR] & (MSR_VM | MSR_UM)) << 1; + assert(!(env->iflags & (D_FLAG | IMM_FLAG))); + + t = (msr & (MSR_VM | MSR_UM)) << 1; qemu_log_mask(CPU_LOG_INT, - "break at pc=%" PRIx64 " msr=%" PRIx64 " %x " - "iflags=%x\n", - env->sregs[SR_PC], env->sregs[SR_MSR], t, env->iflags); + "break at pc=%x msr=%x %x iflags=%x\n", + env->pc, msr, t, env->iflags); log_cpu_state_mask(CPU_LOG_INT, cs, 0); - env->sregs[SR_MSR] &= ~(MSR_VMS | MSR_UMS | MSR_VM | MSR_UM); - env->sregs[SR_MSR] |= t; - env->sregs[SR_MSR] |= MSR_BIP; - if (cs->exception_index == EXCP_HW_BREAK) { - env->regs[16] = env->sregs[SR_PC]; - env->sregs[SR_MSR] |= MSR_BIP; - env->sregs[SR_PC] = cpu->cfg.base_vectors + 0x18; - } else - env->sregs[SR_PC] = env->btarget; + msr &= ~(MSR_VMS | MSR_UMS | MSR_VM | MSR_UM); + msr |= t; + msr |= MSR_BIP; + env->regs[16] = env->pc; + env->iflags = 0; + env->pc = cpu->cfg.base_vectors + 0x18; + mb_cpu_write_msr(env, msr); break; default: cpu_abort(cs, "unhandled exception type=%d\n", @@ -293,8 +286,8 @@ bool mb_cpu_exec_interrupt(CPUState *cs, int interrupt_request) CPUMBState *env = &cpu->env; if ((interrupt_request & CPU_INTERRUPT_HARD) - && (env->sregs[SR_MSR] & MSR_IE) - && !(env->sregs[SR_MSR] & (MSR_EIP | MSR_BIP)) + && (env->msr & MSR_IE) + && !(env->msr & (MSR_EIP | MSR_BIP)) && !(env->iflags & (D_FLAG | IMM_FLAG))) { cs->exception_index = EXCP_IRQ; mb_cpu_do_interrupt(cs); @@ -302,3 +295,31 @@ bool mb_cpu_exec_interrupt(CPUState *cs, int interrupt_request) } return false; } + +void mb_cpu_do_unaligned_access(CPUState *cs, vaddr addr, + MMUAccessType access_type, + int mmu_idx, uintptr_t retaddr) +{ + MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs); + uint32_t esr, iflags; + + /* Recover the pc and iflags from the corresponding insn_start. */ + cpu_restore_state(cs, retaddr, true); + iflags = cpu->env.iflags; + + qemu_log_mask(CPU_LOG_INT, + "Unaligned access addr=" TARGET_FMT_lx " pc=%x iflags=%x\n", + (target_ulong)addr, cpu->env.pc, iflags); + + esr = ESR_EC_UNALIGNED_DATA; + if (likely(iflags & ESR_ESS_FLAG)) { + esr |= iflags & ESR_ESS_MASK; + } else { + qemu_log_mask(LOG_UNIMP, "Unaligned access without ESR_ESS_FLAG\n"); + } + + cpu->env.ear = addr; + cpu->env.esr = esr; + cs->exception_index = EXCP_HW_EXCP; + cpu_loop_exit(cs); +} diff --git a/target/microblaze/helper.h b/target/microblaze/helper.h index 2f8bdea22b..f740835fcb 100644 --- a/target/microblaze/helper.h +++ b/target/microblaze/helper.h @@ -1,36 +1,31 @@ -DEF_HELPER_2(raise_exception, void, env, i32) -DEF_HELPER_1(debug, void, env) -DEF_HELPER_FLAGS_3(carry, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) -DEF_HELPER_2(cmp, i32, i32, i32) -DEF_HELPER_2(cmpu, i32, i32, i32) +DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, i32) -DEF_HELPER_3(divs, i32, env, i32, i32) -DEF_HELPER_3(divu, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(divs, TCG_CALL_NO_WG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(divu, TCG_CALL_NO_WG, i32, env, i32, i32) -DEF_HELPER_3(fadd, i32, env, i32, i32) -DEF_HELPER_3(frsub, i32, env, i32, i32) -DEF_HELPER_3(fmul, i32, env, i32, i32) -DEF_HELPER_3(fdiv, i32, env, i32, i32) -DEF_HELPER_2(flt, i32, env, i32) -DEF_HELPER_2(fint, i32, env, i32) -DEF_HELPER_2(fsqrt, i32, env, i32) +DEF_HELPER_FLAGS_3(fadd, TCG_CALL_NO_WG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(frsub, TCG_CALL_NO_WG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(fmul, TCG_CALL_NO_WG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(fdiv, TCG_CALL_NO_WG, i32, env, i32, i32) +DEF_HELPER_FLAGS_2(flt, TCG_CALL_NO_WG, i32, env, i32) +DEF_HELPER_FLAGS_2(fint, TCG_CALL_NO_WG, i32, env, i32) +DEF_HELPER_FLAGS_2(fsqrt, TCG_CALL_NO_WG, i32, env, i32) -DEF_HELPER_3(fcmp_un, i32, env, i32, i32) -DEF_HELPER_3(fcmp_lt, i32, env, i32, i32) -DEF_HELPER_3(fcmp_eq, i32, env, i32, i32) -DEF_HELPER_3(fcmp_le, i32, env, i32, i32) -DEF_HELPER_3(fcmp_gt, i32, env, i32, i32) -DEF_HELPER_3(fcmp_ne, i32, env, i32, i32) -DEF_HELPER_3(fcmp_ge, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(fcmp_un, TCG_CALL_NO_WG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(fcmp_lt, TCG_CALL_NO_WG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(fcmp_eq, TCG_CALL_NO_WG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(fcmp_le, TCG_CALL_NO_WG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(fcmp_gt, TCG_CALL_NO_WG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(fcmp_ne, TCG_CALL_NO_WG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(fcmp_ge, TCG_CALL_NO_WG, i32, env, i32, i32) DEF_HELPER_FLAGS_2(pcmpbf, TCG_CALL_NO_RWG_SE, i32, i32, i32) #if !defined(CONFIG_USER_ONLY) -DEF_HELPER_3(mmu_read, i32, env, i32, i32) -DEF_HELPER_4(mmu_write, void, env, i32, i32, i32) +DEF_HELPER_FLAGS_3(mmu_read, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_4(mmu_write, TCG_CALL_NO_RWG, void, env, i32, i32, i32) #endif -DEF_HELPER_5(memalign, void, env, tl, i32, i32, i32) -DEF_HELPER_2(stackprot, void, env, tl) +DEF_HELPER_FLAGS_2(stackprot, TCG_CALL_NO_WG, void, env, tl) -DEF_HELPER_2(get, i32, i32, i32) -DEF_HELPER_3(put, void, i32, i32, i32) +DEF_HELPER_FLAGS_2(get, TCG_CALL_NO_RWG, i32, i32, i32) +DEF_HELPER_FLAGS_3(put, TCG_CALL_NO_RWG, void, i32, i32, i32) diff --git a/target/microblaze/insns.decode b/target/microblaze/insns.decode new file mode 100644 index 0000000000..fb0f0e6838 --- /dev/null +++ b/target/microblaze/insns.decode @@ -0,0 +1,256 @@ +# +# MicroBlaze instruction decode definitions. +# +# Copyright (c) 2020 Richard Henderson <rth@twiddle.net> +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, see <http://www.gnu.org/licenses/>. +# + +&typea0 rd ra +&typea rd ra rb +&typea_br rd rb +&typea_bc ra rb +&typeb rd ra imm +&typeb_br rd imm +&typeb_bc ra imm +&type_msr rd imm + +# Include any IMM prefix in the value reported. +%extimm 0:s16 !function=typeb_imm + +@typea ...... rd:5 ra:5 rb:5 ... .... .... &typea +@typeb ...... rd:5 ra:5 ................ &typeb imm=%extimm + +# Officially typea, but with rb==0, which is not used. +@typea0 ...... rd:5 ra:5 ................ &typea0 + +# Officially typea, but with ra as opcode. +@typea_br ...... rd:5 ..... rb:5 ........... &typea_br + +# Officially typea, but with rd as opcode. +@typea_bc ...... ..... ra:5 rb:5 ........... &typea_bc + +# Officially typeb, but any immediate extension is unused. +@typeb_bs ...... rd:5 ra:5 ..... ...... imm:5 &typeb + +# Officially typeb, but with ra as opcode. +@typeb_br ...... rd:5 ..... ................ &typeb_br imm=%extimm + +# Officially typeb, but with rd as opcode. +@typeb_bc ...... ..... ra:5 ................ &typeb_bc imm=%extimm + +# For convenience, extract the two imm_w/imm_s fields, then pack +# them back together as "imm". Doing this makes it easiest to +# match the required zero at bit 5. +%ieimm 6:5 0:5 +@typeb_ie ...... rd:5 ra:5 ..... ..... . ..... &typeb imm=%ieimm + +@type_msr ...... rd:5 ...... imm:15 &type_msr + +### + +{ + zero 000000 00000 00000 00000 000 0000 0000 + add 000000 ..... ..... ..... 000 0000 0000 @typea +} +addc 000010 ..... ..... ..... 000 0000 0000 @typea +addk 000100 ..... ..... ..... 000 0000 0000 @typea +addkc 000110 ..... ..... ..... 000 0000 0000 @typea + +addi 001000 ..... ..... ................ @typeb +addic 001010 ..... ..... ................ @typeb +addik 001100 ..... ..... ................ @typeb +addikc 001110 ..... ..... ................ @typeb + +and 100001 ..... ..... ..... 000 0000 0000 @typea +andi 101001 ..... ..... ................ @typeb + +andn 100011 ..... ..... ..... 000 0000 0000 @typea +andni 101011 ..... ..... ................ @typeb + +beq 100111 00000 ..... ..... 000 0000 0000 @typea_bc +bge 100111 00101 ..... ..... 000 0000 0000 @typea_bc +bgt 100111 00100 ..... ..... 000 0000 0000 @typea_bc +ble 100111 00011 ..... ..... 000 0000 0000 @typea_bc +blt 100111 00010 ..... ..... 000 0000 0000 @typea_bc +bne 100111 00001 ..... ..... 000 0000 0000 @typea_bc + +beqd 100111 10000 ..... ..... 000 0000 0000 @typea_bc +bged 100111 10101 ..... ..... 000 0000 0000 @typea_bc +bgtd 100111 10100 ..... ..... 000 0000 0000 @typea_bc +bled 100111 10011 ..... ..... 000 0000 0000 @typea_bc +bltd 100111 10010 ..... ..... 000 0000 0000 @typea_bc +bned 100111 10001 ..... ..... 000 0000 0000 @typea_bc + +beqi 101111 00000 ..... ................ @typeb_bc +bgei 101111 00101 ..... ................ @typeb_bc +bgti 101111 00100 ..... ................ @typeb_bc +blei 101111 00011 ..... ................ @typeb_bc +blti 101111 00010 ..... ................ @typeb_bc +bnei 101111 00001 ..... ................ @typeb_bc + +beqid 101111 10000 ..... ................ @typeb_bc +bgeid 101111 10101 ..... ................ @typeb_bc +bgtid 101111 10100 ..... ................ @typeb_bc +bleid 101111 10011 ..... ................ @typeb_bc +bltid 101111 10010 ..... ................ @typeb_bc +bneid 101111 10001 ..... ................ @typeb_bc + +br 100110 ..... 00000 ..... 000 0000 0000 @typea_br +bra 100110 ..... 01000 ..... 000 0000 0000 @typea_br +brd 100110 ..... 10000 ..... 000 0000 0000 @typea_br +brad 100110 ..... 11000 ..... 000 0000 0000 @typea_br +brld 100110 ..... 10100 ..... 000 0000 0000 @typea_br +brald 100110 ..... 11100 ..... 000 0000 0000 @typea_br + +bri 101110 ..... 00000 ................ @typeb_br +brai 101110 ..... 01000 ................ @typeb_br +brid 101110 ..... 10000 ................ @typeb_br +braid 101110 ..... 11000 ................ @typeb_br +brlid 101110 ..... 10100 ................ @typeb_br +bralid 101110 ..... 11100 ................ @typeb_br + +brk 100110 ..... 01100 ..... 000 0000 0000 @typea_br +brki 101110 ..... 01100 ................ @typeb_br + +bsrl 010001 ..... ..... ..... 000 0000 0000 @typea +bsra 010001 ..... ..... ..... 010 0000 0000 @typea +bsll 010001 ..... ..... ..... 100 0000 0000 @typea + +bsrli 011001 ..... ..... 00000 000000 ..... @typeb_bs +bsrai 011001 ..... ..... 00000 010000 ..... @typeb_bs +bslli 011001 ..... ..... 00000 100000 ..... @typeb_bs + +bsefi 011001 ..... ..... 01000 .....0 ..... @typeb_ie +bsifi 011001 ..... ..... 10000 .....0 ..... @typeb_ie + +clz 100100 ..... ..... 00000 000 1110 0000 @typea0 + +cmp 000101 ..... ..... ..... 000 0000 0001 @typea +cmpu 000101 ..... ..... ..... 000 0000 0011 @typea + +fadd 010110 ..... ..... ..... 0000 000 0000 @typea +frsub 010110 ..... ..... ..... 0001 000 0000 @typea +fmul 010110 ..... ..... ..... 0010 000 0000 @typea +fdiv 010110 ..... ..... ..... 0011 000 0000 @typea +fcmp_un 010110 ..... ..... ..... 0100 000 0000 @typea +fcmp_lt 010110 ..... ..... ..... 0100 001 0000 @typea +fcmp_eq 010110 ..... ..... ..... 0100 010 0000 @typea +fcmp_le 010110 ..... ..... ..... 0100 011 0000 @typea +fcmp_gt 010110 ..... ..... ..... 0100 100 0000 @typea +fcmp_ne 010110 ..... ..... ..... 0100 101 0000 @typea +fcmp_ge 010110 ..... ..... ..... 0100 110 0000 @typea + +# Note that flt and fint, unlike fsqrt, are documented as having the RB +# operand which is unused. So allow the field to be non-zero but discard +# the value and treat as 2-operand insns. +flt 010110 ..... ..... ----- 0101 000 0000 @typea0 +fint 010110 ..... ..... ----- 0110 000 0000 @typea0 +fsqrt 010110 ..... ..... 00000 0111 000 0000 @typea0 + +get 011011 rd:5 00000 0 ctrl:5 000000 imm:4 +getd 010011 rd:5 00000 rb:5 0 ctrl:5 00000 + +idiv 010010 ..... ..... ..... 000 0000 0000 @typea +idivu 010010 ..... ..... ..... 000 0000 0010 @typea + +imm 101100 00000 00000 imm:16 + +lbu 110000 ..... ..... ..... 0000 000 0000 @typea +lbur 110000 ..... ..... ..... 0100 000 0000 @typea +lbuea 110000 ..... ..... ..... 0001 000 0000 @typea +lbui 111000 ..... ..... ................ @typeb + +lhu 110001 ..... ..... ..... 0000 000 0000 @typea +lhur 110001 ..... ..... ..... 0100 000 0000 @typea +lhuea 110001 ..... ..... ..... 0001 000 0000 @typea +lhui 111001 ..... ..... ................ @typeb + +lw 110010 ..... ..... ..... 0000 000 0000 @typea +lwr 110010 ..... ..... ..... 0100 000 0000 @typea +lwea 110010 ..... ..... ..... 0001 000 0000 @typea +lwx 110010 ..... ..... ..... 1000 000 0000 @typea +lwi 111010 ..... ..... ................ @typeb + +mbar 101110 imm:5 00010 0000 0000 0000 0100 + +mfs 100101 rd:5 0 e:1 000 10 rs:14 +mts 100101 0 e:1 000 ra:5 11 rs:14 + +msrclr 100101 ..... 100010 ............... @type_msr +msrset 100101 ..... 100000 ............... @type_msr + +mul 010000 ..... ..... ..... 000 0000 0000 @typea +mulh 010000 ..... ..... ..... 000 0000 0001 @typea +mulhu 010000 ..... ..... ..... 000 0000 0011 @typea +mulhsu 010000 ..... ..... ..... 000 0000 0010 @typea +muli 011000 ..... ..... ................ @typeb + +or 100000 ..... ..... ..... 000 0000 0000 @typea +ori 101000 ..... ..... ................ @typeb + +pcmpbf 100000 ..... ..... ..... 100 0000 0000 @typea +pcmpeq 100010 ..... ..... ..... 100 0000 0000 @typea +pcmpne 100011 ..... ..... ..... 100 0000 0000 @typea + +put 011011 00000 ra:5 1 ctrl:5 000000 imm:4 +putd 010011 00000 ra:5 rb:5 1 ctrl:5 00000 + +rsub 000001 ..... ..... ..... 000 0000 0000 @typea +rsubc 000011 ..... ..... ..... 000 0000 0000 @typea +rsubk 000101 ..... ..... ..... 000 0000 0000 @typea +rsubkc 000111 ..... ..... ..... 000 0000 0000 @typea + +rsubi 001001 ..... ..... ................ @typeb +rsubic 001011 ..... ..... ................ @typeb +rsubik 001101 ..... ..... ................ @typeb +rsubikc 001111 ..... ..... ................ @typeb + +rtbd 101101 10010 ..... ................ @typeb_bc +rtid 101101 10001 ..... ................ @typeb_bc +rted 101101 10100 ..... ................ @typeb_bc +rtsd 101101 10000 ..... ................ @typeb_bc + +sb 110100 ..... ..... ..... 0000 000 0000 @typea +sbr 110100 ..... ..... ..... 0100 000 0000 @typea +sbea 110100 ..... ..... ..... 0001 000 0000 @typea +sbi 111100 ..... ..... ................ @typeb + +sh 110101 ..... ..... ..... 0000 000 0000 @typea +shr 110101 ..... ..... ..... 0100 000 0000 @typea +shea 110101 ..... ..... ..... 0001 000 0000 @typea +shi 111101 ..... ..... ................ @typeb + +sw 110110 ..... ..... ..... 0000 000 0000 @typea +swr 110110 ..... ..... ..... 0100 000 0000 @typea +swea 110110 ..... ..... ..... 0001 000 0000 @typea +swx 110110 ..... ..... ..... 1000 000 0000 @typea +swi 111110 ..... ..... ................ @typeb + +sext8 100100 ..... ..... 00000 000 0110 0000 @typea0 +sext16 100100 ..... ..... 00000 000 0110 0001 @typea0 + +sra 100100 ..... ..... 00000 000 0000 0001 @typea0 +src 100100 ..... ..... 00000 000 0010 0001 @typea0 +srl 100100 ..... ..... 00000 000 0100 0001 @typea0 + +swapb 100100 ..... ..... 00000 001 1110 0000 @typea0 +swaph 100100 ..... ..... 00000 001 1110 0010 @typea0 + +# Cache operations have no effect in qemu: discard the arguments. +wdic 100100 00000 ----- ----- -00 -11- 01-0 # wdc +wdic 100100 00000 ----- ----- 000 0110 1000 # wic + +xor 100010 ..... ..... ..... 000 0000 0000 @typea +xori 101010 ..... ..... ................ @typeb diff --git a/target/microblaze/meson.build b/target/microblaze/meson.build index b8fe4afe61..639c3f73a8 100644 --- a/target/microblaze/meson.build +++ b/target/microblaze/meson.build @@ -1,4 +1,7 @@ +gen = decodetree.process('insns.decode') + microblaze_ss = ss.source_set() +microblaze_ss.add(gen) microblaze_ss.add(files( 'cpu.c', 'gdbstub.c', diff --git a/target/microblaze/microblaze-decode.h b/target/microblaze/microblaze-decode.h deleted file mode 100644 index 17b2f29fff..0000000000 --- a/target/microblaze/microblaze-decode.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * MicroBlaze insn decoding macros. - * - * Copyright (c) 2009 Edgar E. Iglesias <edgar.iglesias@gmail.com> - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef TARGET_MICROBLAZE_MICROBLAZE_DECODE_H -#define TARGET_MICROBLAZE_MICROBLAZE_DECODE_H - -/* Convenient binary macros. */ -#define HEX__(n) 0x##n##LU -#define B8__(x) ((x&0x0000000FLU)?1:0) \ - + ((x&0x000000F0LU)?2:0) \ - + ((x&0x00000F00LU)?4:0) \ - + ((x&0x0000F000LU)?8:0) \ - + ((x&0x000F0000LU)?16:0) \ - + ((x&0x00F00000LU)?32:0) \ - + ((x&0x0F000000LU)?64:0) \ - + ((x&0xF0000000LU)?128:0) -#define B8(d) ((unsigned char)B8__(HEX__(d))) - -/* Decode logic, value and mask. */ -#define DEC_ADD {B8(00000000), B8(00110001)} -#define DEC_SUB {B8(00000001), B8(00110001)} -#define DEC_AND {B8(00100001), B8(00110101)} -#define DEC_XOR {B8(00100010), B8(00110111)} -#define DEC_OR {B8(00100000), B8(00110111)} -#define DEC_BIT {B8(00100100), B8(00111111)} -#define DEC_MSR {B8(00100101), B8(00111111)} - -#define DEC_BARREL {B8(00010001), B8(00110111)} -#define DEC_MUL {B8(00010000), B8(00110111)} -#define DEC_DIV {B8(00010010), B8(00110111)} -#define DEC_FPU {B8(00010110), B8(00111111)} - -#define DEC_LD {B8(00110000), B8(00110100)} -#define DEC_ST {B8(00110100), B8(00110100)} -#define DEC_IMM {B8(00101100), B8(00111111)} - -#define DEC_BR {B8(00100110), B8(00110111)} -#define DEC_BCC {B8(00100111), B8(00110111)} -#define DEC_RTS {B8(00101101), B8(00111111)} - -#define DEC_STREAM {B8(00010011), B8(00110111)} - -#endif diff --git a/target/microblaze/mmu.c b/target/microblaze/mmu.c index 6763421ba2..6e583d78d9 100644 --- a/target/microblaze/mmu.c +++ b/target/microblaze/mmu.c @@ -250,8 +250,8 @@ void mmu_write(CPUMBState *env, bool ext, uint32_t rn, uint32_t v) if (rn == MMU_R_TLBHI) { if (i < 3 && !(v & TLB_VALID) && qemu_loglevel_mask(~0)) qemu_log_mask(LOG_GUEST_ERROR, - "invalidating index %x at pc=%" PRIx64 "\n", - i, env->sregs[SR_PC]); + "invalidating index %x at pc=%x\n", + i, env->pc); env->mmu.tids[i] = env->mmu.regs[MMU_R_PID] & 0xff; mmu_flush_idx(env, i); } diff --git a/target/microblaze/op_helper.c b/target/microblaze/op_helper.c index f3b17a95b3..4614e99db3 100644 --- a/target/microblaze/op_helper.c +++ b/target/microblaze/op_helper.c @@ -26,8 +26,6 @@ #include "exec/cpu_ldst.h" #include "fpu/softfloat.h" -#define D(x) - void helper_put(uint32_t id, uint32_t ctrl, uint32_t data) { int test = ctrl & STREAM_TEST; @@ -71,85 +69,27 @@ void helper_raise_exception(CPUMBState *env, uint32_t index) cpu_loop_exit(cs); } -void helper_debug(CPUMBState *env) -{ - int i; - - qemu_log("PC=%" PRIx64 "\n", env->sregs[SR_PC]); - qemu_log("rmsr=%" PRIx64 " resr=%" PRIx64 " rear=%" PRIx64 " " - "debug[%x] imm=%x iflags=%x\n", - env->sregs[SR_MSR], env->sregs[SR_ESR], env->sregs[SR_EAR], - env->debug, env->imm, env->iflags); - qemu_log("btaken=%d btarget=%" PRIx64 " mode=%s(saved=%s) eip=%d ie=%d\n", - env->btaken, env->btarget, - (env->sregs[SR_MSR] & MSR_UM) ? "user" : "kernel", - (env->sregs[SR_MSR] & MSR_UMS) ? "user" : "kernel", - (bool)(env->sregs[SR_MSR] & MSR_EIP), - (bool)(env->sregs[SR_MSR] & MSR_IE)); - for (i = 0; i < 32; i++) { - qemu_log("r%2.2d=%8.8x ", i, env->regs[i]); - if ((i + 1) % 4 == 0) - qemu_log("\n"); - } - qemu_log("\n\n"); -} - -static inline uint32_t compute_carry(uint32_t a, uint32_t b, uint32_t cin) -{ - uint32_t cout = 0; - - if ((b == ~0) && cin) - cout = 1; - else if ((~0 - a) < (b + cin)) - cout = 1; - return cout; -} - -uint32_t helper_cmp(uint32_t a, uint32_t b) -{ - uint32_t t; - - t = b + ~a + 1; - if ((b & 0x80000000) ^ (a & 0x80000000)) - t = (t & 0x7fffffff) | (b & 0x80000000); - return t; -} - -uint32_t helper_cmpu(uint32_t a, uint32_t b) +static bool check_divz(CPUMBState *env, uint32_t a, uint32_t b, uintptr_t ra) { - uint32_t t; + if (unlikely(b == 0)) { + env->msr |= MSR_DZ; - t = b + ~a + 1; - if ((b & 0x80000000) ^ (a & 0x80000000)) - t = (t & 0x7fffffff) | (a & 0x80000000); - return t; -} + if ((env->msr & MSR_EE) && + env_archcpu(env)->cfg.div_zero_exception) { + CPUState *cs = env_cpu(env); -uint32_t helper_carry(uint32_t a, uint32_t b, uint32_t cf) -{ - return compute_carry(a, b, cf); -} - -static inline int div_prepare(CPUMBState *env, uint32_t a, uint32_t b) -{ - MicroBlazeCPU *cpu = env_archcpu(env); - - if (b == 0) { - env->sregs[SR_MSR] |= MSR_DZ; - - if ((env->sregs[SR_MSR] & MSR_EE) && cpu->cfg.div_zero_exception) { - env->sregs[SR_ESR] = ESR_EC_DIVZERO; - helper_raise_exception(env, EXCP_HW_EXCP); + env->esr = ESR_EC_DIVZERO; + cs->exception_index = EXCP_HW_EXCP; + cpu_loop_exit_restore(cs, ra); } - return 0; + return false; } - env->sregs[SR_MSR] &= ~MSR_DZ; - return 1; + return true; } uint32_t helper_divs(CPUMBState *env, uint32_t a, uint32_t b) { - if (!div_prepare(env, a, b)) { + if (!check_divz(env, a, b, GETPC())) { return 0; } return (int32_t)a / (int32_t)b; @@ -157,43 +97,46 @@ uint32_t helper_divs(CPUMBState *env, uint32_t a, uint32_t b) uint32_t helper_divu(CPUMBState *env, uint32_t a, uint32_t b) { - if (!div_prepare(env, a, b)) { + if (!check_divz(env, a, b, GETPC())) { return 0; } return a / b; } /* raise FPU exception. */ -static void raise_fpu_exception(CPUMBState *env) +static void raise_fpu_exception(CPUMBState *env, uintptr_t ra) { - env->sregs[SR_ESR] = ESR_EC_FPU; - helper_raise_exception(env, EXCP_HW_EXCP); + CPUState *cs = env_cpu(env); + + env->esr = ESR_EC_FPU; + cs->exception_index = EXCP_HW_EXCP; + cpu_loop_exit_restore(cs, ra); } -static void update_fpu_flags(CPUMBState *env, int flags) +static void update_fpu_flags(CPUMBState *env, int flags, uintptr_t ra) { int raise = 0; if (flags & float_flag_invalid) { - env->sregs[SR_FSR] |= FSR_IO; + env->fsr |= FSR_IO; raise = 1; } if (flags & float_flag_divbyzero) { - env->sregs[SR_FSR] |= FSR_DZ; + env->fsr |= FSR_DZ; raise = 1; } if (flags & float_flag_overflow) { - env->sregs[SR_FSR] |= FSR_OF; + env->fsr |= FSR_OF; raise = 1; } if (flags & float_flag_underflow) { - env->sregs[SR_FSR] |= FSR_UF; + env->fsr |= FSR_UF; raise = 1; } if (raise && (env->pvr.regs[2] & PVR2_FPU_EXC_MASK) - && (env->sregs[SR_MSR] & MSR_EE)) { - raise_fpu_exception(env); + && (env->msr & MSR_EE)) { + raise_fpu_exception(env, ra); } } @@ -208,7 +151,7 @@ uint32_t helper_fadd(CPUMBState *env, uint32_t a, uint32_t b) fd.f = float32_add(fa.f, fb.f, &env->fp_status); flags = get_float_exception_flags(&env->fp_status); - update_fpu_flags(env, flags); + update_fpu_flags(env, flags, GETPC()); return fd.l; } @@ -222,7 +165,7 @@ uint32_t helper_frsub(CPUMBState *env, uint32_t a, uint32_t b) fb.l = b; fd.f = float32_sub(fb.f, fa.f, &env->fp_status); flags = get_float_exception_flags(&env->fp_status); - update_fpu_flags(env, flags); + update_fpu_flags(env, flags, GETPC()); return fd.l; } @@ -236,7 +179,7 @@ uint32_t helper_fmul(CPUMBState *env, uint32_t a, uint32_t b) fb.l = b; fd.f = float32_mul(fa.f, fb.f, &env->fp_status); flags = get_float_exception_flags(&env->fp_status); - update_fpu_flags(env, flags); + update_fpu_flags(env, flags, GETPC()); return fd.l; } @@ -251,7 +194,7 @@ uint32_t helper_fdiv(CPUMBState *env, uint32_t a, uint32_t b) fb.l = b; fd.f = float32_div(fb.f, fa.f, &env->fp_status); flags = get_float_exception_flags(&env->fp_status); - update_fpu_flags(env, flags); + update_fpu_flags(env, flags, GETPC()); return fd.l; } @@ -266,7 +209,7 @@ uint32_t helper_fcmp_un(CPUMBState *env, uint32_t a, uint32_t b) if (float32_is_signaling_nan(fa.f, &env->fp_status) || float32_is_signaling_nan(fb.f, &env->fp_status)) { - update_fpu_flags(env, float_flag_invalid); + update_fpu_flags(env, float_flag_invalid, GETPC()); r = 1; } @@ -289,7 +232,7 @@ uint32_t helper_fcmp_lt(CPUMBState *env, uint32_t a, uint32_t b) fb.l = b; r = float32_lt(fb.f, fa.f, &env->fp_status); flags = get_float_exception_flags(&env->fp_status); - update_fpu_flags(env, flags & float_flag_invalid); + update_fpu_flags(env, flags & float_flag_invalid, GETPC()); return r; } @@ -305,7 +248,7 @@ uint32_t helper_fcmp_eq(CPUMBState *env, uint32_t a, uint32_t b) fb.l = b; r = float32_eq_quiet(fa.f, fb.f, &env->fp_status); flags = get_float_exception_flags(&env->fp_status); - update_fpu_flags(env, flags & float_flag_invalid); + update_fpu_flags(env, flags & float_flag_invalid, GETPC()); return r; } @@ -321,7 +264,7 @@ uint32_t helper_fcmp_le(CPUMBState *env, uint32_t a, uint32_t b) set_float_exception_flags(0, &env->fp_status); r = float32_le(fa.f, fb.f, &env->fp_status); flags = get_float_exception_flags(&env->fp_status); - update_fpu_flags(env, flags & float_flag_invalid); + update_fpu_flags(env, flags & float_flag_invalid, GETPC()); return r; @@ -337,7 +280,7 @@ uint32_t helper_fcmp_gt(CPUMBState *env, uint32_t a, uint32_t b) set_float_exception_flags(0, &env->fp_status); r = float32_lt(fa.f, fb.f, &env->fp_status); flags = get_float_exception_flags(&env->fp_status); - update_fpu_flags(env, flags & float_flag_invalid); + update_fpu_flags(env, flags & float_flag_invalid, GETPC()); return r; } @@ -351,7 +294,7 @@ uint32_t helper_fcmp_ne(CPUMBState *env, uint32_t a, uint32_t b) set_float_exception_flags(0, &env->fp_status); r = !float32_eq_quiet(fa.f, fb.f, &env->fp_status); flags = get_float_exception_flags(&env->fp_status); - update_fpu_flags(env, flags & float_flag_invalid); + update_fpu_flags(env, flags & float_flag_invalid, GETPC()); return r; } @@ -366,7 +309,7 @@ uint32_t helper_fcmp_ge(CPUMBState *env, uint32_t a, uint32_t b) set_float_exception_flags(0, &env->fp_status); r = !float32_lt(fa.f, fb.f, &env->fp_status); flags = get_float_exception_flags(&env->fp_status); - update_fpu_flags(env, flags & float_flag_invalid); + update_fpu_flags(env, flags & float_flag_invalid, GETPC()); return r; } @@ -390,7 +333,7 @@ uint32_t helper_fint(CPUMBState *env, uint32_t a) fa.l = a; r = float32_to_int32(fa.f, &env->fp_status); flags = get_float_exception_flags(&env->fp_status); - update_fpu_flags(env, flags); + update_fpu_flags(env, flags, GETPC()); return r; } @@ -404,7 +347,7 @@ uint32_t helper_fsqrt(CPUMBState *env, uint32_t a) fa.l = a; fd.l = float32_sqrt(fa.f, &env->fp_status); flags = get_float_exception_flags(&env->fp_status); - update_fpu_flags(env, flags); + update_fpu_flags(env, flags, GETPC()); return fd.l; } @@ -422,37 +365,19 @@ uint32_t helper_pcmpbf(uint32_t a, uint32_t b) return 0; } -void helper_memalign(CPUMBState *env, target_ulong addr, - uint32_t dr, uint32_t wr, - uint32_t mask) -{ - if (addr & mask) { - qemu_log_mask(CPU_LOG_INT, - "unaligned access addr=" TARGET_FMT_lx - " mask=%x, wr=%d dr=r%d\n", - addr, mask, wr, dr); - env->sregs[SR_EAR] = addr; - env->sregs[SR_ESR] = ESR_EC_UNALIGNED_DATA | (wr << 10) \ - | (dr & 31) << 5; - if (mask == 3) { - env->sregs[SR_ESR] |= 1 << 11; - } - if (!(env->sregs[SR_MSR] & MSR_EE)) { - return; - } - helper_raise_exception(env, EXCP_HW_EXCP); - } -} - void helper_stackprot(CPUMBState *env, target_ulong addr) { if (addr < env->slr || addr > env->shr) { + CPUState *cs = env_cpu(env); + qemu_log_mask(CPU_LOG_INT, "Stack protector violation at " TARGET_FMT_lx " %x %x\n", addr, env->slr, env->shr); - env->sregs[SR_EAR] = addr; - env->sregs[SR_ESR] = ESR_EC_STACKPROT; - helper_raise_exception(env, EXCP_HW_EXCP); + + env->ear = addr; + env->esr = ESR_EC_STACKPROT; + cs->exception_index = EXCP_HW_EXCP; + cpu_loop_exit_restore(cs, GETPC()); } } @@ -473,32 +398,33 @@ void mb_cpu_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr, int mmu_idx, MemTxAttrs attrs, MemTxResult response, uintptr_t retaddr) { - MicroBlazeCPU *cpu; - CPUMBState *env; + MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs); + CPUMBState *env = &cpu->env; + qemu_log_mask(CPU_LOG_INT, "Transaction failed: vaddr 0x%" VADDR_PRIx " physaddr 0x" TARGET_FMT_plx " size %d access type %s\n", addr, physaddr, size, access_type == MMU_INST_FETCH ? "INST_FETCH" : (access_type == MMU_DATA_LOAD ? "DATA_LOAD" : "DATA_STORE")); - cpu = MICROBLAZE_CPU(cs); - env = &cpu->env; - cpu_restore_state(cs, retaddr, true); - if (!(env->sregs[SR_MSR] & MSR_EE)) { + if (!(env->msr & MSR_EE)) { return; } - env->sregs[SR_EAR] = addr; if (access_type == MMU_INST_FETCH) { - if ((env->pvr.regs[2] & PVR2_IOPB_BUS_EXC_MASK)) { - env->sregs[SR_ESR] = ESR_EC_INSN_BUS; - helper_raise_exception(env, EXCP_HW_EXCP); + if (!cpu->cfg.iopb_bus_exception) { + return; } + env->esr = ESR_EC_INSN_BUS; } else { - if ((env->pvr.regs[2] & PVR2_DOPB_BUS_EXC_MASK)) { - env->sregs[SR_ESR] = ESR_EC_DATA_BUS; - helper_raise_exception(env, EXCP_HW_EXCP); + if (!cpu->cfg.dopb_bus_exception) { + return; } + env->esr = ESR_EC_DATA_BUS; } + + env->ear = addr; + cs->exception_index = EXCP_HW_EXCP; + cpu_loop_exit_restore(cs, retaddr); } #endif diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c index a96cb21d96..a8a3249185 100644 --- a/target/microblaze/translate.c +++ b/target/microblaze/translate.c @@ -24,7 +24,6 @@ #include "exec/exec-all.h" #include "tcg/tcg-op.h" #include "exec/helper-proto.h" -#include "microblaze-decode.h" #include "exec/cpu_ldst.h" #include "exec/helper-gen.h" #include "exec/translator.h" @@ -33,106 +32,99 @@ #include "trace-tcg.h" #include "exec/log.h" - -#define SIM_COMPAT 0 -#define DISAS_GNU 1 -#define DISAS_MB 1 -#if DISAS_MB && !SIM_COMPAT -# define LOG_DIS(...) qemu_log_mask(CPU_LOG_TB_IN_ASM, ## __VA_ARGS__) -#else -# define LOG_DIS(...) do { } while (0) -#endif - -#define D(x) - #define EXTRACT_FIELD(src, start, end) \ (((src) >> start) & ((1 << (end - start + 1)) - 1)) /* is_jmp field values */ #define DISAS_JUMP DISAS_TARGET_0 /* only pc was modified dynamically */ #define DISAS_UPDATE DISAS_TARGET_1 /* cpu state was modified dynamically */ -#define DISAS_TB_JUMP DISAS_TARGET_2 /* only pc was modified statically */ -static TCGv_i32 env_debug; static TCGv_i32 cpu_R[32]; -static TCGv_i64 cpu_SR[14]; -static TCGv_i32 env_imm; -static TCGv_i32 env_btaken; -static TCGv_i64 env_btarget; -static TCGv_i32 env_iflags; -static TCGv env_res_addr; -static TCGv_i32 env_res_val; +static TCGv_i32 cpu_pc; +static TCGv_i32 cpu_msr; +static TCGv_i32 cpu_msr_c; +static TCGv_i32 cpu_imm; +static TCGv_i32 cpu_bvalue; +static TCGv_i32 cpu_btarget; +static TCGv_i32 cpu_iflags; +static TCGv cpu_res_addr; +static TCGv_i32 cpu_res_val; #include "exec/gen-icount.h" /* This is the state at translation time. */ typedef struct DisasContext { + DisasContextBase base; MicroBlazeCPU *cpu; - uint32_t pc; - /* Decoder. */ - int type_b; - uint32_t ir; - uint8_t opcode; - uint8_t rd, ra, rb; - uint16_t imm; + /* TCG op of the current insn_start. */ + TCGOp *insn_start; + + TCGv_i32 r0; + bool r0_set; + /* Decoder. */ + uint32_t ext_imm; unsigned int cpustate_changed; - unsigned int delayed_branch; - unsigned int tb_flags, synced_flags; /* tb dependent flags. */ - unsigned int clear_imm; - int is_jmp; - -#define JMP_NOJMP 0 -#define JMP_DIRECT 1 -#define JMP_DIRECT_CC 2 -#define JMP_INDIRECT 3 - unsigned int jmp; - uint32_t jmp_pc; - - int abort_at_next_insn; - struct TranslationBlock *tb; - int singlestep_enabled; + unsigned int tb_flags; + unsigned int tb_flags_to_set; + int mem_index; + + /* Condition under which to jump, including NEVER and ALWAYS. */ + TCGCond jmp_cond; + + /* Immediate branch-taken destination, or -1 for indirect. */ + uint32_t jmp_dest; } DisasContext; -static const char *regnames[] = +static int typeb_imm(DisasContext *dc, int x) { - "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", - "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", - "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", - "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", -}; + if (dc->tb_flags & IMM_FLAG) { + return deposit32(dc->ext_imm, 0, 16, x); + } + return x; +} -static const char *special_regnames[] = -{ - "rpc", "rmsr", "sr2", "rear", "sr4", "resr", "sr6", "rfsr", - "sr8", "sr9", "sr10", "rbtr", "sr12", "redr" -}; +/* Include the auto-generated decoder. */ +#include "decode-insns.c.inc" -static inline void t_sync_flags(DisasContext *dc) +static void t_sync_flags(DisasContext *dc) { /* Synch the tb dependent flags between translator and runtime. */ - if (dc->tb_flags != dc->synced_flags) { - tcg_gen_movi_i32(env_iflags, dc->tb_flags); - dc->synced_flags = dc->tb_flags; + if ((dc->tb_flags ^ dc->base.tb->flags) & IFLAGS_TB_MASK) { + tcg_gen_movi_i32(cpu_iflags, dc->tb_flags & IFLAGS_TB_MASK); } } -static inline void t_gen_raise_exception(DisasContext *dc, uint32_t index) +static void gen_raise_exception(DisasContext *dc, uint32_t index) { TCGv_i32 tmp = tcg_const_i32(index); - t_sync_flags(dc); - tcg_gen_movi_i64(cpu_SR[SR_PC], dc->pc); gen_helper_raise_exception(cpu_env, tmp); tcg_temp_free_i32(tmp); - dc->is_jmp = DISAS_UPDATE; + dc->base.is_jmp = DISAS_NORETURN; +} + +static void gen_raise_exception_sync(DisasContext *dc, uint32_t index) +{ + t_sync_flags(dc); + tcg_gen_movi_i32(cpu_pc, dc->base.pc_next); + gen_raise_exception(dc, index); +} + +static void gen_raise_hw_excp(DisasContext *dc, uint32_t esr_ec) +{ + TCGv_i32 tmp = tcg_const_i32(esr_ec); + tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUMBState, esr)); + tcg_temp_free_i32(tmp); + + gen_raise_exception_sync(dc, EXCP_HW_EXCP); } static inline bool use_goto_tb(DisasContext *dc, target_ulong dest) { #ifndef CONFIG_USER_ONLY - return (dc->tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK); + return (dc->base.pc_first & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK); #else return true; #endif @@ -140,42 +132,20 @@ static inline bool use_goto_tb(DisasContext *dc, target_ulong dest) static void gen_goto_tb(DisasContext *dc, int n, target_ulong dest) { - if (use_goto_tb(dc, dest)) { + if (dc->base.singlestep_enabled) { + TCGv_i32 tmp = tcg_const_i32(EXCP_DEBUG); + tcg_gen_movi_i32(cpu_pc, dest); + gen_helper_raise_exception(cpu_env, tmp); + tcg_temp_free_i32(tmp); + } else if (use_goto_tb(dc, dest)) { tcg_gen_goto_tb(n); - tcg_gen_movi_i64(cpu_SR[SR_PC], dest); - tcg_gen_exit_tb(dc->tb, n); + tcg_gen_movi_i32(cpu_pc, dest); + tcg_gen_exit_tb(dc->base.tb, n); } else { - tcg_gen_movi_i64(cpu_SR[SR_PC], dest); + tcg_gen_movi_i32(cpu_pc, dest); tcg_gen_exit_tb(NULL, 0); } -} - -static void read_carry(DisasContext *dc, TCGv_i32 d) -{ - tcg_gen_extrl_i64_i32(d, cpu_SR[SR_MSR]); - tcg_gen_shri_i32(d, d, 31); -} - -/* - * write_carry sets the carry bits in MSR based on bit 0 of v. - * v[31:1] are ignored. - */ -static void write_carry(DisasContext *dc, TCGv_i32 v) -{ - TCGv_i64 t0 = tcg_temp_new_i64(); - tcg_gen_extu_i32_i64(t0, v); - /* Deposit bit 0 into MSR_C and the alias MSR_CC. */ - tcg_gen_deposit_i64(cpu_SR[SR_MSR], cpu_SR[SR_MSR], t0, 2, 1); - tcg_gen_deposit_i64(cpu_SR[SR_MSR], cpu_SR[SR_MSR], t0, 31, 1); - tcg_temp_free_i64(t0); -} - -static void write_carryi(DisasContext *dc, bool carry) -{ - TCGv_i32 t0 = tcg_temp_new_i32(); - tcg_gen_movi_i32(t0, carry); - write_carry(dc, t0); - tcg_temp_free_i32(t0); + dc->base.is_jmp = DISAS_NORETURN; } /* @@ -184,10 +154,9 @@ static void write_carryi(DisasContext *dc, bool carry) */ static bool trap_illegal(DisasContext *dc, bool cond) { - if (cond && (dc->tb_flags & MSR_EE_FLAG) + if (cond && (dc->tb_flags & MSR_EE) && dc->cpu->cfg.illegal_opcode_exception) { - tcg_gen_movi_i64(cpu_SR[SR_ESR], ESR_EC_ILLEGAL_OP); - t_gen_raise_exception(dc, EXCP_HW_EXCP); + gen_raise_hw_excp(dc, ESR_EC_ILLEGAL_OP); } return cond; } @@ -198,759 +167,717 @@ static bool trap_illegal(DisasContext *dc, bool cond) */ static bool trap_userspace(DisasContext *dc, bool cond) { - int mem_index = cpu_mmu_index(&dc->cpu->env, false); - bool cond_user = cond && mem_index == MMU_USER_IDX; + bool cond_user = cond && dc->mem_index == MMU_USER_IDX; - if (cond_user && (dc->tb_flags & MSR_EE_FLAG)) { - tcg_gen_movi_i64(cpu_SR[SR_ESR], ESR_EC_PRIVINSN); - t_gen_raise_exception(dc, EXCP_HW_EXCP); + if (cond_user && (dc->tb_flags & MSR_EE)) { + gen_raise_hw_excp(dc, ESR_EC_PRIVINSN); } return cond_user; } -/* True if ALU operand b is a small immediate that may deserve - faster treatment. */ -static inline int dec_alu_op_b_is_small_imm(DisasContext *dc) +static TCGv_i32 reg_for_read(DisasContext *dc, int reg) { - /* Immediate insn without the imm prefix ? */ - return dc->type_b && !(dc->tb_flags & IMM_FLAG); + if (likely(reg != 0)) { + return cpu_R[reg]; + } + if (!dc->r0_set) { + if (dc->r0 == NULL) { + dc->r0 = tcg_temp_new_i32(); + } + tcg_gen_movi_i32(dc->r0, 0); + dc->r0_set = true; + } + return dc->r0; } -static inline TCGv_i32 *dec_alu_op_b(DisasContext *dc) +static TCGv_i32 reg_for_write(DisasContext *dc, int reg) { - if (dc->type_b) { - if (dc->tb_flags & IMM_FLAG) - tcg_gen_ori_i32(env_imm, env_imm, dc->imm); - else - tcg_gen_movi_i32(env_imm, (int32_t)((int16_t)dc->imm)); - return &env_imm; - } else - return &cpu_R[dc->rb]; + if (likely(reg != 0)) { + return cpu_R[reg]; + } + if (dc->r0 == NULL) { + dc->r0 = tcg_temp_new_i32(); + } + return dc->r0; } -static void dec_add(DisasContext *dc) +static bool do_typea(DisasContext *dc, arg_typea *arg, bool side_effects, + void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32)) { - unsigned int k, c; - TCGv_i32 cf; + TCGv_i32 rd, ra, rb; - k = dc->opcode & 4; - c = dc->opcode & 2; - - LOG_DIS("add%s%s%s r%d r%d r%d\n", - dc->type_b ? "i" : "", k ? "k" : "", c ? "c" : "", - dc->rd, dc->ra, dc->rb); + if (arg->rd == 0 && !side_effects) { + return true; + } - /* Take care of the easy cases first. */ - if (k) { - /* k - keep carry, no need to update MSR. */ - /* If rd == r0, it's a nop. */ - if (dc->rd) { - tcg_gen_add_i32(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc))); + rd = reg_for_write(dc, arg->rd); + ra = reg_for_read(dc, arg->ra); + rb = reg_for_read(dc, arg->rb); + fn(rd, ra, rb); + return true; +} - if (c) { - /* c - Add carry into the result. */ - cf = tcg_temp_new_i32(); +static bool do_typea0(DisasContext *dc, arg_typea0 *arg, bool side_effects, + void (*fn)(TCGv_i32, TCGv_i32)) +{ + TCGv_i32 rd, ra; - read_carry(dc, cf); - tcg_gen_add_i32(cpu_R[dc->rd], cpu_R[dc->rd], cf); - tcg_temp_free_i32(cf); - } - } - return; + if (arg->rd == 0 && !side_effects) { + return true; } - /* From now on, we can assume k is zero. So we need to update MSR. */ - /* Extract carry. */ - cf = tcg_temp_new_i32(); - if (c) { - read_carry(dc, cf); - } else { - tcg_gen_movi_i32(cf, 0); + rd = reg_for_write(dc, arg->rd); + ra = reg_for_read(dc, arg->ra); + fn(rd, ra); + return true; +} + +static bool do_typeb_imm(DisasContext *dc, arg_typeb *arg, bool side_effects, + void (*fni)(TCGv_i32, TCGv_i32, int32_t)) +{ + TCGv_i32 rd, ra; + + if (arg->rd == 0 && !side_effects) { + return true; } - if (dc->rd) { - TCGv_i32 ncf = tcg_temp_new_i32(); - gen_helper_carry(ncf, cpu_R[dc->ra], *(dec_alu_op_b(dc)), cf); - tcg_gen_add_i32(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc))); - tcg_gen_add_i32(cpu_R[dc->rd], cpu_R[dc->rd], cf); - write_carry(dc, ncf); - tcg_temp_free_i32(ncf); - } else { - gen_helper_carry(cf, cpu_R[dc->ra], *(dec_alu_op_b(dc)), cf); - write_carry(dc, cf); + rd = reg_for_write(dc, arg->rd); + ra = reg_for_read(dc, arg->ra); + fni(rd, ra, arg->imm); + return true; +} + +static bool do_typeb_val(DisasContext *dc, arg_typeb *arg, bool side_effects, + void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32)) +{ + TCGv_i32 rd, ra, imm; + + if (arg->rd == 0 && !side_effects) { + return true; } - tcg_temp_free_i32(cf); + + rd = reg_for_write(dc, arg->rd); + ra = reg_for_read(dc, arg->ra); + imm = tcg_const_i32(arg->imm); + + fn(rd, ra, imm); + + tcg_temp_free_i32(imm); + return true; } -static void dec_sub(DisasContext *dc) +#define DO_TYPEA(NAME, SE, FN) \ + static bool trans_##NAME(DisasContext *dc, arg_typea *a) \ + { return do_typea(dc, a, SE, FN); } + +#define DO_TYPEA_CFG(NAME, CFG, SE, FN) \ + static bool trans_##NAME(DisasContext *dc, arg_typea *a) \ + { return dc->cpu->cfg.CFG && do_typea(dc, a, SE, FN); } + +#define DO_TYPEA0(NAME, SE, FN) \ + static bool trans_##NAME(DisasContext *dc, arg_typea0 *a) \ + { return do_typea0(dc, a, SE, FN); } + +#define DO_TYPEA0_CFG(NAME, CFG, SE, FN) \ + static bool trans_##NAME(DisasContext *dc, arg_typea0 *a) \ + { return dc->cpu->cfg.CFG && do_typea0(dc, a, SE, FN); } + +#define DO_TYPEBI(NAME, SE, FNI) \ + static bool trans_##NAME(DisasContext *dc, arg_typeb *a) \ + { return do_typeb_imm(dc, a, SE, FNI); } + +#define DO_TYPEBI_CFG(NAME, CFG, SE, FNI) \ + static bool trans_##NAME(DisasContext *dc, arg_typeb *a) \ + { return dc->cpu->cfg.CFG && do_typeb_imm(dc, a, SE, FNI); } + +#define DO_TYPEBV(NAME, SE, FN) \ + static bool trans_##NAME(DisasContext *dc, arg_typeb *a) \ + { return do_typeb_val(dc, a, SE, FN); } + +#define ENV_WRAPPER2(NAME, HELPER) \ + static void NAME(TCGv_i32 out, TCGv_i32 ina) \ + { HELPER(out, cpu_env, ina); } + +#define ENV_WRAPPER3(NAME, HELPER) \ + static void NAME(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) \ + { HELPER(out, cpu_env, ina, inb); } + +/* No input carry, but output carry. */ +static void gen_add(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) { - unsigned int u, cmp, k, c; - TCGv_i32 cf, na; + TCGv_i32 zero = tcg_const_i32(0); - u = dc->imm & 2; - k = dc->opcode & 4; - c = dc->opcode & 2; - cmp = (dc->imm & 1) && (!dc->type_b) && k; + tcg_gen_add2_i32(out, cpu_msr_c, ina, zero, inb, zero); - if (cmp) { - LOG_DIS("cmp%s r%d, r%d ir=%x\n", u ? "u" : "", dc->rd, dc->ra, dc->ir); - if (dc->rd) { - if (u) - gen_helper_cmpu(cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]); - else - gen_helper_cmp(cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]); - } - return; - } + tcg_temp_free_i32(zero); +} - LOG_DIS("sub%s%s r%d, r%d r%d\n", - k ? "k" : "", c ? "c" : "", dc->rd, dc->ra, dc->rb); +/* Input and output carry. */ +static void gen_addc(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) +{ + TCGv_i32 zero = tcg_const_i32(0); + TCGv_i32 tmp = tcg_temp_new_i32(); - /* Take care of the easy cases first. */ - if (k) { - /* k - keep carry, no need to update MSR. */ - /* If rd == r0, it's a nop. */ - if (dc->rd) { - tcg_gen_sub_i32(cpu_R[dc->rd], *(dec_alu_op_b(dc)), cpu_R[dc->ra]); + tcg_gen_add2_i32(tmp, cpu_msr_c, ina, zero, cpu_msr_c, zero); + tcg_gen_add2_i32(out, cpu_msr_c, tmp, cpu_msr_c, inb, zero); - if (c) { - /* c - Add carry into the result. */ - cf = tcg_temp_new_i32(); + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(zero); +} - read_carry(dc, cf); - tcg_gen_add_i32(cpu_R[dc->rd], cpu_R[dc->rd], cf); - tcg_temp_free_i32(cf); - } - } - return; - } +/* Input carry, but no output carry. */ +static void gen_addkc(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) +{ + tcg_gen_add_i32(out, ina, inb); + tcg_gen_add_i32(out, out, cpu_msr_c); +} - /* From now on, we can assume k is zero. So we need to update MSR. */ - /* Extract carry. And complement a into na. */ - cf = tcg_temp_new_i32(); - na = tcg_temp_new_i32(); - if (c) { - read_carry(dc, cf); - } else { - tcg_gen_movi_i32(cf, 1); - } +DO_TYPEA(add, true, gen_add) +DO_TYPEA(addc, true, gen_addc) +DO_TYPEA(addk, false, tcg_gen_add_i32) +DO_TYPEA(addkc, true, gen_addkc) - /* d = b + ~a + c. carry defaults to 1. */ - tcg_gen_not_i32(na, cpu_R[dc->ra]); +DO_TYPEBV(addi, true, gen_add) +DO_TYPEBV(addic, true, gen_addc) +DO_TYPEBI(addik, false, tcg_gen_addi_i32) +DO_TYPEBV(addikc, true, gen_addkc) - if (dc->rd) { - TCGv_i32 ncf = tcg_temp_new_i32(); - gen_helper_carry(ncf, na, *(dec_alu_op_b(dc)), cf); - tcg_gen_add_i32(cpu_R[dc->rd], na, *(dec_alu_op_b(dc))); - tcg_gen_add_i32(cpu_R[dc->rd], cpu_R[dc->rd], cf); - write_carry(dc, ncf); - tcg_temp_free_i32(ncf); - } else { - gen_helper_carry(cf, na, *(dec_alu_op_b(dc)), cf); - write_carry(dc, cf); - } - tcg_temp_free_i32(cf); - tcg_temp_free_i32(na); +static void gen_andni(TCGv_i32 out, TCGv_i32 ina, int32_t imm) +{ + tcg_gen_andi_i32(out, ina, ~imm); } -static void dec_pattern(DisasContext *dc) +DO_TYPEA(and, false, tcg_gen_and_i32) +DO_TYPEBI(andi, false, tcg_gen_andi_i32) +DO_TYPEA(andn, false, tcg_gen_andc_i32) +DO_TYPEBI(andni, false, gen_andni) + +static void gen_bsra(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) { - unsigned int mode; + TCGv_i32 tmp = tcg_temp_new_i32(); + tcg_gen_andi_i32(tmp, inb, 31); + tcg_gen_sar_i32(out, ina, tmp); + tcg_temp_free_i32(tmp); +} - if (trap_illegal(dc, !dc->cpu->cfg.use_pcmp_instr)) { - return; - } +static void gen_bsrl(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + tcg_gen_andi_i32(tmp, inb, 31); + tcg_gen_shr_i32(out, ina, tmp); + tcg_temp_free_i32(tmp); +} - mode = dc->opcode & 3; - switch (mode) { - case 0: - /* pcmpbf. */ - LOG_DIS("pcmpbf r%d r%d r%d\n", dc->rd, dc->ra, dc->rb); - if (dc->rd) - gen_helper_pcmpbf(cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]); - break; - case 2: - LOG_DIS("pcmpeq r%d r%d r%d\n", dc->rd, dc->ra, dc->rb); - if (dc->rd) { - tcg_gen_setcond_i32(TCG_COND_EQ, cpu_R[dc->rd], - cpu_R[dc->ra], cpu_R[dc->rb]); - } - break; - case 3: - LOG_DIS("pcmpne r%d r%d r%d\n", dc->rd, dc->ra, dc->rb); - if (dc->rd) { - tcg_gen_setcond_i32(TCG_COND_NE, cpu_R[dc->rd], - cpu_R[dc->ra], cpu_R[dc->rb]); - } - break; - default: - cpu_abort(CPU(dc->cpu), - "unsupported pattern insn opcode=%x\n", dc->opcode); - break; - } +static void gen_bsll(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + tcg_gen_andi_i32(tmp, inb, 31); + tcg_gen_shl_i32(out, ina, tmp); + tcg_temp_free_i32(tmp); } -static void dec_and(DisasContext *dc) +static void gen_bsefi(TCGv_i32 out, TCGv_i32 ina, int32_t imm) { - unsigned int not; + /* Note that decodetree has extracted and reassembled imm_w/imm_s. */ + int imm_w = extract32(imm, 5, 5); + int imm_s = extract32(imm, 0, 5); + + if (imm_w + imm_s > 32 || imm_w == 0) { + /* These inputs have an undefined behavior. */ + qemu_log_mask(LOG_GUEST_ERROR, "bsefi: Bad input w=%d s=%d\n", + imm_w, imm_s); + } else { + tcg_gen_extract_i32(out, ina, imm_s, imm_w); + } +} - if (!dc->type_b && (dc->imm & (1 << 10))) { - dec_pattern(dc); - return; +static void gen_bsifi(TCGv_i32 out, TCGv_i32 ina, int32_t imm) +{ + /* Note that decodetree has extracted and reassembled imm_w/imm_s. */ + int imm_w = extract32(imm, 5, 5); + int imm_s = extract32(imm, 0, 5); + int width = imm_w - imm_s + 1; + + if (imm_w < imm_s) { + /* These inputs have an undefined behavior. */ + qemu_log_mask(LOG_GUEST_ERROR, "bsifi: Bad input w=%d s=%d\n", + imm_w, imm_s); + } else { + tcg_gen_deposit_i32(out, out, ina, imm_s, width); } +} - not = dc->opcode & (1 << 1); - LOG_DIS("and%s\n", not ? "n" : ""); +DO_TYPEA_CFG(bsra, use_barrel, false, gen_bsra) +DO_TYPEA_CFG(bsrl, use_barrel, false, gen_bsrl) +DO_TYPEA_CFG(bsll, use_barrel, false, gen_bsll) - if (!dc->rd) - return; +DO_TYPEBI_CFG(bsrai, use_barrel, false, tcg_gen_sari_i32) +DO_TYPEBI_CFG(bsrli, use_barrel, false, tcg_gen_shri_i32) +DO_TYPEBI_CFG(bslli, use_barrel, false, tcg_gen_shli_i32) + +DO_TYPEBI_CFG(bsefi, use_barrel, false, gen_bsefi) +DO_TYPEBI_CFG(bsifi, use_barrel, false, gen_bsifi) - if (not) { - tcg_gen_andc_i32(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc))); - } else - tcg_gen_and_i32(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc))); +static void gen_clz(TCGv_i32 out, TCGv_i32 ina) +{ + tcg_gen_clzi_i32(out, ina, 32); } -static void dec_or(DisasContext *dc) +DO_TYPEA0_CFG(clz, use_pcmp_instr, false, gen_clz) + +static void gen_cmp(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) { - if (!dc->type_b && (dc->imm & (1 << 10))) { - dec_pattern(dc); - return; - } + TCGv_i32 lt = tcg_temp_new_i32(); - LOG_DIS("or r%d r%d r%d imm=%x\n", dc->rd, dc->ra, dc->rb, dc->imm); - if (dc->rd) - tcg_gen_or_i32(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc))); + tcg_gen_setcond_i32(TCG_COND_LT, lt, inb, ina); + tcg_gen_sub_i32(out, inb, ina); + tcg_gen_deposit_i32(out, out, lt, 31, 1); + tcg_temp_free_i32(lt); } -static void dec_xor(DisasContext *dc) +static void gen_cmpu(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) { - if (!dc->type_b && (dc->imm & (1 << 10))) { - dec_pattern(dc); - return; - } + TCGv_i32 lt = tcg_temp_new_i32(); - LOG_DIS("xor r%d\n", dc->rd); - if (dc->rd) - tcg_gen_xor_i32(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc))); + tcg_gen_setcond_i32(TCG_COND_LTU, lt, inb, ina); + tcg_gen_sub_i32(out, inb, ina); + tcg_gen_deposit_i32(out, out, lt, 31, 1); + tcg_temp_free_i32(lt); } -static inline void msr_read(DisasContext *dc, TCGv_i32 d) +DO_TYPEA(cmp, false, gen_cmp) +DO_TYPEA(cmpu, false, gen_cmpu) + +ENV_WRAPPER3(gen_fadd, gen_helper_fadd) +ENV_WRAPPER3(gen_frsub, gen_helper_frsub) +ENV_WRAPPER3(gen_fmul, gen_helper_fmul) +ENV_WRAPPER3(gen_fdiv, gen_helper_fdiv) +ENV_WRAPPER3(gen_fcmp_un, gen_helper_fcmp_un) +ENV_WRAPPER3(gen_fcmp_lt, gen_helper_fcmp_lt) +ENV_WRAPPER3(gen_fcmp_eq, gen_helper_fcmp_eq) +ENV_WRAPPER3(gen_fcmp_le, gen_helper_fcmp_le) +ENV_WRAPPER3(gen_fcmp_gt, gen_helper_fcmp_gt) +ENV_WRAPPER3(gen_fcmp_ne, gen_helper_fcmp_ne) +ENV_WRAPPER3(gen_fcmp_ge, gen_helper_fcmp_ge) + +DO_TYPEA_CFG(fadd, use_fpu, true, gen_fadd) +DO_TYPEA_CFG(frsub, use_fpu, true, gen_frsub) +DO_TYPEA_CFG(fmul, use_fpu, true, gen_fmul) +DO_TYPEA_CFG(fdiv, use_fpu, true, gen_fdiv) +DO_TYPEA_CFG(fcmp_un, use_fpu, true, gen_fcmp_un) +DO_TYPEA_CFG(fcmp_lt, use_fpu, true, gen_fcmp_lt) +DO_TYPEA_CFG(fcmp_eq, use_fpu, true, gen_fcmp_eq) +DO_TYPEA_CFG(fcmp_le, use_fpu, true, gen_fcmp_le) +DO_TYPEA_CFG(fcmp_gt, use_fpu, true, gen_fcmp_gt) +DO_TYPEA_CFG(fcmp_ne, use_fpu, true, gen_fcmp_ne) +DO_TYPEA_CFG(fcmp_ge, use_fpu, true, gen_fcmp_ge) + +ENV_WRAPPER2(gen_flt, gen_helper_flt) +ENV_WRAPPER2(gen_fint, gen_helper_fint) +ENV_WRAPPER2(gen_fsqrt, gen_helper_fsqrt) + +DO_TYPEA0_CFG(flt, use_fpu >= 2, true, gen_flt) +DO_TYPEA0_CFG(fint, use_fpu >= 2, true, gen_fint) +DO_TYPEA0_CFG(fsqrt, use_fpu >= 2, true, gen_fsqrt) + +/* Does not use ENV_WRAPPER3, because arguments are swapped as well. */ +static void gen_idiv(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) { - tcg_gen_extrl_i64_i32(d, cpu_SR[SR_MSR]); + gen_helper_divs(out, cpu_env, inb, ina); } -static inline void msr_write(DisasContext *dc, TCGv_i32 v) +static void gen_idivu(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) { - TCGv_i64 t; + gen_helper_divu(out, cpu_env, inb, ina); +} - t = tcg_temp_new_i64(); - dc->cpustate_changed = 1; - /* PVR bit is not writable. */ - tcg_gen_extu_i32_i64(t, v); - tcg_gen_andi_i64(t, t, ~MSR_PVR); - tcg_gen_andi_i64(cpu_SR[SR_MSR], cpu_SR[SR_MSR], MSR_PVR); - tcg_gen_or_i64(cpu_SR[SR_MSR], cpu_SR[SR_MSR], t); - tcg_temp_free_i64(t); -} - -static void dec_msr(DisasContext *dc) -{ - CPUState *cs = CPU(dc->cpu); - TCGv_i32 t0, t1; - unsigned int sr, rn; - bool to, clrset, extended = false; - - sr = extract32(dc->imm, 0, 14); - to = extract32(dc->imm, 14, 1); - clrset = extract32(dc->imm, 15, 1) == 0; - dc->type_b = 1; - if (to) { - dc->cpustate_changed = 1; - } +DO_TYPEA_CFG(idiv, use_div, true, gen_idiv) +DO_TYPEA_CFG(idivu, use_div, true, gen_idivu) - /* Extended MSRs are only available if addr_size > 32. */ - if (dc->cpu->cfg.addr_size > 32) { - /* The E-bit is encoded differently for To/From MSR. */ - static const unsigned int e_bit[] = { 19, 24 }; +static bool trans_imm(DisasContext *dc, arg_imm *arg) +{ + dc->ext_imm = arg->imm << 16; + tcg_gen_movi_i32(cpu_imm, dc->ext_imm); + dc->tb_flags_to_set = IMM_FLAG; + return true; +} - extended = extract32(dc->imm, e_bit[to], 1); - } +static void gen_mulh(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + tcg_gen_muls2_i32(tmp, out, ina, inb); + tcg_temp_free_i32(tmp); +} - /* msrclr and msrset. */ - if (clrset) { - bool clr = extract32(dc->ir, 16, 1); +static void gen_mulhu(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + tcg_gen_mulu2_i32(tmp, out, ina, inb); + tcg_temp_free_i32(tmp); +} - LOG_DIS("msr%s r%d imm=%x\n", clr ? "clr" : "set", - dc->rd, dc->imm); +static void gen_mulhsu(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + tcg_gen_mulsu2_i32(tmp, out, ina, inb); + tcg_temp_free_i32(tmp); +} - if (!dc->cpu->cfg.use_msr_instr) { - /* nop??? */ - return; - } +DO_TYPEA_CFG(mul, use_hw_mul, false, tcg_gen_mul_i32) +DO_TYPEA_CFG(mulh, use_hw_mul >= 2, false, gen_mulh) +DO_TYPEA_CFG(mulhu, use_hw_mul >= 2, false, gen_mulhu) +DO_TYPEA_CFG(mulhsu, use_hw_mul >= 2, false, gen_mulhsu) +DO_TYPEBI_CFG(muli, use_hw_mul, false, tcg_gen_muli_i32) - if (trap_userspace(dc, dc->imm != 4 && dc->imm != 0)) { - return; - } +DO_TYPEA(or, false, tcg_gen_or_i32) +DO_TYPEBI(ori, false, tcg_gen_ori_i32) - if (dc->rd) - msr_read(dc, cpu_R[dc->rd]); - - t0 = tcg_temp_new_i32(); - t1 = tcg_temp_new_i32(); - msr_read(dc, t0); - tcg_gen_mov_i32(t1, *(dec_alu_op_b(dc))); - - if (clr) { - tcg_gen_not_i32(t1, t1); - tcg_gen_and_i32(t0, t0, t1); - } else - tcg_gen_or_i32(t0, t0, t1); - msr_write(dc, t0); - tcg_temp_free_i32(t0); - tcg_temp_free_i32(t1); - tcg_gen_movi_i64(cpu_SR[SR_PC], dc->pc + 4); - dc->is_jmp = DISAS_UPDATE; - return; - } +static void gen_pcmpeq(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) +{ + tcg_gen_setcond_i32(TCG_COND_EQ, out, ina, inb); +} - if (trap_userspace(dc, to)) { - return; - } +static void gen_pcmpne(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) +{ + tcg_gen_setcond_i32(TCG_COND_NE, out, ina, inb); +} -#if !defined(CONFIG_USER_ONLY) - /* Catch read/writes to the mmu block. */ - if ((sr & ~0xff) == 0x1000) { - TCGv_i32 tmp_ext = tcg_const_i32(extended); - TCGv_i32 tmp_sr; +DO_TYPEA_CFG(pcmpbf, use_pcmp_instr, false, gen_helper_pcmpbf) +DO_TYPEA_CFG(pcmpeq, use_pcmp_instr, false, gen_pcmpeq) +DO_TYPEA_CFG(pcmpne, use_pcmp_instr, false, gen_pcmpne) - sr &= 7; - tmp_sr = tcg_const_i32(sr); - LOG_DIS("m%ss sr%d r%d imm=%x\n", to ? "t" : "f", sr, dc->ra, dc->imm); - if (to) { - gen_helper_mmu_write(cpu_env, tmp_ext, tmp_sr, cpu_R[dc->ra]); - } else { - gen_helper_mmu_read(cpu_R[dc->rd], cpu_env, tmp_ext, tmp_sr); - } - tcg_temp_free_i32(tmp_sr); - tcg_temp_free_i32(tmp_ext); - return; - } -#endif +/* No input carry, but output carry. */ +static void gen_rsub(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) +{ + tcg_gen_setcond_i32(TCG_COND_GEU, cpu_msr_c, inb, ina); + tcg_gen_sub_i32(out, inb, ina); +} - if (to) { - LOG_DIS("m%ss sr%x r%d imm=%x\n", to ? "t" : "f", sr, dc->ra, dc->imm); - switch (sr) { - case 0: - break; - case 1: - msr_write(dc, cpu_R[dc->ra]); - break; - case SR_EAR: - case SR_ESR: - case SR_FSR: - tcg_gen_extu_i32_i64(cpu_SR[sr], cpu_R[dc->ra]); - break; - case 0x800: - tcg_gen_st_i32(cpu_R[dc->ra], - cpu_env, offsetof(CPUMBState, slr)); - break; - case 0x802: - tcg_gen_st_i32(cpu_R[dc->ra], - cpu_env, offsetof(CPUMBState, shr)); - break; - default: - cpu_abort(CPU(dc->cpu), "unknown mts reg %x\n", sr); - break; - } - } else { - LOG_DIS("m%ss r%d sr%x imm=%x\n", to ? "t" : "f", dc->rd, sr, dc->imm); - - switch (sr) { - case 0: - tcg_gen_movi_i32(cpu_R[dc->rd], dc->pc); - break; - case 1: - msr_read(dc, cpu_R[dc->rd]); - break; - case SR_EAR: - if (extended) { - tcg_gen_extrh_i64_i32(cpu_R[dc->rd], cpu_SR[sr]); - break; - } - case SR_ESR: - case SR_FSR: - case SR_BTR: - case SR_EDR: - tcg_gen_extrl_i64_i32(cpu_R[dc->rd], cpu_SR[sr]); - break; - case 0x800: - tcg_gen_ld_i32(cpu_R[dc->rd], - cpu_env, offsetof(CPUMBState, slr)); - break; - case 0x802: - tcg_gen_ld_i32(cpu_R[dc->rd], - cpu_env, offsetof(CPUMBState, shr)); - break; - case 0x2000 ... 0x200c: - rn = sr & 0xf; - tcg_gen_ld_i32(cpu_R[dc->rd], - cpu_env, offsetof(CPUMBState, pvr.regs[rn])); - break; - default: - cpu_abort(cs, "unknown mfs reg %x\n", sr); - break; - } - } +/* Input and output carry. */ +static void gen_rsubc(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) +{ + TCGv_i32 zero = tcg_const_i32(0); + TCGv_i32 tmp = tcg_temp_new_i32(); - if (dc->rd == 0) { - tcg_gen_movi_i32(cpu_R[0], 0); - } + tcg_gen_not_i32(tmp, ina); + tcg_gen_add2_i32(tmp, cpu_msr_c, tmp, zero, cpu_msr_c, zero); + tcg_gen_add2_i32(out, cpu_msr_c, tmp, cpu_msr_c, inb, zero); + + tcg_temp_free_i32(zero); + tcg_temp_free_i32(tmp); } -/* Multiplier unit. */ -static void dec_mul(DisasContext *dc) +/* No input or output carry. */ +static void gen_rsubk(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) { - TCGv_i32 tmp; - unsigned int subcode; + tcg_gen_sub_i32(out, inb, ina); +} - if (trap_illegal(dc, !dc->cpu->cfg.use_hw_mul)) { - return; - } +/* Input carry, no output carry. */ +static void gen_rsubkc(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) +{ + TCGv_i32 nota = tcg_temp_new_i32(); - subcode = dc->imm & 3; + tcg_gen_not_i32(nota, ina); + tcg_gen_add_i32(out, inb, nota); + tcg_gen_add_i32(out, out, cpu_msr_c); - if (dc->type_b) { - LOG_DIS("muli r%d r%d %x\n", dc->rd, dc->ra, dc->imm); - tcg_gen_mul_i32(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc))); - return; - } + tcg_temp_free_i32(nota); +} - /* mulh, mulhsu and mulhu are not available if C_USE_HW_MUL is < 2. */ - if (subcode >= 1 && subcode <= 3 && dc->cpu->cfg.use_hw_mul < 2) { - /* nop??? */ - } +DO_TYPEA(rsub, true, gen_rsub) +DO_TYPEA(rsubc, true, gen_rsubc) +DO_TYPEA(rsubk, false, gen_rsubk) +DO_TYPEA(rsubkc, true, gen_rsubkc) + +DO_TYPEBV(rsubi, true, gen_rsub) +DO_TYPEBV(rsubic, true, gen_rsubc) +DO_TYPEBV(rsubik, false, gen_rsubk) +DO_TYPEBV(rsubikc, true, gen_rsubkc) + +DO_TYPEA0(sext8, false, tcg_gen_ext8s_i32) +DO_TYPEA0(sext16, false, tcg_gen_ext16s_i32) + +static void gen_sra(TCGv_i32 out, TCGv_i32 ina) +{ + tcg_gen_andi_i32(cpu_msr_c, ina, 1); + tcg_gen_sari_i32(out, ina, 1); +} + +static void gen_src(TCGv_i32 out, TCGv_i32 ina) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + + tcg_gen_mov_i32(tmp, cpu_msr_c); + tcg_gen_andi_i32(cpu_msr_c, ina, 1); + tcg_gen_extract2_i32(out, ina, tmp, 1); - tmp = tcg_temp_new_i32(); - switch (subcode) { - case 0: - LOG_DIS("mul r%d r%d r%d\n", dc->rd, dc->ra, dc->rb); - tcg_gen_mul_i32(cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]); - break; - case 1: - LOG_DIS("mulh r%d r%d r%d\n", dc->rd, dc->ra, dc->rb); - tcg_gen_muls2_i32(tmp, cpu_R[dc->rd], - cpu_R[dc->ra], cpu_R[dc->rb]); - break; - case 2: - LOG_DIS("mulhsu r%d r%d r%d\n", dc->rd, dc->ra, dc->rb); - tcg_gen_mulsu2_i32(tmp, cpu_R[dc->rd], - cpu_R[dc->ra], cpu_R[dc->rb]); - break; - case 3: - LOG_DIS("mulhu r%d r%d r%d\n", dc->rd, dc->ra, dc->rb); - tcg_gen_mulu2_i32(tmp, cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]); - break; - default: - cpu_abort(CPU(dc->cpu), "unknown MUL insn %x\n", subcode); - break; - } tcg_temp_free_i32(tmp); } -/* Div unit. */ -static void dec_div(DisasContext *dc) +static void gen_srl(TCGv_i32 out, TCGv_i32 ina) { - unsigned int u; + tcg_gen_andi_i32(cpu_msr_c, ina, 1); + tcg_gen_shri_i32(out, ina, 1); +} - u = dc->imm & 2; - LOG_DIS("div\n"); +DO_TYPEA0(sra, false, gen_sra) +DO_TYPEA0(src, false, gen_src) +DO_TYPEA0(srl, false, gen_srl) - if (trap_illegal(dc, !dc->cpu->cfg.use_div)) { - return; - } +static void gen_swaph(TCGv_i32 out, TCGv_i32 ina) +{ + tcg_gen_rotri_i32(out, ina, 16); +} + +DO_TYPEA0(swapb, false, tcg_gen_bswap32_i32) +DO_TYPEA0(swaph, false, gen_swaph) - if (u) - gen_helper_divu(cpu_R[dc->rd], cpu_env, *(dec_alu_op_b(dc)), - cpu_R[dc->ra]); - else - gen_helper_divs(cpu_R[dc->rd], cpu_env, *(dec_alu_op_b(dc)), - cpu_R[dc->ra]); - if (!dc->rd) - tcg_gen_movi_i32(cpu_R[dc->rd], 0); +static bool trans_wdic(DisasContext *dc, arg_wdic *a) +{ + /* Cache operations are nops: only check for supervisor mode. */ + trap_userspace(dc, true); + return true; } -static void dec_barrel(DisasContext *dc) +DO_TYPEA(xor, false, tcg_gen_xor_i32) +DO_TYPEBI(xori, false, tcg_gen_xori_i32) + +static TCGv compute_ldst_addr_typea(DisasContext *dc, int ra, int rb) { - TCGv_i32 t0; - unsigned int imm_w, imm_s; - bool s, t, e = false, i = false; + TCGv ret = tcg_temp_new(); - if (trap_illegal(dc, !dc->cpu->cfg.use_barrel)) { - return; + /* If any of the regs is r0, set t to the value of the other reg. */ + if (ra && rb) { + TCGv_i32 tmp = tcg_temp_new_i32(); + tcg_gen_add_i32(tmp, cpu_R[ra], cpu_R[rb]); + tcg_gen_extu_i32_tl(ret, tmp); + tcg_temp_free_i32(tmp); + } else if (ra) { + tcg_gen_extu_i32_tl(ret, cpu_R[ra]); + } else if (rb) { + tcg_gen_extu_i32_tl(ret, cpu_R[rb]); + } else { + tcg_gen_movi_tl(ret, 0); } - if (dc->type_b) { - /* Insert and extract are only available in immediate mode. */ - i = extract32(dc->imm, 15, 1); - e = extract32(dc->imm, 14, 1); + if ((ra == 1 || rb == 1) && dc->cpu->cfg.stackprot) { + gen_helper_stackprot(cpu_env, ret); } - s = extract32(dc->imm, 10, 1); - t = extract32(dc->imm, 9, 1); - imm_w = extract32(dc->imm, 6, 5); - imm_s = extract32(dc->imm, 0, 5); + return ret; +} - LOG_DIS("bs%s%s%s r%d r%d r%d\n", - e ? "e" : "", - s ? "l" : "r", t ? "a" : "l", dc->rd, dc->ra, dc->rb); +static TCGv compute_ldst_addr_typeb(DisasContext *dc, int ra, int imm) +{ + TCGv ret = tcg_temp_new(); - if (e) { - if (imm_w + imm_s > 32 || imm_w == 0) { - /* These inputs have an undefined behavior. */ - qemu_log_mask(LOG_GUEST_ERROR, "bsefi: Bad input w=%d s=%d\n", - imm_w, imm_s); - } else { - tcg_gen_extract_i32(cpu_R[dc->rd], cpu_R[dc->ra], imm_s, imm_w); - } - } else if (i) { - int width = imm_w - imm_s + 1; + /* If any of the regs is r0, set t to the value of the other reg. */ + if (ra) { + TCGv_i32 tmp = tcg_temp_new_i32(); + tcg_gen_addi_i32(tmp, cpu_R[ra], imm); + tcg_gen_extu_i32_tl(ret, tmp); + tcg_temp_free_i32(tmp); + } else { + tcg_gen_movi_tl(ret, (uint32_t)imm); + } - if (imm_w < imm_s) { - /* These inputs have an undefined behavior. */ - qemu_log_mask(LOG_GUEST_ERROR, "bsifi: Bad input w=%d s=%d\n", - imm_w, imm_s); + if (ra == 1 && dc->cpu->cfg.stackprot) { + gen_helper_stackprot(cpu_env, ret); + } + return ret; +} + +#ifndef CONFIG_USER_ONLY +static TCGv compute_ldst_addr_ea(DisasContext *dc, int ra, int rb) +{ + int addr_size = dc->cpu->cfg.addr_size; + TCGv ret = tcg_temp_new(); + + if (addr_size == 32 || ra == 0) { + if (rb) { + tcg_gen_extu_i32_tl(ret, cpu_R[rb]); } else { - tcg_gen_deposit_i32(cpu_R[dc->rd], cpu_R[dc->rd], cpu_R[dc->ra], - imm_s, width); + tcg_gen_movi_tl(ret, 0); } } else { - t0 = tcg_temp_new_i32(); - - tcg_gen_mov_i32(t0, *(dec_alu_op_b(dc))); - tcg_gen_andi_i32(t0, t0, 31); - - if (s) { - tcg_gen_shl_i32(cpu_R[dc->rd], cpu_R[dc->ra], t0); + if (rb) { + tcg_gen_concat_i32_i64(ret, cpu_R[rb], cpu_R[ra]); } else { - if (t) { - tcg_gen_sar_i32(cpu_R[dc->rd], cpu_R[dc->ra], t0); - } else { - tcg_gen_shr_i32(cpu_R[dc->rd], cpu_R[dc->ra], t0); - } + tcg_gen_extu_i32_tl(ret, cpu_R[ra]); + tcg_gen_shli_tl(ret, ret, 32); + } + if (addr_size < 64) { + /* Mask off out of range bits. */ + tcg_gen_andi_i64(ret, ret, MAKE_64BIT_MASK(0, addr_size)); } - tcg_temp_free_i32(t0); } + return ret; } +#endif -static void dec_bit(DisasContext *dc) +static void record_unaligned_ess(DisasContext *dc, int rd, + MemOp size, bool store) { - CPUState *cs = CPU(dc->cpu); - TCGv_i32 t0; - unsigned int op; + uint32_t iflags = tcg_get_insn_start_param(dc->insn_start, 1); - op = dc->ir & ((1 << 9) - 1); - switch (op) { - case 0x21: - /* src. */ - t0 = tcg_temp_new_i32(); + iflags |= ESR_ESS_FLAG; + iflags |= rd << 5; + iflags |= store * ESR_S; + iflags |= (size == MO_32) * ESR_W; - LOG_DIS("src r%d r%d\n", dc->rd, dc->ra); - tcg_gen_extrl_i64_i32(t0, cpu_SR[SR_MSR]); - tcg_gen_andi_i32(t0, t0, MSR_CC); - write_carry(dc, cpu_R[dc->ra]); - if (dc->rd) { - tcg_gen_shri_i32(cpu_R[dc->rd], cpu_R[dc->ra], 1); - tcg_gen_or_i32(cpu_R[dc->rd], cpu_R[dc->rd], t0); - } - tcg_temp_free_i32(t0); - break; - - case 0x1: - case 0x41: - /* srl. */ - LOG_DIS("srl r%d r%d\n", dc->rd, dc->ra); - - /* Update carry. Note that write carry only looks at the LSB. */ - write_carry(dc, cpu_R[dc->ra]); - if (dc->rd) { - if (op == 0x41) - tcg_gen_shri_i32(cpu_R[dc->rd], cpu_R[dc->ra], 1); - else - tcg_gen_sari_i32(cpu_R[dc->rd], cpu_R[dc->ra], 1); - } - break; - case 0x60: - LOG_DIS("ext8s r%d r%d\n", dc->rd, dc->ra); - tcg_gen_ext8s_i32(cpu_R[dc->rd], cpu_R[dc->ra]); - break; - case 0x61: - LOG_DIS("ext16s r%d r%d\n", dc->rd, dc->ra); - tcg_gen_ext16s_i32(cpu_R[dc->rd], cpu_R[dc->ra]); - break; - case 0x64: - case 0x66: - case 0x74: - case 0x76: - /* wdc. */ - LOG_DIS("wdc r%d\n", dc->ra); - trap_userspace(dc, true); - break; - case 0x68: - /* wic. */ - LOG_DIS("wic r%d\n", dc->ra); - trap_userspace(dc, true); - break; - case 0xe0: - if (trap_illegal(dc, !dc->cpu->cfg.use_pcmp_instr)) { - return; - } - if (dc->cpu->cfg.use_pcmp_instr) { - tcg_gen_clzi_i32(cpu_R[dc->rd], cpu_R[dc->ra], 32); - } - break; - case 0x1e0: - /* swapb */ - LOG_DIS("swapb r%d r%d\n", dc->rd, dc->ra); - tcg_gen_bswap32_i32(cpu_R[dc->rd], cpu_R[dc->ra]); - break; - case 0x1e2: - /*swaph */ - LOG_DIS("swaph r%d r%d\n", dc->rd, dc->ra); - tcg_gen_rotri_i32(cpu_R[dc->rd], cpu_R[dc->ra], 16); - break; - default: - cpu_abort(cs, "unknown bit oc=%x op=%x rd=%d ra=%d rb=%d\n", - dc->pc, op, dc->rd, dc->ra, dc->rb); - break; - } + tcg_set_insn_start_param(dc->insn_start, 1, iflags); } -static inline void sync_jmpstate(DisasContext *dc) +static bool do_load(DisasContext *dc, int rd, TCGv addr, MemOp mop, + int mem_index, bool rev) { - if (dc->jmp == JMP_DIRECT || dc->jmp == JMP_DIRECT_CC) { - if (dc->jmp == JMP_DIRECT) { - tcg_gen_movi_i32(env_btaken, 1); + MemOp size = mop & MO_SIZE; + + /* + * When doing reverse accesses we need to do two things. + * + * 1. Reverse the address wrt endianness. + * 2. Byteswap the data lanes on the way back into the CPU core. + */ + if (rev) { + if (size > MO_8) { + mop ^= MO_BSWAP; + } + if (size < MO_32) { + tcg_gen_xori_tl(addr, addr, 3 - size); } - dc->jmp = JMP_INDIRECT; - tcg_gen_movi_i64(env_btarget, dc->jmp_pc); } + + if (size > MO_8 && + (dc->tb_flags & MSR_EE) && + dc->cpu->cfg.unaligned_exceptions) { + record_unaligned_ess(dc, rd, size, false); + mop |= MO_ALIGN; + } + + tcg_gen_qemu_ld_i32(reg_for_write(dc, rd), addr, mem_index, mop); + + tcg_temp_free(addr); + return true; } -static void dec_imm(DisasContext *dc) +static bool trans_lbu(DisasContext *dc, arg_typea *arg) { - LOG_DIS("imm %x\n", dc->imm << 16); - tcg_gen_movi_i32(env_imm, (dc->imm << 16)); - dc->tb_flags |= IMM_FLAG; - dc->clear_imm = 0; + TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); + return do_load(dc, arg->rd, addr, MO_UB, dc->mem_index, false); } -static inline void compute_ldst_addr(DisasContext *dc, bool ea, TCGv t) +static bool trans_lbur(DisasContext *dc, arg_typea *arg) { - bool extimm = dc->tb_flags & IMM_FLAG; - /* Should be set to true if r1 is used by loadstores. */ - bool stackprot = false; - TCGv_i32 t32; + TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); + return do_load(dc, arg->rd, addr, MO_UB, dc->mem_index, true); +} - /* All load/stores use ra. */ - if (dc->ra == 1 && dc->cpu->cfg.stackprot) { - stackprot = true; +static bool trans_lbuea(DisasContext *dc, arg_typea *arg) +{ + if (trap_userspace(dc, true)) { + return true; } +#ifdef CONFIG_USER_ONLY + return true; +#else + TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); + return do_load(dc, arg->rd, addr, MO_UB, MMU_NOMMU_IDX, false); +#endif +} - /* Treat the common cases first. */ - if (!dc->type_b) { - if (ea) { - int addr_size = dc->cpu->cfg.addr_size; +static bool trans_lbui(DisasContext *dc, arg_typeb *arg) +{ + TCGv addr = compute_ldst_addr_typeb(dc, arg->ra, arg->imm); + return do_load(dc, arg->rd, addr, MO_UB, dc->mem_index, false); +} - if (addr_size == 32) { - tcg_gen_extu_i32_tl(t, cpu_R[dc->rb]); - return; - } +static bool trans_lhu(DisasContext *dc, arg_typea *arg) +{ + TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); + return do_load(dc, arg->rd, addr, MO_TEUW, dc->mem_index, false); +} - tcg_gen_concat_i32_i64(t, cpu_R[dc->rb], cpu_R[dc->ra]); - if (addr_size < 64) { - /* Mask off out of range bits. */ - tcg_gen_andi_i64(t, t, MAKE_64BIT_MASK(0, addr_size)); - } - return; - } +static bool trans_lhur(DisasContext *dc, arg_typea *arg) +{ + TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); + return do_load(dc, arg->rd, addr, MO_TEUW, dc->mem_index, true); +} - /* If any of the regs is r0, set t to the value of the other reg. */ - if (dc->ra == 0) { - tcg_gen_extu_i32_tl(t, cpu_R[dc->rb]); - return; - } else if (dc->rb == 0) { - tcg_gen_extu_i32_tl(t, cpu_R[dc->ra]); - return; - } +static bool trans_lhuea(DisasContext *dc, arg_typea *arg) +{ + if (trap_userspace(dc, true)) { + return true; + } +#ifdef CONFIG_USER_ONLY + return true; +#else + TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); + return do_load(dc, arg->rd, addr, MO_TEUW, MMU_NOMMU_IDX, false); +#endif +} - if (dc->rb == 1 && dc->cpu->cfg.stackprot) { - stackprot = true; - } +static bool trans_lhui(DisasContext *dc, arg_typeb *arg) +{ + TCGv addr = compute_ldst_addr_typeb(dc, arg->ra, arg->imm); + return do_load(dc, arg->rd, addr, MO_TEUW, dc->mem_index, false); +} - t32 = tcg_temp_new_i32(); - tcg_gen_add_i32(t32, cpu_R[dc->ra], cpu_R[dc->rb]); - tcg_gen_extu_i32_tl(t, t32); - tcg_temp_free_i32(t32); +static bool trans_lw(DisasContext *dc, arg_typea *arg) +{ + TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); + return do_load(dc, arg->rd, addr, MO_TEUL, dc->mem_index, false); +} - if (stackprot) { - gen_helper_stackprot(cpu_env, t); - } - return; - } - /* Immediate. */ - t32 = tcg_temp_new_i32(); - if (!extimm) { - tcg_gen_addi_i32(t32, cpu_R[dc->ra], (int16_t)dc->imm); - } else { - tcg_gen_add_i32(t32, cpu_R[dc->ra], *(dec_alu_op_b(dc))); - } - tcg_gen_extu_i32_tl(t, t32); - tcg_temp_free_i32(t32); +static bool trans_lwr(DisasContext *dc, arg_typea *arg) +{ + TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); + return do_load(dc, arg->rd, addr, MO_TEUL, dc->mem_index, true); +} - if (stackprot) { - gen_helper_stackprot(cpu_env, t); +static bool trans_lwea(DisasContext *dc, arg_typea *arg) +{ + if (trap_userspace(dc, true)) { + return true; } - return; +#ifdef CONFIG_USER_ONLY + return true; +#else + TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); + return do_load(dc, arg->rd, addr, MO_TEUL, MMU_NOMMU_IDX, false); +#endif } -static void dec_load(DisasContext *dc) +static bool trans_lwi(DisasContext *dc, arg_typeb *arg) { - TCGv_i32 v; - TCGv addr; - unsigned int size; - bool rev = false, ex = false, ea = false; - int mem_index = cpu_mmu_index(&dc->cpu->env, false); - MemOp mop; + TCGv addr = compute_ldst_addr_typeb(dc, arg->ra, arg->imm); + return do_load(dc, arg->rd, addr, MO_TEUL, dc->mem_index, false); +} - mop = dc->opcode & 3; - size = 1 << mop; - if (!dc->type_b) { - ea = extract32(dc->ir, 7, 1); - rev = extract32(dc->ir, 9, 1); - ex = extract32(dc->ir, 10, 1); - } - mop |= MO_TE; - if (rev) { - mop ^= MO_BSWAP; - } +static bool trans_lwx(DisasContext *dc, arg_typea *arg) +{ + TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); - if (trap_illegal(dc, size > 4)) { - return; - } + /* lwx does not throw unaligned access errors, so force alignment */ + tcg_gen_andi_tl(addr, addr, ~3); - if (trap_userspace(dc, ea)) { - return; + tcg_gen_qemu_ld_i32(cpu_res_val, addr, dc->mem_index, MO_TEUL); + tcg_gen_mov_tl(cpu_res_addr, addr); + tcg_temp_free(addr); + + if (arg->rd) { + tcg_gen_mov_i32(cpu_R[arg->rd], cpu_res_val); } - LOG_DIS("l%d%s%s%s%s\n", size, dc->type_b ? "i" : "", rev ? "r" : "", - ex ? "x" : "", - ea ? "ea" : ""); + /* No support for AXI exclusive so always clear C */ + tcg_gen_movi_i32(cpu_msr_c, 0); + return true; +} - t_sync_flags(dc); - addr = tcg_temp_new(); - compute_ldst_addr(dc, ea, addr); - /* Extended addressing bypasses the MMU. */ - mem_index = ea ? MMU_NOMMU_IDX : mem_index; +static bool do_store(DisasContext *dc, int rd, TCGv addr, MemOp mop, + int mem_index, bool rev) +{ + MemOp size = mop & MO_SIZE; /* * When doing reverse accesses we need to do two things. @@ -958,925 +885,1025 @@ static void dec_load(DisasContext *dc) * 1. Reverse the address wrt endianness. * 2. Byteswap the data lanes on the way back into the CPU core. */ - if (rev && size != 4) { - /* Endian reverse the address. t is addr. */ - switch (size) { - case 1: - { - tcg_gen_xori_tl(addr, addr, 3); - break; - } - - case 2: - /* 00 -> 10 - 10 -> 00. */ - tcg_gen_xori_tl(addr, addr, 2); - break; - default: - cpu_abort(CPU(dc->cpu), "Invalid reverse size\n"); - break; + if (rev) { + if (size > MO_8) { + mop ^= MO_BSWAP; + } + if (size < MO_32) { + tcg_gen_xori_tl(addr, addr, 3 - size); } } - /* lwx does not throw unaligned access errors, so force alignment */ - if (ex) { - tcg_gen_andi_tl(addr, addr, ~3); + if (size > MO_8 && + (dc->tb_flags & MSR_EE) && + dc->cpu->cfg.unaligned_exceptions) { + record_unaligned_ess(dc, rd, size, true); + mop |= MO_ALIGN; } - /* If we get a fault on a dslot, the jmpstate better be in sync. */ - sync_jmpstate(dc); + tcg_gen_qemu_st_i32(reg_for_read(dc, rd), addr, mem_index, mop); - /* Verify alignment if needed. */ - /* - * Microblaze gives MMU faults priority over faults due to - * unaligned addresses. That's why we speculatively do the load - * into v. If the load succeeds, we verify alignment of the - * address and if that succeeds we write into the destination reg. - */ - v = tcg_temp_new_i32(); - tcg_gen_qemu_ld_i32(v, addr, mem_index, mop); + tcg_temp_free(addr); + return true; +} - if (dc->cpu->cfg.unaligned_exceptions && size > 1) { - TCGv_i32 t0 = tcg_const_i32(0); - TCGv_i32 treg = tcg_const_i32(dc->rd); - TCGv_i32 tsize = tcg_const_i32(size - 1); +static bool trans_sb(DisasContext *dc, arg_typea *arg) +{ + TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); + return do_store(dc, arg->rd, addr, MO_UB, dc->mem_index, false); +} - tcg_gen_movi_i64(cpu_SR[SR_PC], dc->pc); - gen_helper_memalign(cpu_env, addr, treg, t0, tsize); +static bool trans_sbr(DisasContext *dc, arg_typea *arg) +{ + TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); + return do_store(dc, arg->rd, addr, MO_UB, dc->mem_index, true); +} - tcg_temp_free_i32(t0); - tcg_temp_free_i32(treg); - tcg_temp_free_i32(tsize); +static bool trans_sbea(DisasContext *dc, arg_typea *arg) +{ + if (trap_userspace(dc, true)) { + return true; } +#ifdef CONFIG_USER_ONLY + return true; +#else + TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); + return do_store(dc, arg->rd, addr, MO_UB, MMU_NOMMU_IDX, false); +#endif +} - if (ex) { - tcg_gen_mov_tl(env_res_addr, addr); - tcg_gen_mov_i32(env_res_val, v); - } - if (dc->rd) { - tcg_gen_mov_i32(cpu_R[dc->rd], v); - } - tcg_temp_free_i32(v); +static bool trans_sbi(DisasContext *dc, arg_typeb *arg) +{ + TCGv addr = compute_ldst_addr_typeb(dc, arg->ra, arg->imm); + return do_store(dc, arg->rd, addr, MO_UB, dc->mem_index, false); +} - if (ex) { /* lwx */ - /* no support for AXI exclusive so always clear C */ - write_carryi(dc, 0); +static bool trans_sh(DisasContext *dc, arg_typea *arg) +{ + TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); + return do_store(dc, arg->rd, addr, MO_TEUW, dc->mem_index, false); +} + +static bool trans_shr(DisasContext *dc, arg_typea *arg) +{ + TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); + return do_store(dc, arg->rd, addr, MO_TEUW, dc->mem_index, true); +} + +static bool trans_shea(DisasContext *dc, arg_typea *arg) +{ + if (trap_userspace(dc, true)) { + return true; } +#ifdef CONFIG_USER_ONLY + return true; +#else + TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); + return do_store(dc, arg->rd, addr, MO_TEUW, MMU_NOMMU_IDX, false); +#endif +} - tcg_temp_free(addr); +static bool trans_shi(DisasContext *dc, arg_typeb *arg) +{ + TCGv addr = compute_ldst_addr_typeb(dc, arg->ra, arg->imm); + return do_store(dc, arg->rd, addr, MO_TEUW, dc->mem_index, false); } -static void dec_store(DisasContext *dc) +static bool trans_sw(DisasContext *dc, arg_typea *arg) { - TCGv addr; - TCGLabel *swx_skip = NULL; - unsigned int size; - bool rev = false, ex = false, ea = false; - int mem_index = cpu_mmu_index(&dc->cpu->env, false); - MemOp mop; + TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); + return do_store(dc, arg->rd, addr, MO_TEUL, dc->mem_index, false); +} - mop = dc->opcode & 3; - size = 1 << mop; - if (!dc->type_b) { - ea = extract32(dc->ir, 7, 1); - rev = extract32(dc->ir, 9, 1); - ex = extract32(dc->ir, 10, 1); - } - mop |= MO_TE; - if (rev) { - mop ^= MO_BSWAP; - } +static bool trans_swr(DisasContext *dc, arg_typea *arg) +{ + TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); + return do_store(dc, arg->rd, addr, MO_TEUL, dc->mem_index, true); +} - if (trap_illegal(dc, size > 4)) { - return; +static bool trans_swea(DisasContext *dc, arg_typea *arg) +{ + if (trap_userspace(dc, true)) { + return true; } +#ifdef CONFIG_USER_ONLY + return true; +#else + TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); + return do_store(dc, arg->rd, addr, MO_TEUL, MMU_NOMMU_IDX, false); +#endif +} + +static bool trans_swi(DisasContext *dc, arg_typeb *arg) +{ + TCGv addr = compute_ldst_addr_typeb(dc, arg->ra, arg->imm); + return do_store(dc, arg->rd, addr, MO_TEUL, dc->mem_index, false); +} - trap_userspace(dc, ea); +static bool trans_swx(DisasContext *dc, arg_typea *arg) +{ + TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); + TCGLabel *swx_done = gen_new_label(); + TCGLabel *swx_fail = gen_new_label(); + TCGv_i32 tval; - LOG_DIS("s%d%s%s%s%s\n", size, dc->type_b ? "i" : "", rev ? "r" : "", - ex ? "x" : "", - ea ? "ea" : ""); - t_sync_flags(dc); - /* If we get a fault on a dslot, the jmpstate better be in sync. */ - sync_jmpstate(dc); - /* SWX needs a temp_local. */ - addr = ex ? tcg_temp_local_new() : tcg_temp_new(); - compute_ldst_addr(dc, ea, addr); - /* Extended addressing bypasses the MMU. */ - mem_index = ea ? MMU_NOMMU_IDX : mem_index; + /* swx does not throw unaligned access errors, so force alignment */ + tcg_gen_andi_tl(addr, addr, ~3); - if (ex) { /* swx */ - TCGv_i32 tval; + /* + * Compare the address vs the one we used during lwx. + * On mismatch, the operation fails. On match, addr dies at the + * branch, but we know we can use the equal version in the global. + * In either case, addr is no longer needed. + */ + tcg_gen_brcond_tl(TCG_COND_NE, cpu_res_addr, addr, swx_fail); + tcg_temp_free(addr); - /* swx does not throw unaligned access errors, so force alignment */ - tcg_gen_andi_tl(addr, addr, ~3); + /* + * Compare the value loaded during lwx with current contents of + * the reserved location. + */ + tval = tcg_temp_new_i32(); - write_carryi(dc, 1); - swx_skip = gen_new_label(); - tcg_gen_brcond_tl(TCG_COND_NE, env_res_addr, addr, swx_skip); + tcg_gen_atomic_cmpxchg_i32(tval, cpu_res_addr, cpu_res_val, + reg_for_write(dc, arg->rd), + dc->mem_index, MO_TEUL); - /* - * Compare the value loaded at lwx with current contents of - * the reserved location. - */ - tval = tcg_temp_new_i32(); + tcg_gen_brcond_i32(TCG_COND_NE, cpu_res_val, tval, swx_fail); + tcg_temp_free_i32(tval); - tcg_gen_atomic_cmpxchg_i32(tval, addr, env_res_val, - cpu_R[dc->rd], mem_index, - mop); + /* Success */ + tcg_gen_movi_i32(cpu_msr_c, 0); + tcg_gen_br(swx_done); - tcg_gen_brcond_i32(TCG_COND_NE, env_res_val, tval, swx_skip); - write_carryi(dc, 0); - tcg_temp_free_i32(tval); - } + /* Failure */ + gen_set_label(swx_fail); + tcg_gen_movi_i32(cpu_msr_c, 1); - if (rev && size != 4) { - /* Endian reverse the address. t is addr. */ - switch (size) { - case 1: - { - tcg_gen_xori_tl(addr, addr, 3); - break; - } + gen_set_label(swx_done); - case 2: - /* 00 -> 10 - 10 -> 00. */ - /* Force addr into the temp. */ - tcg_gen_xori_tl(addr, addr, 2); - break; - default: - cpu_abort(CPU(dc->cpu), "Invalid reverse size\n"); - break; - } - } + /* + * Prevent the saved address from working again without another ldx. + * Akin to the pseudocode setting reservation = 0. + */ + tcg_gen_movi_tl(cpu_res_addr, -1); + return true; +} - if (!ex) { - tcg_gen_qemu_st_i32(cpu_R[dc->rd], addr, mem_index, mop); +static void setup_dslot(DisasContext *dc, bool type_b) +{ + dc->tb_flags_to_set |= D_FLAG; + if (type_b && (dc->tb_flags & IMM_FLAG)) { + dc->tb_flags_to_set |= BIMM_FLAG; } +} - /* Verify alignment if needed. */ - if (dc->cpu->cfg.unaligned_exceptions && size > 1) { - TCGv_i32 t1 = tcg_const_i32(1); - TCGv_i32 treg = tcg_const_i32(dc->rd); - TCGv_i32 tsize = tcg_const_i32(size - 1); - - tcg_gen_movi_i64(cpu_SR[SR_PC], dc->pc); - /* FIXME: if the alignment is wrong, we should restore the value - * in memory. One possible way to achieve this is to probe - * the MMU prior to the memaccess, thay way we could put - * the alignment checks in between the probe and the mem - * access. - */ - gen_helper_memalign(cpu_env, addr, treg, t1, tsize); +static bool do_branch(DisasContext *dc, int dest_rb, int dest_imm, + bool delay, bool abs, int link) +{ + uint32_t add_pc; - tcg_temp_free_i32(t1); - tcg_temp_free_i32(treg); - tcg_temp_free_i32(tsize); + if (delay) { + setup_dslot(dc, dest_rb < 0); } - if (ex) { - gen_set_label(swx_skip); + if (link) { + tcg_gen_movi_i32(cpu_R[link], dc->base.pc_next); } - tcg_temp_free(addr); + /* Store the branch taken destination into btarget. */ + add_pc = abs ? 0 : dc->base.pc_next; + if (dest_rb > 0) { + dc->jmp_dest = -1; + tcg_gen_addi_i32(cpu_btarget, cpu_R[dest_rb], add_pc); + } else { + dc->jmp_dest = add_pc + dest_imm; + tcg_gen_movi_i32(cpu_btarget, dc->jmp_dest); + } + dc->jmp_cond = TCG_COND_ALWAYS; + return true; } -static inline void eval_cc(DisasContext *dc, unsigned int cc, - TCGv_i32 d, TCGv_i32 a) +#define DO_BR(NAME, NAMEI, DELAY, ABS, LINK) \ + static bool trans_##NAME(DisasContext *dc, arg_typea_br *arg) \ + { return do_branch(dc, arg->rb, 0, DELAY, ABS, LINK ? arg->rd : 0); } \ + static bool trans_##NAMEI(DisasContext *dc, arg_typeb_br *arg) \ + { return do_branch(dc, -1, arg->imm, DELAY, ABS, LINK ? arg->rd : 0); } + +DO_BR(br, bri, false, false, false) +DO_BR(bra, brai, false, true, false) +DO_BR(brd, brid, true, false, false) +DO_BR(brad, braid, true, true, false) +DO_BR(brld, brlid, true, false, true) +DO_BR(brald, bralid, true, true, true) + +static bool do_bcc(DisasContext *dc, int dest_rb, int dest_imm, + TCGCond cond, int ra, bool delay) { - static const int mb_to_tcg_cc[] = { - [CC_EQ] = TCG_COND_EQ, - [CC_NE] = TCG_COND_NE, - [CC_LT] = TCG_COND_LT, - [CC_LE] = TCG_COND_LE, - [CC_GE] = TCG_COND_GE, - [CC_GT] = TCG_COND_GT, - }; + TCGv_i32 zero, next; - switch (cc) { - case CC_EQ: - case CC_NE: - case CC_LT: - case CC_LE: - case CC_GE: - case CC_GT: - tcg_gen_setcondi_i32(mb_to_tcg_cc[cc], d, a, 0); - break; - default: - cpu_abort(CPU(dc->cpu), "Unknown condition code %x.\n", cc); - break; + if (delay) { + setup_dslot(dc, dest_rb < 0); } -} -static void eval_cond_jmp(DisasContext *dc, TCGv_i64 pc_true, TCGv_i64 pc_false) -{ - TCGv_i64 tmp_btaken = tcg_temp_new_i64(); - TCGv_i64 tmp_zero = tcg_const_i64(0); + dc->jmp_cond = cond; - tcg_gen_extu_i32_i64(tmp_btaken, env_btaken); - tcg_gen_movcond_i64(TCG_COND_NE, cpu_SR[SR_PC], - tmp_btaken, tmp_zero, - pc_true, pc_false); + /* Cache the condition register in cpu_bvalue across any delay slot. */ + tcg_gen_mov_i32(cpu_bvalue, reg_for_read(dc, ra)); - tcg_temp_free_i64(tmp_btaken); - tcg_temp_free_i64(tmp_zero); + /* Store the branch taken destination into btarget. */ + if (dest_rb > 0) { + dc->jmp_dest = -1; + tcg_gen_addi_i32(cpu_btarget, cpu_R[dest_rb], dc->base.pc_next); + } else { + dc->jmp_dest = dc->base.pc_next + dest_imm; + tcg_gen_movi_i32(cpu_btarget, dc->jmp_dest); + } + + /* Compute the final destination into btarget. */ + zero = tcg_const_i32(0); + next = tcg_const_i32(dc->base.pc_next + (delay + 1) * 4); + tcg_gen_movcond_i32(dc->jmp_cond, cpu_btarget, + reg_for_read(dc, ra), zero, + cpu_btarget, next); + tcg_temp_free_i32(zero); + tcg_temp_free_i32(next); + + return true; } -static void dec_setup_dslot(DisasContext *dc) +#define DO_BCC(NAME, COND) \ + static bool trans_##NAME(DisasContext *dc, arg_typea_bc *arg) \ + { return do_bcc(dc, arg->rb, 0, COND, arg->ra, false); } \ + static bool trans_##NAME##d(DisasContext *dc, arg_typea_bc *arg) \ + { return do_bcc(dc, arg->rb, 0, COND, arg->ra, true); } \ + static bool trans_##NAME##i(DisasContext *dc, arg_typeb_bc *arg) \ + { return do_bcc(dc, -1, arg->imm, COND, arg->ra, false); } \ + static bool trans_##NAME##id(DisasContext *dc, arg_typeb_bc *arg) \ + { return do_bcc(dc, -1, arg->imm, COND, arg->ra, true); } + +DO_BCC(beq, TCG_COND_EQ) +DO_BCC(bge, TCG_COND_GE) +DO_BCC(bgt, TCG_COND_GT) +DO_BCC(ble, TCG_COND_LE) +DO_BCC(blt, TCG_COND_LT) +DO_BCC(bne, TCG_COND_NE) + +static bool trans_brk(DisasContext *dc, arg_typea_br *arg) { - TCGv_i32 tmp = tcg_const_i32(dc->type_b && (dc->tb_flags & IMM_FLAG)); - - dc->delayed_branch = 2; - dc->tb_flags |= D_FLAG; + if (trap_userspace(dc, true)) { + return true; + } + tcg_gen_mov_i32(cpu_pc, reg_for_read(dc, arg->rb)); + if (arg->rd) { + tcg_gen_movi_i32(cpu_R[arg->rd], dc->base.pc_next); + } + tcg_gen_ori_i32(cpu_msr, cpu_msr, MSR_BIP); + tcg_gen_movi_tl(cpu_res_addr, -1); - tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUMBState, bimm)); - tcg_temp_free_i32(tmp); + dc->base.is_jmp = DISAS_UPDATE; + return true; } -static void dec_bcc(DisasContext *dc) +static bool trans_brki(DisasContext *dc, arg_typeb_br *arg) { - unsigned int cc; - unsigned int dslot; - - cc = EXTRACT_FIELD(dc->ir, 21, 23); - dslot = dc->ir & (1 << 25); - LOG_DIS("bcc%s r%d %x\n", dslot ? "d" : "", dc->ra, dc->imm); + uint32_t imm = arg->imm; - dc->delayed_branch = 1; - if (dslot) { - dec_setup_dslot(dc); + if (trap_userspace(dc, imm != 0x8 && imm != 0x18)) { + return true; } + tcg_gen_movi_i32(cpu_pc, imm); + if (arg->rd) { + tcg_gen_movi_i32(cpu_R[arg->rd], dc->base.pc_next); + } + tcg_gen_movi_tl(cpu_res_addr, -1); - if (dec_alu_op_b_is_small_imm(dc)) { - int32_t offset = (int32_t)((int16_t)dc->imm); /* sign-extend. */ +#ifdef CONFIG_USER_ONLY + switch (imm) { + case 0x8: /* syscall trap */ + gen_raise_exception_sync(dc, EXCP_SYSCALL); + break; + case 0x18: /* debug trap */ + gen_raise_exception_sync(dc, EXCP_DEBUG); + break; + default: /* eliminated with trap_userspace check */ + g_assert_not_reached(); + } +#else + uint32_t msr_to_set = 0; - tcg_gen_movi_i64(env_btarget, dc->pc + offset); - dc->jmp = JMP_DIRECT_CC; - dc->jmp_pc = dc->pc + offset; - } else { - dc->jmp = JMP_INDIRECT; - tcg_gen_extu_i32_i64(env_btarget, *(dec_alu_op_b(dc))); - tcg_gen_addi_i64(env_btarget, env_btarget, dc->pc); - tcg_gen_andi_i64(env_btarget, env_btarget, UINT32_MAX); + if (imm != 0x18) { + msr_to_set |= MSR_BIP; } - eval_cc(dc, cc, env_btaken, cpu_R[dc->ra]); + if (imm == 0x8 || imm == 0x18) { + /* MSR_UM and MSR_VM are in tb_flags, so we know their value. */ + msr_to_set |= (dc->tb_flags & (MSR_UM | MSR_VM)) << 1; + tcg_gen_andi_i32(cpu_msr, cpu_msr, + ~(MSR_VMS | MSR_UMS | MSR_VM | MSR_UM)); + } + tcg_gen_ori_i32(cpu_msr, cpu_msr, msr_to_set); + dc->base.is_jmp = DISAS_UPDATE; +#endif + + return true; } -static void dec_br(DisasContext *dc) +static bool trans_mbar(DisasContext *dc, arg_mbar *arg) { - unsigned int dslot, link, abs, mbar; - - dslot = dc->ir & (1 << 20); - abs = dc->ir & (1 << 19); - link = dc->ir & (1 << 18); + int mbar_imm = arg->imm; - /* Memory barrier. */ - mbar = (dc->ir >> 16) & 31; - if (mbar == 2 && dc->imm == 4) { - uint16_t mbar_imm = dc->rd; + /* Data access memory barrier. */ + if ((mbar_imm & 2) == 0) { + tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL); + } - LOG_DIS("mbar %d\n", mbar_imm); + /* Sleep. */ + if (mbar_imm & 16) { + TCGv_i32 tmp_1; - /* Data access memory barrier. */ - if ((mbar_imm & 2) == 0) { - tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL); + if (trap_userspace(dc, true)) { + /* Sleep is a privileged instruction. */ + return true; } - /* mbar IMM & 16 decodes to sleep. */ - if (mbar_imm & 16) { - TCGv_i32 tmp_hlt = tcg_const_i32(EXCP_HLT); - TCGv_i32 tmp_1 = tcg_const_i32(1); + t_sync_flags(dc); - LOG_DIS("sleep\n"); + tmp_1 = tcg_const_i32(1); + tcg_gen_st_i32(tmp_1, cpu_env, + -offsetof(MicroBlazeCPU, env) + +offsetof(CPUState, halted)); + tcg_temp_free_i32(tmp_1); - if (trap_userspace(dc, true)) { - /* Sleep is a privileged instruction. */ - return; - } + tcg_gen_movi_i32(cpu_pc, dc->base.pc_next + 4); - t_sync_flags(dc); - tcg_gen_st_i32(tmp_1, cpu_env, - -offsetof(MicroBlazeCPU, env) - +offsetof(CPUState, halted)); - tcg_gen_movi_i64(cpu_SR[SR_PC], dc->pc + 4); - gen_helper_raise_exception(cpu_env, tmp_hlt); - tcg_temp_free_i32(tmp_hlt); - tcg_temp_free_i32(tmp_1); - return; - } - /* Break the TB. */ - dc->cpustate_changed = 1; - return; + gen_raise_exception(dc, EXCP_HLT); } - LOG_DIS("br%s%s%s%s imm=%x\n", - abs ? "a" : "", link ? "l" : "", - dc->type_b ? "i" : "", dslot ? "d" : "", - dc->imm); + /* + * If !(mbar_imm & 1), this is an instruction access memory barrier + * and we need to end the TB so that we recognize self-modified + * code immediately. + * + * However, there are some data mbars that need the TB break + * (and return to main loop) to recognize interrupts right away. + * E.g. recognizing a change to an interrupt controller register. + * + * Therefore, choose to end the TB always. + */ + dc->cpustate_changed = 1; + return true; +} - dc->delayed_branch = 1; - if (dslot) { - dec_setup_dslot(dc); +static bool do_rts(DisasContext *dc, arg_typeb_bc *arg, int to_set) +{ + if (trap_userspace(dc, to_set)) { + return true; } - if (link && dc->rd) - tcg_gen_movi_i32(cpu_R[dc->rd], dc->pc); - - dc->jmp = JMP_INDIRECT; - if (abs) { - tcg_gen_movi_i32(env_btaken, 1); - tcg_gen_extu_i32_i64(env_btarget, *(dec_alu_op_b(dc))); - if (link && !dslot) { - if (!(dc->tb_flags & IMM_FLAG) && (dc->imm == 8 || dc->imm == 0x18)) - t_gen_raise_exception(dc, EXCP_BREAK); - if (dc->imm == 0) { - if (trap_userspace(dc, true)) { - return; - } + dc->tb_flags_to_set |= to_set; + setup_dslot(dc, true); - t_gen_raise_exception(dc, EXCP_DEBUG); - } - } - } else { - if (dec_alu_op_b_is_small_imm(dc)) { - dc->jmp = JMP_DIRECT; - dc->jmp_pc = dc->pc + (int32_t)((int16_t)dc->imm); - } else { - tcg_gen_movi_i32(env_btaken, 1); - tcg_gen_extu_i32_i64(env_btarget, *(dec_alu_op_b(dc))); - tcg_gen_addi_i64(env_btarget, env_btarget, dc->pc); - tcg_gen_andi_i64(env_btarget, env_btarget, UINT32_MAX); - } - } + dc->jmp_cond = TCG_COND_ALWAYS; + dc->jmp_dest = -1; + tcg_gen_addi_i32(cpu_btarget, reg_for_read(dc, arg->ra), arg->imm); + return true; } -static inline void do_rti(DisasContext *dc) -{ - TCGv_i32 t0, t1; - t0 = tcg_temp_new_i32(); - t1 = tcg_temp_new_i32(); - tcg_gen_extrl_i64_i32(t1, cpu_SR[SR_MSR]); - tcg_gen_shri_i32(t0, t1, 1); - tcg_gen_ori_i32(t1, t1, MSR_IE); - tcg_gen_andi_i32(t0, t0, (MSR_VM | MSR_UM)); +#define DO_RTS(NAME, IFLAG) \ + static bool trans_##NAME(DisasContext *dc, arg_typeb_bc *arg) \ + { return do_rts(dc, arg, IFLAG); } - tcg_gen_andi_i32(t1, t1, ~(MSR_VM | MSR_UM)); - tcg_gen_or_i32(t1, t1, t0); - msr_write(dc, t1); - tcg_temp_free_i32(t1); - tcg_temp_free_i32(t0); - dc->tb_flags &= ~DRTI_FLAG; +DO_RTS(rtbd, DRTB_FLAG) +DO_RTS(rtid, DRTI_FLAG) +DO_RTS(rted, DRTE_FLAG) +DO_RTS(rtsd, 0) + +static bool trans_zero(DisasContext *dc, arg_zero *arg) +{ + /* If opcode_0_illegal, trap. */ + if (dc->cpu->cfg.opcode_0_illegal) { + trap_illegal(dc, true); + return true; + } + /* + * Otherwise, this is "add r0, r0, r0". + * Continue to trans_add so that MSR[C] gets cleared. + */ + return false; } -static inline void do_rtb(DisasContext *dc) +static void msr_read(DisasContext *dc, TCGv_i32 d) { - TCGv_i32 t0, t1; - t0 = tcg_temp_new_i32(); - t1 = tcg_temp_new_i32(); - tcg_gen_extrl_i64_i32(t1, cpu_SR[SR_MSR]); - tcg_gen_andi_i32(t1, t1, ~MSR_BIP); - tcg_gen_shri_i32(t0, t1, 1); - tcg_gen_andi_i32(t0, t0, (MSR_VM | MSR_UM)); + TCGv_i32 t; - tcg_gen_andi_i32(t1, t1, ~(MSR_VM | MSR_UM)); - tcg_gen_or_i32(t1, t1, t0); - msr_write(dc, t1); - tcg_temp_free_i32(t1); - tcg_temp_free_i32(t0); - dc->tb_flags &= ~DRTB_FLAG; + /* Replicate the cpu_msr_c boolean into the proper bit and the copy. */ + t = tcg_temp_new_i32(); + tcg_gen_muli_i32(t, cpu_msr_c, MSR_C | MSR_CC); + tcg_gen_or_i32(d, cpu_msr, t); + tcg_temp_free_i32(t); } -static inline void do_rte(DisasContext *dc) +#ifndef CONFIG_USER_ONLY +static void msr_write(DisasContext *dc, TCGv_i32 v) { - TCGv_i32 t0, t1; - t0 = tcg_temp_new_i32(); - t1 = tcg_temp_new_i32(); + dc->cpustate_changed = 1; - tcg_gen_extrl_i64_i32(t1, cpu_SR[SR_MSR]); - tcg_gen_ori_i32(t1, t1, MSR_EE); - tcg_gen_andi_i32(t1, t1, ~MSR_EIP); - tcg_gen_shri_i32(t0, t1, 1); - tcg_gen_andi_i32(t0, t0, (MSR_VM | MSR_UM)); + /* Install MSR_C. */ + tcg_gen_extract_i32(cpu_msr_c, v, 2, 1); - tcg_gen_andi_i32(t1, t1, ~(MSR_VM | MSR_UM)); - tcg_gen_or_i32(t1, t1, t0); - msr_write(dc, t1); - tcg_temp_free_i32(t1); - tcg_temp_free_i32(t0); - dc->tb_flags &= ~DRTE_FLAG; + /* Clear MSR_C and MSR_CC; MSR_PVR is not writable, and is always clear. */ + tcg_gen_andi_i32(cpu_msr, v, ~(MSR_C | MSR_CC | MSR_PVR)); } +#endif -static void dec_rts(DisasContext *dc) +static bool do_msrclrset(DisasContext *dc, arg_type_msr *arg, bool set) { - unsigned int b_bit, i_bit, e_bit; - TCGv_i64 tmp64; + uint32_t imm = arg->imm; - i_bit = dc->ir & (1 << 21); - b_bit = dc->ir & (1 << 22); - e_bit = dc->ir & (1 << 23); - - if (trap_userspace(dc, i_bit || b_bit || e_bit)) { - return; + if (trap_userspace(dc, imm != MSR_C)) { + return true; } - dec_setup_dslot(dc); + if (arg->rd) { + msr_read(dc, cpu_R[arg->rd]); + } - if (i_bit) { - LOG_DIS("rtid ir=%x\n", dc->ir); - dc->tb_flags |= DRTI_FLAG; - } else if (b_bit) { - LOG_DIS("rtbd ir=%x\n", dc->ir); - dc->tb_flags |= DRTB_FLAG; - } else if (e_bit) { - LOG_DIS("rted ir=%x\n", dc->ir); - dc->tb_flags |= DRTE_FLAG; - } else - LOG_DIS("rts ir=%x\n", dc->ir); + /* + * Handle the carry bit separately. + * This is the only bit that userspace can modify. + */ + if (imm & MSR_C) { + tcg_gen_movi_i32(cpu_msr_c, set); + } - dc->jmp = JMP_INDIRECT; - tcg_gen_movi_i32(env_btaken, 1); + /* + * MSR_C and MSR_CC set above. + * MSR_PVR is not writable, and is always clear. + */ + imm &= ~(MSR_C | MSR_CC | MSR_PVR); - tmp64 = tcg_temp_new_i64(); - tcg_gen_extu_i32_i64(env_btarget, *(dec_alu_op_b(dc))); - tcg_gen_extu_i32_i64(tmp64, cpu_R[dc->ra]); - tcg_gen_add_i64(env_btarget, env_btarget, tmp64); - tcg_gen_andi_i64(env_btarget, env_btarget, UINT32_MAX); - tcg_temp_free_i64(tmp64); + if (imm != 0) { + if (set) { + tcg_gen_ori_i32(cpu_msr, cpu_msr, imm); + } else { + tcg_gen_andi_i32(cpu_msr, cpu_msr, ~imm); + } + dc->cpustate_changed = 1; + } + return true; } -static int dec_check_fpuv2(DisasContext *dc) +static bool trans_msrclr(DisasContext *dc, arg_type_msr *arg) { - if ((dc->cpu->cfg.use_fpu != 2) && (dc->tb_flags & MSR_EE_FLAG)) { - tcg_gen_movi_i64(cpu_SR[SR_ESR], ESR_EC_FPU); - t_gen_raise_exception(dc, EXCP_HW_EXCP); - } - return (dc->cpu->cfg.use_fpu == 2) ? PVR2_USE_FPU2_MASK : 0; + return do_msrclrset(dc, arg, false); } -static void dec_fpu(DisasContext *dc) +static bool trans_msrset(DisasContext *dc, arg_type_msr *arg) { - unsigned int fpu_insn; + return do_msrclrset(dc, arg, true); +} - if (trap_illegal(dc, !dc->cpu->cfg.use_fpu)) { - return; +static bool trans_mts(DisasContext *dc, arg_mts *arg) +{ + if (trap_userspace(dc, true)) { + return true; } - fpu_insn = (dc->ir >> 7) & 7; - - switch (fpu_insn) { - case 0: - gen_helper_fadd(cpu_R[dc->rd], cpu_env, cpu_R[dc->ra], - cpu_R[dc->rb]); - break; +#ifdef CONFIG_USER_ONLY + g_assert_not_reached(); +#else + if (arg->e && arg->rs != 0x1003) { + qemu_log_mask(LOG_GUEST_ERROR, + "Invalid extended mts reg 0x%x\n", arg->rs); + return true; + } - case 1: - gen_helper_frsub(cpu_R[dc->rd], cpu_env, cpu_R[dc->ra], - cpu_R[dc->rb]); - break; + TCGv_i32 src = reg_for_read(dc, arg->ra); + switch (arg->rs) { + case SR_MSR: + msr_write(dc, src); + break; + case SR_FSR: + tcg_gen_st_i32(src, cpu_env, offsetof(CPUMBState, fsr)); + break; + case 0x800: + tcg_gen_st_i32(src, cpu_env, offsetof(CPUMBState, slr)); + break; + case 0x802: + tcg_gen_st_i32(src, cpu_env, offsetof(CPUMBState, shr)); + break; - case 2: - gen_helper_fmul(cpu_R[dc->rd], cpu_env, cpu_R[dc->ra], - cpu_R[dc->rb]); - break; + case 0x1000: /* PID */ + case 0x1001: /* ZPR */ + case 0x1002: /* TLBX */ + case 0x1003: /* TLBLO */ + case 0x1004: /* TLBHI */ + case 0x1005: /* TLBSX */ + { + TCGv_i32 tmp_ext = tcg_const_i32(arg->e); + TCGv_i32 tmp_reg = tcg_const_i32(arg->rs & 7); + + gen_helper_mmu_write(cpu_env, tmp_ext, tmp_reg, src); + tcg_temp_free_i32(tmp_reg); + tcg_temp_free_i32(tmp_ext); + } + break; - case 3: - gen_helper_fdiv(cpu_R[dc->rd], cpu_env, cpu_R[dc->ra], - cpu_R[dc->rb]); - break; + default: + qemu_log_mask(LOG_GUEST_ERROR, "Invalid mts reg 0x%x\n", arg->rs); + return true; + } + dc->cpustate_changed = 1; + return true; +#endif +} - case 4: - switch ((dc->ir >> 4) & 7) { - case 0: - gen_helper_fcmp_un(cpu_R[dc->rd], cpu_env, - cpu_R[dc->ra], cpu_R[dc->rb]); - break; - case 1: - gen_helper_fcmp_lt(cpu_R[dc->rd], cpu_env, - cpu_R[dc->ra], cpu_R[dc->rb]); - break; - case 2: - gen_helper_fcmp_eq(cpu_R[dc->rd], cpu_env, - cpu_R[dc->ra], cpu_R[dc->rb]); - break; - case 3: - gen_helper_fcmp_le(cpu_R[dc->rd], cpu_env, - cpu_R[dc->ra], cpu_R[dc->rb]); - break; - case 4: - gen_helper_fcmp_gt(cpu_R[dc->rd], cpu_env, - cpu_R[dc->ra], cpu_R[dc->rb]); - break; - case 5: - gen_helper_fcmp_ne(cpu_R[dc->rd], cpu_env, - cpu_R[dc->ra], cpu_R[dc->rb]); - break; - case 6: - gen_helper_fcmp_ge(cpu_R[dc->rd], cpu_env, - cpu_R[dc->ra], cpu_R[dc->rb]); - break; - default: - qemu_log_mask(LOG_UNIMP, - "unimplemented fcmp fpu_insn=%x pc=%x" - " opc=%x\n", - fpu_insn, dc->pc, dc->opcode); - dc->abort_at_next_insn = 1; - break; - } - break; +static bool trans_mfs(DisasContext *dc, arg_mfs *arg) +{ + TCGv_i32 dest = reg_for_write(dc, arg->rd); - case 5: - if (!dec_check_fpuv2(dc)) { - return; + if (arg->e) { + switch (arg->rs) { + case SR_EAR: + { + TCGv_i64 t64 = tcg_temp_new_i64(); + tcg_gen_ld_i64(t64, cpu_env, offsetof(CPUMBState, ear)); + tcg_gen_extrh_i64_i32(dest, t64); + tcg_temp_free_i64(t64); } - gen_helper_flt(cpu_R[dc->rd], cpu_env, cpu_R[dc->ra]); + return true; +#ifndef CONFIG_USER_ONLY + case 0x1003: /* TLBLO */ + /* Handled below. */ break; +#endif + case 0x2006 ... 0x2009: + /* High bits of PVR6-9 not implemented. */ + tcg_gen_movi_i32(dest, 0); + return true; + default: + qemu_log_mask(LOG_GUEST_ERROR, + "Invalid extended mfs reg 0x%x\n", arg->rs); + return true; + } + } - case 6: - if (!dec_check_fpuv2(dc)) { - return; - } - gen_helper_fint(cpu_R[dc->rd], cpu_env, cpu_R[dc->ra]); - break; + switch (arg->rs) { + case SR_PC: + tcg_gen_movi_i32(dest, dc->base.pc_next); + break; + case SR_MSR: + msr_read(dc, dest); + break; + case SR_EAR: + { + TCGv_i64 t64 = tcg_temp_new_i64(); + tcg_gen_ld_i64(t64, cpu_env, offsetof(CPUMBState, ear)); + tcg_gen_extrl_i64_i32(dest, t64); + tcg_temp_free_i64(t64); + } + break; + case SR_ESR: + tcg_gen_ld_i32(dest, cpu_env, offsetof(CPUMBState, esr)); + break; + case SR_FSR: + tcg_gen_ld_i32(dest, cpu_env, offsetof(CPUMBState, fsr)); + break; + case SR_BTR: + tcg_gen_ld_i32(dest, cpu_env, offsetof(CPUMBState, btr)); + break; + case SR_EDR: + tcg_gen_ld_i32(dest, cpu_env, offsetof(CPUMBState, edr)); + break; + case 0x800: + tcg_gen_ld_i32(dest, cpu_env, offsetof(CPUMBState, slr)); + break; + case 0x802: + tcg_gen_ld_i32(dest, cpu_env, offsetof(CPUMBState, shr)); + break; - case 7: - if (!dec_check_fpuv2(dc)) { - return; - } - gen_helper_fsqrt(cpu_R[dc->rd], cpu_env, cpu_R[dc->ra]); - break; +#ifndef CONFIG_USER_ONLY + case 0x1000: /* PID */ + case 0x1001: /* ZPR */ + case 0x1002: /* TLBX */ + case 0x1003: /* TLBLO */ + case 0x1004: /* TLBHI */ + case 0x1005: /* TLBSX */ + { + TCGv_i32 tmp_ext = tcg_const_i32(arg->e); + TCGv_i32 tmp_reg = tcg_const_i32(arg->rs & 7); + + gen_helper_mmu_read(dest, cpu_env, tmp_ext, tmp_reg); + tcg_temp_free_i32(tmp_reg); + tcg_temp_free_i32(tmp_ext); + } + break; +#endif - default: - qemu_log_mask(LOG_UNIMP, "unimplemented FPU insn fpu_insn=%x pc=%x" - " opc=%x\n", - fpu_insn, dc->pc, dc->opcode); - dc->abort_at_next_insn = 1; - break; + case 0x2000 ... 0x200c: + tcg_gen_ld_i32(dest, cpu_env, + offsetof(CPUMBState, pvr.regs[arg->rs - 0x2000])); + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, "Invalid mfs reg 0x%x\n", arg->rs); + break; } + return true; } -static void dec_null(DisasContext *dc) +static void do_rti(DisasContext *dc) { - if (trap_illegal(dc, true)) { - return; - } - qemu_log_mask(LOG_GUEST_ERROR, "unknown insn pc=%x opc=%x\n", dc->pc, dc->opcode); - dc->abort_at_next_insn = 1; + TCGv_i32 tmp = tcg_temp_new_i32(); + + tcg_gen_shri_i32(tmp, cpu_msr, 1); + tcg_gen_ori_i32(cpu_msr, cpu_msr, MSR_IE); + tcg_gen_andi_i32(tmp, tmp, MSR_VM | MSR_UM); + tcg_gen_andi_i32(cpu_msr, cpu_msr, ~(MSR_VM | MSR_UM)); + tcg_gen_or_i32(cpu_msr, cpu_msr, tmp); + + tcg_temp_free_i32(tmp); + dc->tb_flags &= ~DRTI_FLAG; +} + +static void do_rtb(DisasContext *dc) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + + tcg_gen_shri_i32(tmp, cpu_msr, 1); + tcg_gen_andi_i32(cpu_msr, cpu_msr, ~(MSR_VM | MSR_UM | MSR_BIP)); + tcg_gen_andi_i32(tmp, tmp, (MSR_VM | MSR_UM)); + tcg_gen_or_i32(cpu_msr, cpu_msr, tmp); + + tcg_temp_free_i32(tmp); + dc->tb_flags &= ~DRTB_FLAG; +} + +static void do_rte(DisasContext *dc) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + + tcg_gen_shri_i32(tmp, cpu_msr, 1); + tcg_gen_ori_i32(cpu_msr, cpu_msr, MSR_EE); + tcg_gen_andi_i32(tmp, tmp, (MSR_VM | MSR_UM)); + tcg_gen_andi_i32(cpu_msr, cpu_msr, ~(MSR_VM | MSR_UM | MSR_EIP)); + tcg_gen_or_i32(cpu_msr, cpu_msr, tmp); + + tcg_temp_free_i32(tmp); + dc->tb_flags &= ~DRTE_FLAG; } /* Insns connected to FSL or AXI stream attached devices. */ -static void dec_stream(DisasContext *dc) +static bool do_get(DisasContext *dc, int rd, int rb, int imm, int ctrl) { TCGv_i32 t_id, t_ctrl; - int ctrl; - - LOG_DIS("%s%s imm=%x\n", dc->rd ? "get" : "put", - dc->type_b ? "" : "d", dc->imm); if (trap_userspace(dc, true)) { - return; + return true; } t_id = tcg_temp_new_i32(); - if (dc->type_b) { - tcg_gen_movi_i32(t_id, dc->imm & 0xf); - ctrl = dc->imm >> 10; + if (rb) { + tcg_gen_andi_i32(t_id, cpu_R[rb], 0xf); } else { - tcg_gen_andi_i32(t_id, cpu_R[dc->rb], 0xf); - ctrl = dc->imm >> 5; + tcg_gen_movi_i32(t_id, imm); } t_ctrl = tcg_const_i32(ctrl); + gen_helper_get(reg_for_write(dc, rd), t_id, t_ctrl); + tcg_temp_free_i32(t_id); + tcg_temp_free_i32(t_ctrl); + return true; +} - if (dc->rd == 0) { - gen_helper_put(t_id, t_ctrl, cpu_R[dc->ra]); +static bool trans_get(DisasContext *dc, arg_get *arg) +{ + return do_get(dc, arg->rd, 0, arg->imm, arg->ctrl); +} + +static bool trans_getd(DisasContext *dc, arg_getd *arg) +{ + return do_get(dc, arg->rd, arg->rb, 0, arg->ctrl); +} + +static bool do_put(DisasContext *dc, int ra, int rb, int imm, int ctrl) +{ + TCGv_i32 t_id, t_ctrl; + + if (trap_userspace(dc, true)) { + return true; + } + + t_id = tcg_temp_new_i32(); + if (rb) { + tcg_gen_andi_i32(t_id, cpu_R[rb], 0xf); } else { - gen_helper_get(cpu_R[dc->rd], t_id, t_ctrl); + tcg_gen_movi_i32(t_id, imm); } + + t_ctrl = tcg_const_i32(ctrl); + gen_helper_put(t_id, t_ctrl, reg_for_read(dc, ra)); tcg_temp_free_i32(t_id); tcg_temp_free_i32(t_ctrl); + return true; } -static struct decoder_info { - struct { - uint32_t bits; - uint32_t mask; - }; - void (*dec)(DisasContext *dc); -} decinfo[] = { - {DEC_ADD, dec_add}, - {DEC_SUB, dec_sub}, - {DEC_AND, dec_and}, - {DEC_XOR, dec_xor}, - {DEC_OR, dec_or}, - {DEC_BIT, dec_bit}, - {DEC_BARREL, dec_barrel}, - {DEC_LD, dec_load}, - {DEC_ST, dec_store}, - {DEC_IMM, dec_imm}, - {DEC_BR, dec_br}, - {DEC_BCC, dec_bcc}, - {DEC_RTS, dec_rts}, - {DEC_FPU, dec_fpu}, - {DEC_MUL, dec_mul}, - {DEC_DIV, dec_div}, - {DEC_MSR, dec_msr}, - {DEC_STREAM, dec_stream}, - {{0, 0}, dec_null} -}; +static bool trans_put(DisasContext *dc, arg_put *arg) +{ + return do_put(dc, arg->ra, 0, arg->imm, arg->ctrl); +} -static inline void decode(DisasContext *dc, uint32_t ir) +static bool trans_putd(DisasContext *dc, arg_putd *arg) { - int i; + return do_put(dc, arg->ra, arg->rb, 0, arg->ctrl); +} - dc->ir = ir; - LOG_DIS("%8.8x\t", dc->ir); +static void mb_tr_init_disas_context(DisasContextBase *dcb, CPUState *cs) +{ + DisasContext *dc = container_of(dcb, DisasContext, base); + MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs); + int bound; - if (ir == 0) { - trap_illegal(dc, dc->cpu->cfg.opcode_0_illegal); - /* Don't decode nop/zero instructions any further. */ - return; - } + dc->cpu = cpu; + dc->tb_flags = dc->base.tb->flags; + dc->cpustate_changed = 0; + dc->ext_imm = dc->base.tb->cs_base; + dc->r0 = NULL; + dc->r0_set = false; + dc->mem_index = cpu_mmu_index(&cpu->env, false); + dc->jmp_cond = dc->tb_flags & D_FLAG ? TCG_COND_ALWAYS : TCG_COND_NEVER; + dc->jmp_dest = -1; + + bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4; + dc->base.max_insns = MIN(dc->base.max_insns, bound); +} - /* bit 2 seems to indicate insn type. */ - dc->type_b = ir & (1 << 29); +static void mb_tr_tb_start(DisasContextBase *dcb, CPUState *cs) +{ +} - dc->opcode = EXTRACT_FIELD(ir, 26, 31); - dc->rd = EXTRACT_FIELD(ir, 21, 25); - dc->ra = EXTRACT_FIELD(ir, 16, 20); - dc->rb = EXTRACT_FIELD(ir, 11, 15); - dc->imm = EXTRACT_FIELD(ir, 0, 15); +static void mb_tr_insn_start(DisasContextBase *dcb, CPUState *cs) +{ + DisasContext *dc = container_of(dcb, DisasContext, base); - /* Large switch for all insns. */ - for (i = 0; i < ARRAY_SIZE(decinfo); i++) { - if ((dc->opcode & decinfo[i].mask) == decinfo[i].bits) { - decinfo[i].dec(dc); - break; - } - } + tcg_gen_insn_start(dc->base.pc_next, dc->tb_flags & ~MSR_TB_MASK); + dc->insn_start = tcg_last_op(); } -/* generate intermediate code for basic block 'tb'. */ -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) +static bool mb_tr_breakpoint_check(DisasContextBase *dcb, CPUState *cs, + const CPUBreakpoint *bp) { + DisasContext *dc = container_of(dcb, DisasContext, base); + + gen_raise_exception_sync(dc, EXCP_DEBUG); + + /* + * The address covered by the breakpoint must be included in + * [tb->pc, tb->pc + tb->size) in order to for it to be + * properly cleared -- thus we increment the PC here so that + * the logic setting tb->size below does the right thing. + */ + dc->base.pc_next += 4; + return true; +} + +static void mb_tr_translate_insn(DisasContextBase *dcb, CPUState *cs) +{ + DisasContext *dc = container_of(dcb, DisasContext, base); CPUMBState *env = cs->env_ptr; - MicroBlazeCPU *cpu = env_archcpu(env); - uint32_t pc_start; - struct DisasContext ctx; - struct DisasContext *dc = &ctx; - uint32_t page_start, org_flags; - uint32_t npc; - int num_insns; - - pc_start = tb->pc; - dc->cpu = cpu; - dc->tb = tb; - org_flags = dc->synced_flags = dc->tb_flags = tb->flags; + uint32_t ir; - dc->is_jmp = DISAS_NEXT; - dc->jmp = 0; - dc->delayed_branch = !!(dc->tb_flags & D_FLAG); - if (dc->delayed_branch) { - dc->jmp = JMP_INDIRECT; + /* TODO: This should raise an exception, not terminate qemu. */ + if (dc->base.pc_next & 3) { + cpu_abort(cs, "Microblaze: unaligned PC=%x\n", + (uint32_t)dc->base.pc_next); } - dc->pc = pc_start; - dc->singlestep_enabled = cs->singlestep_enabled; - dc->cpustate_changed = 0; - dc->abort_at_next_insn = 0; - if (pc_start & 3) { - cpu_abort(cs, "Microblaze: unaligned PC=%x\n", pc_start); + dc->tb_flags_to_set = 0; + + ir = cpu_ldl_code(env, dc->base.pc_next); + if (!decode(dc, ir)) { + trap_illegal(dc, true); } - page_start = pc_start & TARGET_PAGE_MASK; - num_insns = 0; + if (dc->r0) { + tcg_temp_free_i32(dc->r0); + dc->r0 = NULL; + dc->r0_set = false; + } - gen_tb_start(tb); - do - { - tcg_gen_insn_start(dc->pc); - num_insns++; + /* Discard the imm global when its contents cannot be used. */ + if ((dc->tb_flags & ~dc->tb_flags_to_set) & IMM_FLAG) { + tcg_gen_discard_i32(cpu_imm); + } -#if SIM_COMPAT - if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) { - tcg_gen_movi_i64(cpu_SR[SR_PC], dc->pc); - gen_helper_debug(); - } -#endif + dc->tb_flags &= ~(IMM_FLAG | BIMM_FLAG | D_FLAG); + dc->tb_flags |= dc->tb_flags_to_set; + dc->base.pc_next += 4; - if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) { - t_gen_raise_exception(dc, EXCP_DEBUG); - dc->is_jmp = DISAS_UPDATE; - /* The address covered by the breakpoint must be included in - [tb->pc, tb->pc + tb->size) in order to for it to be - properly cleared -- thus we increment the PC here so that - the logic setting tb->size below does the right thing. */ - dc->pc += 4; - break; + if (dc->jmp_cond != TCG_COND_NEVER && !(dc->tb_flags & D_FLAG)) { + if (dc->tb_flags & DRTI_FLAG) { + do_rti(dc); + } else if (dc->tb_flags & DRTB_FLAG) { + do_rtb(dc); + } else if (dc->tb_flags & DRTE_FLAG) { + do_rte(dc); } + dc->base.is_jmp = DISAS_JUMP; + } - /* Pretty disas. */ - LOG_DIS("%8.8x:\t", dc->pc); + /* Force an exit if the per-tb cpu state has changed. */ + if (dc->base.is_jmp == DISAS_NEXT && dc->cpustate_changed) { + dc->base.is_jmp = DISAS_UPDATE; + tcg_gen_movi_i32(cpu_pc, dc->base.pc_next); + } +} - if (num_insns == max_insns && (tb_cflags(tb) & CF_LAST_IO)) { - gen_io_start(); - } +static void mb_tr_tb_stop(DisasContextBase *dcb, CPUState *cs) +{ + DisasContext *dc = container_of(dcb, DisasContext, base); - dc->clear_imm = 1; - decode(dc, cpu_ldl_code(env, dc->pc)); - if (dc->clear_imm) - dc->tb_flags &= ~IMM_FLAG; - dc->pc += 4; - - if (dc->delayed_branch) { - dc->delayed_branch--; - if (!dc->delayed_branch) { - if (dc->tb_flags & DRTI_FLAG) - do_rti(dc); - if (dc->tb_flags & DRTB_FLAG) - do_rtb(dc); - if (dc->tb_flags & DRTE_FLAG) - do_rte(dc); - /* Clear the delay slot flag. */ - dc->tb_flags &= ~D_FLAG; - /* If it is a direct jump, try direct chaining. */ - if (dc->jmp == JMP_INDIRECT) { - TCGv_i64 tmp_pc = tcg_const_i64(dc->pc); - eval_cond_jmp(dc, env_btarget, tmp_pc); - tcg_temp_free_i64(tmp_pc); - - dc->is_jmp = DISAS_JUMP; - } else if (dc->jmp == JMP_DIRECT) { - t_sync_flags(dc); - gen_goto_tb(dc, 0, dc->jmp_pc); - dc->is_jmp = DISAS_TB_JUMP; - } else if (dc->jmp == JMP_DIRECT_CC) { - TCGLabel *l1 = gen_new_label(); - t_sync_flags(dc); - /* Conditional jmp. */ - tcg_gen_brcondi_i32(TCG_COND_NE, env_btaken, 0, l1); - gen_goto_tb(dc, 1, dc->pc); - gen_set_label(l1); - gen_goto_tb(dc, 0, dc->jmp_pc); - - dc->is_jmp = DISAS_TB_JUMP; - } - break; - } - } - if (cs->singlestep_enabled) { - break; - } - } while (!dc->is_jmp && !dc->cpustate_changed - && !tcg_op_buf_full() - && !singlestep - && (dc->pc - page_start < TARGET_PAGE_SIZE) - && num_insns < max_insns); - - npc = dc->pc; - if (dc->jmp == JMP_DIRECT || dc->jmp == JMP_DIRECT_CC) { - if (dc->tb_flags & D_FLAG) { - dc->is_jmp = DISAS_UPDATE; - tcg_gen_movi_i64(cpu_SR[SR_PC], npc); - sync_jmpstate(dc); - } else - npc = dc->jmp_pc; - } - - /* Force an update if the per-tb cpu state has changed. */ - if (dc->is_jmp == DISAS_NEXT - && (dc->cpustate_changed || org_flags != dc->tb_flags)) { - dc->is_jmp = DISAS_UPDATE; - tcg_gen_movi_i64(cpu_SR[SR_PC], npc); + if (dc->base.is_jmp == DISAS_NORETURN) { + /* We have already exited the TB. */ + return; } + t_sync_flags(dc); - if (unlikely(cs->singlestep_enabled)) { - TCGv_i32 tmp = tcg_const_i32(EXCP_DEBUG); + switch (dc->base.is_jmp) { + case DISAS_TOO_MANY: + gen_goto_tb(dc, 0, dc->base.pc_next); + return; - if (dc->is_jmp != DISAS_JUMP) { - tcg_gen_movi_i64(cpu_SR[SR_PC], npc); + case DISAS_UPDATE: + if (unlikely(cs->singlestep_enabled)) { + gen_raise_exception(dc, EXCP_DEBUG); + } else { + tcg_gen_exit_tb(NULL, 0); } - gen_helper_raise_exception(cpu_env, tmp); - tcg_temp_free_i32(tmp); - } else { - switch(dc->is_jmp) { - case DISAS_NEXT: - gen_goto_tb(dc, 1, npc); - break; - default: - case DISAS_JUMP: - case DISAS_UPDATE: - /* indicate that the hash table must be used - to find the next TB */ - tcg_gen_exit_tb(NULL, 0); - break; - case DISAS_TB_JUMP: - /* nothing more to generate */ - break; + return; + + case DISAS_JUMP: + if (dc->jmp_dest != -1 && !cs->singlestep_enabled) { + /* Direct jump. */ + tcg_gen_discard_i32(cpu_btarget); + + if (dc->jmp_cond != TCG_COND_ALWAYS) { + /* Conditional direct jump. */ + TCGLabel *taken = gen_new_label(); + TCGv_i32 tmp = tcg_temp_new_i32(); + + /* + * Copy bvalue to a temp now, so we can discard bvalue. + * This can avoid writing bvalue to memory when the + * delay slot cannot raise an exception. + */ + tcg_gen_mov_i32(tmp, cpu_bvalue); + tcg_gen_discard_i32(cpu_bvalue); + + tcg_gen_brcondi_i32(dc->jmp_cond, tmp, 0, taken); + gen_goto_tb(dc, 1, dc->base.pc_next); + gen_set_label(taken); + } + gen_goto_tb(dc, 0, dc->jmp_dest); + return; } - } - gen_tb_end(tb, num_insns); - tb->size = dc->pc - pc_start; - tb->icount = num_insns; + /* Indirect jump (or direct jump w/ singlestep) */ + tcg_gen_mov_i32(cpu_pc, cpu_btarget); + tcg_gen_discard_i32(cpu_btarget); + + if (unlikely(cs->singlestep_enabled)) { + gen_raise_exception(dc, EXCP_DEBUG); + } else { + tcg_gen_exit_tb(NULL, 0); + } + return; -#ifdef DEBUG_DISAS -#if !SIM_COMPAT - if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) - && qemu_log_in_addr_range(pc_start)) { - FILE *logfile = qemu_log_lock(); - qemu_log("--------------\n"); - log_target_disas(cs, pc_start, dc->pc - pc_start); - qemu_log_unlock(logfile); + default: + g_assert_not_reached(); } -#endif -#endif - assert(!dc->abort_at_next_insn); +} + +static void mb_tr_disas_log(const DisasContextBase *dcb, CPUState *cs) +{ + qemu_log("IN: %s\n", lookup_symbol(dcb->pc_first)); + log_target_disas(cs, dcb->pc_first, dcb->tb->size); +} + +static const TranslatorOps mb_tr_ops = { + .init_disas_context = mb_tr_init_disas_context, + .tb_start = mb_tr_tb_start, + .insn_start = mb_tr_insn_start, + .breakpoint_check = mb_tr_breakpoint_check, + .translate_insn = mb_tr_translate_insn, + .tb_stop = mb_tr_tb_stop, + .disas_log = mb_tr_disas_log, +}; + +void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns) +{ + DisasContext dc; + translator_loop(&mb_tr_ops, &dc.base, cpu, tb, max_insns); } void mb_cpu_dump_state(CPUState *cs, FILE *f, int flags) { MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs); CPUMBState *env = &cpu->env; + uint32_t iflags; int i; - if (!env) { - return; + qemu_fprintf(f, "pc=0x%08x msr=0x%05x mode=%s(saved=%s) eip=%d ie=%d\n", + env->pc, env->msr, + (env->msr & MSR_UM) ? "user" : "kernel", + (env->msr & MSR_UMS) ? "user" : "kernel", + (bool)(env->msr & MSR_EIP), + (bool)(env->msr & MSR_IE)); + + iflags = env->iflags; + qemu_fprintf(f, "iflags: 0x%08x", iflags); + if (iflags & IMM_FLAG) { + qemu_fprintf(f, " IMM(0x%08x)", env->imm); + } + if (iflags & BIMM_FLAG) { + qemu_fprintf(f, " BIMM"); + } + if (iflags & D_FLAG) { + qemu_fprintf(f, " D(btarget=0x%08x)", env->btarget); } + if (iflags & DRTI_FLAG) { + qemu_fprintf(f, " DRTI"); + } + if (iflags & DRTE_FLAG) { + qemu_fprintf(f, " DRTE"); + } + if (iflags & DRTB_FLAG) { + qemu_fprintf(f, " DRTB"); + } + if (iflags & ESR_ESS_FLAG) { + qemu_fprintf(f, " ESR_ESS(0x%04x)", iflags & ESR_ESS_MASK); + } + + qemu_fprintf(f, "\nesr=0x%04x fsr=0x%02x btr=0x%08x edr=0x%x\n" + "ear=0x" TARGET_FMT_lx " slr=0x%x shr=0x%x\n", + env->esr, env->fsr, env->btr, env->edr, + env->ear, env->slr, env->shr); - qemu_fprintf(f, "IN: PC=%" PRIx64 " %s\n", - env->sregs[SR_PC], lookup_symbol(env->sregs[SR_PC])); - qemu_fprintf(f, "rmsr=%" PRIx64 " resr=%" PRIx64 " rear=%" PRIx64 " " - "debug=%x imm=%x iflags=%x fsr=%" PRIx64 " " - "rbtr=%" PRIx64 "\n", - env->sregs[SR_MSR], env->sregs[SR_ESR], env->sregs[SR_EAR], - env->debug, env->imm, env->iflags, env->sregs[SR_FSR], - env->sregs[SR_BTR]); - qemu_fprintf(f, "btaken=%d btarget=%" PRIx64 " mode=%s(saved=%s) " - "eip=%d ie=%d\n", - env->btaken, env->btarget, - (env->sregs[SR_MSR] & MSR_UM) ? "user" : "kernel", - (env->sregs[SR_MSR] & MSR_UMS) ? "user" : "kernel", - (bool)(env->sregs[SR_MSR] & MSR_EIP), - (bool)(env->sregs[SR_MSR] & MSR_IE)); for (i = 0; i < 12; i++) { - qemu_fprintf(f, "rpvr%2.2d=%8.8x ", i, env->pvr.regs[i]); - if ((i + 1) % 4 == 0) { - qemu_fprintf(f, "\n"); - } + qemu_fprintf(f, "rpvr%-2d=%08x%c", + i, env->pvr.regs[i], i % 4 == 3 ? '\n' : ' '); } - /* Registers that aren't modeled are reported as 0 */ - qemu_fprintf(f, "redr=%" PRIx64 " rpid=0 rzpr=0 rtlbx=0 rtlbsx=0 " - "rtlblo=0 rtlbhi=0\n", env->sregs[SR_EDR]); - qemu_fprintf(f, "slr=%x shr=%x\n", env->slr, env->shr); for (i = 0; i < 32; i++) { - qemu_fprintf(f, "r%2.2d=%8.8x ", i, env->regs[i]); - if ((i + 1) % 4 == 0) - qemu_fprintf(f, "\n"); - } - qemu_fprintf(f, "\n\n"); + qemu_fprintf(f, "r%2.2d=%08x%c", + i, env->regs[i], i % 4 == 3 ? '\n' : ' '); + } + qemu_fprintf(f, "\n"); } void mb_tcg_init(void) { - int i; +#define R(X) { &cpu_R[X], offsetof(CPUMBState, regs[X]), "r" #X } +#define SP(X) { &cpu_##X, offsetof(CPUMBState, X), #X } + + static const struct { + TCGv_i32 *var; int ofs; char name[8]; + } i32s[] = { + /* + * Note that r0 is handled specially in reg_for_read + * and reg_for_write. Nothing should touch cpu_R[0]. + * Leave that element NULL, which will assert quickly + * inside the tcg generator functions. + */ + R(1), R(2), R(3), R(4), R(5), R(6), R(7), + R(8), R(9), R(10), R(11), R(12), R(13), R(14), R(15), + R(16), R(17), R(18), R(19), R(20), R(21), R(22), R(23), + R(24), R(25), R(26), R(27), R(28), R(29), R(30), R(31), + + SP(pc), + SP(msr), + SP(msr_c), + SP(imm), + SP(iflags), + SP(bvalue), + SP(btarget), + SP(res_val), + }; - env_debug = tcg_global_mem_new_i32(cpu_env, - offsetof(CPUMBState, debug), - "debug0"); - env_iflags = tcg_global_mem_new_i32(cpu_env, - offsetof(CPUMBState, iflags), - "iflags"); - env_imm = tcg_global_mem_new_i32(cpu_env, - offsetof(CPUMBState, imm), - "imm"); - env_btarget = tcg_global_mem_new_i64(cpu_env, - offsetof(CPUMBState, btarget), - "btarget"); - env_btaken = tcg_global_mem_new_i32(cpu_env, - offsetof(CPUMBState, btaken), - "btaken"); - env_res_addr = tcg_global_mem_new(cpu_env, - offsetof(CPUMBState, res_addr), - "res_addr"); - env_res_val = tcg_global_mem_new_i32(cpu_env, - offsetof(CPUMBState, res_val), - "res_val"); - for (i = 0; i < ARRAY_SIZE(cpu_R); i++) { - cpu_R[i] = tcg_global_mem_new_i32(cpu_env, - offsetof(CPUMBState, regs[i]), - regnames[i]); - } - for (i = 0; i < ARRAY_SIZE(cpu_SR); i++) { - cpu_SR[i] = tcg_global_mem_new_i64(cpu_env, - offsetof(CPUMBState, sregs[i]), - special_regnames[i]); +#undef R +#undef SP + + for (int i = 0; i < ARRAY_SIZE(i32s); ++i) { + *i32s[i].var = + tcg_global_mem_new_i32(cpu_env, i32s[i].ofs, i32s[i].name); } + + cpu_res_addr = + tcg_global_mem_new(cpu_env, offsetof(CPUMBState, res_addr), "res_addr"); } void restore_state_to_opc(CPUMBState *env, TranslationBlock *tb, target_ulong *data) { - env->sregs[SR_PC] = data[0]; + env->pc = data[0]; + env->iflags = data[1]; } diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c index 55b68d1246..e43a3b4686 100644 --- a/target/ppc/misc_helper.c +++ b/target/ppc/misc_helper.c @@ -234,25 +234,20 @@ target_ulong helper_clcs(CPUPPCState *env, uint32_t arg) case 0x0CUL: /* Instruction cache line size */ return env->icache_line_size; - break; case 0x0DUL: /* Data cache line size */ return env->dcache_line_size; - break; case 0x0EUL: /* Minimum cache line size */ return (env->icache_line_size < env->dcache_line_size) ? env->icache_line_size : env->dcache_line_size; - break; case 0x0FUL: /* Maximum cache line size */ return (env->icache_line_size > env->dcache_line_size) ? env->icache_line_size : env->dcache_line_size; - break; default: /* Undefined */ return 0; - break; } } diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 793af99067..a156573d28 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -3955,12 +3955,6 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ } \ } -static bool float16_eq_quiet(uint16_t a, uint16_t b, float_status *s) -{ - FloatRelation compare = float16_compare_quiet(a, b, s); - return compare == float_relation_equal; -} - GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) @@ -4017,12 +4011,6 @@ GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) -static bool float16_lt(uint16_t a, uint16_t b, float_status *s) -{ - FloatRelation compare = float16_compare(a, b, s); - return compare == float_relation_less; -} - GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) @@ -4030,13 +4018,6 @@ GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) -static bool float16_le(uint16_t a, uint16_t b, float_status *s) -{ - FloatRelation compare = float16_compare(a, b, s); - return compare == float_relation_less || - compare == float_relation_equal; -} - GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) @@ -4091,12 +4072,6 @@ GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) -static bool float16_unordered_quiet(uint16_t a, uint16_t b, float_status *s) -{ - FloatRelation compare = float16_compare_quiet(a, b, s); - return compare == float_relation_unordered; -} - GEN_VEXT_CMP_VV_ENV(vmford_vv_h, uint16_t, H2, !float16_unordered_quiet) GEN_VEXT_CMP_VV_ENV(vmford_vv_w, uint32_t, H4, !float32_unordered_quiet) GEN_VEXT_CMP_VV_ENV(vmford_vv_d, uint64_t, H8, !float64_unordered_quiet) diff --git a/target/rx/cpu-qom.h b/target/rx/cpu-qom.h index 3e81856ef5..9054762326 100644 --- a/target/rx/cpu-qom.h +++ b/target/rx/cpu-qom.h @@ -25,11 +25,12 @@ #define TYPE_RX62N_CPU RX_CPU_TYPE_NAME("rx62n") -#define RXCPU_CLASS(klass) \ +typedef struct RXCPU RXCPU; +#define RX_CPU_CLASS(klass) \ OBJECT_CLASS_CHECK(RXCPUClass, (klass), TYPE_RX_CPU) -#define RXCPU(obj) \ +#define RX_CPU(obj) \ OBJECT_CHECK(RXCPU, (obj), TYPE_RX_CPU) -#define RXCPU_GET_CLASS(obj) \ +#define RX_CPU_GET_CLASS(obj) \ OBJECT_GET_CLASS(RXCPUClass, (obj), TYPE_RX_CPU) /* diff --git a/target/rx/cpu.c b/target/rx/cpu.c index 219e05397b..23ee17a701 100644 --- a/target/rx/cpu.c +++ b/target/rx/cpu.c @@ -28,14 +28,14 @@ static void rx_cpu_set_pc(CPUState *cs, vaddr value) { - RXCPU *cpu = RXCPU(cs); + RXCPU *cpu = RX_CPU(cs); cpu->env.pc = value; } static void rx_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb) { - RXCPU *cpu = RXCPU(cs); + RXCPU *cpu = RX_CPU(cs); cpu->env.pc = tb->pc; } @@ -48,8 +48,8 @@ static bool rx_cpu_has_work(CPUState *cs) static void rx_cpu_reset(DeviceState *dev) { - RXCPU *cpu = RXCPU(dev); - RXCPUClass *rcc = RXCPU_GET_CLASS(cpu); + RXCPU *cpu = RX_CPU(dev); + RXCPUClass *rcc = RX_CPU_GET_CLASS(cpu); CPURXState *env = &cpu->env; uint32_t *resetvec; @@ -108,7 +108,7 @@ static ObjectClass *rx_cpu_class_by_name(const char *cpu_model) static void rx_cpu_realize(DeviceState *dev, Error **errp) { CPUState *cs = CPU(dev); - RXCPUClass *rcc = RXCPU_GET_CLASS(dev); + RXCPUClass *rcc = RX_CPU_GET_CLASS(dev); Error *local_err = NULL; cpu_exec_realizefn(cs, &local_err); @@ -164,7 +164,7 @@ static bool rx_cpu_tlb_fill(CPUState *cs, vaddr addr, int size, static void rx_cpu_init(Object *obj) { CPUState *cs = CPU(obj); - RXCPU *cpu = RXCPU(obj); + RXCPU *cpu = RX_CPU(obj); CPURXState *env = &cpu->env; cpu_set_cpustate_pointers(cpu); @@ -176,7 +176,7 @@ static void rx_cpu_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); CPUClass *cc = CPU_CLASS(klass); - RXCPUClass *rcc = RXCPU_CLASS(klass); + RXCPUClass *rcc = RX_CPU_CLASS(klass); device_class_set_parent_realize(dc, rx_cpu_realize, &rcc->parent_realize); diff --git a/target/rx/cpu.h b/target/rx/cpu.h index d1fb1ef3ca..0b4b998c7b 100644 --- a/target/rx/cpu.h +++ b/target/rx/cpu.h @@ -115,7 +115,6 @@ struct RXCPU { CPURXState env; }; -typedef struct RXCPU RXCPU; typedef RXCPU ArchCPU; #define ENV_OFFSET offsetof(RXCPU, env) diff --git a/target/rx/gdbstub.c b/target/rx/gdbstub.c index 9391e8151e..c811d4810b 100644 --- a/target/rx/gdbstub.c +++ b/target/rx/gdbstub.c @@ -22,7 +22,7 @@ int rx_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n) { - RXCPU *cpu = RXCPU(cs); + RXCPU *cpu = RX_CPU(cs); CPURXState *env = &cpu->env; switch (n) { @@ -54,7 +54,7 @@ int rx_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n) int rx_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) { - RXCPU *cpu = RXCPU(cs); + RXCPU *cpu = RX_CPU(cs); CPURXState *env = &cpu->env; uint32_t psw; switch (n) { diff --git a/target/rx/helper.c b/target/rx/helper.c index a6a337a311..3e380a94fe 100644 --- a/target/rx/helper.c +++ b/target/rx/helper.c @@ -44,7 +44,7 @@ void rx_cpu_unpack_psw(CPURXState *env, uint32_t psw, int rte) #define INT_FLAGS (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIR) void rx_cpu_do_interrupt(CPUState *cs) { - RXCPU *cpu = RXCPU(cs); + RXCPU *cpu = RX_CPU(cs); CPURXState *env = &cpu->env; int do_irq = cs->interrupt_request & INT_FLAGS; uint32_t save_psw; @@ -121,7 +121,7 @@ void rx_cpu_do_interrupt(CPUState *cs) bool rx_cpu_exec_interrupt(CPUState *cs, int interrupt_request) { - RXCPU *cpu = RXCPU(cs); + RXCPU *cpu = RX_CPU(cs); CPURXState *env = &cpu->env; int accept = 0; /* hardware interrupt (Normal) */ diff --git a/target/rx/translate.c b/target/rx/translate.c index da9713d362..482278edd2 100644 --- a/target/rx/translate.c +++ b/target/rx/translate.c @@ -128,7 +128,7 @@ static int bdsp_s(DisasContext *ctx, int d) void rx_cpu_dump_state(CPUState *cs, FILE *f, int flags) { - RXCPU *cpu = RXCPU(cs); + RXCPU *cpu = RX_CPU(cs); CPURXState *env = &cpu->env; int i; uint32_t psw; diff --git a/target/sh4/translate.c b/target/sh4/translate.c index 6192d83e8c..60c863d9e1 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -1542,7 +1542,6 @@ static void _decode_opc(DisasContext * ctx) tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL | MO_UNALN); return; - break; case 0x40e9: /* movua.l @Rm+,R0 */ CHECK_SH4A /* Load non-boundary-aligned data */ @@ -1550,7 +1549,6 @@ static void _decode_opc(DisasContext * ctx) MO_TEUL | MO_UNALN); tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4); return; - break; case 0x0029: /* movt Rn */ tcg_gen_mov_i32(REG(B11_8), cpu_sr_t); return; @@ -1638,7 +1636,6 @@ static void _decode_opc(DisasContext * ctx) CHECK_SH4A tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); return; - break; case 0x4024: /* rotcl Rn */ { TCGv tmp = tcg_temp_new(); |