aboutsummaryrefslogtreecommitdiff
path: root/target
diff options
context:
space:
mode:
Diffstat (limited to 'target')
-rw-r--r--target/arm/cpu-qom.h2
-rw-r--r--target/arm/cpu.c3
-rw-r--r--target/arm/cpu.h7
-rw-r--r--target/arm/cpu64.c10
-rw-r--r--target/arm/helper-a64.c11
-rw-r--r--target/arm/helper-sve.h5
-rw-r--r--target/arm/helper.c1
-rw-r--r--target/arm/helper.h161
-rw-r--r--target/arm/kvm64.c1
-rw-r--r--target/arm/mte_helper.c19
-rw-r--r--target/arm/neon-dp.decode8
-rw-r--r--target/arm/sve.decode35
-rw-r--r--target/arm/sve_helper.c70
-rw-r--r--target/arm/translate-a64.c117
-rw-r--r--target/arm/translate-neon.c.inc765
-rw-r--r--target/arm/translate-sve.c397
-rw-r--r--target/arm/translate-vfp.c.inc810
-rw-r--r--target/arm/translate.h1
-rw-r--r--target/arm/vec_helper.c601
-rw-r--r--target/arm/vfp-uncond.decode27
-rw-r--r--target/arm/vfp.decode34
-rw-r--r--target/arm/vfp_helper.c244
-rw-r--r--target/cris/translate.c7
-rw-r--r--target/cris/translate_v10.c.inc2
-rw-r--r--target/i386/cpu.c190
-rw-r--r--target/i386/cpu.h3
-rw-r--r--target/i386/hvf/hvf-i386.h4
-rw-r--r--target/i386/kvm.c2
-rw-r--r--target/i386/sev.c1
-rw-r--r--target/microblaze/cpu-param.h15
-rw-r--r--target/microblaze/cpu.c20
-rw-r--r--target/microblaze/cpu.h70
-rw-r--r--target/microblaze/gdbstub.c189
-rw-r--r--target/microblaze/helper.c181
-rw-r--r--target/microblaze/helper.h49
-rw-r--r--target/microblaze/insns.decode256
-rw-r--r--target/microblaze/meson.build3
-rw-r--r--target/microblaze/microblaze-decode.h59
-rw-r--r--target/microblaze/mmu.c4
-rw-r--r--target/microblaze/op_helper.c194
-rw-r--r--target/microblaze/translate.c2901
-rw-r--r--target/ppc/misc_helper.c5
-rw-r--r--target/riscv/vector_helper.c25
-rw-r--r--target/rx/cpu-qom.h7
-rw-r--r--target/rx/cpu.c14
-rw-r--r--target/rx/cpu.h1
-rw-r--r--target/rx/gdbstub.c4
-rw-r--r--target/rx/helper.c4
-rw-r--r--target/rx/translate.c2
-rw-r--r--target/sh4/translate.c3
50 files changed, 4378 insertions, 3166 deletions
diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h
index 56395b87f6..fdef05cacf 100644
--- a/target/arm/cpu-qom.h
+++ b/target/arm/cpu-qom.h
@@ -67,7 +67,7 @@ typedef struct ARMCPU ARMCPU;
#define AARCH64_CPU_CLASS(klass) \
OBJECT_CLASS_CHECK(AArch64CPUClass, (klass), TYPE_AARCH64_CPU)
#define AARCH64_CPU_GET_CLASS(obj) \
- OBJECT_GET_CLASS(AArch64CPUClass, (obj), TYPE_AArch64_CPU)
+ OBJECT_GET_CLASS(AArch64CPUClass, (obj), TYPE_AARCH64_CPU)
typedef struct AArch64CPUClass {
/*< private >*/
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 6b382fcd60..c179e0752d 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -2143,7 +2143,8 @@ static void arm_max_initfn(Object *obj)
cpu->isar.id_isar6 = t;
t = cpu->isar.mvfr1;
- t = FIELD_DP32(t, MVFR1, FPHP, 2); /* v8.0 FP support */
+ t = FIELD_DP32(t, MVFR1, FPHP, 3); /* v8.2-FP16 */
+ t = FIELD_DP32(t, MVFR1, SIMDHP, 2); /* v8.2-FP16 */
cpu->isar.mvfr1 = t;
t = cpu->isar.mvfr2;
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index ac857bdc2c..a1c7d8ebae 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -3519,12 +3519,7 @@ static inline bool isar_feature_aa32_predinv(const ARMISARegisters *id)
static inline bool isar_feature_aa32_fp16_arith(const ARMISARegisters *id)
{
- /*
- * This is a placeholder for use by VCMA until the rest of
- * the ARMv8.2-FP16 extension is implemented for aa32 mode.
- * At which point we can properly set and check MVFR1.FPHP.
- */
- return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1;
+ return FIELD_EX32(id->mvfr1, MVFR1, FPHP) >= 3;
}
static inline bool isar_feature_aa32_vfp_simd(const ARMISARegisters *id)
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index dd696183df..3c2b3d9599 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -704,12 +704,10 @@ static void aarch64_max_initfn(Object *obj)
u = FIELD_DP32(u, ID_DFR0, PERFMON, 5); /* v8.4-PMU */
cpu->isar.id_dfr0 = u;
- /*
- * FIXME: We do not yet support ARMv8.2-fp16 for AArch32 yet,
- * so do not set MVFR1.FPHP. Strictly speaking this is not legal,
- * but it is also not legal to enable SVE without support for FP16,
- * and enabling SVE in system mode is more useful in the short term.
- */
+ u = cpu->isar.mvfr1;
+ u = FIELD_DP32(u, MVFR1, FPHP, 3); /* v8.2-FP16 */
+ u = FIELD_DP32(u, MVFR1, SIMDHP, 2); /* v8.2-FP16 */
+ cpu->isar.mvfr1 = u;
#ifdef CONFIG_USER_ONLY
/* For usermode -cpu max we can use a larger and more efficient DCZ
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
index 8682630ff6..030821489b 100644
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@@ -234,17 +234,6 @@ uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp)
* versions, these do a fully fused multiply-add or
* multiply-add-and-halve.
*/
-#define float16_two make_float16(0x4000)
-#define float16_three make_float16(0x4200)
-#define float16_one_point_five make_float16(0x3e00)
-
-#define float32_two make_float32(0x40000000)
-#define float32_three make_float32(0x40400000)
-#define float32_one_point_five make_float32(0x3fc00000)
-
-#define float64_two make_float64(0x4000000000000000ULL)
-#define float64_three make_float64(0x4008000000000000ULL)
-#define float64_one_point_five make_float64(0x3FF8000000000000ULL)
uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, void *fpstp)
{
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index 63c4a087ca..4411c47120 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -269,11 +269,6 @@ DEF_HELPER_FLAGS_3(sve_uminv_h, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve_uminv_s, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve_uminv_d, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_clr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_clr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_clr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_clr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-
DEF_HELPER_FLAGS_4(sve_movz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_movz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_movz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 6b4f0eb533..44d666627a 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -5105,7 +5105,6 @@ static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = {
.access = PL2_RW,
.readfn = arm_cp_read_zero, .writefn = arm_cp_write_ignore },
{ .name = "HCR_EL2", .state = ARM_CP_STATE_BOTH,
- .type = ARM_CP_NO_RAW,
.opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0,
.access = PL2_RW,
.type = ARM_CP_CONST, .resetvalue = 0 },
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 759639a63a..8defd7c801 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -101,30 +101,43 @@ DEF_HELPER_FLAGS_5(probe_access, TCG_CALL_NO_WG, void, env, tl, i32, i32, i32)
DEF_HELPER_1(vfp_get_fpscr, i32, env)
DEF_HELPER_2(vfp_set_fpscr, void, env, i32)
+DEF_HELPER_3(vfp_addh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_adds, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_addd, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_subh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_subs, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_subd, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_mulh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_muls, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_muld, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_divh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_divs, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_divd, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_maxh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_maxs, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_maxd, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_minh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_mins, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_mind, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_maxnumh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_maxnums, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_maxnumd, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_minnumh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_minnums, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_minnumd, f64, f64, f64, ptr)
+DEF_HELPER_1(vfp_negh, f16, f16)
DEF_HELPER_1(vfp_negs, f32, f32)
DEF_HELPER_1(vfp_negd, f64, f64)
+DEF_HELPER_1(vfp_absh, f16, f16)
DEF_HELPER_1(vfp_abss, f32, f32)
DEF_HELPER_1(vfp_absd, f64, f64)
+DEF_HELPER_2(vfp_sqrth, f16, f16, env)
DEF_HELPER_2(vfp_sqrts, f32, f32, env)
DEF_HELPER_2(vfp_sqrtd, f64, f64, env)
+DEF_HELPER_3(vfp_cmph, void, f16, f16, env)
DEF_HELPER_3(vfp_cmps, void, f32, f32, env)
DEF_HELPER_3(vfp_cmpd, void, f64, f64, env)
+DEF_HELPER_3(vfp_cmpeh, void, f16, f16, env)
DEF_HELPER_3(vfp_cmpes, void, f32, f32, env)
DEF_HELPER_3(vfp_cmped, void, f64, f64, env)
@@ -151,6 +164,10 @@ DEF_HELPER_2(vfp_tosizh, s32, f16, ptr)
DEF_HELPER_2(vfp_tosizs, s32, f32, ptr)
DEF_HELPER_2(vfp_tosizd, s32, f64, ptr)
+DEF_HELPER_3(vfp_toshh_round_to_zero, i32, f16, i32, ptr)
+DEF_HELPER_3(vfp_toslh_round_to_zero, i32, f16, i32, ptr)
+DEF_HELPER_3(vfp_touhh_round_to_zero, i32, f16, i32, ptr)
+DEF_HELPER_3(vfp_toulh_round_to_zero, i32, f16, i32, ptr)
DEF_HELPER_3(vfp_toshs_round_to_zero, i32, f32, i32, ptr)
DEF_HELPER_3(vfp_tosls_round_to_zero, i32, f32, i32, ptr)
DEF_HELPER_3(vfp_touhs_round_to_zero, i32, f32, i32, ptr)
@@ -189,13 +206,14 @@ DEF_HELPER_3(vfp_sqtod, f64, i64, i32, ptr)
DEF_HELPER_3(vfp_uhtod, f64, i64, i32, ptr)
DEF_HELPER_3(vfp_ultod, f64, i64, i32, ptr)
DEF_HELPER_3(vfp_uqtod, f64, i64, i32, ptr)
+DEF_HELPER_3(vfp_shtoh, f16, i32, i32, ptr)
+DEF_HELPER_3(vfp_uhtoh, f16, i32, i32, ptr)
DEF_HELPER_3(vfp_sltoh, f16, i32, i32, ptr)
DEF_HELPER_3(vfp_ultoh, f16, i32, i32, ptr)
DEF_HELPER_3(vfp_sqtoh, f16, i64, i32, ptr)
DEF_HELPER_3(vfp_uqtoh, f16, i64, i32, ptr)
DEF_HELPER_FLAGS_2(set_rmode, TCG_CALL_NO_RWG, i32, i32, ptr)
-DEF_HELPER_FLAGS_2(set_neon_rmode, TCG_CALL_NO_RWG, i32, i32, env)
DEF_HELPER_FLAGS_3(vfp_fcvt_f16_to_f32, TCG_CALL_NO_RWG, f32, f16, ptr, i32)
DEF_HELPER_FLAGS_3(vfp_fcvt_f32_to_f16, TCG_CALL_NO_RWG, f16, f32, ptr, i32)
@@ -204,9 +222,8 @@ DEF_HELPER_FLAGS_3(vfp_fcvt_f64_to_f16, TCG_CALL_NO_RWG, f16, f64, ptr, i32)
DEF_HELPER_4(vfp_muladdd, f64, f64, f64, f64, ptr)
DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, ptr)
+DEF_HELPER_4(vfp_muladdh, f16, f16, f16, f16, ptr)
-DEF_HELPER_3(recps_f32, f32, env, f32, f32)
-DEF_HELPER_3(rsqrts_f32, f32, env, f32, f32)
DEF_HELPER_FLAGS_2(recpe_f16, TCG_CALL_NO_RWG, f16, f16, ptr)
DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
@@ -222,8 +239,10 @@ DEF_HELPER_3(shr_cc, i32, env, i32, i32)
DEF_HELPER_3(sar_cc, i32, env, i32, i32)
DEF_HELPER_3(ror_cc, i32, env, i32, i32)
+DEF_HELPER_FLAGS_2(rinth_exact, TCG_CALL_NO_RWG, f16, f16, ptr)
DEF_HELPER_FLAGS_2(rints_exact, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(rintd_exact, TCG_CALL_NO_RWG, f64, f64, ptr)
+DEF_HELPER_FLAGS_2(rinth, TCG_CALL_NO_RWG, f16, f16, ptr)
DEF_HELPER_FLAGS_2(rints, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(rintd, TCG_CALL_NO_RWG, f64, f64, ptr)
@@ -587,6 +606,43 @@ DEF_HELPER_FLAGS_5(gvec_fcmlas_idx, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_5(gvec_fcmlad, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_paddh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_pmaxh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_pminh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_padds, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_pmaxs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_pmins, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_sstoh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sitos, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_ustoh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uitos, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_tosszh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_tosizs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_touszh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_touizs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_vcvt_sf, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_uf, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_fs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_fu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_vcvt_sh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_uh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_hs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_vcvt_rm_ss, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_rm_us, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_rm_sh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_rm_uh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_vrint_rm_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vrint_rm_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_vrintx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vrintx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_4(gvec_frecpe_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_frecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_frecpe_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -595,6 +651,21 @@ DEF_HELPER_FLAGS_4(gvec_frsqrte_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_frsqrte_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_frsqrte_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_fcgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_fcgt0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_fcge0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_fcge0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_fceq0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_fceq0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_fcle0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_fcle0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_fclt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_fclt0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_5(gvec_fadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
@@ -607,8 +678,54 @@ DEF_HELPER_FLAGS_5(gvec_fmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fmul_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fceq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fceq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fcge_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fcge_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fcgt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fcgt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_facge_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_facge_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_facgt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_facgt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmax_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmax_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmin_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmin_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmaxnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmaxnum_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fminnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fminnum_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_recps_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_recps_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_rsqrts_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_rsqrts_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmla_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmla_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmls_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmls_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_vfma_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vfma_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG,
@@ -623,6 +740,16 @@ DEF_HELPER_FLAGS_5(gvec_fmul_idx_s, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_5(gvec_fmul_idx_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmla_nf_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmla_nf_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmls_nf_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmls_nf_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_6(gvec_fmla_idx_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG,
@@ -758,6 +885,34 @@ DEF_HELPER_FLAGS_4(gvec_uaba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_uaba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_uaba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_mul_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_mul_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_mul_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_mla_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_mla_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_mla_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_mls_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_mls_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_mls_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(neon_sqdmulh_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_sqdmulh_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(neon_sqrdmulh_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_sqrdmulh_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
#ifdef TARGET_AARCH64
#include "helper-a64.h"
#include "helper-sve.h"
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
index 1169237905..ef1e960285 100644
--- a/target/arm/kvm64.c
+++ b/target/arm/kvm64.c
@@ -330,7 +330,6 @@ int kvm_arch_remove_hw_breakpoint(target_ulong addr,
switch (type) {
case GDB_BREAKPOINT_HW:
return delete_hw_breakpoint(addr);
- break;
case GDB_WATCHPOINT_READ:
case GDB_WATCHPOINT_WRITE:
case GDB_WATCHPOINT_ACCESS:
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
index 104752041f..891306f5b0 100644
--- a/target/arm/mte_helper.c
+++ b/target/arm/mte_helper.c
@@ -514,11 +514,12 @@ void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, uint64_t val)
}
/* Record a tag check failure. */
-static void mte_check_fail(CPUARMState *env, int mmu_idx,
+static void mte_check_fail(CPUARMState *env, uint32_t desc,
uint64_t dirty_ptr, uintptr_t ra)
{
+ int mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
ARMMMUIdx arm_mmu_idx = core_to_aa64_mmu_idx(mmu_idx);
- int el, reg_el, tcf, select;
+ int el, reg_el, tcf, select, is_write, syn;
uint64_t sctlr;
reg_el = regime_el(env, arm_mmu_idx);
@@ -546,9 +547,10 @@ static void mte_check_fail(CPUARMState *env, int mmu_idx,
*/
cpu_restore_state(env_cpu(env), ra, true);
env->exception.vaddress = dirty_ptr;
- raise_exception(env, EXCP_DATA_ABORT,
- syn_data_abort_no_iss(el != 0, 0, 0, 0, 0, 0, 0x11),
- exception_target_el(env));
+
+ is_write = FIELD_EX32(desc, MTEDESC, WRITE);
+ syn = syn_data_abort_no_iss(el != 0, 0, 0, 0, 0, is_write, 0x11);
+ raise_exception(env, EXCP_DATA_ABORT, syn, exception_target_el(env));
/* noreturn, but fall through to the assert anyway */
case 0:
@@ -639,8 +641,7 @@ uint64_t mte_check1(CPUARMState *env, uint32_t desc,
}
if (unlikely(!mte_probe1_int(env, desc, ptr, ra, bit55))) {
- int mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
- mte_check_fail(env, mmu_idx, ptr, ra);
+ mte_check_fail(env, desc, ptr, ra);
}
return useronly_clean_ptr(ptr);
@@ -810,7 +811,7 @@ uint64_t mte_checkN(CPUARMState *env, uint32_t desc,
fail_ofs = tag_first + n * TAG_GRANULE - ptr;
fail_ofs = ROUND_UP(fail_ofs, esize);
- mte_check_fail(env, mmu_idx, ptr + fail_ofs, ra);
+ mte_check_fail(env, desc, ptr + fail_ofs, ra);
}
done:
@@ -922,7 +923,7 @@ uint64_t HELPER(mte_check_zva)(CPUARMState *env, uint32_t desc, uint64_t ptr)
fail:
/* Locate the first nibble that differs. */
i = ctz64(mem_tag ^ ptr_tag) >> 4;
- mte_check_fail(env, mmu_idx, align_ptr + i * TAG_GRANULE, ra);
+ mte_check_fail(env, desc, align_ptr + i * TAG_GRANULE, ra);
done:
return useronly_clean_ptr(ptr);
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
index 686f9fbf46..1e9e859291 100644
--- a/target/arm/neon-dp.decode
+++ b/target/arm/neon-dp.decode
@@ -254,6 +254,8 @@ VMINNM_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 1 .... @3same_fp
# We use size=0 for fp32 and size=1 for fp16 to match the 3-same encodings.
@2reg_vcvt .... ... . . . 1 ..... .... .... . q:1 . . .... \
&2reg_shift vm=%vm_dp vd=%vd_dp size=0 shift=%neon_rshift_i5
+@2reg_vcvt_f16 .... ... . . . 11 .... .... .... . q:1 . . .... \
+ &2reg_shift vm=%vm_dp vd=%vd_dp size=1 shift=%neon_rshift_i4
VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_d
VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_s
@@ -370,7 +372,11 @@ VSHLL_U_2sh 1111 001 1 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_h
VSHLL_U_2sh 1111 001 1 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_b
# VCVT fixed<->float conversions
-# TODO: FP16 fixed<->float conversions are opc==0b1100 and 0b1101
+VCVT_SH_2sh 1111 001 0 1 . ...... .... 1100 0 . . 1 .... @2reg_vcvt_f16
+VCVT_UH_2sh 1111 001 1 1 . ...... .... 1100 0 . . 1 .... @2reg_vcvt_f16
+VCVT_HS_2sh 1111 001 0 1 . ...... .... 1101 0 . . 1 .... @2reg_vcvt_f16
+VCVT_HU_2sh 1111 001 1 1 . ...... .... 1101 0 . . 1 .... @2reg_vcvt_f16
+
VCVT_SF_2sh 1111 001 0 1 . ...... .... 1110 0 . . 1 .... @2reg_vcvt
VCVT_UF_2sh 1111 001 1 1 . ...... .... 1110 0 . . 1 .... @2reg_vcvt
VCVT_FS_2sh 1111 001 0 1 . ...... .... 1111 0 . . 1 .... @2reg_vcvt
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index 4f580a25e7..6425396ac1 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -150,13 +150,17 @@
@rd_rn_i6 ........ ... rn:5 ..... imm:s6 rd:5 &rri
# Two register operand, one immediate operand, with predicate,
-# element size encoded as TSZHL. User must fill in imm.
-@rdn_pg_tszimm ........ .. ... ... ... pg:3 ..... rd:5 \
- &rpri_esz rn=%reg_movprfx esz=%tszimm_esz
+# element size encoded as TSZHL.
+@rdn_pg_tszimm_shl ........ .. ... ... ... pg:3 ..... rd:5 \
+ &rpri_esz rn=%reg_movprfx esz=%tszimm_esz imm=%tszimm_shl
+@rdn_pg_tszimm_shr ........ .. ... ... ... pg:3 ..... rd:5 \
+ &rpri_esz rn=%reg_movprfx esz=%tszimm_esz imm=%tszimm_shr
# Similarly without predicate.
-@rd_rn_tszimm ........ .. ... ... ...... rn:5 rd:5 \
- &rri_esz esz=%tszimm16_esz
+@rd_rn_tszimm_shl ........ .. ... ... ...... rn:5 rd:5 \
+ &rri_esz esz=%tszimm16_esz imm=%tszimm16_shl
+@rd_rn_tszimm_shr ........ .. ... ... ...... rn:5 rd:5 \
+ &rri_esz esz=%tszimm16_esz imm=%tszimm16_shr
# Two register operand, one immediate operand, with 4-bit predicate.
# User must fill in imm.
@@ -289,14 +293,10 @@ UMINV 00000100 .. 001 011 001 ... ..... ..... @rd_pg_rn
### SVE Shift by Immediate - Predicated Group
# SVE bitwise shift by immediate (predicated)
-ASR_zpzi 00000100 .. 000 000 100 ... .. ... ..... \
- @rdn_pg_tszimm imm=%tszimm_shr
-LSR_zpzi 00000100 .. 000 001 100 ... .. ... ..... \
- @rdn_pg_tszimm imm=%tszimm_shr
-LSL_zpzi 00000100 .. 000 011 100 ... .. ... ..... \
- @rdn_pg_tszimm imm=%tszimm_shl
-ASRD 00000100 .. 000 100 100 ... .. ... ..... \
- @rdn_pg_tszimm imm=%tszimm_shr
+ASR_zpzi 00000100 .. 000 000 100 ... .. ... ..... @rdn_pg_tszimm_shr
+LSR_zpzi 00000100 .. 000 001 100 ... .. ... ..... @rdn_pg_tszimm_shr
+LSL_zpzi 00000100 .. 000 011 100 ... .. ... ..... @rdn_pg_tszimm_shl
+ASRD 00000100 .. 000 100 100 ... .. ... ..... @rdn_pg_tszimm_shr
# SVE bitwise shift by vector (predicated)
ASR_zpzz 00000100 .. 010 000 100 ... ..... ..... @rdn_pg_rm
@@ -400,12 +400,9 @@ RDVL 00000100 101 11111 01010 imm:s6 rd:5
### SVE Bitwise Shift - Unpredicated Group
# SVE bitwise shift by immediate (unpredicated)
-ASR_zzi 00000100 .. 1 ..... 1001 00 ..... ..... \
- @rd_rn_tszimm imm=%tszimm16_shr
-LSR_zzi 00000100 .. 1 ..... 1001 01 ..... ..... \
- @rd_rn_tszimm imm=%tszimm16_shr
-LSL_zzi 00000100 .. 1 ..... 1001 11 ..... ..... \
- @rd_rn_tszimm imm=%tszimm16_shl
+ASR_zzi 00000100 .. 1 ..... 1001 00 ..... ..... @rd_rn_tszimm_shr
+LSR_zzi 00000100 .. 1 ..... 1001 01 ..... ..... @rd_rn_tszimm_shr
+LSL_zzi 00000100 .. 1 ..... 1001 11 ..... ..... @rd_rn_tszimm_shl
# SVE bitwise shift by wide elements (unpredicated)
# Note esz != 3
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index 382fa82bc8..4758d46f34 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -956,85 +956,43 @@ uint32_t HELPER(sve_pnext)(void *vd, void *vg, uint32_t pred_desc)
return flags;
}
-/* Store zero into every active element of Zd. We will use this for two
- * and three-operand predicated instructions for which logic dictates a
- * zero result. In particular, logical shift by element size, which is
- * otherwise undefined on the host.
- *
- * For element sizes smaller than uint64_t, we use tables to expand
- * the N bits of the controlling predicate to a byte mask, and clear
- * those bytes.
+/*
+ * Copy Zn into Zd, and store zero into inactive elements.
+ * If inv, store zeros into the active elements.
*/
-void HELPER(sve_clr_b)(void *vd, void *vg, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd;
- uint8_t *pg = vg;
- for (i = 0; i < opr_sz; i += 1) {
- d[i] &= ~expand_pred_b(pg[H1(i)]);
- }
-}
-
-void HELPER(sve_clr_h)(void *vd, void *vg, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd;
- uint8_t *pg = vg;
- for (i = 0; i < opr_sz; i += 1) {
- d[i] &= ~expand_pred_h(pg[H1(i)]);
- }
-}
-
-void HELPER(sve_clr_s)(void *vd, void *vg, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd;
- uint8_t *pg = vg;
- for (i = 0; i < opr_sz; i += 1) {
- d[i] &= ~expand_pred_s(pg[H1(i)]);
- }
-}
-
-void HELPER(sve_clr_d)(void *vd, void *vg, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd;
- uint8_t *pg = vg;
- for (i = 0; i < opr_sz; i += 1) {
- if (pg[H1(i)] & 1) {
- d[i] = 0;
- }
- }
-}
-
-/* Copy Zn into Zd, and store zero into inactive elements. */
void HELPER(sve_movz_b)(void *vd, void *vn, void *vg, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t inv = -(uint64_t)(simd_data(desc) & 1);
uint64_t *d = vd, *n = vn;
uint8_t *pg = vg;
+
for (i = 0; i < opr_sz; i += 1) {
- d[i] = n[i] & expand_pred_b(pg[H1(i)]);
+ d[i] = n[i] & (expand_pred_b(pg[H1(i)]) ^ inv);
}
}
void HELPER(sve_movz_h)(void *vd, void *vn, void *vg, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t inv = -(uint64_t)(simd_data(desc) & 1);
uint64_t *d = vd, *n = vn;
uint8_t *pg = vg;
+
for (i = 0; i < opr_sz; i += 1) {
- d[i] = n[i] & expand_pred_h(pg[H1(i)]);
+ d[i] = n[i] & (expand_pred_h(pg[H1(i)]) ^ inv);
}
}
void HELPER(sve_movz_s)(void *vd, void *vn, void *vg, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t inv = -(uint64_t)(simd_data(desc) & 1);
uint64_t *d = vd, *n = vn;
uint8_t *pg = vg;
+
for (i = 0; i < opr_sz; i += 1) {
- d[i] = n[i] & expand_pred_s(pg[H1(i)]);
+ d[i] = n[i] & (expand_pred_s(pg[H1(i)]) ^ inv);
}
}
@@ -1043,8 +1001,10 @@ void HELPER(sve_movz_d)(void *vd, void *vn, void *vg, uint32_t desc)
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
uint64_t *d = vd, *n = vn;
uint8_t *pg = vg;
+ uint8_t inv = simd_data(desc);
+
for (i = 0; i < opr_sz; i += 1) {
- d[i] = n[i] & -(uint64_t)(pg[H1(i)] & 1);
+ d[i] = n[i] & -(uint64_t)((pg[H1(i)] ^ inv) & 1);
}
}
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 0fc5e12fab..7188808341 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -678,6 +678,20 @@ static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
tcg_temp_free_ptr(fpst);
}
+/* Expand a 3-operand + qc + operation using an out-of-line helper. */
+static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn,
+ int rm, gen_helper_gvec_3_ptr *fn)
+{
+ TCGv_ptr qc_ptr = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
+ tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm), qc_ptr,
+ is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
+ tcg_temp_free_ptr(qc_ptr);
+}
+
/* Set ZF and NF based on a 64 bit result. This is alas fiddlier
* than the 32 bit equivalent.
*/
@@ -1156,18 +1170,18 @@ static void do_vec_ld(DisasContext *s, int destidx, int element,
* unallocated-encoding checks (otherwise the syndrome information
* for the resulting exception will be incorrect).
*/
-static inline bool fp_access_check(DisasContext *s)
+static bool fp_access_check(DisasContext *s)
{
- assert(!s->fp_access_checked);
- s->fp_access_checked = true;
+ if (s->fp_excp_el) {
+ assert(!s->fp_access_checked);
+ s->fp_access_checked = true;
- if (!s->fp_excp_el) {
- return true;
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
+ syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
+ return false;
}
-
- gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
- syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
- return false;
+ s->fp_access_checked = true;
+ return true;
}
/* Check that SVE access is enabled. If it is, return true.
@@ -1176,10 +1190,14 @@ static inline bool fp_access_check(DisasContext *s)
bool sve_access_check(DisasContext *s)
{
if (s->sve_excp_el) {
- gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_sve_access_trap(),
- s->sve_excp_el);
+ assert(!s->sve_access_checked);
+ s->sve_access_checked = true;
+
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
+ syn_sve_access_trap(), s->sve_excp_el);
return false;
}
+ s->sve_access_checked = true;
return fp_access_check(s);
}
@@ -8613,8 +8631,8 @@ static void handle_scalar_simd_shli(DisasContext *s, bool insert,
int size = 32 - clz32(immh) - 1;
int immhb = immh << 3 | immb;
int shift = immhb - (8 << size);
- TCGv_i64 tcg_rn = new_tmp_a64(s);
- TCGv_i64 tcg_rd = new_tmp_a64(s);
+ TCGv_i64 tcg_rn;
+ TCGv_i64 tcg_rd;
if (!extract32(immh, 3, 1)) {
unallocated_encoding(s);
@@ -11730,6 +11748,15 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size);
}
return;
+ case 0x16: /* SQDMULH, SQRDMULH */
+ {
+ static gen_helper_gvec_3_ptr * const fns[2][2] = {
+ { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h },
+ { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s },
+ };
+ gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]);
+ }
+ return;
case 0x11:
if (!u) { /* CMTST */
gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size);
@@ -11841,16 +11868,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
genenvfn = fns[size][u];
break;
}
- case 0x16: /* SQDMULH, SQRDMULH */
- {
- static NeonGenTwoOpEnvFn * const fns[2][2] = {
- { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
- { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
- };
- assert(size == 1 || size == 2);
- genenvfn = fns[size - 1][u];
- break;
- }
default:
g_assert_not_reached();
}
@@ -12997,9 +13014,6 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
fpop = deposit32(opcode, 5, 1, a);
fpop = deposit32(fpop, 6, 1, u);
- rd = extract32(insn, 0, 5);
- rn = extract32(insn, 5, 5);
-
switch (fpop) {
case 0x1d: /* SCVTF */
case 0x5d: /* UCVTF */
@@ -13484,6 +13498,56 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
data, gen_helper_gvec_fmlal_idx_a64);
}
return;
+
+ case 0x08: /* MUL */
+ if (!is_long && !is_scalar) {
+ static gen_helper_gvec_3 * const fns[3] = {
+ gen_helper_gvec_mul_idx_h,
+ gen_helper_gvec_mul_idx_s,
+ gen_helper_gvec_mul_idx_d,
+ };
+ tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ is_q ? 16 : 8, vec_full_reg_size(s),
+ index, fns[size - 1]);
+ return;
+ }
+ break;
+
+ case 0x10: /* MLA */
+ if (!is_long && !is_scalar) {
+ static gen_helper_gvec_4 * const fns[3] = {
+ gen_helper_gvec_mla_idx_h,
+ gen_helper_gvec_mla_idx_s,
+ gen_helper_gvec_mla_idx_d,
+ };
+ tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ vec_full_reg_offset(s, rd),
+ is_q ? 16 : 8, vec_full_reg_size(s),
+ index, fns[size - 1]);
+ return;
+ }
+ break;
+
+ case 0x14: /* MLS */
+ if (!is_long && !is_scalar) {
+ static gen_helper_gvec_4 * const fns[3] = {
+ gen_helper_gvec_mls_idx_h,
+ gen_helper_gvec_mls_idx_s,
+ gen_helper_gvec_mls_idx_d,
+ };
+ tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ vec_full_reg_offset(s, rd),
+ is_q ? 16 : 8, vec_full_reg_size(s),
+ index, fns[size - 1]);
+ return;
+ }
+ break;
}
if (size == 3) {
@@ -14529,6 +14593,7 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s)
s->base.pc_next += 4;
s->fp_access_checked = false;
+ s->sve_access_checked = false;
if (dc_isar_feature(aa64_bti, s)) {
if (s->base.num_insns == 1) {
diff --git a/target/arm/translate-neon.c.inc b/target/arm/translate-neon.c.inc
index 9879731a52..2d4926316a 100644
--- a/target/arm/translate-neon.c.inc
+++ b/target/arm/translate-neon.c.inc
@@ -1033,122 +1033,54 @@ DO_3SAME_PAIR(VPADD, padd_u)
DO_3SAME_VQDMULH(VQDMULH, qdmulh)
DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
-static bool do_3same_fp(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn,
- bool reads_vd)
-{
- /*
- * FP operations handled elementwise 32 bits at a time.
- * If reads_vd is true then the old value of Vd will be
- * loaded before calling the callback function. This is
- * used for multiply-accumulate type operations.
- */
- TCGv_i32 tmp, tmp2;
- int pass;
-
- if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
- return false;
- }
-
- /* UNDEF accesses to D16-D31 if they don't exist. */
- if (!dc_isar_feature(aa32_simd_r32, s) &&
- ((a->vd | a->vn | a->vm) & 0x10)) {
- return false;
- }
-
- if ((a->vn | a->vm | a->vd) & a->q) {
- return false;
- }
-
- if (!vfp_access_check(s)) {
- return true;
- }
-
- TCGv_ptr fpstatus = fpstatus_ptr(FPST_STD);
- for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
- tmp = neon_load_reg(a->vn, pass);
- tmp2 = neon_load_reg(a->vm, pass);
- if (reads_vd) {
- TCGv_i32 tmp_rd = neon_load_reg(a->vd, pass);
- fn(tmp_rd, tmp, tmp2, fpstatus);
- neon_store_reg(a->vd, pass, tmp_rd);
- tcg_temp_free_i32(tmp);
- } else {
- fn(tmp, tmp, tmp2, fpstatus);
- neon_store_reg(a->vd, pass, tmp);
- }
- tcg_temp_free_i32(tmp2);
- }
- tcg_temp_free_ptr(fpstatus);
- return true;
-}
-
-/*
- * For all the functions using this macro, size == 1 means fp16,
- * which is an architecture extension we don't implement yet.
- */
-#define DO_3S_FP_GVEC(INSN,FUNC) \
- static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
- uint32_t rn_ofs, uint32_t rm_ofs, \
- uint32_t oprsz, uint32_t maxsz) \
+#define WRAP_FP_GVEC(WRAPNAME, FPST, FUNC) \
+ static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \
+ uint32_t rn_ofs, uint32_t rm_ofs, \
+ uint32_t oprsz, uint32_t maxsz) \
{ \
- TCGv_ptr fpst = fpstatus_ptr(FPST_STD); \
+ TCGv_ptr fpst = fpstatus_ptr(FPST); \
tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst, \
oprsz, maxsz, 0, FUNC); \
tcg_temp_free_ptr(fpst); \
- } \
+ }
+
+#define DO_3S_FP_GVEC(INSN,SFUNC,HFUNC) \
+ WRAP_FP_GVEC(gen_##INSN##_fp32_3s, FPST_STD, SFUNC) \
+ WRAP_FP_GVEC(gen_##INSN##_fp16_3s, FPST_STD_F16, HFUNC) \
static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
{ \
if (a->size != 0) { \
- /* TODO fp16 support */ \
- return false; \
+ if (!dc_isar_feature(aa32_fp16_arith, s)) { \
+ return false; \
+ } \
+ return do_3same(s, a, gen_##INSN##_fp16_3s); \
} \
- return do_3same(s, a, gen_##INSN##_3s); \
- }
-
-
-DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s)
-DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s)
-DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s)
-DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s)
-
-/*
- * For all the functions using this macro, size == 1 means fp16,
- * which is an architecture extension we don't implement yet.
- */
-#define DO_3S_FP(INSN,FUNC,READS_VD) \
- static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
- { \
- if (a->size != 0) { \
- /* TODO fp16 support */ \
- return false; \
- } \
- return do_3same_fp(s, a, FUNC, READS_VD); \
- }
-
-DO_3S_FP(VCEQ, gen_helper_neon_ceq_f32, false)
-DO_3S_FP(VCGE, gen_helper_neon_cge_f32, false)
-DO_3S_FP(VCGT, gen_helper_neon_cgt_f32, false)
-DO_3S_FP(VACGE, gen_helper_neon_acge_f32, false)
-DO_3S_FP(VACGT, gen_helper_neon_acgt_f32, false)
-DO_3S_FP(VMAX, gen_helper_vfp_maxs, false)
-DO_3S_FP(VMIN, gen_helper_vfp_mins, false)
-
-static void gen_VMLA_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
- TCGv_ptr fpstatus)
-{
- gen_helper_vfp_muls(vn, vn, vm, fpstatus);
- gen_helper_vfp_adds(vd, vd, vn, fpstatus);
-}
-
-static void gen_VMLS_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
- TCGv_ptr fpstatus)
-{
- gen_helper_vfp_muls(vn, vn, vm, fpstatus);
- gen_helper_vfp_subs(vd, vd, vn, fpstatus);
-}
-
-DO_3S_FP(VMLA, gen_VMLA_fp_3s, true)
-DO_3S_FP(VMLS, gen_VMLS_fp_3s, true)
+ return do_3same(s, a, gen_##INSN##_fp32_3s); \
+ }
+
+
+DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s, gen_helper_gvec_fadd_h)
+DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s, gen_helper_gvec_fsub_h)
+DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s, gen_helper_gvec_fabd_h)
+DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s, gen_helper_gvec_fmul_h)
+DO_3S_FP_GVEC(VCEQ, gen_helper_gvec_fceq_s, gen_helper_gvec_fceq_h)
+DO_3S_FP_GVEC(VCGE, gen_helper_gvec_fcge_s, gen_helper_gvec_fcge_h)
+DO_3S_FP_GVEC(VCGT, gen_helper_gvec_fcgt_s, gen_helper_gvec_fcgt_h)
+DO_3S_FP_GVEC(VACGE, gen_helper_gvec_facge_s, gen_helper_gvec_facge_h)
+DO_3S_FP_GVEC(VACGT, gen_helper_gvec_facgt_s, gen_helper_gvec_facgt_h)
+DO_3S_FP_GVEC(VMAX, gen_helper_gvec_fmax_s, gen_helper_gvec_fmax_h)
+DO_3S_FP_GVEC(VMIN, gen_helper_gvec_fmin_s, gen_helper_gvec_fmin_h)
+DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_s, gen_helper_gvec_fmla_h)
+DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h)
+DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h)
+DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h)
+DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h)
+DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h)
+
+WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s)
+WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h)
+WRAP_FP_GVEC(gen_VMINNM_fp32_3s, FPST_STD, gen_helper_gvec_fminnum_s)
+WRAP_FP_GVEC(gen_VMINNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fminnum_h)
static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a)
{
@@ -1157,11 +1089,12 @@ static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a)
}
if (a->size != 0) {
- /* TODO fp16 support */
- return false;
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+ return do_3same(s, a, gen_VMAXNM_fp16_3s);
}
-
- return do_3same_fp(s, a, gen_helper_vfp_maxnums, false);
+ return do_3same(s, a, gen_VMAXNM_fp32_3s);
}
static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a)
@@ -1171,98 +1104,18 @@ static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a)
}
if (a->size != 0) {
- /* TODO fp16 support */
- return false;
- }
-
- return do_3same_fp(s, a, gen_helper_vfp_minnums, false);
-}
-
-WRAP_ENV_FN(gen_VRECPS_tramp, gen_helper_recps_f32)
-
-static void gen_VRECPS_fp_3s(unsigned vece, uint32_t rd_ofs,
- uint32_t rn_ofs, uint32_t rm_ofs,
- uint32_t oprsz, uint32_t maxsz)
-{
- static const GVecGen3 ops = { .fni4 = gen_VRECPS_tramp };
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops);
-}
-
-static bool trans_VRECPS_fp_3s(DisasContext *s, arg_3same *a)
-{
- if (a->size != 0) {
- /* TODO fp16 support */
- return false;
- }
-
- return do_3same(s, a, gen_VRECPS_fp_3s);
-}
-
-WRAP_ENV_FN(gen_VRSQRTS_tramp, gen_helper_rsqrts_f32)
-
-static void gen_VRSQRTS_fp_3s(unsigned vece, uint32_t rd_ofs,
- uint32_t rn_ofs, uint32_t rm_ofs,
- uint32_t oprsz, uint32_t maxsz)
-{
- static const GVecGen3 ops = { .fni4 = gen_VRSQRTS_tramp };
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops);
-}
-
-static bool trans_VRSQRTS_fp_3s(DisasContext *s, arg_3same *a)
-{
- if (a->size != 0) {
- /* TODO fp16 support */
- return false;
- }
-
- return do_3same(s, a, gen_VRSQRTS_fp_3s);
-}
-
-static void gen_VFMA_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
- TCGv_ptr fpstatus)
-{
- gen_helper_vfp_muladds(vd, vn, vm, vd, fpstatus);
-}
-
-static bool trans_VFMA_fp_3s(DisasContext *s, arg_3same *a)
-{
- if (!dc_isar_feature(aa32_simdfmac, s)) {
- return false;
- }
-
- if (a->size != 0) {
- /* TODO fp16 support */
- return false;
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+ return do_3same(s, a, gen_VMINNM_fp16_3s);
}
-
- return do_3same_fp(s, a, gen_VFMA_fp_3s, true);
+ return do_3same(s, a, gen_VMINNM_fp32_3s);
}
-static void gen_VFMS_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
- TCGv_ptr fpstatus)
+static bool do_3same_fp_pair(DisasContext *s, arg_3same *a,
+ gen_helper_gvec_3_ptr *fn)
{
- gen_helper_vfp_negs(vn, vn);
- gen_helper_vfp_muladds(vd, vn, vm, vd, fpstatus);
-}
-
-static bool trans_VFMS_fp_3s(DisasContext *s, arg_3same *a)
-{
- if (!dc_isar_feature(aa32_simdfmac, s)) {
- return false;
- }
-
- if (a->size != 0) {
- /* TODO fp16 support */
- return false;
- }
-
- return do_3same_fp(s, a, gen_VFMS_fp_3s, true);
-}
-
-static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn)
-{
- /* FP operations handled pairwise 32 bits at a time */
- TCGv_i32 tmp, tmp2, tmp3;
+ /* FP pairwise operations */
TCGv_ptr fpstatus;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
@@ -1281,26 +1134,14 @@ static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn)
assert(a->q == 0); /* enforced by decode patterns */
- /*
- * Note that we have to be careful not to clobber the source operands
- * in the "vm == vd" case by storing the result of the first pass too
- * early. Since Q is 0 there are always just two passes, so instead
- * of a complicated loop over each pass we just unroll.
- */
- fpstatus = fpstatus_ptr(FPST_STD);
- tmp = neon_load_reg(a->vn, 0);
- tmp2 = neon_load_reg(a->vn, 1);
- fn(tmp, tmp, tmp2, fpstatus);
- tcg_temp_free_i32(tmp2);
- tmp3 = neon_load_reg(a->vm, 0);
- tmp2 = neon_load_reg(a->vm, 1);
- fn(tmp3, tmp3, tmp2, fpstatus);
- tcg_temp_free_i32(tmp2);
+ fpstatus = fpstatus_ptr(a->size != 0 ? FPST_STD_F16 : FPST_STD);
+ tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
+ vfp_reg_offset(1, a->vn),
+ vfp_reg_offset(1, a->vm),
+ fpstatus, 8, 8, 0, fn);
tcg_temp_free_ptr(fpstatus);
- neon_store_reg(a->vd, 0, tmp);
- neon_store_reg(a->vd, 1, tmp3);
return true;
}
@@ -1312,15 +1153,17 @@ static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn)
static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
{ \
if (a->size != 0) { \
- /* TODO fp16 support */ \
- return false; \
+ if (!dc_isar_feature(aa32_fp16_arith, s)) { \
+ return false; \
+ } \
+ return do_3same_fp_pair(s, a, FUNC##h); \
} \
- return do_3same_fp_pair(s, a, FUNC); \
+ return do_3same_fp_pair(s, a, FUNC##s); \
}
-DO_3S_FP_PAIR(VPADD, gen_helper_vfp_adds)
-DO_3S_FP_PAIR(VPMAX, gen_helper_vfp_maxs)
-DO_3S_FP_PAIR(VPMIN, gen_helper_vfp_mins)
+DO_3S_FP_PAIR(VPADD, gen_helper_neon_padd)
+DO_3S_FP_PAIR(VPMAX, gen_helper_neon_pmax)
+DO_3S_FP_PAIR(VPMIN, gen_helper_neon_pmin)
static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn)
{
@@ -1765,17 +1608,24 @@ static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a)
}
static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
- NeonGenTwoSingleOpFn *fn)
+ gen_helper_gvec_2_ptr *fn)
{
/* FP operations in 2-reg-and-shift group */
- TCGv_i32 tmp, shiftv;
- TCGv_ptr fpstatus;
- int pass;
+ int vec_size = a->q ? 16 : 8;
+ int rd_ofs = neon_reg_offset(a->vd, 0);
+ int rm_ofs = neon_reg_offset(a->vm, 0);
+ TCGv_ptr fpst;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
}
+ if (a->size != 0) {
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+ }
+
/* UNDEF accesses to D16-D31 if they don't exist. */
if (!dc_isar_feature(aa32_simd_r32, s) &&
((a->vd | a->vm) & 0x10)) {
@@ -1790,15 +1640,9 @@ static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
return true;
}
- fpstatus = fpstatus_ptr(FPST_STD);
- shiftv = tcg_const_i32(a->shift);
- for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
- tmp = neon_load_reg(a->vm, pass);
- fn(tmp, tmp, shiftv, fpstatus);
- neon_store_reg(a->vd, pass, tmp);
- }
- tcg_temp_free_ptr(fpstatus);
- tcg_temp_free_i32(shiftv);
+ fpst = fpstatus_ptr(a->size ? FPST_STD_F16 : FPST_STD);
+ tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, vec_size, vec_size, a->shift, fn);
+ tcg_temp_free_ptr(fpst);
return true;
}
@@ -1808,10 +1652,15 @@ static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
return do_fp_2sh(s, a, FUNC); \
}
-DO_FP_2SH(VCVT_SF, gen_helper_vfp_sltos)
-DO_FP_2SH(VCVT_UF, gen_helper_vfp_ultos)
-DO_FP_2SH(VCVT_FS, gen_helper_vfp_tosls_round_to_zero)
-DO_FP_2SH(VCVT_FU, gen_helper_vfp_touls_round_to_zero)
+DO_FP_2SH(VCVT_SF, gen_helper_gvec_vcvt_sf)
+DO_FP_2SH(VCVT_UF, gen_helper_gvec_vcvt_uf)
+DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_fs)
+DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_fu)
+
+DO_FP_2SH(VCVT_SH, gen_helper_gvec_vcvt_sh)
+DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh)
+DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs)
+DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu)
static uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
{
@@ -2583,70 +2432,70 @@ static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a)
return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
}
-/*
- * Rather than have a float-specific version of do_2scalar just for
- * three insns, we wrap a NeonGenTwoSingleOpFn to turn it into
- * a NeonGenTwoOpFn.
- */
-#define WRAP_FP_FN(WRAPNAME, FUNC) \
- static void WRAPNAME(TCGv_i32 rd, TCGv_i32 rn, TCGv_i32 rm) \
- { \
- TCGv_ptr fpstatus = fpstatus_ptr(FPST_STD); \
- FUNC(rd, rn, rm, fpstatus); \
- tcg_temp_free_ptr(fpstatus); \
+static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a,
+ gen_helper_gvec_3_ptr *fn)
+{
+ /* Two registers and a scalar, using gvec */
+ int vec_size = a->q ? 16 : 8;
+ int rd_ofs = neon_reg_offset(a->vd, 0);
+ int rn_ofs = neon_reg_offset(a->vn, 0);
+ int rm_ofs;
+ int idx;
+ TCGv_ptr fpstatus;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
}
-WRAP_FP_FN(gen_VMUL_F_mul, gen_helper_vfp_muls)
-WRAP_FP_FN(gen_VMUL_F_add, gen_helper_vfp_adds)
-WRAP_FP_FN(gen_VMUL_F_sub, gen_helper_vfp_subs)
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
-static bool trans_VMUL_F_2sc(DisasContext *s, arg_2scalar *a)
-{
- static NeonGenTwoOpFn * const opfn[] = {
- NULL,
- NULL, /* TODO: fp16 support */
- gen_VMUL_F_mul,
- NULL,
- };
+ if (!fn) {
+ /* Bad size (including size == 3, which is a different insn group) */
+ return false;
+ }
- return do_2scalar(s, a, opfn[a->size], NULL);
-}
+ if (a->q && ((a->vd | a->vn) & 1)) {
+ return false;
+ }
-static bool trans_VMLA_F_2sc(DisasContext *s, arg_2scalar *a)
-{
- static NeonGenTwoOpFn * const opfn[] = {
- NULL,
- NULL, /* TODO: fp16 support */
- gen_VMUL_F_mul,
- NULL,
- };
- static NeonGenTwoOpFn * const accfn[] = {
- NULL,
- NULL, /* TODO: fp16 support */
- gen_VMUL_F_add,
- NULL,
- };
+ if (!vfp_access_check(s)) {
+ return true;
+ }
- return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
+ /* a->vm is M:Vm, which encodes both register and index */
+ idx = extract32(a->vm, a->size + 2, 2);
+ a->vm = extract32(a->vm, 0, a->size + 2);
+ rm_ofs = neon_reg_offset(a->vm, 0);
+
+ fpstatus = fpstatus_ptr(a->size == 1 ? FPST_STD_F16 : FPST_STD);
+ tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpstatus,
+ vec_size, vec_size, idx, fn);
+ tcg_temp_free_ptr(fpstatus);
+ return true;
}
-static bool trans_VMLS_F_2sc(DisasContext *s, arg_2scalar *a)
-{
- static NeonGenTwoOpFn * const opfn[] = {
- NULL,
- NULL, /* TODO: fp16 support */
- gen_VMUL_F_mul,
- NULL,
- };
- static NeonGenTwoOpFn * const accfn[] = {
- NULL,
- NULL, /* TODO: fp16 support */
- gen_VMUL_F_sub,
- NULL,
- };
+#define DO_VMUL_F_2sc(NAME, FUNC) \
+ static bool trans_##NAME##_F_2sc(DisasContext *s, arg_2scalar *a) \
+ { \
+ static gen_helper_gvec_3_ptr * const opfn[] = { \
+ NULL, \
+ gen_helper_##FUNC##_h, \
+ gen_helper_##FUNC##_s, \
+ NULL, \
+ }; \
+ if (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s)) { \
+ return false; \
+ } \
+ return do_2scalar_fp_vec(s, a, opfn[a->size]); \
+ }
- return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
-}
+DO_VMUL_F_2sc(VMUL, gvec_fmul_idx)
+DO_VMUL_F_2sc(VMLA, gvec_fmla_nf_idx)
+DO_VMUL_F_2sc(VMLS, gvec_fmls_nf_idx)
WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16)
WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32)
@@ -3739,22 +3588,44 @@ static bool trans_VCNT(DisasContext *s, arg_2misc *a)
return do_2misc(s, a, gen_helper_neon_cnt_u8);
}
+static void gen_VABS_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ uint32_t oprsz, uint32_t maxsz)
+{
+ tcg_gen_gvec_andi(vece, rd_ofs, rm_ofs,
+ vece == MO_16 ? 0x7fff : 0x7fffffff,
+ oprsz, maxsz);
+}
+
static bool trans_VABS_F(DisasContext *s, arg_2misc *a)
{
- if (a->size != 2) {
+ if (a->size == MO_16) {
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+ } else if (a->size != MO_32) {
return false;
}
- /* TODO: FP16 : size == 1 */
- return do_2misc(s, a, gen_helper_vfp_abss);
+ return do_2misc_vec(s, a, gen_VABS_F);
+}
+
+static void gen_VNEG_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ uint32_t oprsz, uint32_t maxsz)
+{
+ tcg_gen_gvec_xori(vece, rd_ofs, rm_ofs,
+ vece == MO_16 ? 0x8000 : 0x80000000,
+ oprsz, maxsz);
}
static bool trans_VNEG_F(DisasContext *s, arg_2misc *a)
{
- if (a->size != 2) {
+ if (a->size == MO_16) {
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+ } else if (a->size != MO_32) {
return false;
}
- /* TODO: FP16 : size == 1 */
- return do_2misc(s, a, gen_helper_vfp_negs);
+ return do_2misc_vec(s, a, gen_VNEG_F);
}
static bool trans_VRECPE(DisasContext *s, arg_2misc *a)
@@ -3808,226 +3679,100 @@ static bool trans_VQNEG(DisasContext *s, arg_2misc *a)
return do_2misc(s, a, fn[a->size]);
}
-static bool do_2misc_fp(DisasContext *s, arg_2misc *a,
- NeonGenOneSingleOpFn *fn)
-{
- int pass;
- TCGv_ptr fpst;
-
- /* Handle a 2-reg-misc operation by iterating 32 bits at a time */
- if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
- return false;
- }
-
- /* UNDEF accesses to D16-D31 if they don't exist. */
- if (!dc_isar_feature(aa32_simd_r32, s) &&
- ((a->vd | a->vm) & 0x10)) {
- return false;
- }
-
- if (a->size != 2) {
- /* TODO: FP16 will be the size == 1 case */
- return false;
- }
-
- if ((a->vd | a->vm) & a->q) {
- return false;
- }
-
- if (!vfp_access_check(s)) {
- return true;
- }
-
- fpst = fpstatus_ptr(FPST_STD);
- for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
- TCGv_i32 tmp = neon_load_reg(a->vm, pass);
- fn(tmp, tmp, fpst);
- neon_store_reg(a->vd, pass, tmp);
+#define DO_2MISC_FP_VEC(INSN, HFUNC, SFUNC) \
+ static void gen_##INSN(unsigned vece, uint32_t rd_ofs, \
+ uint32_t rm_ofs, \
+ uint32_t oprsz, uint32_t maxsz) \
+ { \
+ static gen_helper_gvec_2_ptr * const fns[4] = { \
+ NULL, HFUNC, SFUNC, NULL, \
+ }; \
+ TCGv_ptr fpst; \
+ fpst = fpstatus_ptr(vece == MO_16 ? FPST_STD_F16 : FPST_STD); \
+ tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, 0, \
+ fns[vece]); \
+ tcg_temp_free_ptr(fpst); \
+ } \
+ static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
+ { \
+ if (a->size == MO_16) { \
+ if (!dc_isar_feature(aa32_fp16_arith, s)) { \
+ return false; \
+ } \
+ } else if (a->size != MO_32) { \
+ return false; \
+ } \
+ return do_2misc_vec(s, a, gen_##INSN); \
}
- tcg_temp_free_ptr(fpst);
- return true;
-}
-
-#define DO_2MISC_FP(INSN, FUNC) \
- static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
- { \
- return do_2misc_fp(s, a, FUNC); \
- }
+DO_2MISC_FP_VEC(VRECPE_F, gen_helper_gvec_frecpe_h, gen_helper_gvec_frecpe_s)
+DO_2MISC_FP_VEC(VRSQRTE_F, gen_helper_gvec_frsqrte_h, gen_helper_gvec_frsqrte_s)
+DO_2MISC_FP_VEC(VCGT0_F, gen_helper_gvec_fcgt0_h, gen_helper_gvec_fcgt0_s)
+DO_2MISC_FP_VEC(VCGE0_F, gen_helper_gvec_fcge0_h, gen_helper_gvec_fcge0_s)
+DO_2MISC_FP_VEC(VCEQ0_F, gen_helper_gvec_fceq0_h, gen_helper_gvec_fceq0_s)
+DO_2MISC_FP_VEC(VCLT0_F, gen_helper_gvec_fclt0_h, gen_helper_gvec_fclt0_s)
+DO_2MISC_FP_VEC(VCLE0_F, gen_helper_gvec_fcle0_h, gen_helper_gvec_fcle0_s)
+DO_2MISC_FP_VEC(VCVT_FS, gen_helper_gvec_sstoh, gen_helper_gvec_sitos)
+DO_2MISC_FP_VEC(VCVT_FU, gen_helper_gvec_ustoh, gen_helper_gvec_uitos)
+DO_2MISC_FP_VEC(VCVT_SF, gen_helper_gvec_tosszh, gen_helper_gvec_tosizs)
+DO_2MISC_FP_VEC(VCVT_UF, gen_helper_gvec_touszh, gen_helper_gvec_touizs)
-DO_2MISC_FP(VRECPE_F, gen_helper_recpe_f32)
-DO_2MISC_FP(VRSQRTE_F, gen_helper_rsqrte_f32)
-DO_2MISC_FP(VCVT_FS, gen_helper_vfp_sitos)
-DO_2MISC_FP(VCVT_FU, gen_helper_vfp_uitos)
-DO_2MISC_FP(VCVT_SF, gen_helper_vfp_tosizs)
-DO_2MISC_FP(VCVT_UF, gen_helper_vfp_touizs)
+DO_2MISC_FP_VEC(VRINTX_impl, gen_helper_gvec_vrintx_h, gen_helper_gvec_vrintx_s)
static bool trans_VRINTX(DisasContext *s, arg_2misc *a)
{
if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
return false;
}
- return do_2misc_fp(s, a, gen_helper_rints_exact);
+ return trans_VRINTX_impl(s, a);
}
-#define WRAP_FP_CMP0_FWD(WRAPNAME, FUNC) \
- static void WRAPNAME(TCGv_i32 d, TCGv_i32 m, TCGv_ptr fpst) \
- { \
- TCGv_i32 zero = tcg_const_i32(0); \
- FUNC(d, m, zero, fpst); \
- tcg_temp_free_i32(zero); \
- }
-#define WRAP_FP_CMP0_REV(WRAPNAME, FUNC) \
- static void WRAPNAME(TCGv_i32 d, TCGv_i32 m, TCGv_ptr fpst) \
- { \
- TCGv_i32 zero = tcg_const_i32(0); \
- FUNC(d, zero, m, fpst); \
- tcg_temp_free_i32(zero); \
- }
-
-#define DO_FP_CMP0(INSN, FUNC, REV) \
- WRAP_FP_CMP0_##REV(gen_##INSN, FUNC) \
- static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
- { \
- return do_2misc_fp(s, a, gen_##INSN); \
- }
-
-DO_FP_CMP0(VCGT0_F, gen_helper_neon_cgt_f32, FWD)
-DO_FP_CMP0(VCGE0_F, gen_helper_neon_cge_f32, FWD)
-DO_FP_CMP0(VCEQ0_F, gen_helper_neon_ceq_f32, FWD)
-DO_FP_CMP0(VCLE0_F, gen_helper_neon_cge_f32, REV)
-DO_FP_CMP0(VCLT0_F, gen_helper_neon_cgt_f32, REV)
-
-static bool do_vrint(DisasContext *s, arg_2misc *a, int rmode)
-{
- /*
- * Handle a VRINT* operation by iterating 32 bits at a time,
- * with a specified rounding mode in operation.
- */
- int pass;
- TCGv_ptr fpst;
- TCGv_i32 tcg_rmode;
-
- if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
- !arm_dc_feature(s, ARM_FEATURE_V8)) {
- return false;
- }
-
- /* UNDEF accesses to D16-D31 if they don't exist. */
- if (!dc_isar_feature(aa32_simd_r32, s) &&
- ((a->vd | a->vm) & 0x10)) {
- return false;
- }
-
- if (a->size != 2) {
- /* TODO: FP16 will be the size == 1 case */
- return false;
- }
-
- if ((a->vd | a->vm) & a->q) {
- return false;
- }
-
- if (!vfp_access_check(s)) {
- return true;
- }
-
- fpst = fpstatus_ptr(FPST_STD);
- tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
- gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, cpu_env);
- for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
- TCGv_i32 tmp = neon_load_reg(a->vm, pass);
- gen_helper_rints(tmp, tmp, fpst);
- neon_store_reg(a->vd, pass, tmp);
- }
- gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, cpu_env);
- tcg_temp_free_i32(tcg_rmode);
- tcg_temp_free_ptr(fpst);
-
- return true;
-}
-
-#define DO_VRINT(INSN, RMODE) \
- static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
- { \
- return do_vrint(s, a, RMODE); \
- }
-
-DO_VRINT(VRINTN, FPROUNDING_TIEEVEN)
-DO_VRINT(VRINTA, FPROUNDING_TIEAWAY)
-DO_VRINT(VRINTZ, FPROUNDING_ZERO)
-DO_VRINT(VRINTM, FPROUNDING_NEGINF)
-DO_VRINT(VRINTP, FPROUNDING_POSINF)
-
-static bool do_vcvt(DisasContext *s, arg_2misc *a, int rmode, bool is_signed)
-{
- /*
- * Handle a VCVT* operation by iterating 32 bits at a time,
- * with a specified rounding mode in operation.
- */
- int pass;
- TCGv_ptr fpst;
- TCGv_i32 tcg_rmode, tcg_shift;
-
- if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
- !arm_dc_feature(s, ARM_FEATURE_V8)) {
- return false;
- }
-
- /* UNDEF accesses to D16-D31 if they don't exist. */
- if (!dc_isar_feature(aa32_simd_r32, s) &&
- ((a->vd | a->vm) & 0x10)) {
- return false;
- }
-
- if (a->size != 2) {
- /* TODO: FP16 will be the size == 1 case */
- return false;
- }
-
- if ((a->vd | a->vm) & a->q) {
- return false;
- }
-
- if (!vfp_access_check(s)) {
- return true;
- }
-
- fpst = fpstatus_ptr(FPST_STD);
- tcg_shift = tcg_const_i32(0);
- tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
- gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, cpu_env);
- for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
- TCGv_i32 tmp = neon_load_reg(a->vm, pass);
- if (is_signed) {
- gen_helper_vfp_tosls(tmp, tmp, tcg_shift, fpst);
- } else {
- gen_helper_vfp_touls(tmp, tmp, tcg_shift, fpst);
- }
- neon_store_reg(a->vd, pass, tmp);
- }
- gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, cpu_env);
- tcg_temp_free_i32(tcg_rmode);
- tcg_temp_free_i32(tcg_shift);
- tcg_temp_free_ptr(fpst);
-
- return true;
-}
-
-#define DO_VCVT(INSN, RMODE, SIGNED) \
- static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
- { \
- return do_vcvt(s, a, RMODE, SIGNED); \
- }
-
-DO_VCVT(VCVTAU, FPROUNDING_TIEAWAY, false)
-DO_VCVT(VCVTAS, FPROUNDING_TIEAWAY, true)
-DO_VCVT(VCVTNU, FPROUNDING_TIEEVEN, false)
-DO_VCVT(VCVTNS, FPROUNDING_TIEEVEN, true)
-DO_VCVT(VCVTPU, FPROUNDING_POSINF, false)
-DO_VCVT(VCVTPS, FPROUNDING_POSINF, true)
-DO_VCVT(VCVTMU, FPROUNDING_NEGINF, false)
-DO_VCVT(VCVTMS, FPROUNDING_NEGINF, true)
+#define DO_VEC_RMODE(INSN, RMODE, OP) \
+ static void gen_##INSN(unsigned vece, uint32_t rd_ofs, \
+ uint32_t rm_ofs, \
+ uint32_t oprsz, uint32_t maxsz) \
+ { \
+ static gen_helper_gvec_2_ptr * const fns[4] = { \
+ NULL, \
+ gen_helper_gvec_##OP##h, \
+ gen_helper_gvec_##OP##s, \
+ NULL, \
+ }; \
+ TCGv_ptr fpst; \
+ fpst = fpstatus_ptr(vece == 1 ? FPST_STD_F16 : FPST_STD); \
+ tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, \
+ arm_rmode_to_sf(RMODE), fns[vece]); \
+ tcg_temp_free_ptr(fpst); \
+ } \
+ static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
+ { \
+ if (!arm_dc_feature(s, ARM_FEATURE_V8)) { \
+ return false; \
+ } \
+ if (a->size == MO_16) { \
+ if (!dc_isar_feature(aa32_fp16_arith, s)) { \
+ return false; \
+ } \
+ } else if (a->size != MO_32) { \
+ return false; \
+ } \
+ return do_2misc_vec(s, a, gen_##INSN); \
+ }
+
+DO_VEC_RMODE(VCVTAU, FPROUNDING_TIEAWAY, vcvt_rm_u)
+DO_VEC_RMODE(VCVTAS, FPROUNDING_TIEAWAY, vcvt_rm_s)
+DO_VEC_RMODE(VCVTNU, FPROUNDING_TIEEVEN, vcvt_rm_u)
+DO_VEC_RMODE(VCVTNS, FPROUNDING_TIEEVEN, vcvt_rm_s)
+DO_VEC_RMODE(VCVTPU, FPROUNDING_POSINF, vcvt_rm_u)
+DO_VEC_RMODE(VCVTPS, FPROUNDING_POSINF, vcvt_rm_s)
+DO_VEC_RMODE(VCVTMU, FPROUNDING_NEGINF, vcvt_rm_u)
+DO_VEC_RMODE(VCVTMS, FPROUNDING_NEGINF, vcvt_rm_s)
+
+DO_VEC_RMODE(VRINTN, FPROUNDING_TIEEVEN, vrint_rm_)
+DO_VEC_RMODE(VRINTA, FPROUNDING_TIEAWAY, vrint_rm_)
+DO_VEC_RMODE(VRINTZ, FPROUNDING_ZERO, vrint_rm_)
+DO_VEC_RMODE(VRINTM, FPROUNDING_NEGINF, vrint_rm_)
+DO_VEC_RMODE(VRINTP, FPROUNDING_POSINF, vrint_rm_)
static bool trans_VSWP(DisasContext *s, arg_2misc *a)
{
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index d97cb37d83..e4cd6b6251 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -142,35 +142,76 @@ static int pred_gvec_reg_size(DisasContext *s)
return size_for_gvec(pred_full_reg_size(s));
}
+/* Invoke an out-of-line helper on 2 Zregs. */
+static void gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
+ int rd, int rn, int data)
+{
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vsz, vsz, data, fn);
+}
+
+/* Invoke an out-of-line helper on 3 Zregs. */
+static void gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
+ int rd, int rn, int rm, int data)
+{
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ vsz, vsz, data, fn);
+}
+
+/* Invoke an out-of-line helper on 2 Zregs and a predicate. */
+static void gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
+ int rd, int rn, int pg, int data)
+{
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ pred_full_reg_offset(s, pg),
+ vsz, vsz, data, fn);
+}
+
+/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
+static void gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
+ int rd, int rn, int rm, int pg, int data)
+{
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ pred_full_reg_offset(s, pg),
+ vsz, vsz, data, fn);
+}
+
/* Invoke a vector expander on two Zregs. */
-static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
- int esz, int rd, int rn)
+static void gen_gvec_fn_zz(DisasContext *s, GVecGen2Fn *gvec_fn,
+ int esz, int rd, int rn)
{
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- gvec_fn(esz, vec_full_reg_offset(s, rd),
- vec_full_reg_offset(s, rn), vsz, vsz);
- }
- return true;
+ unsigned vsz = vec_full_reg_size(s);
+ gvec_fn(esz, vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn), vsz, vsz);
}
/* Invoke a vector expander on three Zregs. */
-static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
- int esz, int rd, int rn, int rm)
+static void gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
+ int esz, int rd, int rn, int rm)
{
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- gvec_fn(esz, vec_full_reg_offset(s, rd),
- vec_full_reg_offset(s, rn),
- vec_full_reg_offset(s, rm), vsz, vsz);
- }
- return true;
+ unsigned vsz = vec_full_reg_size(s);
+ gvec_fn(esz, vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm), vsz, vsz);
}
/* Invoke a vector move on two Zregs. */
static bool do_mov_z(DisasContext *s, int rd, int rn)
{
- return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
+ if (sve_access_check(s)) {
+ gen_gvec_fn_zz(s, tcg_gen_gvec_mov, MO_8, rd, rn);
+ }
+ return true;
}
/* Initialize a Zreg with replications of a 64-bit immediate. */
@@ -180,52 +221,27 @@ static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
}
-/* Invoke a vector expander on two Pregs. */
-static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
- int esz, int rd, int rn)
-{
- if (sve_access_check(s)) {
- unsigned psz = pred_gvec_reg_size(s);
- gvec_fn(esz, pred_full_reg_offset(s, rd),
- pred_full_reg_offset(s, rn), psz, psz);
- }
- return true;
-}
-
/* Invoke a vector expander on three Pregs. */
-static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
- int esz, int rd, int rn, int rm)
+static void gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
+ int rd, int rn, int rm)
{
- if (sve_access_check(s)) {
- unsigned psz = pred_gvec_reg_size(s);
- gvec_fn(esz, pred_full_reg_offset(s, rd),
- pred_full_reg_offset(s, rn),
- pred_full_reg_offset(s, rm), psz, psz);
- }
- return true;
+ unsigned psz = pred_gvec_reg_size(s);
+ gvec_fn(MO_64, pred_full_reg_offset(s, rd),
+ pred_full_reg_offset(s, rn),
+ pred_full_reg_offset(s, rm), psz, psz);
}
-/* Invoke a vector operation on four Pregs. */
-static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
- int rd, int rn, int rm, int rg)
+/* Invoke a vector move on two Pregs. */
+static bool do_mov_p(DisasContext *s, int rd, int rn)
{
if (sve_access_check(s)) {
unsigned psz = pred_gvec_reg_size(s);
- tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
- pred_full_reg_offset(s, rn),
- pred_full_reg_offset(s, rm),
- pred_full_reg_offset(s, rg),
- psz, psz, gvec_op);
+ tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
+ pred_full_reg_offset(s, rn), psz, psz);
}
return true;
}
-/* Invoke a vector move on two Pregs. */
-static bool do_mov_p(DisasContext *s, int rd, int rn)
-{
- return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
-}
-
/* Set the cpu flags as per a return from an SVE helper. */
static void do_pred_flags(TCGv_i32 t)
{
@@ -273,24 +289,32 @@ const uint64_t pred_esz_masks[4] = {
*** SVE Logical - Unpredicated Group
*/
+static bool do_zzz_fn(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *gvec_fn)
+{
+ if (sve_access_check(s)) {
+ gen_gvec_fn_zzz(s, gvec_fn, a->esz, a->rd, a->rn, a->rm);
+ }
+ return true;
+}
+
static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
{
- return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
+ return do_zzz_fn(s, a, tcg_gen_gvec_and);
}
static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
{
- return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
+ return do_zzz_fn(s, a, tcg_gen_gvec_or);
}
static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
{
- return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
+ return do_zzz_fn(s, a, tcg_gen_gvec_xor);
}
static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
{
- return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
+ return do_zzz_fn(s, a, tcg_gen_gvec_andc);
}
/*
@@ -299,32 +323,32 @@ static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
{
- return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
+ return do_zzz_fn(s, a, tcg_gen_gvec_add);
}
static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
{
- return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
+ return do_zzz_fn(s, a, tcg_gen_gvec_sub);
}
static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
{
- return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
+ return do_zzz_fn(s, a, tcg_gen_gvec_ssadd);
}
static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
{
- return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
+ return do_zzz_fn(s, a, tcg_gen_gvec_sssub);
}
static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
{
- return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
+ return do_zzz_fn(s, a, tcg_gen_gvec_usadd);
}
static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
{
- return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
+ return do_zzz_fn(s, a, tcg_gen_gvec_ussub);
}
/*
@@ -333,16 +357,11 @@ static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
{
- unsigned vsz = vec_full_reg_size(s);
if (fn == NULL) {
return false;
}
if (sve_access_check(s)) {
- tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- pred_full_reg_offset(s, a->pg),
- vsz, vsz, 0, fn);
+ gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0);
}
return true;
}
@@ -356,12 +375,7 @@ static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
};
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
- vec_full_reg_offset(s, rn),
- vec_full_reg_offset(s, rm),
- pred_full_reg_offset(s, pg),
- vsz, vsz, 0, fns[esz]);
+ gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
}
#define DO_ZPZZ(NAME, name) \
@@ -433,11 +447,7 @@ static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
return false;
}
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- pred_full_reg_offset(s, a->pg),
- vsz, vsz, 0, fn);
+ gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, 0);
}
return true;
}
@@ -608,48 +618,29 @@ static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
*** SVE Shift by Immediate - Predicated Group
*/
-/* Store zero into every active element of Zd. We will use this for two
- * and three-operand predicated instructions for which logic dictates a
- * zero result.
+/*
+ * Copy Zn into Zd, storing zeros into inactive elements.
+ * If invert, store zeros into the active elements.
*/
-static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
-{
- static gen_helper_gvec_2 * const fns[4] = {
- gen_helper_sve_clr_b, gen_helper_sve_clr_h,
- gen_helper_sve_clr_s, gen_helper_sve_clr_d,
- };
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
- pred_full_reg_offset(s, pg),
- vsz, vsz, 0, fns[esz]);
- }
- return true;
-}
-
-/* Copy Zn into Zd, storing zeros into inactive elements. */
-static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
+static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
+ int esz, bool invert)
{
static gen_helper_gvec_3 * const fns[4] = {
gen_helper_sve_movz_b, gen_helper_sve_movz_h,
gen_helper_sve_movz_s, gen_helper_sve_movz_d,
};
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
- vec_full_reg_offset(s, rn),
- pred_full_reg_offset(s, pg),
- vsz, vsz, 0, fns[esz]);
+
+ if (sve_access_check(s)) {
+ gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
+ }
+ return true;
}
static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
gen_helper_gvec_3 *fn)
{
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- pred_full_reg_offset(s, a->pg),
- vsz, vsz, a->imm, fn);
+ gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
}
return true;
}
@@ -682,7 +673,7 @@ static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
/* Shift by element size is architecturally valid.
For logical shifts, it is a zeroing operation. */
if (a->imm >= (8 << a->esz)) {
- return do_clr_zp(s, a->rd, a->pg, a->esz);
+ return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
} else {
return do_zpzi_ool(s, a, fns[a->esz]);
}
@@ -700,7 +691,7 @@ static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
/* Shift by element size is architecturally valid.
For logical shifts, it is a zeroing operation. */
if (a->imm >= (8 << a->esz)) {
- return do_clr_zp(s, a->rd, a->pg, a->esz);
+ return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
} else {
return do_zpzi_ool(s, a, fns[a->esz]);
}
@@ -718,7 +709,7 @@ static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
/* Shift by element size is architecturally valid. For arithmetic
right shift for division, it is a zeroing operation. */
if (a->imm >= (8 << a->esz)) {
- return do_clr_zp(s, a->rd, a->pg, a->esz);
+ return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
} else {
return do_zpzi_ool(s, a, fns[a->esz]);
}
@@ -799,11 +790,7 @@ static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
return false;
}
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- vsz, vsz, 0, fn);
+ gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
}
return true;
}
@@ -977,11 +964,7 @@ static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
{
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- vsz, vsz, a->imm, fn);
+ gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
}
return true;
}
@@ -1022,10 +1005,7 @@ static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
return false;
}
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vsz, vsz, 0, fns[a->esz]);
+ gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
}
return true;
}
@@ -1042,11 +1022,7 @@ static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
return false;
}
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- vsz, vsz, 0, fns[a->esz]);
+ gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
}
return true;
}
@@ -1068,6 +1044,11 @@ static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
int mofs = pred_full_reg_offset(s, a->rm);
int gofs = pred_full_reg_offset(s, a->pg);
+ if (!a->s) {
+ tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
+ return true;
+ }
+
if (psz == 8) {
/* Do the operation and the flags generation in temps. */
TCGv_i64 pd = tcg_temp_new_i64();
@@ -1127,19 +1108,24 @@ static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
.fno = gen_helper_sve_and_pppp,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
};
- if (a->s) {
- return do_pppp_flags(s, a, &op);
- } else if (a->rn == a->rm) {
- if (a->pg == a->rn) {
- return do_mov_p(s, a->rd, a->rn);
- } else {
- return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
+
+ if (!a->s) {
+ if (!sve_access_check(s)) {
+ return true;
+ }
+ if (a->rn == a->rm) {
+ if (a->pg == a->rn) {
+ do_mov_p(s, a->rd, a->rn);
+ } else {
+ gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
+ }
+ return true;
+ } else if (a->pg == a->rn || a->pg == a->rm) {
+ gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
+ return true;
}
- } else if (a->pg == a->rn || a->pg == a->rm) {
- return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
}
+ return do_pppp_flags(s, a, &op);
}
static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
@@ -1163,13 +1149,14 @@ static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
.fno = gen_helper_sve_bic_pppp,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
};
- if (a->s) {
- return do_pppp_flags(s, a, &op);
- } else if (a->pg == a->rn) {
- return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
+
+ if (!a->s && a->pg == a->rn) {
+ if (sve_access_check(s)) {
+ gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
+ }
+ return true;
}
+ return do_pppp_flags(s, a, &op);
}
static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
@@ -1193,41 +1180,22 @@ static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
.fno = gen_helper_sve_eor_pppp,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
};
- if (a->s) {
- return do_pppp_flags(s, a, &op);
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
- }
-}
-
-static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
-{
- tcg_gen_and_i64(pn, pn, pg);
- tcg_gen_andc_i64(pm, pm, pg);
- tcg_gen_or_i64(pd, pn, pm);
-}
-
-static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
- TCGv_vec pm, TCGv_vec pg)
-{
- tcg_gen_and_vec(vece, pn, pn, pg);
- tcg_gen_andc_vec(vece, pm, pm, pg);
- tcg_gen_or_vec(vece, pd, pn, pm);
+ return do_pppp_flags(s, a, &op);
}
static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
{
- static const GVecGen4 op = {
- .fni8 = gen_sel_pg_i64,
- .fniv = gen_sel_pg_vec,
- .fno = gen_helper_sve_sel_pppp,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- };
if (a->s) {
return false;
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
}
+ if (sve_access_check(s)) {
+ unsigned psz = pred_gvec_reg_size(s);
+ tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
+ pred_full_reg_offset(s, a->pg),
+ pred_full_reg_offset(s, a->rn),
+ pred_full_reg_offset(s, a->rm), psz, psz);
+ }
+ return true;
}
static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
@@ -1251,13 +1219,11 @@ static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
.fno = gen_helper_sve_orr_pppp,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
};
- if (a->s) {
- return do_pppp_flags(s, a, &op);
- } else if (a->pg == a->rn && a->rn == a->rm) {
+
+ if (!a->s && a->pg == a->rn && a->rn == a->rm) {
return do_mov_p(s, a->rd, a->rn);
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
}
+ return do_pppp_flags(s, a, &op);
}
static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
@@ -1281,11 +1247,7 @@ static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
.fno = gen_helper_sve_orn_pppp,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
};
- if (a->s) {
- return do_pppp_flags(s, a, &op);
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
- }
+ return do_pppp_flags(s, a, &op);
}
static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
@@ -1309,11 +1271,7 @@ static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
.fno = gen_helper_sve_nor_pppp,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
};
- if (a->s) {
- return do_pppp_flags(s, a, &op);
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
- }
+ return do_pppp_flags(s, a, &op);
}
static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
@@ -1337,11 +1295,7 @@ static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
.fno = gen_helper_sve_nand_pppp,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
};
- if (a->s) {
- return do_pppp_flags(s, a, &op);
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
- }
+ return do_pppp_flags(s, a, &op);
}
/*
@@ -2103,10 +2057,7 @@ static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
};
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vsz, vsz, 0, fns[a->esz]);
+ gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
}
return true;
}
@@ -2119,11 +2070,7 @@ static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
};
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- vsz, vsz, 0, fns[a->esz]);
+ gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
}
return true;
}
@@ -2296,11 +2243,7 @@ static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
gen_helper_gvec_3 *fn)
{
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- vsz, vsz, data, fn);
+ gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
}
return true;
}
@@ -2745,12 +2688,8 @@ static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
{
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- pred_full_reg_offset(s, a->pg),
- vsz, vsz, a->esz, gen_helper_sve_splice);
+ gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
+ a->rd, a->rn, a->rm, a->pg, 0);
}
return true;
}
@@ -3429,11 +3368,7 @@ static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a)
};
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- vsz, vsz, 0, fns[a->u][a->sz]);
+ gen_gvec_ool_zzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, 0);
}
return true;
}
@@ -3446,11 +3381,7 @@ static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a)
};
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- vsz, vsz, a->index, fns[a->u][a->sz]);
+ gen_gvec_ool_zzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, a->index);
}
return true;
}
@@ -3872,10 +3803,6 @@ static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \
return true; \
}
-#define float16_two make_float16(0x4000)
-#define float32_two make_float32(0x40000000)
-#define float64_two make_float64(0x4000000000000000ULL)
-
DO_FP_IMM(FADD, fadds, half, one)
DO_FP_IMM(FSUB, fsubs, half, one)
DO_FP_IMM(FMUL, fmuls, half, two)
@@ -5093,8 +5020,7 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
/* Zero the inactive elements. */
gen_set_label(over);
- do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
- return true;
+ return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
}
static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
@@ -5877,8 +5803,5 @@ static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
{
- if (sve_access_check(s)) {
- do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz);
- }
- return true;
+ return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false);
}
diff --git a/target/arm/translate-vfp.c.inc b/target/arm/translate-vfp.c.inc
index 4eeafb494a..28e0dba5f1 100644
--- a/target/arm/translate-vfp.c.inc
+++ b/target/arm/translate-vfp.c.inc
@@ -190,18 +190,22 @@ static bool vfp_access_check(DisasContext *s)
static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
{
uint32_t rd, rn, rm;
- bool dp = a->dp;
+ int sz = a->sz;
if (!dc_isar_feature(aa32_vsel, s)) {
return false;
}
- if (dp && !dc_isar_feature(aa32_fpdp_v2, s)) {
+ if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
+ return false;
+ }
+
+ if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
return false;
}
/* UNDEF accesses to D16-D31 if they don't exist */
- if (dp && !dc_isar_feature(aa32_simd_r32, s) &&
+ if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
((a->vm | a->vn | a->vd) & 0x10)) {
return false;
}
@@ -214,7 +218,7 @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
return true;
}
- if (dp) {
+ if (sz == 3) {
TCGv_i64 frn, frm, dest;
TCGv_i64 tmp, zero, zf, nf, vf;
@@ -307,6 +311,10 @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
tcg_temp_free_i32(tmp);
break;
}
+ /* For fp16 the top half is always zeroes */
+ if (sz == 1) {
+ tcg_gen_andi_i32(dest, dest, 0xffff);
+ }
neon_store_reg32(dest, rd);
tcg_temp_free_i32(frn);
tcg_temp_free_i32(frm);
@@ -333,7 +341,7 @@ static const uint8_t fp_decode_rm[] = {
static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
{
uint32_t rd, rm;
- bool dp = a->dp;
+ int sz = a->sz;
TCGv_ptr fpst;
TCGv_i32 tcg_rmode;
int rounding = fp_decode_rm[a->rm];
@@ -342,12 +350,16 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
return false;
}
- if (dp && !dc_isar_feature(aa32_fpdp_v2, s)) {
+ if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
+ return false;
+ }
+
+ if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
return false;
}
/* UNDEF accesses to D16-D31 if they don't exist */
- if (dp && !dc_isar_feature(aa32_simd_r32, s) &&
+ if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
((a->vm | a->vd) & 0x10)) {
return false;
}
@@ -359,12 +371,16 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
return true;
}
- fpst = fpstatus_ptr(FPST_FPCR);
+ if (sz == 1) {
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+ } else {
+ fpst = fpstatus_ptr(FPST_FPCR);
+ }
tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
- if (dp) {
+ if (sz == 3) {
TCGv_i64 tcg_op;
TCGv_i64 tcg_res;
tcg_op = tcg_temp_new_i64();
@@ -380,7 +396,11 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
tcg_op = tcg_temp_new_i32();
tcg_res = tcg_temp_new_i32();
neon_load_reg32(tcg_op, rm);
- gen_helper_rints(tcg_res, tcg_op, fpst);
+ if (sz == 1) {
+ gen_helper_rinth(tcg_res, tcg_op, fpst);
+ } else {
+ gen_helper_rints(tcg_res, tcg_op, fpst);
+ }
neon_store_reg32(tcg_res, rd);
tcg_temp_free_i32(tcg_op);
tcg_temp_free_i32(tcg_res);
@@ -396,7 +416,7 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
{
uint32_t rd, rm;
- bool dp = a->dp;
+ int sz = a->sz;
TCGv_ptr fpst;
TCGv_i32 tcg_rmode, tcg_shift;
int rounding = fp_decode_rm[a->rm];
@@ -406,12 +426,16 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
return false;
}
- if (dp && !dc_isar_feature(aa32_fpdp_v2, s)) {
+ if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
+ return false;
+ }
+
+ if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
return false;
}
/* UNDEF accesses to D16-D31 if they don't exist */
- if (dp && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
+ if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
return false;
}
@@ -422,14 +446,18 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
return true;
}
- fpst = fpstatus_ptr(FPST_FPCR);
+ if (sz == 1) {
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+ } else {
+ fpst = fpstatus_ptr(FPST_FPCR);
+ }
tcg_shift = tcg_const_i32(0);
tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
- if (dp) {
+ if (sz == 3) {
TCGv_i64 tcg_double, tcg_res;
TCGv_i32 tcg_tmp;
tcg_double = tcg_temp_new_i64();
@@ -451,10 +479,18 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
tcg_single = tcg_temp_new_i32();
tcg_res = tcg_temp_new_i32();
neon_load_reg32(tcg_single, rm);
- if (is_signed) {
- gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
+ if (sz == 1) {
+ if (is_signed) {
+ gen_helper_vfp_toslh(tcg_res, tcg_single, tcg_shift, fpst);
+ } else {
+ gen_helper_vfp_toulh(tcg_res, tcg_single, tcg_shift, fpst);
+ }
} else {
- gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
+ if (is_signed) {
+ gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
+ } else {
+ gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
+ }
}
neon_store_reg32(tcg_res, rd);
tcg_temp_free_i32(tcg_res);
@@ -773,6 +809,40 @@ static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
return true;
}
+static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a)
+{
+ TCGv_i32 tmp;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (a->rt == 15) {
+ /* UNPREDICTABLE; we choose to UNDEF */
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ if (a->l) {
+ /* VFP to general purpose register */
+ tmp = tcg_temp_new_i32();
+ neon_load_reg32(tmp, a->vn);
+ tcg_gen_andi_i32(tmp, tmp, 0xffff);
+ store_reg(s, a->rt, tmp);
+ } else {
+ /* general purpose register to VFP */
+ tmp = load_reg(s, a->rt);
+ tcg_gen_andi_i32(tmp, tmp, 0xffff);
+ neon_store_reg32(tmp, a->vn);
+ tcg_temp_free_i32(tmp);
+ }
+
+ return true;
+}
+
static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
{
TCGv_i32 tmp;
@@ -886,6 +956,41 @@ static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a)
return true;
}
+static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a)
+{
+ uint32_t offset;
+ TCGv_i32 addr, tmp;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ /* imm8 field is offset/2 for fp16, unlike fp32 and fp64 */
+ offset = a->imm << 1;
+ if (!a->u) {
+ offset = -offset;
+ }
+
+ /* For thumb, use of PC is UNPREDICTABLE. */
+ addr = add_reg_for_lit(s, a->rn, offset);
+ tmp = tcg_temp_new_i32();
+ if (a->l) {
+ gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
+ neon_store_reg32(tmp, a->vd);
+ } else {
+ neon_load_reg32(tmp, a->vd);
+ gen_aa32_st16(s, tmp, addr, get_mem_index(s));
+ }
+ tcg_temp_free_i32(tmp);
+ tcg_temp_free_i32(addr);
+
+ return true;
+}
+
static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
{
uint32_t offset;
@@ -1266,6 +1371,54 @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
return true;
}
+static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
+ int vd, int vn, int vm, bool reads_vd)
+{
+ /*
+ * Do a half-precision operation. Functionally this is
+ * the same as do_vfp_3op_sp(), except:
+ * - it uses the FPST_FPCR_F16
+ * - it doesn't need the VFP vector handling (fp16 is a
+ * v8 feature, and in v8 VFP vectors don't exist)
+ * - it does the aa32_fp16_arith feature test
+ */
+ TCGv_i32 f0, f1, fd;
+ TCGv_ptr fpst;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (s->vec_len != 0 || s->vec_stride != 0) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ f0 = tcg_temp_new_i32();
+ f1 = tcg_temp_new_i32();
+ fd = tcg_temp_new_i32();
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+
+ neon_load_reg32(f0, vn);
+ neon_load_reg32(f1, vm);
+
+ if (reads_vd) {
+ neon_load_reg32(fd, vd);
+ }
+ fn(fd, f0, f1, fpst);
+ neon_store_reg32(fd, vd);
+
+ tcg_temp_free_i32(f0);
+ tcg_temp_free_i32(f1);
+ tcg_temp_free_i32(fd);
+ tcg_temp_free_ptr(fpst);
+
+ return true;
+}
+
static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
int vd, int vn, int vm, bool reads_vd)
{
@@ -1421,6 +1574,38 @@ static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
return true;
}
+static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
+{
+ /*
+ * Do a half-precision operation. Functionally this is
+ * the same as do_vfp_2op_sp(), except:
+ * - it doesn't need the VFP vector handling (fp16 is a
+ * v8 feature, and in v8 VFP vectors don't exist)
+ * - it does the aa32_fp16_arith feature test
+ */
+ TCGv_i32 f0;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (s->vec_len != 0 || s->vec_stride != 0) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ f0 = tcg_temp_new_i32();
+ neon_load_reg32(f0, vm);
+ fn(f0, f0);
+ neon_store_reg32(f0, vd);
+ tcg_temp_free_i32(f0);
+
+ return true;
+}
+
static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
{
uint32_t delta_m = 0;
@@ -1499,6 +1684,21 @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
return true;
}
+static void gen_VMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+ /* Note that order of inputs to the add matters for NaNs */
+ TCGv_i32 tmp = tcg_temp_new_i32();
+
+ gen_helper_vfp_mulh(tmp, vn, vm, fpst);
+ gen_helper_vfp_addh(vd, vd, tmp, fpst);
+ tcg_temp_free_i32(tmp);
+}
+
+static bool trans_VMLA_hp(DisasContext *s, arg_VMLA_sp *a)
+{
+ return do_vfp_3op_hp(s, gen_VMLA_hp, a->vd, a->vn, a->vm, true);
+}
+
static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/* Note that order of inputs to the add matters for NaNs */
@@ -1529,6 +1729,25 @@ static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a)
return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
}
+static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+ /*
+ * VMLS: vd = vd + -(vn * vm)
+ * Note that order of inputs to the add matters for NaNs.
+ */
+ TCGv_i32 tmp = tcg_temp_new_i32();
+
+ gen_helper_vfp_mulh(tmp, vn, vm, fpst);
+ gen_helper_vfp_negh(tmp, tmp);
+ gen_helper_vfp_addh(vd, vd, tmp, fpst);
+ tcg_temp_free_i32(tmp);
+}
+
+static bool trans_VMLS_hp(DisasContext *s, arg_VMLS_sp *a)
+{
+ return do_vfp_3op_hp(s, gen_VMLS_hp, a->vd, a->vn, a->vm, true);
+}
+
static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/*
@@ -1567,6 +1786,27 @@ static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a)
return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
}
+static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+ /*
+ * VNMLS: -fd + (fn * fm)
+ * Note that it isn't valid to replace (-A + B) with (B - A) or similar
+ * plausible looking simplifications because this will give wrong results
+ * for NaNs.
+ */
+ TCGv_i32 tmp = tcg_temp_new_i32();
+
+ gen_helper_vfp_mulh(tmp, vn, vm, fpst);
+ gen_helper_vfp_negh(vd, vd);
+ gen_helper_vfp_addh(vd, vd, tmp, fpst);
+ tcg_temp_free_i32(tmp);
+}
+
+static bool trans_VNMLS_hp(DisasContext *s, arg_VNMLS_sp *a)
+{
+ return do_vfp_3op_hp(s, gen_VNMLS_hp, a->vd, a->vn, a->vm, true);
+}
+
static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/*
@@ -1609,6 +1849,23 @@ static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a)
return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
}
+static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+ /* VNMLA: -fd + -(fn * fm) */
+ TCGv_i32 tmp = tcg_temp_new_i32();
+
+ gen_helper_vfp_mulh(tmp, vn, vm, fpst);
+ gen_helper_vfp_negh(tmp, tmp);
+ gen_helper_vfp_negh(vd, vd);
+ gen_helper_vfp_addh(vd, vd, tmp, fpst);
+ tcg_temp_free_i32(tmp);
+}
+
+static bool trans_VNMLA_hp(DisasContext *s, arg_VNMLA_sp *a)
+{
+ return do_vfp_3op_hp(s, gen_VNMLA_hp, a->vd, a->vn, a->vm, true);
+}
+
static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/* VNMLA: -fd + -(fn * fm) */
@@ -1643,6 +1900,11 @@ static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a)
return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true);
}
+static bool trans_VMUL_hp(DisasContext *s, arg_VMUL_sp *a)
+{
+ return do_vfp_3op_hp(s, gen_helper_vfp_mulh, a->vd, a->vn, a->vm, false);
+}
+
static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a)
{
return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false);
@@ -1653,6 +1915,18 @@ static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a)
return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
}
+static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+ /* VNMUL: -(fn * fm) */
+ gen_helper_vfp_mulh(vd, vn, vm, fpst);
+ gen_helper_vfp_negh(vd, vd);
+}
+
+static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a)
+{
+ return do_vfp_3op_hp(s, gen_VNMUL_hp, a->vd, a->vn, a->vm, false);
+}
+
static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/* VNMUL: -(fn * fm) */
@@ -1677,6 +1951,11 @@ static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a)
return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false);
}
+static bool trans_VADD_hp(DisasContext *s, arg_VADD_sp *a)
+{
+ return do_vfp_3op_hp(s, gen_helper_vfp_addh, a->vd, a->vn, a->vm, false);
+}
+
static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a)
{
return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false);
@@ -1687,6 +1966,11 @@ static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a)
return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false);
}
+static bool trans_VSUB_hp(DisasContext *s, arg_VSUB_sp *a)
+{
+ return do_vfp_3op_hp(s, gen_helper_vfp_subh, a->vd, a->vn, a->vm, false);
+}
+
static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a)
{
return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false);
@@ -1697,6 +1981,11 @@ static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a)
return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false);
}
+static bool trans_VDIV_hp(DisasContext *s, arg_VDIV_sp *a)
+{
+ return do_vfp_3op_hp(s, gen_helper_vfp_divh, a->vd, a->vn, a->vm, false);
+}
+
static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a)
{
return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false);
@@ -1707,6 +1996,24 @@ static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a)
return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
}
+static bool trans_VMINNM_hp(DisasContext *s, arg_VMINNM_sp *a)
+{
+ if (!dc_isar_feature(aa32_vminmaxnm, s)) {
+ return false;
+ }
+ return do_vfp_3op_hp(s, gen_helper_vfp_minnumh,
+ a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VMAXNM_hp(DisasContext *s, arg_VMAXNM_sp *a)
+{
+ if (!dc_isar_feature(aa32_vminmaxnm, s)) {
+ return false;
+ }
+ return do_vfp_3op_hp(s, gen_helper_vfp_maxnumh,
+ a->vd, a->vn, a->vm, false);
+}
+
static bool trans_VMINNM_sp(DisasContext *s, arg_VMINNM_sp *a)
{
if (!dc_isar_feature(aa32_vminmaxnm, s)) {
@@ -1743,6 +2050,69 @@ static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a)
a->vd, a->vn, a->vm, false);
}
+static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
+{
+ /*
+ * VFNMA : fd = muladd(-fd, fn, fm)
+ * VFNMS : fd = muladd(-fd, -fn, fm)
+ * VFMA : fd = muladd( fd, fn, fm)
+ * VFMS : fd = muladd( fd, -fn, fm)
+ *
+ * These are fused multiply-add, and must be done as one floating
+ * point operation with no rounding between the multiplication and
+ * addition steps. NB that doing the negations here as separate
+ * steps is correct : an input NaN should come out with its sign
+ * bit flipped if it is a negated-input.
+ */
+ TCGv_ptr fpst;
+ TCGv_i32 vn, vm, vd;
+
+ /*
+ * Present in VFPv4 only, and only with the FP16 extension.
+ * Note that we can't rely on the SIMDFMAC check alone, because
+ * in a Neon-no-VFP core that ID register field will be non-zero.
+ */
+ if (!dc_isar_feature(aa32_fp16_arith, s) ||
+ !dc_isar_feature(aa32_simdfmac, s) ||
+ !dc_isar_feature(aa32_fpsp_v2, s)) {
+ return false;
+ }
+
+ if (s->vec_len != 0 || s->vec_stride != 0) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ vn = tcg_temp_new_i32();
+ vm = tcg_temp_new_i32();
+ vd = tcg_temp_new_i32();
+
+ neon_load_reg32(vn, a->vn);
+ neon_load_reg32(vm, a->vm);
+ if (neg_n) {
+ /* VFNMS, VFMS */
+ gen_helper_vfp_negh(vn, vn);
+ }
+ neon_load_reg32(vd, a->vd);
+ if (neg_d) {
+ /* VFNMA, VFNMS */
+ gen_helper_vfp_negh(vd, vd);
+ }
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+ gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst);
+ neon_store_reg32(vd, a->vd);
+
+ tcg_temp_free_ptr(fpst);
+ tcg_temp_free_i32(vn);
+ tcg_temp_free_i32(vm);
+ tcg_temp_free_i32(vd);
+
+ return true;
+}
+
static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
{
/*
@@ -1808,26 +2178,6 @@ static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
return true;
}
-static bool trans_VFMA_sp(DisasContext *s, arg_VFMA_sp *a)
-{
- return do_vfm_sp(s, a, false, false);
-}
-
-static bool trans_VFMS_sp(DisasContext *s, arg_VFMS_sp *a)
-{
- return do_vfm_sp(s, a, true, false);
-}
-
-static bool trans_VFNMA_sp(DisasContext *s, arg_VFNMA_sp *a)
-{
- return do_vfm_sp(s, a, false, true);
-}
-
-static bool trans_VFNMS_sp(DisasContext *s, arg_VFNMS_sp *a)
-{
- return do_vfm_sp(s, a, true, true);
-}
-
static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
{
/*
@@ -1899,24 +2249,43 @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
return true;
}
-static bool trans_VFMA_dp(DisasContext *s, arg_VFMA_dp *a)
-{
- return do_vfm_dp(s, a, false, false);
-}
+#define MAKE_ONE_VFM_TRANS_FN(INSN, PREC, NEGN, NEGD) \
+ static bool trans_##INSN##_##PREC(DisasContext *s, \
+ arg_##INSN##_##PREC *a) \
+ { \
+ return do_vfm_##PREC(s, a, NEGN, NEGD); \
+ }
-static bool trans_VFMS_dp(DisasContext *s, arg_VFMS_dp *a)
-{
- return do_vfm_dp(s, a, true, false);
-}
+#define MAKE_VFM_TRANS_FNS(PREC) \
+ MAKE_ONE_VFM_TRANS_FN(VFMA, PREC, false, false) \
+ MAKE_ONE_VFM_TRANS_FN(VFMS, PREC, true, false) \
+ MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \
+ MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true)
-static bool trans_VFNMA_dp(DisasContext *s, arg_VFNMA_dp *a)
-{
- return do_vfm_dp(s, a, false, true);
-}
+MAKE_VFM_TRANS_FNS(hp)
+MAKE_VFM_TRANS_FNS(sp)
+MAKE_VFM_TRANS_FNS(dp)
-static bool trans_VFNMS_dp(DisasContext *s, arg_VFNMS_dp *a)
+static bool trans_VMOV_imm_hp(DisasContext *s, arg_VMOV_imm_sp *a)
{
- return do_vfm_dp(s, a, true, true);
+ TCGv_i32 fd;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (s->vec_len != 0 || s->vec_stride != 0) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ fd = tcg_const_i32(vfp_expand_imm(MO_16, a->imm));
+ neon_store_reg32(fd, a->vd);
+ tcg_temp_free_i32(fd);
+ return true;
}
static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
@@ -2024,34 +2393,27 @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
return true;
}
-static bool trans_VMOV_reg_sp(DisasContext *s, arg_VMOV_reg_sp *a)
-{
- return do_vfp_2op_sp(s, tcg_gen_mov_i32, a->vd, a->vm);
-}
+#define DO_VFP_2OP(INSN, PREC, FN) \
+ static bool trans_##INSN##_##PREC(DisasContext *s, \
+ arg_##INSN##_##PREC *a) \
+ { \
+ return do_vfp_2op_##PREC(s, FN, a->vd, a->vm); \
+ }
-static bool trans_VMOV_reg_dp(DisasContext *s, arg_VMOV_reg_dp *a)
-{
- return do_vfp_2op_dp(s, tcg_gen_mov_i64, a->vd, a->vm);
-}
+DO_VFP_2OP(VMOV_reg, sp, tcg_gen_mov_i32)
+DO_VFP_2OP(VMOV_reg, dp, tcg_gen_mov_i64)
-static bool trans_VABS_sp(DisasContext *s, arg_VABS_sp *a)
-{
- return do_vfp_2op_sp(s, gen_helper_vfp_abss, a->vd, a->vm);
-}
+DO_VFP_2OP(VABS, hp, gen_helper_vfp_absh)
+DO_VFP_2OP(VABS, sp, gen_helper_vfp_abss)
+DO_VFP_2OP(VABS, dp, gen_helper_vfp_absd)
-static bool trans_VABS_dp(DisasContext *s, arg_VABS_dp *a)
-{
- return do_vfp_2op_dp(s, gen_helper_vfp_absd, a->vd, a->vm);
-}
+DO_VFP_2OP(VNEG, hp, gen_helper_vfp_negh)
+DO_VFP_2OP(VNEG, sp, gen_helper_vfp_negs)
+DO_VFP_2OP(VNEG, dp, gen_helper_vfp_negd)
-static bool trans_VNEG_sp(DisasContext *s, arg_VNEG_sp *a)
+static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm)
{
- return do_vfp_2op_sp(s, gen_helper_vfp_negs, a->vd, a->vm);
-}
-
-static bool trans_VNEG_dp(DisasContext *s, arg_VNEG_dp *a)
-{
- return do_vfp_2op_dp(s, gen_helper_vfp_negd, a->vd, a->vm);
+ gen_helper_vfp_sqrth(vd, vm, cpu_env);
}
static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
@@ -2059,19 +2421,52 @@ static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
gen_helper_vfp_sqrts(vd, vm, cpu_env);
}
-static bool trans_VSQRT_sp(DisasContext *s, arg_VSQRT_sp *a)
-{
- return do_vfp_2op_sp(s, gen_VSQRT_sp, a->vd, a->vm);
-}
-
static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm)
{
gen_helper_vfp_sqrtd(vd, vm, cpu_env);
}
-static bool trans_VSQRT_dp(DisasContext *s, arg_VSQRT_dp *a)
+DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp)
+DO_VFP_2OP(VSQRT, sp, gen_VSQRT_sp)
+DO_VFP_2OP(VSQRT, dp, gen_VSQRT_dp)
+
+static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a)
{
- return do_vfp_2op_dp(s, gen_VSQRT_dp, a->vd, a->vm);
+ TCGv_i32 vd, vm;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ /* Vm/M bits must be zero for the Z variant */
+ if (a->z && a->vm != 0) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ vd = tcg_temp_new_i32();
+ vm = tcg_temp_new_i32();
+
+ neon_load_reg32(vd, a->vd);
+ if (a->z) {
+ tcg_gen_movi_i32(vm, 0);
+ } else {
+ neon_load_reg32(vm, a->vm);
+ }
+
+ if (a->e) {
+ gen_helper_vfp_cmpeh(vd, vm, cpu_env);
+ } else {
+ gen_helper_vfp_cmph(vd, vm, cpu_env);
+ }
+
+ tcg_temp_free_i32(vd);
+ tcg_temp_free_i32(vm);
+
+ return true;
}
static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a)
@@ -2289,6 +2684,29 @@ static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
return true;
}
+static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
+{
+ TCGv_ptr fpst;
+ TCGv_i32 tmp;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ tmp = tcg_temp_new_i32();
+ neon_load_reg32(tmp, a->vm);
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+ gen_helper_rinth(tmp, tmp, fpst);
+ neon_store_reg32(tmp, a->vd);
+ tcg_temp_free_ptr(fpst);
+ tcg_temp_free_i32(tmp);
+ return true;
+}
+
static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
{
TCGv_ptr fpst;
@@ -2344,6 +2762,34 @@ static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
return true;
}
+static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
+{
+ TCGv_ptr fpst;
+ TCGv_i32 tmp;
+ TCGv_i32 tcg_rmode;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ tmp = tcg_temp_new_i32();
+ neon_load_reg32(tmp, a->vm);
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+ tcg_rmode = tcg_const_i32(float_round_to_zero);
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
+ gen_helper_rinth(tmp, tmp, fpst);
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
+ neon_store_reg32(tmp, a->vd);
+ tcg_temp_free_ptr(fpst);
+ tcg_temp_free_i32(tcg_rmode);
+ tcg_temp_free_i32(tmp);
+ return true;
+}
+
static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
{
TCGv_ptr fpst;
@@ -2409,6 +2855,29 @@ static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
return true;
}
+static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
+{
+ TCGv_ptr fpst;
+ TCGv_i32 tmp;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ tmp = tcg_temp_new_i32();
+ neon_load_reg32(tmp, a->vm);
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+ gen_helper_rinth_exact(tmp, tmp, fpst);
+ neon_store_reg32(tmp, a->vd);
+ tcg_temp_free_ptr(fpst);
+ tcg_temp_free_i32(tmp);
+ return true;
+}
+
static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
{
TCGv_ptr fpst;
@@ -2520,6 +2989,35 @@ static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
return true;
}
+static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a)
+{
+ TCGv_i32 vm;
+ TCGv_ptr fpst;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ vm = tcg_temp_new_i32();
+ neon_load_reg32(vm, a->vm);
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+ if (a->s) {
+ /* i32 -> f16 */
+ gen_helper_vfp_sitoh(vm, vm, fpst);
+ } else {
+ /* u32 -> f16 */
+ gen_helper_vfp_uitoh(vm, vm, fpst);
+ }
+ neon_store_reg32(vm, a->vd);
+ tcg_temp_free_i32(vm);
+ tcg_temp_free_ptr(fpst);
+ return true;
+}
+
static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
{
TCGv_i32 vm;
@@ -2618,6 +3116,65 @@ static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
return true;
}
+static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a)
+{
+ TCGv_i32 vd, shift;
+ TCGv_ptr fpst;
+ int frac_bits;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
+
+ vd = tcg_temp_new_i32();
+ neon_load_reg32(vd, a->vd);
+
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+ shift = tcg_const_i32(frac_bits);
+
+ /* Switch on op:U:sx bits */
+ switch (a->opc) {
+ case 0:
+ gen_helper_vfp_shtoh(vd, vd, shift, fpst);
+ break;
+ case 1:
+ gen_helper_vfp_sltoh(vd, vd, shift, fpst);
+ break;
+ case 2:
+ gen_helper_vfp_uhtoh(vd, vd, shift, fpst);
+ break;
+ case 3:
+ gen_helper_vfp_ultoh(vd, vd, shift, fpst);
+ break;
+ case 4:
+ gen_helper_vfp_toshh_round_to_zero(vd, vd, shift, fpst);
+ break;
+ case 5:
+ gen_helper_vfp_toslh_round_to_zero(vd, vd, shift, fpst);
+ break;
+ case 6:
+ gen_helper_vfp_touhh_round_to_zero(vd, vd, shift, fpst);
+ break;
+ case 7:
+ gen_helper_vfp_toulh_round_to_zero(vd, vd, shift, fpst);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ neon_store_reg32(vd, a->vd);
+ tcg_temp_free_i32(vd);
+ tcg_temp_free_i32(shift);
+ tcg_temp_free_ptr(fpst);
+ return true;
+}
+
static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
{
TCGv_i32 vd, shift;
@@ -2742,6 +3299,42 @@ static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
return true;
}
+static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
+{
+ TCGv_i32 vm;
+ TCGv_ptr fpst;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+ vm = tcg_temp_new_i32();
+ neon_load_reg32(vm, a->vm);
+
+ if (a->s) {
+ if (a->rz) {
+ gen_helper_vfp_tosizh(vm, vm, fpst);
+ } else {
+ gen_helper_vfp_tosih(vm, vm, fpst);
+ }
+ } else {
+ if (a->rz) {
+ gen_helper_vfp_touizh(vm, vm, fpst);
+ } else {
+ gen_helper_vfp_touih(vm, vm, fpst);
+ }
+ }
+ neon_store_reg32(vm, a->vd);
+ tcg_temp_free_i32(vm);
+ tcg_temp_free_ptr(fpst);
+ return true;
+}
+
static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
{
TCGv_i32 vm;
@@ -2895,3 +3488,56 @@ static bool trans_NOCP(DisasContext *s, arg_NOCP *a)
return false;
}
+
+static bool trans_VINS(DisasContext *s, arg_VINS *a)
+{
+ TCGv_i32 rd, rm;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (s->vec_len != 0 || s->vec_stride != 0) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ /* Insert low half of Vm into high half of Vd */
+ rm = tcg_temp_new_i32();
+ rd = tcg_temp_new_i32();
+ neon_load_reg32(rm, a->vm);
+ neon_load_reg32(rd, a->vd);
+ tcg_gen_deposit_i32(rd, rd, rm, 16, 16);
+ neon_store_reg32(rd, a->vd);
+ tcg_temp_free_i32(rm);
+ tcg_temp_free_i32(rd);
+ return true;
+}
+
+static bool trans_VMOVX(DisasContext *s, arg_VINS *a)
+{
+ TCGv_i32 rm;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (s->vec_len != 0 || s->vec_stride != 0) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ /* Set Vd to high half of Vm */
+ rm = tcg_temp_new_i32();
+ neon_load_reg32(rm, a->vm);
+ tcg_gen_shri_i32(rm, rm, 16);
+ neon_store_reg32(rm, a->vd);
+ tcg_temp_free_i32(rm);
+ return true;
+}
diff --git a/target/arm/translate.h b/target/arm/translate.h
index 6d6d4c0f42..423b0e08df 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -64,6 +64,7 @@ typedef struct DisasContext {
* that it is set at the point where we actually touch the FP regs.
*/
bool fp_access_checked;
+ bool sve_access_checked;
/* ARMv8 single-step state (this is distinct from the QEMU gdbstub
* single-step support).
*/
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index 7d76412ee0..a973454e4f 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -37,19 +37,24 @@
#endif
/* Signed saturating rounding doubling multiply-accumulate high half, 16-bit */
-static int16_t inl_qrdmlah_s16(int16_t src1, int16_t src2,
- int16_t src3, uint32_t *sat)
+static int16_t do_sqrdmlah_h(int16_t src1, int16_t src2, int16_t src3,
+ bool neg, bool round, uint32_t *sat)
{
- /* Simplify:
+ /*
+ * Simplify:
* = ((a3 << 16) + ((e1 * e2) << 1) + (1 << 15)) >> 16
* = ((a3 << 15) + (e1 * e2) + (1 << 14)) >> 15
*/
int32_t ret = (int32_t)src1 * src2;
- ret = ((int32_t)src3 << 15) + ret + (1 << 14);
+ if (neg) {
+ ret = -ret;
+ }
+ ret += ((int32_t)src3 << 15) + (round << 14);
ret >>= 15;
+
if (ret != (int16_t)ret) {
*sat = 1;
- ret = (ret < 0 ? -0x8000 : 0x7fff);
+ ret = (ret < 0 ? INT16_MIN : INT16_MAX);
}
return ret;
}
@@ -58,8 +63,9 @@ uint32_t HELPER(neon_qrdmlah_s16)(CPUARMState *env, uint32_t src1,
uint32_t src2, uint32_t src3)
{
uint32_t *sat = &env->vfp.qc[0];
- uint16_t e1 = inl_qrdmlah_s16(src1, src2, src3, sat);
- uint16_t e2 = inl_qrdmlah_s16(src1 >> 16, src2 >> 16, src3 >> 16, sat);
+ uint16_t e1 = do_sqrdmlah_h(src1, src2, src3, false, true, sat);
+ uint16_t e2 = do_sqrdmlah_h(src1 >> 16, src2 >> 16, src3 >> 16,
+ false, true, sat);
return deposit32(e1, 16, 16, e2);
}
@@ -73,35 +79,18 @@ void HELPER(gvec_qrdmlah_s16)(void *vd, void *vn, void *vm,
uintptr_t i;
for (i = 0; i < opr_sz / 2; ++i) {
- d[i] = inl_qrdmlah_s16(n[i], m[i], d[i], vq);
+ d[i] = do_sqrdmlah_h(n[i], m[i], d[i], false, true, vq);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
-/* Signed saturating rounding doubling multiply-subtract high half, 16-bit */
-static int16_t inl_qrdmlsh_s16(int16_t src1, int16_t src2,
- int16_t src3, uint32_t *sat)
-{
- /* Similarly, using subtraction:
- * = ((a3 << 16) - ((e1 * e2) << 1) + (1 << 15)) >> 16
- * = ((a3 << 15) - (e1 * e2) + (1 << 14)) >> 15
- */
- int32_t ret = (int32_t)src1 * src2;
- ret = ((int32_t)src3 << 15) - ret + (1 << 14);
- ret >>= 15;
- if (ret != (int16_t)ret) {
- *sat = 1;
- ret = (ret < 0 ? -0x8000 : 0x7fff);
- }
- return ret;
-}
-
uint32_t HELPER(neon_qrdmlsh_s16)(CPUARMState *env, uint32_t src1,
uint32_t src2, uint32_t src3)
{
uint32_t *sat = &env->vfp.qc[0];
- uint16_t e1 = inl_qrdmlsh_s16(src1, src2, src3, sat);
- uint16_t e2 = inl_qrdmlsh_s16(src1 >> 16, src2 >> 16, src3 >> 16, sat);
+ uint16_t e1 = do_sqrdmlah_h(src1, src2, src3, true, true, sat);
+ uint16_t e2 = do_sqrdmlah_h(src1 >> 16, src2 >> 16, src3 >> 16,
+ true, true, sat);
return deposit32(e1, 16, 16, e2);
}
@@ -115,19 +104,47 @@ void HELPER(gvec_qrdmlsh_s16)(void *vd, void *vn, void *vm,
uintptr_t i;
for (i = 0; i < opr_sz / 2; ++i) {
- d[i] = inl_qrdmlsh_s16(n[i], m[i], d[i], vq);
+ d[i] = do_sqrdmlah_h(n[i], m[i], d[i], true, true, vq);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(neon_sqdmulh_h)(void *vd, void *vn, void *vm,
+ void *vq, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int16_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 2; ++i) {
+ d[i] = do_sqrdmlah_h(n[i], m[i], 0, false, false, vq);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(neon_sqrdmulh_h)(void *vd, void *vn, void *vm,
+ void *vq, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int16_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 2; ++i) {
+ d[i] = do_sqrdmlah_h(n[i], m[i], 0, false, true, vq);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
/* Signed saturating rounding doubling multiply-accumulate high half, 32-bit */
-static int32_t inl_qrdmlah_s32(int32_t src1, int32_t src2,
- int32_t src3, uint32_t *sat)
+static int32_t do_sqrdmlah_s(int32_t src1, int32_t src2, int32_t src3,
+ bool neg, bool round, uint32_t *sat)
{
/* Simplify similarly to int_qrdmlah_s16 above. */
int64_t ret = (int64_t)src1 * src2;
- ret = ((int64_t)src3 << 31) + ret + (1 << 30);
+ if (neg) {
+ ret = -ret;
+ }
+ ret += ((int64_t)src3 << 31) + (round << 30);
ret >>= 31;
+
if (ret != (int32_t)ret) {
*sat = 1;
ret = (ret < 0 ? INT32_MIN : INT32_MAX);
@@ -139,7 +156,7 @@ uint32_t HELPER(neon_qrdmlah_s32)(CPUARMState *env, int32_t src1,
int32_t src2, int32_t src3)
{
uint32_t *sat = &env->vfp.qc[0];
- return inl_qrdmlah_s32(src1, src2, src3, sat);
+ return do_sqrdmlah_s(src1, src2, src3, false, true, sat);
}
void HELPER(gvec_qrdmlah_s32)(void *vd, void *vn, void *vm,
@@ -152,31 +169,16 @@ void HELPER(gvec_qrdmlah_s32)(void *vd, void *vn, void *vm,
uintptr_t i;
for (i = 0; i < opr_sz / 4; ++i) {
- d[i] = inl_qrdmlah_s32(n[i], m[i], d[i], vq);
+ d[i] = do_sqrdmlah_s(n[i], m[i], d[i], false, true, vq);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
-/* Signed saturating rounding doubling multiply-subtract high half, 32-bit */
-static int32_t inl_qrdmlsh_s32(int32_t src1, int32_t src2,
- int32_t src3, uint32_t *sat)
-{
- /* Simplify similarly to int_qrdmlsh_s16 above. */
- int64_t ret = (int64_t)src1 * src2;
- ret = ((int64_t)src3 << 31) - ret + (1 << 30);
- ret >>= 31;
- if (ret != (int32_t)ret) {
- *sat = 1;
- ret = (ret < 0 ? INT32_MIN : INT32_MAX);
- }
- return ret;
-}
-
uint32_t HELPER(neon_qrdmlsh_s32)(CPUARMState *env, int32_t src1,
int32_t src2, int32_t src3)
{
uint32_t *sat = &env->vfp.qc[0];
- return inl_qrdmlsh_s32(src1, src2, src3, sat);
+ return do_sqrdmlah_s(src1, src2, src3, true, true, sat);
}
void HELPER(gvec_qrdmlsh_s32)(void *vd, void *vn, void *vm,
@@ -189,7 +191,31 @@ void HELPER(gvec_qrdmlsh_s32)(void *vd, void *vn, void *vm,
uintptr_t i;
for (i = 0; i < opr_sz / 4; ++i) {
- d[i] = inl_qrdmlsh_s32(n[i], m[i], d[i], vq);
+ d[i] = do_sqrdmlah_s(n[i], m[i], d[i], true, true, vq);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(neon_sqdmulh_s)(void *vd, void *vn, void *vm,
+ void *vq, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int32_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 4; ++i) {
+ d[i] = do_sqrdmlah_s(n[i], m[i], 0, false, false, vq);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(neon_sqrdmulh_s)(void *vd, void *vn, void *vm,
+ void *vq, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int32_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 4; ++i) {
+ d[i] = do_sqrdmlah_s(n[i], m[i], 0, false, true, vq);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
@@ -630,6 +656,81 @@ void HELPER(gvec_fcmlad)(void *vd, void *vn, void *vm,
clear_tail(d, opr_sz, simd_maxsz(desc));
}
+/*
+ * Floating point comparisons producing an integer result (all 1s or all 0s).
+ * Note that EQ doesn't signal InvalidOp for QNaNs but GE and GT do.
+ * Softfloat routines return 0/1, which we convert to the 0/-1 Neon requires.
+ */
+static uint16_t float16_ceq(float16 op1, float16 op2, float_status *stat)
+{
+ return -float16_eq_quiet(op1, op2, stat);
+}
+
+static uint32_t float32_ceq(float32 op1, float32 op2, float_status *stat)
+{
+ return -float32_eq_quiet(op1, op2, stat);
+}
+
+static uint16_t float16_cge(float16 op1, float16 op2, float_status *stat)
+{
+ return -float16_le(op2, op1, stat);
+}
+
+static uint32_t float32_cge(float32 op1, float32 op2, float_status *stat)
+{
+ return -float32_le(op2, op1, stat);
+}
+
+static uint16_t float16_cgt(float16 op1, float16 op2, float_status *stat)
+{
+ return -float16_lt(op2, op1, stat);
+}
+
+static uint32_t float32_cgt(float32 op1, float32 op2, float_status *stat)
+{
+ return -float32_lt(op2, op1, stat);
+}
+
+static uint16_t float16_acge(float16 op1, float16 op2, float_status *stat)
+{
+ return -float16_le(float16_abs(op2), float16_abs(op1), stat);
+}
+
+static uint32_t float32_acge(float32 op1, float32 op2, float_status *stat)
+{
+ return -float32_le(float32_abs(op2), float32_abs(op1), stat);
+}
+
+static uint16_t float16_acgt(float16 op1, float16 op2, float_status *stat)
+{
+ return -float16_lt(float16_abs(op2), float16_abs(op1), stat);
+}
+
+static uint32_t float32_acgt(float32 op1, float32 op2, float_status *stat)
+{
+ return -float32_lt(float32_abs(op2), float32_abs(op1), stat);
+}
+
+static int16_t vfp_tosszh(float16 x, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ if (float16_is_any_nan(x)) {
+ float_raise(float_flag_invalid, fpst);
+ return 0;
+ }
+ return float16_to_int16_round_to_zero(x, fpst);
+}
+
+static uint16_t vfp_touszh(float16 x, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ if (float16_is_any_nan(x)) {
+ float_raise(float_flag_invalid, fpst);
+ return 0;
+ }
+ return float16_to_uint16_round_to_zero(x, fpst);
+}
+
#define DO_2OP(NAME, FUNC, TYPE) \
void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \
{ \
@@ -649,7 +750,44 @@ DO_2OP(gvec_frsqrte_h, helper_rsqrte_f16, float16)
DO_2OP(gvec_frsqrte_s, helper_rsqrte_f32, float32)
DO_2OP(gvec_frsqrte_d, helper_rsqrte_f64, float64)
+DO_2OP(gvec_vrintx_h, float16_round_to_int, float16)
+DO_2OP(gvec_vrintx_s, float32_round_to_int, float32)
+
+DO_2OP(gvec_sitos, helper_vfp_sitos, int32_t)
+DO_2OP(gvec_uitos, helper_vfp_uitos, uint32_t)
+DO_2OP(gvec_tosizs, helper_vfp_tosizs, float32)
+DO_2OP(gvec_touizs, helper_vfp_touizs, float32)
+DO_2OP(gvec_sstoh, int16_to_float16, int16_t)
+DO_2OP(gvec_ustoh, uint16_to_float16, uint16_t)
+DO_2OP(gvec_tosszh, vfp_tosszh, float16)
+DO_2OP(gvec_touszh, vfp_touszh, float16)
+
+#define WRAP_CMP0_FWD(FN, CMPOP, TYPE) \
+ static TYPE TYPE##_##FN##0(TYPE op, float_status *stat) \
+ { \
+ return TYPE##_##CMPOP(op, TYPE##_zero, stat); \
+ }
+
+#define WRAP_CMP0_REV(FN, CMPOP, TYPE) \
+ static TYPE TYPE##_##FN##0(TYPE op, float_status *stat) \
+ { \
+ return TYPE##_##CMPOP(TYPE##_zero, op, stat); \
+ }
+
+#define DO_2OP_CMP0(FN, CMPOP, DIRN) \
+ WRAP_CMP0_##DIRN(FN, CMPOP, float16) \
+ WRAP_CMP0_##DIRN(FN, CMPOP, float32) \
+ DO_2OP(gvec_f##FN##0_h, float16_##FN##0, float16) \
+ DO_2OP(gvec_f##FN##0_s, float32_##FN##0, float32)
+
+DO_2OP_CMP0(cgt, cgt, FWD)
+DO_2OP_CMP0(cge, cge, FWD)
+DO_2OP_CMP0(ceq, ceq, FWD)
+DO_2OP_CMP0(clt, cgt, REV)
+DO_2OP_CMP0(cle, cge, REV)
+
#undef DO_2OP
+#undef DO_2OP_CMP0
/* Floating-point trigonometric starting value.
* See the ARM ARM pseudocode function FPTrigSMul.
@@ -681,11 +819,71 @@ static float64 float64_ftsmul(float64 op1, uint64_t op2, float_status *stat)
return result;
}
+static float16 float16_abd(float16 op1, float16 op2, float_status *stat)
+{
+ return float16_abs(float16_sub(op1, op2, stat));
+}
+
static float32 float32_abd(float32 op1, float32 op2, float_status *stat)
{
return float32_abs(float32_sub(op1, op2, stat));
}
+/*
+ * Reciprocal step. These are the AArch32 version which uses a
+ * non-fused multiply-and-subtract.
+ */
+static float16 float16_recps_nf(float16 op1, float16 op2, float_status *stat)
+{
+ op1 = float16_squash_input_denormal(op1, stat);
+ op2 = float16_squash_input_denormal(op2, stat);
+
+ if ((float16_is_infinity(op1) && float16_is_zero(op2)) ||
+ (float16_is_infinity(op2) && float16_is_zero(op1))) {
+ return float16_two;
+ }
+ return float16_sub(float16_two, float16_mul(op1, op2, stat), stat);
+}
+
+static float32 float32_recps_nf(float32 op1, float32 op2, float_status *stat)
+{
+ op1 = float32_squash_input_denormal(op1, stat);
+ op2 = float32_squash_input_denormal(op2, stat);
+
+ if ((float32_is_infinity(op1) && float32_is_zero(op2)) ||
+ (float32_is_infinity(op2) && float32_is_zero(op1))) {
+ return float32_two;
+ }
+ return float32_sub(float32_two, float32_mul(op1, op2, stat), stat);
+}
+
+/* Reciprocal square-root step. AArch32 non-fused semantics. */
+static float16 float16_rsqrts_nf(float16 op1, float16 op2, float_status *stat)
+{
+ op1 = float16_squash_input_denormal(op1, stat);
+ op2 = float16_squash_input_denormal(op2, stat);
+
+ if ((float16_is_infinity(op1) && float16_is_zero(op2)) ||
+ (float16_is_infinity(op2) && float16_is_zero(op1))) {
+ return float16_one_point_five;
+ }
+ op1 = float16_sub(float16_three, float16_mul(op1, op2, stat), stat);
+ return float16_div(op1, float16_two, stat);
+}
+
+static float32 float32_rsqrts_nf(float32 op1, float32 op2, float_status *stat)
+{
+ op1 = float32_squash_input_denormal(op1, stat);
+ op2 = float32_squash_input_denormal(op2, stat);
+
+ if ((float32_is_infinity(op1) && float32_is_zero(op2)) ||
+ (float32_is_infinity(op2) && float32_is_zero(op1))) {
+ return float32_one_point_five;
+ }
+ op1 = float32_sub(float32_three, float32_mul(op1, op2, stat), stat);
+ return float32_div(op1, float32_two, stat);
+}
+
#define DO_3OP(NAME, FUNC, TYPE) \
void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
{ \
@@ -713,8 +911,42 @@ DO_3OP(gvec_ftsmul_h, float16_ftsmul, float16)
DO_3OP(gvec_ftsmul_s, float32_ftsmul, float32)
DO_3OP(gvec_ftsmul_d, float64_ftsmul, float64)
+DO_3OP(gvec_fabd_h, float16_abd, float16)
DO_3OP(gvec_fabd_s, float32_abd, float32)
+DO_3OP(gvec_fceq_h, float16_ceq, float16)
+DO_3OP(gvec_fceq_s, float32_ceq, float32)
+
+DO_3OP(gvec_fcge_h, float16_cge, float16)
+DO_3OP(gvec_fcge_s, float32_cge, float32)
+
+DO_3OP(gvec_fcgt_h, float16_cgt, float16)
+DO_3OP(gvec_fcgt_s, float32_cgt, float32)
+
+DO_3OP(gvec_facge_h, float16_acge, float16)
+DO_3OP(gvec_facge_s, float32_acge, float32)
+
+DO_3OP(gvec_facgt_h, float16_acgt, float16)
+DO_3OP(gvec_facgt_s, float32_acgt, float32)
+
+DO_3OP(gvec_fmax_h, float16_max, float16)
+DO_3OP(gvec_fmax_s, float32_max, float32)
+
+DO_3OP(gvec_fmin_h, float16_min, float16)
+DO_3OP(gvec_fmin_s, float32_min, float32)
+
+DO_3OP(gvec_fmaxnum_h, float16_maxnum, float16)
+DO_3OP(gvec_fmaxnum_s, float32_maxnum, float32)
+
+DO_3OP(gvec_fminnum_h, float16_minnum, float16)
+DO_3OP(gvec_fminnum_s, float32_minnum, float32)
+
+DO_3OP(gvec_recps_nf_h, float16_recps_nf, float16)
+DO_3OP(gvec_recps_nf_s, float32_recps_nf, float32)
+
+DO_3OP(gvec_rsqrts_nf_h, float16_rsqrts_nf, float16)
+DO_3OP(gvec_rsqrts_nf_s, float32_rsqrts_nf, float32)
+
#ifdef TARGET_AARCH64
DO_3OP(gvec_recps_h, helper_recpsf_f16, float16)
@@ -728,36 +960,176 @@ DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64)
#endif
#undef DO_3OP
+/* Non-fused multiply-add (unlike float16_muladd etc, which are fused) */
+static float16 float16_muladd_nf(float16 dest, float16 op1, float16 op2,
+ float_status *stat)
+{
+ return float16_add(dest, float16_mul(op1, op2, stat), stat);
+}
+
+static float32 float32_muladd_nf(float32 dest, float32 op1, float32 op2,
+ float_status *stat)
+{
+ return float32_add(dest, float32_mul(op1, op2, stat), stat);
+}
+
+static float16 float16_mulsub_nf(float16 dest, float16 op1, float16 op2,
+ float_status *stat)
+{
+ return float16_sub(dest, float16_mul(op1, op2, stat), stat);
+}
+
+static float32 float32_mulsub_nf(float32 dest, float32 op1, float32 op2,
+ float_status *stat)
+{
+ return float32_sub(dest, float32_mul(op1, op2, stat), stat);
+}
+
+/* Fused versions; these have the semantics Neon VFMA/VFMS want */
+static float16 float16_muladd_f(float16 dest, float16 op1, float16 op2,
+ float_status *stat)
+{
+ return float16_muladd(op1, op2, dest, 0, stat);
+}
+
+static float32 float32_muladd_f(float32 dest, float32 op1, float32 op2,
+ float_status *stat)
+{
+ return float32_muladd(op1, op2, dest, 0, stat);
+}
+
+static float16 float16_mulsub_f(float16 dest, float16 op1, float16 op2,
+ float_status *stat)
+{
+ return float16_muladd(float16_chs(op1), op2, dest, 0, stat);
+}
+
+static float32 float32_mulsub_f(float32 dest, float32 op1, float32 op2,
+ float_status *stat)
+{
+ return float32_muladd(float32_chs(op1), op2, dest, 0, stat);
+}
+
+#define DO_MULADD(NAME, FUNC, TYPE) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
+{ \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
+ d[i] = FUNC(d[i], n[i], m[i], stat); \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+DO_MULADD(gvec_fmla_h, float16_muladd_nf, float16)
+DO_MULADD(gvec_fmla_s, float32_muladd_nf, float32)
+
+DO_MULADD(gvec_fmls_h, float16_mulsub_nf, float16)
+DO_MULADD(gvec_fmls_s, float32_mulsub_nf, float32)
+
+DO_MULADD(gvec_vfma_h, float16_muladd_f, float16)
+DO_MULADD(gvec_vfma_s, float32_muladd_f, float32)
+
+DO_MULADD(gvec_vfms_h, float16_mulsub_f, float16)
+DO_MULADD(gvec_vfms_s, float32_mulsub_f, float32)
+
/* For the indexed ops, SVE applies the index per 128-bit vector segment.
* For AdvSIMD, there is of course only one such vector segment.
*/
#define DO_MUL_IDX(NAME, TYPE, H) \
-void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
{ \
- intptr_t i, j, oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \
+ intptr_t i, j, oprsz = simd_oprsz(desc); \
+ intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \
intptr_t idx = simd_data(desc); \
TYPE *d = vd, *n = vn, *m = vm; \
for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
TYPE mm = m[H(i + idx)]; \
for (j = 0; j < segment; j++) { \
- d[i + j] = TYPE##_mul(n[i + j], mm, stat); \
+ d[i + j] = n[i + j] * mm; \
} \
} \
clear_tail(d, oprsz, simd_maxsz(desc)); \
}
-DO_MUL_IDX(gvec_fmul_idx_h, float16, H2)
-DO_MUL_IDX(gvec_fmul_idx_s, float32, H4)
-DO_MUL_IDX(gvec_fmul_idx_d, float64, )
+DO_MUL_IDX(gvec_mul_idx_h, uint16_t, H2)
+DO_MUL_IDX(gvec_mul_idx_s, uint32_t, H4)
+DO_MUL_IDX(gvec_mul_idx_d, uint64_t, )
#undef DO_MUL_IDX
+#define DO_MLA_IDX(NAME, TYPE, OP, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
+{ \
+ intptr_t i, j, oprsz = simd_oprsz(desc); \
+ intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \
+ intptr_t idx = simd_data(desc); \
+ TYPE *d = vd, *n = vn, *m = vm, *a = va; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
+ TYPE mm = m[H(i + idx)]; \
+ for (j = 0; j < segment; j++) { \
+ d[i + j] = a[i + j] OP n[i + j] * mm; \
+ } \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+DO_MLA_IDX(gvec_mla_idx_h, uint16_t, +, H2)
+DO_MLA_IDX(gvec_mla_idx_s, uint32_t, +, H4)
+DO_MLA_IDX(gvec_mla_idx_d, uint64_t, +, )
+
+DO_MLA_IDX(gvec_mls_idx_h, uint16_t, -, H2)
+DO_MLA_IDX(gvec_mls_idx_s, uint32_t, -, H4)
+DO_MLA_IDX(gvec_mls_idx_d, uint64_t, -, )
+
+#undef DO_MLA_IDX
+
+#define DO_FMUL_IDX(NAME, ADD, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
+{ \
+ intptr_t i, j, oprsz = simd_oprsz(desc); \
+ intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \
+ intptr_t idx = simd_data(desc); \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
+ TYPE mm = m[H(i + idx)]; \
+ for (j = 0; j < segment; j++) { \
+ d[i + j] = TYPE##_##ADD(d[i + j], \
+ TYPE##_mul(n[i + j], mm, stat), stat); \
+ } \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+#define float16_nop(N, M, S) (M)
+#define float32_nop(N, M, S) (M)
+#define float64_nop(N, M, S) (M)
+
+DO_FMUL_IDX(gvec_fmul_idx_h, nop, float16, H2)
+DO_FMUL_IDX(gvec_fmul_idx_s, nop, float32, H4)
+DO_FMUL_IDX(gvec_fmul_idx_d, nop, float64, )
+
+/*
+ * Non-fused multiply-accumulate operations, for Neon. NB that unlike
+ * the fused ops below they assume accumulate both from and into Vd.
+ */
+DO_FMUL_IDX(gvec_fmla_nf_idx_h, add, float16, H2)
+DO_FMUL_IDX(gvec_fmla_nf_idx_s, add, float32, H4)
+DO_FMUL_IDX(gvec_fmls_nf_idx_h, sub, float16, H2)
+DO_FMUL_IDX(gvec_fmls_nf_idx_s, sub, float32, H4)
+
+#undef float16_nop
+#undef float32_nop
+#undef float64_nop
+#undef DO_FMUL_IDX
+
#define DO_FMLA_IDX(NAME, TYPE, H) \
void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \
void *stat, uint32_t desc) \
{ \
- intptr_t i, j, oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \
+ intptr_t i, j, oprsz = simd_oprsz(desc); \
+ intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \
TYPE op1_neg = extract32(desc, SIMD_DATA_SHIFT, 1); \
intptr_t idx = desc >> (SIMD_DATA_SHIFT + 1); \
TYPE *d = vd, *n = vn, *m = vm, *a = va; \
@@ -1452,3 +1824,116 @@ DO_ABA(gvec_uaba_s, uint32_t)
DO_ABA(gvec_uaba_d, uint64_t)
#undef DO_ABA
+
+#define DO_NEON_PAIRWISE(NAME, OP) \
+ void HELPER(NAME##s)(void *vd, void *vn, void *vm, \
+ void *stat, uint32_t oprsz) \
+ { \
+ float_status *fpst = stat; \
+ float32 *d = vd; \
+ float32 *n = vn; \
+ float32 *m = vm; \
+ float32 r0, r1; \
+ \
+ /* Read all inputs before writing outputs in case vm == vd */ \
+ r0 = float32_##OP(n[H4(0)], n[H4(1)], fpst); \
+ r1 = float32_##OP(m[H4(0)], m[H4(1)], fpst); \
+ \
+ d[H4(0)] = r0; \
+ d[H4(1)] = r1; \
+ } \
+ \
+ void HELPER(NAME##h)(void *vd, void *vn, void *vm, \
+ void *stat, uint32_t oprsz) \
+ { \
+ float_status *fpst = stat; \
+ float16 *d = vd; \
+ float16 *n = vn; \
+ float16 *m = vm; \
+ float16 r0, r1, r2, r3; \
+ \
+ /* Read all inputs before writing outputs in case vm == vd */ \
+ r0 = float16_##OP(n[H2(0)], n[H2(1)], fpst); \
+ r1 = float16_##OP(n[H2(2)], n[H2(3)], fpst); \
+ r2 = float16_##OP(m[H2(0)], m[H2(1)], fpst); \
+ r3 = float16_##OP(m[H2(2)], m[H2(3)], fpst); \
+ \
+ d[H4(0)] = r0; \
+ d[H4(1)] = r1; \
+ d[H4(2)] = r2; \
+ d[H4(3)] = r3; \
+ }
+
+DO_NEON_PAIRWISE(neon_padd, add)
+DO_NEON_PAIRWISE(neon_pmax, max)
+DO_NEON_PAIRWISE(neon_pmin, min)
+
+#undef DO_NEON_PAIRWISE
+
+#define DO_VCVT_FIXED(NAME, FUNC, TYPE) \
+ void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \
+ { \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ int shift = simd_data(desc); \
+ TYPE *d = vd, *n = vn; \
+ float_status *fpst = stat; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
+ d[i] = FUNC(n[i], shift, fpst); \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+ }
+
+DO_VCVT_FIXED(gvec_vcvt_sf, helper_vfp_sltos, uint32_t)
+DO_VCVT_FIXED(gvec_vcvt_uf, helper_vfp_ultos, uint32_t)
+DO_VCVT_FIXED(gvec_vcvt_fs, helper_vfp_tosls_round_to_zero, uint32_t)
+DO_VCVT_FIXED(gvec_vcvt_fu, helper_vfp_touls_round_to_zero, uint32_t)
+DO_VCVT_FIXED(gvec_vcvt_sh, helper_vfp_shtoh, uint16_t)
+DO_VCVT_FIXED(gvec_vcvt_uh, helper_vfp_uhtoh, uint16_t)
+DO_VCVT_FIXED(gvec_vcvt_hs, helper_vfp_toshh_round_to_zero, uint16_t)
+DO_VCVT_FIXED(gvec_vcvt_hu, helper_vfp_touhh_round_to_zero, uint16_t)
+
+#undef DO_VCVT_FIXED
+
+#define DO_VCVT_RMODE(NAME, FUNC, TYPE) \
+ void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \
+ { \
+ float_status *fpst = stat; \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ uint32_t rmode = simd_data(desc); \
+ uint32_t prev_rmode = get_float_rounding_mode(fpst); \
+ TYPE *d = vd, *n = vn; \
+ set_float_rounding_mode(rmode, fpst); \
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
+ d[i] = FUNC(n[i], 0, fpst); \
+ } \
+ set_float_rounding_mode(prev_rmode, fpst); \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+ }
+
+DO_VCVT_RMODE(gvec_vcvt_rm_ss, helper_vfp_tosls, uint32_t)
+DO_VCVT_RMODE(gvec_vcvt_rm_us, helper_vfp_touls, uint32_t)
+DO_VCVT_RMODE(gvec_vcvt_rm_sh, helper_vfp_toshh, uint16_t)
+DO_VCVT_RMODE(gvec_vcvt_rm_uh, helper_vfp_touhh, uint16_t)
+
+#undef DO_VCVT_RMODE
+
+#define DO_VRINT_RMODE(NAME, FUNC, TYPE) \
+ void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \
+ { \
+ float_status *fpst = stat; \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ uint32_t rmode = simd_data(desc); \
+ uint32_t prev_rmode = get_float_rounding_mode(fpst); \
+ TYPE *d = vd, *n = vn; \
+ set_float_rounding_mode(rmode, fpst); \
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
+ d[i] = FUNC(n[i], fpst); \
+ } \
+ set_float_rounding_mode(prev_rmode, fpst); \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+ }
+
+DO_VRINT_RMODE(gvec_vrint_rm_h, helper_rinth, uint16_t)
+DO_VRINT_RMODE(gvec_vrint_rm_s, helper_rints, uint32_t)
+
+#undef DO_VRINT_RMODE
diff --git a/target/arm/vfp-uncond.decode b/target/arm/vfp-uncond.decode
index 34ca164266..8891ab3d54 100644
--- a/target/arm/vfp-uncond.decode
+++ b/target/arm/vfp-uncond.decode
@@ -44,10 +44,15 @@
@vfp_dnm_s ................................ vm=%vm_sp vn=%vn_sp vd=%vd_sp
@vfp_dnm_d ................................ vm=%vm_dp vn=%vn_dp vd=%vd_dp
+VSEL 1111 1110 0. cc:2 .... .... 1001 .0.0 .... \
+ vm=%vm_sp vn=%vn_sp vd=%vd_sp sz=1
VSEL 1111 1110 0. cc:2 .... .... 1010 .0.0 .... \
- vm=%vm_sp vn=%vn_sp vd=%vd_sp dp=0
+ vm=%vm_sp vn=%vn_sp vd=%vd_sp sz=2
VSEL 1111 1110 0. cc:2 .... .... 1011 .0.0 .... \
- vm=%vm_dp vn=%vn_dp vd=%vd_dp dp=1
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp sz=3
+
+VMAXNM_hp 1111 1110 1.00 .... .... 1001 .0.0 .... @vfp_dnm_s
+VMINNM_hp 1111 1110 1.00 .... .... 1001 .1.0 .... @vfp_dnm_s
VMAXNM_sp 1111 1110 1.00 .... .... 1010 .0.0 .... @vfp_dnm_s
VMINNM_sp 1111 1110 1.00 .... .... 1010 .1.0 .... @vfp_dnm_s
@@ -55,13 +60,23 @@ VMINNM_sp 1111 1110 1.00 .... .... 1010 .1.0 .... @vfp_dnm_s
VMAXNM_dp 1111 1110 1.00 .... .... 1011 .0.0 .... @vfp_dnm_d
VMINNM_dp 1111 1110 1.00 .... .... 1011 .1.0 .... @vfp_dnm_d
+VRINT 1111 1110 1.11 10 rm:2 .... 1001 01.0 .... \
+ vm=%vm_sp vd=%vd_sp sz=1
VRINT 1111 1110 1.11 10 rm:2 .... 1010 01.0 .... \
- vm=%vm_sp vd=%vd_sp dp=0
+ vm=%vm_sp vd=%vd_sp sz=2
VRINT 1111 1110 1.11 10 rm:2 .... 1011 01.0 .... \
- vm=%vm_dp vd=%vd_dp dp=1
+ vm=%vm_dp vd=%vd_dp sz=3
# VCVT float to int with specified rounding mode; Vd is always single-precision
+VCVT 1111 1110 1.11 11 rm:2 .... 1001 op:1 1.0 .... \
+ vm=%vm_sp vd=%vd_sp sz=1
VCVT 1111 1110 1.11 11 rm:2 .... 1010 op:1 1.0 .... \
- vm=%vm_sp vd=%vd_sp dp=0
+ vm=%vm_sp vd=%vd_sp sz=2
VCVT 1111 1110 1.11 11 rm:2 .... 1011 op:1 1.0 .... \
- vm=%vm_dp vd=%vd_sp dp=1
+ vm=%vm_dp vd=%vd_sp sz=3
+
+VMOVX 1111 1110 1.11 0000 .... 1010 01 . 0 .... \
+ vd=%vd_sp vm=%vm_sp
+
+VINS 1111 1110 1.11 0000 .... 1010 11 . 0 .... \
+ vd=%vd_sp vm=%vm_sp
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
index 2c793e3e87..51f143b4a5 100644
--- a/target/arm/vfp.decode
+++ b/target/arm/vfp.decode
@@ -74,13 +74,13 @@ VDUP ---- 1110 1 b:1 q:1 0 .... rt:4 1011 . 0 e:1 1 0000 \
vn=%vn_dp
VMSR_VMRS ---- 1110 111 l:1 reg:4 rt:4 1010 0001 0000
+VMOV_half ---- 1110 000 l:1 .... rt:4 1001 . 001 0000 vn=%vn_sp
VMOV_single ---- 1110 000 l:1 .... rt:4 1010 . 001 0000 vn=%vn_sp
VMOV_64_sp ---- 1100 010 op:1 rt2:4 rt:4 1010 00.1 .... vm=%vm_sp
VMOV_64_dp ---- 1100 010 op:1 rt2:4 rt:4 1011 00.1 .... vm=%vm_dp
-# Note that the half-precision variants of VLDR and VSTR are
-# not part of this decodetree at all because they have bits [9:8] == 0b01
+VLDR_VSTR_hp ---- 1101 u:1 .0 l:1 rn:4 .... 1001 imm:8 vd=%vd_sp
VLDR_VSTR_sp ---- 1101 u:1 .0 l:1 rn:4 .... 1010 imm:8 vd=%vd_sp
VLDR_VSTR_dp ---- 1101 u:1 .0 l:1 rn:4 .... 1011 imm:8 vd=%vd_dp
@@ -103,33 +103,47 @@ VLDM_VSTM_dp ---- 1101 0.1 l:1 rn:4 .... 1011 imm:8 \
vd=%vd_dp p=1 u=0 w=1
# 3-register VFP data-processing; bits [23,21:20,6] identify the operation.
+VMLA_hp ---- 1110 0.00 .... .... 1001 .0.0 .... @vfp_dnm_s
VMLA_sp ---- 1110 0.00 .... .... 1010 .0.0 .... @vfp_dnm_s
VMLA_dp ---- 1110 0.00 .... .... 1011 .0.0 .... @vfp_dnm_d
+VMLS_hp ---- 1110 0.00 .... .... 1001 .1.0 .... @vfp_dnm_s
VMLS_sp ---- 1110 0.00 .... .... 1010 .1.0 .... @vfp_dnm_s
VMLS_dp ---- 1110 0.00 .... .... 1011 .1.0 .... @vfp_dnm_d
+VNMLS_hp ---- 1110 0.01 .... .... 1001 .0.0 .... @vfp_dnm_s
VNMLS_sp ---- 1110 0.01 .... .... 1010 .0.0 .... @vfp_dnm_s
VNMLS_dp ---- 1110 0.01 .... .... 1011 .0.0 .... @vfp_dnm_d
+VNMLA_hp ---- 1110 0.01 .... .... 1001 .1.0 .... @vfp_dnm_s
VNMLA_sp ---- 1110 0.01 .... .... 1010 .1.0 .... @vfp_dnm_s
VNMLA_dp ---- 1110 0.01 .... .... 1011 .1.0 .... @vfp_dnm_d
+VMUL_hp ---- 1110 0.10 .... .... 1001 .0.0 .... @vfp_dnm_s
VMUL_sp ---- 1110 0.10 .... .... 1010 .0.0 .... @vfp_dnm_s
VMUL_dp ---- 1110 0.10 .... .... 1011 .0.0 .... @vfp_dnm_d
+VNMUL_hp ---- 1110 0.10 .... .... 1001 .1.0 .... @vfp_dnm_s
VNMUL_sp ---- 1110 0.10 .... .... 1010 .1.0 .... @vfp_dnm_s
VNMUL_dp ---- 1110 0.10 .... .... 1011 .1.0 .... @vfp_dnm_d
+VADD_hp ---- 1110 0.11 .... .... 1001 .0.0 .... @vfp_dnm_s
VADD_sp ---- 1110 0.11 .... .... 1010 .0.0 .... @vfp_dnm_s
VADD_dp ---- 1110 0.11 .... .... 1011 .0.0 .... @vfp_dnm_d
+VSUB_hp ---- 1110 0.11 .... .... 1001 .1.0 .... @vfp_dnm_s
VSUB_sp ---- 1110 0.11 .... .... 1010 .1.0 .... @vfp_dnm_s
VSUB_dp ---- 1110 0.11 .... .... 1011 .1.0 .... @vfp_dnm_d
+VDIV_hp ---- 1110 1.00 .... .... 1001 .0.0 .... @vfp_dnm_s
VDIV_sp ---- 1110 1.00 .... .... 1010 .0.0 .... @vfp_dnm_s
VDIV_dp ---- 1110 1.00 .... .... 1011 .0.0 .... @vfp_dnm_d
+VFMA_hp ---- 1110 1.10 .... .... 1001 .0. 0 .... @vfp_dnm_s
+VFMS_hp ---- 1110 1.10 .... .... 1001 .1. 0 .... @vfp_dnm_s
+VFNMA_hp ---- 1110 1.01 .... .... 1001 .0. 0 .... @vfp_dnm_s
+VFNMS_hp ---- 1110 1.01 .... .... 1001 .1. 0 .... @vfp_dnm_s
+
VFMA_sp ---- 1110 1.10 .... .... 1010 .0. 0 .... @vfp_dnm_s
VFMS_sp ---- 1110 1.10 .... .... 1010 .1. 0 .... @vfp_dnm_s
VFNMA_sp ---- 1110 1.01 .... .... 1010 .0. 0 .... @vfp_dnm_s
@@ -140,6 +154,8 @@ VFMS_dp ---- 1110 1.10 .... .... 1011 .1.0 .... @vfp_dnm_d
VFNMA_dp ---- 1110 1.01 .... .... 1011 .0.0 .... @vfp_dnm_d
VFNMS_dp ---- 1110 1.01 .... .... 1011 .1.0 .... @vfp_dnm_d
+VMOV_imm_hp ---- 1110 1.11 .... .... 1001 0000 .... \
+ vd=%vd_sp imm=%vmov_imm
VMOV_imm_sp ---- 1110 1.11 .... .... 1010 0000 .... \
vd=%vd_sp imm=%vmov_imm
VMOV_imm_dp ---- 1110 1.11 .... .... 1011 0000 .... \
@@ -148,15 +164,20 @@ VMOV_imm_dp ---- 1110 1.11 .... .... 1011 0000 .... \
VMOV_reg_sp ---- 1110 1.11 0000 .... 1010 01.0 .... @vfp_dm_ss
VMOV_reg_dp ---- 1110 1.11 0000 .... 1011 01.0 .... @vfp_dm_dd
+VABS_hp ---- 1110 1.11 0000 .... 1001 11.0 .... @vfp_dm_ss
VABS_sp ---- 1110 1.11 0000 .... 1010 11.0 .... @vfp_dm_ss
VABS_dp ---- 1110 1.11 0000 .... 1011 11.0 .... @vfp_dm_dd
+VNEG_hp ---- 1110 1.11 0001 .... 1001 01.0 .... @vfp_dm_ss
VNEG_sp ---- 1110 1.11 0001 .... 1010 01.0 .... @vfp_dm_ss
VNEG_dp ---- 1110 1.11 0001 .... 1011 01.0 .... @vfp_dm_dd
+VSQRT_hp ---- 1110 1.11 0001 .... 1001 11.0 .... @vfp_dm_ss
VSQRT_sp ---- 1110 1.11 0001 .... 1010 11.0 .... @vfp_dm_ss
VSQRT_dp ---- 1110 1.11 0001 .... 1011 11.0 .... @vfp_dm_dd
+VCMP_hp ---- 1110 1.11 010 z:1 .... 1001 e:1 1.0 .... \
+ vd=%vd_sp vm=%vm_sp
VCMP_sp ---- 1110 1.11 010 z:1 .... 1010 e:1 1.0 .... \
vd=%vd_sp vm=%vm_sp
VCMP_dp ---- 1110 1.11 010 z:1 .... 1011 e:1 1.0 .... \
@@ -175,12 +196,15 @@ VCVT_f16_f32 ---- 1110 1.11 0011 .... 1010 t:1 1.0 .... \
VCVT_f16_f64 ---- 1110 1.11 0011 .... 1011 t:1 1.0 .... \
vd=%vd_sp vm=%vm_dp
+VRINTR_hp ---- 1110 1.11 0110 .... 1001 01.0 .... @vfp_dm_ss
VRINTR_sp ---- 1110 1.11 0110 .... 1010 01.0 .... @vfp_dm_ss
VRINTR_dp ---- 1110 1.11 0110 .... 1011 01.0 .... @vfp_dm_dd
+VRINTZ_hp ---- 1110 1.11 0110 .... 1001 11.0 .... @vfp_dm_ss
VRINTZ_sp ---- 1110 1.11 0110 .... 1010 11.0 .... @vfp_dm_ss
VRINTZ_dp ---- 1110 1.11 0110 .... 1011 11.0 .... @vfp_dm_dd
+VRINTX_hp ---- 1110 1.11 0111 .... 1001 01.0 .... @vfp_dm_ss
VRINTX_sp ---- 1110 1.11 0111 .... 1010 01.0 .... @vfp_dm_ss
VRINTX_dp ---- 1110 1.11 0111 .... 1011 01.0 .... @vfp_dm_dd
@@ -190,6 +214,8 @@ VCVT_sp ---- 1110 1.11 0111 .... 1010 11.0 .... @vfp_dm_ds
VCVT_dp ---- 1110 1.11 0111 .... 1011 11.0 .... @vfp_dm_sd
# VCVT from integer to floating point: Vm always single; Vd depends on size
+VCVT_int_hp ---- 1110 1.11 1000 .... 1001 s:1 1.0 .... \
+ vd=%vd_sp vm=%vm_sp
VCVT_int_sp ---- 1110 1.11 1000 .... 1010 s:1 1.0 .... \
vd=%vd_sp vm=%vm_sp
VCVT_int_dp ---- 1110 1.11 1000 .... 1011 s:1 1.0 .... \
@@ -203,12 +229,16 @@ VJCVT ---- 1110 1.11 1001 .... 1011 11.0 .... @vfp_dm_sd
# We assemble bits 18 (op), 16 (u) and 7 (sx) into a single opc field
# for the convenience of the trans_VCVT_fix functions.
%vcvt_fix_op 18:1 16:1 7:1
+VCVT_fix_hp ---- 1110 1.11 1.1. .... 1001 .1.0 .... \
+ vd=%vd_sp imm=%vm_sp opc=%vcvt_fix_op
VCVT_fix_sp ---- 1110 1.11 1.1. .... 1010 .1.0 .... \
vd=%vd_sp imm=%vm_sp opc=%vcvt_fix_op
VCVT_fix_dp ---- 1110 1.11 1.1. .... 1011 .1.0 .... \
vd=%vd_dp imm=%vm_sp opc=%vcvt_fix_op
# VCVT float to integer (VCVT and VCVTR): Vd always single; Vd depends on size
+VCVT_hp_int ---- 1110 1.11 110 s:1 .... 1001 rz:1 1.0 .... \
+ vd=%vd_sp vm=%vm_sp
VCVT_sp_int ---- 1110 1.11 110 s:1 .... 1010 rz:1 1.0 .... \
vd=%vd_sp vm=%vm_sp
VCVT_dp_int ---- 1110 1.11 110 s:1 .... 1011 rz:1 1.0 .... \
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
index 64266ece62..5666393ef7 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_helper.c
@@ -236,6 +236,11 @@ void vfp_set_fpscr(CPUARMState *env, uint32_t val)
#define VFP_HELPER(name, p) HELPER(glue(glue(vfp_,name),p))
#define VFP_BINOP(name) \
+dh_ctype_f16 VFP_HELPER(name, h)(dh_ctype_f16 a, dh_ctype_f16 b, void *fpstp) \
+{ \
+ float_status *fpst = fpstp; \
+ return float16_ ## name(a, b, fpst); \
+} \
float32 VFP_HELPER(name, s)(float32 a, float32 b, void *fpstp) \
{ \
float_status *fpst = fpstp; \
@@ -256,6 +261,11 @@ VFP_BINOP(minnum)
VFP_BINOP(maxnum)
#undef VFP_BINOP
+dh_ctype_f16 VFP_HELPER(neg, h)(dh_ctype_f16 a)
+{
+ return float16_chs(a);
+}
+
float32 VFP_HELPER(neg, s)(float32 a)
{
return float32_chs(a);
@@ -266,6 +276,11 @@ float64 VFP_HELPER(neg, d)(float64 a)
return float64_chs(a);
}
+dh_ctype_f16 VFP_HELPER(abs, h)(dh_ctype_f16 a)
+{
+ return float16_abs(a);
+}
+
float32 VFP_HELPER(abs, s)(float32 a)
{
return float32_abs(a);
@@ -276,6 +291,11 @@ float64 VFP_HELPER(abs, d)(float64 a)
return float64_abs(a);
}
+dh_ctype_f16 VFP_HELPER(sqrt, h)(dh_ctype_f16 a, CPUARMState *env)
+{
+ return float16_sqrt(a, &env->vfp.fp_status_f16);
+}
+
float32 VFP_HELPER(sqrt, s)(float32 a, CPUARMState *env)
{
return float32_sqrt(a, &env->vfp.fp_status);
@@ -310,19 +330,20 @@ static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp)
}
/* XXX: check quiet/signaling case */
-#define DO_VFP_cmp(p, type) \
-void VFP_HELPER(cmp, p)(type a, type b, CPUARMState *env) \
+#define DO_VFP_cmp(P, FLOATTYPE, ARGTYPE, FPST) \
+void VFP_HELPER(cmp, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
{ \
softfloat_to_vfp_compare(env, \
- type ## _compare_quiet(a, b, &env->vfp.fp_status)); \
+ FLOATTYPE ## _compare_quiet(a, b, &env->vfp.FPST)); \
} \
-void VFP_HELPER(cmpe, p)(type a, type b, CPUARMState *env) \
+void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
{ \
softfloat_to_vfp_compare(env, \
- type ## _compare(a, b, &env->vfp.fp_status)); \
+ FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \
}
-DO_VFP_cmp(s, float32)
-DO_VFP_cmp(d, float64)
+DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status_f16)
+DO_VFP_cmp(s, float32, float32, fp_status)
+DO_VFP_cmp(d, float64, float64, fp_status)
#undef DO_VFP_cmp
/* Integer to float and float to integer conversions */
@@ -373,13 +394,13 @@ float32 VFP_HELPER(fcvts, d)(float64 x, CPUARMState *env)
}
/* VFP3 fixed point conversion. */
-#define VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype) \
-float##fsz HELPER(vfp_##name##to##p)(uint##isz##_t x, uint32_t shift, \
+#define VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype) \
+ftype HELPER(vfp_##name##to##p)(uint##isz##_t x, uint32_t shift, \
void *fpstp) \
{ return itype##_to_##float##fsz##_scalbn(x, -shift, fpstp); }
-#define VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, ROUND, suff) \
-uint##isz##_t HELPER(vfp_to##name##p##suff)(float##fsz x, uint32_t shift, \
+#define VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, ROUND, suff) \
+uint##isz##_t HELPER(vfp_to##name##p##suff)(ftype x, uint32_t shift, \
void *fpst) \
{ \
if (unlikely(float##fsz##_is_any_nan(x))) { \
@@ -389,116 +410,42 @@ uint##isz##_t HELPER(vfp_to##name##p##suff)(float##fsz x, uint32_t shift, \
return float##fsz##_to_##itype##_scalbn(x, ROUND, shift, fpst); \
}
-#define VFP_CONV_FIX(name, p, fsz, isz, itype) \
-VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype) \
-VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, \
+#define VFP_CONV_FIX(name, p, fsz, ftype, isz, itype) \
+VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype) \
+VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, \
float_round_to_zero, _round_to_zero) \
-VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, \
+VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, \
get_float_rounding_mode(fpst), )
-#define VFP_CONV_FIX_A64(name, p, fsz, isz, itype) \
-VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype) \
-VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, \
+#define VFP_CONV_FIX_A64(name, p, fsz, ftype, isz, itype) \
+VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype) \
+VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, \
get_float_rounding_mode(fpst), )
-VFP_CONV_FIX(sh, d, 64, 64, int16)
-VFP_CONV_FIX(sl, d, 64, 64, int32)
-VFP_CONV_FIX_A64(sq, d, 64, 64, int64)
-VFP_CONV_FIX(uh, d, 64, 64, uint16)
-VFP_CONV_FIX(ul, d, 64, 64, uint32)
-VFP_CONV_FIX_A64(uq, d, 64, 64, uint64)
-VFP_CONV_FIX(sh, s, 32, 32, int16)
-VFP_CONV_FIX(sl, s, 32, 32, int32)
-VFP_CONV_FIX_A64(sq, s, 32, 64, int64)
-VFP_CONV_FIX(uh, s, 32, 32, uint16)
-VFP_CONV_FIX(ul, s, 32, 32, uint32)
-VFP_CONV_FIX_A64(uq, s, 32, 64, uint64)
+VFP_CONV_FIX(sh, d, 64, float64, 64, int16)
+VFP_CONV_FIX(sl, d, 64, float64, 64, int32)
+VFP_CONV_FIX_A64(sq, d, 64, float64, 64, int64)
+VFP_CONV_FIX(uh, d, 64, float64, 64, uint16)
+VFP_CONV_FIX(ul, d, 64, float64, 64, uint32)
+VFP_CONV_FIX_A64(uq, d, 64, float64, 64, uint64)
+VFP_CONV_FIX(sh, s, 32, float32, 32, int16)
+VFP_CONV_FIX(sl, s, 32, float32, 32, int32)
+VFP_CONV_FIX_A64(sq, s, 32, float32, 64, int64)
+VFP_CONV_FIX(uh, s, 32, float32, 32, uint16)
+VFP_CONV_FIX(ul, s, 32, float32, 32, uint32)
+VFP_CONV_FIX_A64(uq, s, 32, float32, 64, uint64)
+VFP_CONV_FIX(sh, h, 16, dh_ctype_f16, 32, int16)
+VFP_CONV_FIX(sl, h, 16, dh_ctype_f16, 32, int32)
+VFP_CONV_FIX_A64(sq, h, 16, dh_ctype_f16, 64, int64)
+VFP_CONV_FIX(uh, h, 16, dh_ctype_f16, 32, uint16)
+VFP_CONV_FIX(ul, h, 16, dh_ctype_f16, 32, uint32)
+VFP_CONV_FIX_A64(uq, h, 16, dh_ctype_f16, 64, uint64)
#undef VFP_CONV_FIX
#undef VFP_CONV_FIX_FLOAT
#undef VFP_CONV_FLOAT_FIX_ROUND
#undef VFP_CONV_FIX_A64
-uint32_t HELPER(vfp_sltoh)(uint32_t x, uint32_t shift, void *fpst)
-{
- return int32_to_float16_scalbn(x, -shift, fpst);
-}
-
-uint32_t HELPER(vfp_ultoh)(uint32_t x, uint32_t shift, void *fpst)
-{
- return uint32_to_float16_scalbn(x, -shift, fpst);
-}
-
-uint32_t HELPER(vfp_sqtoh)(uint64_t x, uint32_t shift, void *fpst)
-{
- return int64_to_float16_scalbn(x, -shift, fpst);
-}
-
-uint32_t HELPER(vfp_uqtoh)(uint64_t x, uint32_t shift, void *fpst)
-{
- return uint64_to_float16_scalbn(x, -shift, fpst);
-}
-
-uint32_t HELPER(vfp_toshh)(uint32_t x, uint32_t shift, void *fpst)
-{
- if (unlikely(float16_is_any_nan(x))) {
- float_raise(float_flag_invalid, fpst);
- return 0;
- }
- return float16_to_int16_scalbn(x, get_float_rounding_mode(fpst),
- shift, fpst);
-}
-
-uint32_t HELPER(vfp_touhh)(uint32_t x, uint32_t shift, void *fpst)
-{
- if (unlikely(float16_is_any_nan(x))) {
- float_raise(float_flag_invalid, fpst);
- return 0;
- }
- return float16_to_uint16_scalbn(x, get_float_rounding_mode(fpst),
- shift, fpst);
-}
-
-uint32_t HELPER(vfp_toslh)(uint32_t x, uint32_t shift, void *fpst)
-{
- if (unlikely(float16_is_any_nan(x))) {
- float_raise(float_flag_invalid, fpst);
- return 0;
- }
- return float16_to_int32_scalbn(x, get_float_rounding_mode(fpst),
- shift, fpst);
-}
-
-uint32_t HELPER(vfp_toulh)(uint32_t x, uint32_t shift, void *fpst)
-{
- if (unlikely(float16_is_any_nan(x))) {
- float_raise(float_flag_invalid, fpst);
- return 0;
- }
- return float16_to_uint32_scalbn(x, get_float_rounding_mode(fpst),
- shift, fpst);
-}
-
-uint64_t HELPER(vfp_tosqh)(uint32_t x, uint32_t shift, void *fpst)
-{
- if (unlikely(float16_is_any_nan(x))) {
- float_raise(float_flag_invalid, fpst);
- return 0;
- }
- return float16_to_int64_scalbn(x, get_float_rounding_mode(fpst),
- shift, fpst);
-}
-
-uint64_t HELPER(vfp_touqh)(uint32_t x, uint32_t shift, void *fpst)
-{
- if (unlikely(float16_is_any_nan(x))) {
- float_raise(float_flag_invalid, fpst);
- return 0;
- }
- return float16_to_uint64_scalbn(x, get_float_rounding_mode(fpst),
- shift, fpst);
-}
-
/* Set the current fp rounding mode and return the old one.
* The argument is a softfloat float_round_ value.
*/
@@ -512,23 +459,6 @@ uint32_t HELPER(set_rmode)(uint32_t rmode, void *fpstp)
return prev_rmode;
}
-/* Set the current fp rounding mode in the standard fp status and return
- * the old one. This is for NEON instructions that need to change the
- * rounding mode but wish to use the standard FPSCR values for everything
- * else. Always set the rounding mode back to the correct value after
- * modifying it.
- * The argument is a softfloat float_round_ value.
- */
-uint32_t HELPER(set_neon_rmode)(uint32_t rmode, CPUARMState *env)
-{
- float_status *fp_status = &env->vfp.standard_fp_status;
-
- uint32_t prev_rmode = get_float_rounding_mode(fp_status);
- set_float_rounding_mode(rmode, fp_status);
-
- return prev_rmode;
-}
-
/* Half precision conversions. */
float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, void *fpstp, uint32_t ahp_mode)
{
@@ -582,38 +512,6 @@ uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t ahp_mode)
return r;
}
-#define float32_two make_float32(0x40000000)
-#define float32_three make_float32(0x40400000)
-#define float32_one_point_five make_float32(0x3fc00000)
-
-float32 HELPER(recps_f32)(CPUARMState *env, float32 a, float32 b)
-{
- float_status *s = &env->vfp.standard_fp_status;
- if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) ||
- (float32_is_infinity(b) && float32_is_zero_or_denormal(a))) {
- if (!(float32_is_zero(a) || float32_is_zero(b))) {
- float_raise(float_flag_input_denormal, s);
- }
- return float32_two;
- }
- return float32_sub(float32_two, float32_mul(a, b, s), s);
-}
-
-float32 HELPER(rsqrts_f32)(CPUARMState *env, float32 a, float32 b)
-{
- float_status *s = &env->vfp.standard_fp_status;
- float32 product;
- if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) ||
- (float32_is_infinity(b) && float32_is_zero_or_denormal(a))) {
- if (!(float32_is_zero(a) || float32_is_zero(b))) {
- float_raise(float_flag_input_denormal, s);
- }
- return float32_one_point_five;
- }
- product = float32_mul(a, b, s);
- return float32_div(float32_sub(float32_three, product, s), float32_two, s);
-}
-
/* NEON helpers. */
/* Constants 256 and 512 are used in some helpers; we avoid relying on
@@ -1056,6 +954,13 @@ uint32_t HELPER(rsqrte_u32)(uint32_t a)
}
/* VFPv4 fused multiply-accumulate */
+dh_ctype_f16 VFP_HELPER(muladd, h)(dh_ctype_f16 a, dh_ctype_f16 b,
+ dh_ctype_f16 c, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ return float16_muladd(a, b, c, 0, fpst);
+}
+
float32 VFP_HELPER(muladd, s)(float32 a, float32 b, float32 c, void *fpstp)
{
float_status *fpst = fpstp;
@@ -1069,6 +974,11 @@ float64 VFP_HELPER(muladd, d)(float64 a, float64 b, float64 c, void *fpstp)
}
/* ARMv8 round to integral */
+dh_ctype_f16 HELPER(rinth_exact)(dh_ctype_f16 x, void *fp_status)
+{
+ return float16_round_to_int(x, fp_status);
+}
+
float32 HELPER(rints_exact)(float32 x, void *fp_status)
{
return float32_round_to_int(x, fp_status);
@@ -1079,6 +989,22 @@ float64 HELPER(rintd_exact)(float64 x, void *fp_status)
return float64_round_to_int(x, fp_status);
}
+dh_ctype_f16 HELPER(rinth)(dh_ctype_f16 x, void *fp_status)
+{
+ int old_flags = get_float_exception_flags(fp_status), new_flags;
+ float16 ret;
+
+ ret = float16_round_to_int(x, fp_status);
+
+ /* Suppress any inexact exceptions the conversion produced */
+ if (!(old_flags & float_flag_inexact)) {
+ new_flags = get_float_exception_flags(fp_status);
+ set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
+ }
+
+ return ret;
+}
+
float32 HELPER(rints)(float32 x, void *fp_status)
{
int old_flags = get_float_exception_flags(fp_status), new_flags;
diff --git a/target/cris/translate.c b/target/cris/translate.c
index ee5e359c77..c312e6f8a6 100644
--- a/target/cris/translate.c
+++ b/target/cris/translate.c
@@ -1178,12 +1178,11 @@ static inline void t_gen_zext(TCGv d, TCGv s, int size)
static char memsize_char(int size)
{
switch (size) {
- case 1: return 'b'; break;
- case 2: return 'w'; break;
- case 4: return 'd'; break;
+ case 1: return 'b';
+ case 2: return 'w';
+ case 4: return 'd';
default:
return 'x';
- break;
}
}
#endif
diff --git a/target/cris/translate_v10.c.inc b/target/cris/translate_v10.c.inc
index ae34a0d1a3..7f38fd215e 100644
--- a/target/cris/translate_v10.c.inc
+++ b/target/cris/translate_v10.c.inc
@@ -1026,10 +1026,8 @@ static unsigned int dec10_ind(CPUCRISState *env, DisasContext *dc)
switch (dc->opcode) {
case CRISV10_IND_MOVE_M_R:
return dec10_ind_move_m_r(env, dc, size);
- break;
case CRISV10_IND_MOVE_R_M:
return dec10_ind_move_r_m(dc, size);
- break;
case CRISV10_IND_CMP:
LOG_DIS("cmp size=%d op=%d %d\n", size, dc->src, dc->dst);
cris_cc_mask(dc, CC_MASK_NZVC);
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 588f32e136..49d8958528 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -338,15 +338,68 @@ static void encode_cache_cpuid80000006(CPUCacheInfo *l2,
}
}
+/*
+ * Definitions used for building CPUID Leaf 0x8000001D and 0x8000001E
+ * Please refer to the AMD64 Architecture Programmer’s Manual Volume 3.
+ * Define the constants to build the cpu topology. Right now, TOPOEXT
+ * feature is enabled only on EPYC. So, these constants are based on
+ * EPYC supported configurations. We may need to handle the cases if
+ * these values change in future.
+ */
+/* Maximum core complexes in a node */
+#define MAX_CCX 2
+/* Maximum cores in a core complex */
+#define MAX_CORES_IN_CCX 4
+/* Maximum cores in a node */
+#define MAX_CORES_IN_NODE 8
+/* Maximum nodes in a socket */
+#define MAX_NODES_PER_SOCKET 4
+
+/*
+ * Figure out the number of nodes required to build this config.
+ * Max cores in a node is 8
+ */
+static int nodes_in_socket(int nr_cores)
+{
+ int nodes;
+
+ nodes = DIV_ROUND_UP(nr_cores, MAX_CORES_IN_NODE);
+
+ /* Hardware does not support config with 3 nodes, return 4 in that case */
+ return (nodes == 3) ? 4 : nodes;
+}
+
+/*
+ * Decide the number of cores in a core complex with the given nr_cores using
+ * following set constants MAX_CCX, MAX_CORES_IN_CCX, MAX_CORES_IN_NODE and
+ * MAX_NODES_PER_SOCKET. Maintain symmetry as much as possible
+ * L3 cache is shared across all cores in a core complex. So, this will also
+ * tell us how many cores are sharing the L3 cache.
+ */
+static int cores_in_core_complex(int nr_cores)
+{
+ int nodes;
+
+ /* Check if we can fit all the cores in one core complex */
+ if (nr_cores <= MAX_CORES_IN_CCX) {
+ return nr_cores;
+ }
+ /* Get the number of nodes required to build this config */
+ nodes = nodes_in_socket(nr_cores);
+
+ /*
+ * Divide the cores accros all the core complexes
+ * Return rounded up value
+ */
+ return DIV_ROUND_UP(nr_cores, nodes * MAX_CCX);
+}
+
/* Encode cache info for CPUID[8000001D] */
-static void encode_cache_cpuid8000001d(CPUCacheInfo *cache,
- X86CPUTopoInfo *topo_info,
- uint32_t *eax, uint32_t *ebx,
- uint32_t *ecx, uint32_t *edx)
+static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, CPUState *cs,
+ uint32_t *eax, uint32_t *ebx,
+ uint32_t *ecx, uint32_t *edx)
{
uint32_t l3_cores;
- unsigned nodes = MAX(topo_info->nodes_per_pkg, 1);
-
assert(cache->size == cache->line_size * cache->associativity *
cache->partitions * cache->sets);
@@ -355,13 +408,10 @@ static void encode_cache_cpuid8000001d(CPUCacheInfo *cache,
/* L3 is shared among multiple cores */
if (cache->level == 3) {
- l3_cores = DIV_ROUND_UP((topo_info->dies_per_pkg *
- topo_info->cores_per_die *
- topo_info->threads_per_core),
- nodes);
- *eax |= (l3_cores - 1) << 14;
+ l3_cores = cores_in_core_complex(cs->nr_cores);
+ *eax |= ((l3_cores * cs->nr_threads) - 1) << 14;
} else {
- *eax |= ((topo_info->threads_per_core - 1) << 14);
+ *eax |= ((cs->nr_threads - 1) << 14);
}
assert(cache->line_size > 0);
@@ -381,17 +431,55 @@ static void encode_cache_cpuid8000001d(CPUCacheInfo *cache,
(cache->complex_indexing ? CACHE_COMPLEX_IDX : 0);
}
+/* Data structure to hold the configuration info for a given core index */
+struct core_topology {
+ /* core complex id of the current core index */
+ int ccx_id;
+ /*
+ * Adjusted core index for this core in the topology
+ * This can be 0,1,2,3 with max 4 cores in a core complex
+ */
+ int core_id;
+ /* Node id for this core index */
+ int node_id;
+ /* Number of nodes in this config */
+ int num_nodes;
+};
+
+/*
+ * Build the configuration closely match the EPYC hardware. Using the EPYC
+ * hardware configuration values (MAX_CCX, MAX_CORES_IN_CCX, MAX_CORES_IN_NODE)
+ * right now. This could change in future.
+ * nr_cores : Total number of cores in the config
+ * core_id : Core index of the current CPU
+ * topo : Data structure to hold all the config info for this core index
+ */
+static void build_core_topology(int nr_cores, int core_id,
+ struct core_topology *topo)
+{
+ int nodes, cores_in_ccx;
+
+ /* First get the number of nodes required */
+ nodes = nodes_in_socket(nr_cores);
+
+ cores_in_ccx = cores_in_core_complex(nr_cores);
+
+ topo->node_id = core_id / (cores_in_ccx * MAX_CCX);
+ topo->ccx_id = (core_id % (cores_in_ccx * MAX_CCX)) / cores_in_ccx;
+ topo->core_id = core_id % cores_in_ccx;
+ topo->num_nodes = nodes;
+}
+
/* Encode cache info for CPUID[8000001E] */
-static void encode_topo_cpuid8000001e(X86CPUTopoInfo *topo_info, X86CPU *cpu,
+static void encode_topo_cpuid8000001e(CPUState *cs, X86CPU *cpu,
uint32_t *eax, uint32_t *ebx,
uint32_t *ecx, uint32_t *edx)
{
- X86CPUTopoIDs topo_ids = {0};
- unsigned long nodes = MAX(topo_info->nodes_per_pkg, 1);
+ struct core_topology topo = {0};
+ unsigned long nodes;
int shift;
- x86_topo_ids_from_apicid_epyc(cpu->apic_id, topo_info, &topo_ids);
-
+ build_core_topology(cs->nr_cores, cpu->core_id, &topo);
*eax = cpu->apic_id;
/*
* CPUID_Fn8000001E_EBX
@@ -408,8 +496,12 @@ static void encode_topo_cpuid8000001e(X86CPUTopoInfo *topo_info, X86CPU *cpu,
* 3 Core complex id
* 1:0 Core id
*/
- *ebx = ((topo_info->threads_per_core - 1) << 8) | (topo_ids.node_id << 3) |
- (topo_ids.core_id);
+ if (cs->nr_threads - 1) {
+ *ebx = ((cs->nr_threads - 1) << 8) | (topo.node_id << 3) |
+ (topo.ccx_id << 2) | topo.core_id;
+ } else {
+ *ebx = (topo.node_id << 4) | (topo.ccx_id << 3) | topo.core_id;
+ }
/*
* CPUID_Fn8000001E_ECX
* 31:11 Reserved
@@ -418,8 +510,9 @@ static void encode_topo_cpuid8000001e(X86CPUTopoInfo *topo_info, X86CPU *cpu,
* 2 Socket id
* 1:0 Node id
*/
- if (nodes <= 4) {
- *ecx = ((nodes - 1) << 8) | (topo_ids.pkg_id << 2) | topo_ids.node_id;
+ if (topo.num_nodes <= 4) {
+ *ecx = ((topo.num_nodes - 1) << 8) | (cpu->socket_id << 2) |
+ topo.node_id;
} else {
/*
* Node id fix up. Actual hardware supports up to 4 nodes. But with
@@ -434,10 +527,10 @@ static void encode_topo_cpuid8000001e(X86CPUTopoInfo *topo_info, X86CPU *cpu,
* number of nodes. find_last_bit returns last set bit(0 based). Left
* shift(+1) the socket id to represent all the nodes.
*/
- nodes -= 1;
+ nodes = topo.num_nodes - 1;
shift = find_last_bit(&nodes, 8);
- *ecx = (nodes << 8) | (topo_ids.pkg_id << (shift + 1)) |
- topo_ids.node_id;
+ *ecx = ((topo.num_nodes - 1) << 8) | (cpu->socket_id << (shift + 1)) |
+ topo.node_id;
}
*edx = 0;
}
@@ -1638,10 +1731,6 @@ typedef struct X86CPUDefinition {
FeatureWordArray features;
const char *model_id;
CPUCaches *cache_info;
-
- /* Use AMD EPYC encoding for apic id */
- bool use_epyc_apic_id_encoding;
-
/*
* Definitions for alternative versions of CPU model.
* List is terminated by item with version == 0.
@@ -1683,18 +1772,6 @@ static const X86CPUVersionDefinition *x86_cpu_def_get_versions(X86CPUDefinition
return def->versions ?: default_version_list;
}
-bool cpu_x86_use_epyc_apic_id_encoding(const char *cpu_type)
-{
- X86CPUClass *xcc = X86_CPU_CLASS(object_class_by_name(cpu_type));
-
- assert(xcc);
- if (xcc->model && xcc->model->cpudef) {
- return xcc->model->cpudef->use_epyc_apic_id_encoding;
- } else {
- return false;
- }
-}
-
static CPUCaches epyc_cache_info = {
.l1d_cache = &(CPUCacheInfo) {
.type = DATA_CACHE,
@@ -3995,7 +4072,6 @@ static X86CPUDefinition builtin_x86_defs[] = {
.xlevel = 0x8000001E,
.model_id = "AMD EPYC Processor",
.cache_info = &epyc_cache_info,
- .use_epyc_apic_id_encoding = 1,
.versions = (X86CPUVersionDefinition[]) {
{ .version = 1 },
{
@@ -4123,7 +4199,6 @@ static X86CPUDefinition builtin_x86_defs[] = {
.xlevel = 0x8000001E,
.model_id = "AMD EPYC-Rome Processor",
.cache_info = &epyc_rome_cache_info,
- .use_epyc_apic_id_encoding = 1,
},
};
@@ -4872,6 +4947,7 @@ static void x86_cpu_class_check_missing_features(X86CPUClass *xcc,
new->value = g_strdup("type");
*next = new;
next = &new->next;
+ error_free(err);
}
x86_cpu_filter_features(xc, false);
@@ -5489,7 +5565,6 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
uint32_t signature[3];
X86CPUTopoInfo topo_info;
- topo_info.nodes_per_pkg = env->nr_nodes;
topo_info.dies_per_pkg = env->nr_dies;
topo_info.cores_per_die = cs->nr_cores;
topo_info.threads_per_core = cs->nr_threads;
@@ -5678,7 +5753,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
*ecx |= CPUID_TOPOLOGY_LEVEL_SMT;
break;
case 1:
- *eax = env->pkg_offset;
+ *eax = apicid_pkg_offset(&topo_info);
*ebx = cs->nr_cores * cs->nr_threads;
*ecx |= CPUID_TOPOLOGY_LEVEL_CORE;
break;
@@ -5712,7 +5787,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
*ecx |= CPUID_TOPOLOGY_LEVEL_CORE;
break;
case 2:
- *eax = env->pkg_offset;
+ *eax = apicid_pkg_offset(&topo_info);
*ebx = env->nr_dies * cs->nr_cores * cs->nr_threads;
*ecx |= CPUID_TOPOLOGY_LEVEL_DIE;
break;
@@ -5889,11 +5964,10 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
/*
* Bits 15:12 is "The number of bits in the initial
* Core::X86::Apic::ApicId[ApicId] value that indicate
- * thread ID within a package". This is already stored at
- * CPUX86State::pkg_offset.
+ * thread ID within a package".
* Bits 7:0 is "The number of threads in the package is NC+1"
*/
- *ecx = (env->pkg_offset << 12) |
+ *ecx = (apicid_pkg_offset(&topo_info) << 12) |
((cs->nr_cores * cs->nr_threads) - 1);
} else {
*ecx = 0;
@@ -5921,20 +5995,20 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
}
switch (count) {
case 0: /* L1 dcache info */
- encode_cache_cpuid8000001d(env->cache_info_amd.l1d_cache,
- &topo_info, eax, ebx, ecx, edx);
+ encode_cache_cpuid8000001d(env->cache_info_amd.l1d_cache, cs,
+ eax, ebx, ecx, edx);
break;
case 1: /* L1 icache info */
- encode_cache_cpuid8000001d(env->cache_info_amd.l1i_cache,
- &topo_info, eax, ebx, ecx, edx);
+ encode_cache_cpuid8000001d(env->cache_info_amd.l1i_cache, cs,
+ eax, ebx, ecx, edx);
break;
case 2: /* L2 cache info */
- encode_cache_cpuid8000001d(env->cache_info_amd.l2_cache,
- &topo_info, eax, ebx, ecx, edx);
+ encode_cache_cpuid8000001d(env->cache_info_amd.l2_cache, cs,
+ eax, ebx, ecx, edx);
break;
case 3: /* L3 cache info */
- encode_cache_cpuid8000001d(env->cache_info_amd.l3_cache,
- &topo_info, eax, ebx, ecx, edx);
+ encode_cache_cpuid8000001d(env->cache_info_amd.l3_cache, cs,
+ eax, ebx, ecx, edx);
break;
default: /* end of info */
*eax = *ebx = *ecx = *edx = 0;
@@ -5943,7 +6017,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
break;
case 0x8000001E:
assert(cpu->core_id <= 255);
- encode_topo_cpuid8000001e(&topo_info, cpu, eax, ebx, ecx, edx);
+ encode_topo_cpuid8000001e(cs, cpu,
+ eax, ebx, ecx, edx);
break;
case 0xC0000000:
*eax = env->cpuid_xlevel2;
@@ -6949,7 +7024,6 @@ static void x86_cpu_initfn(Object *obj)
FeatureWord w;
env->nr_dies = 1;
- env->nr_nodes = 1;
cpu_set_cpustate_pointers(cpu);
object_property_add(obj, "family", "int",
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index e1a5c174dc..d3097be6a5 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1629,8 +1629,6 @@ typedef struct CPUX86State {
TPRAccess tpr_access_type;
unsigned nr_dies;
- unsigned nr_nodes;
- unsigned pkg_offset;
} CPUX86State;
struct kvm_msrs;
@@ -1919,7 +1917,6 @@ void cpu_clear_apic_feature(CPUX86State *env);
void host_cpuid(uint32_t function, uint32_t count,
uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx);
void host_vendor_fms(char *vendor, int *family, int *model, int *stepping);
-bool cpu_x86_use_epyc_apic_id_encoding(const char *cpu_type);
/* helper.c */
bool x86_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
diff --git a/target/i386/hvf/hvf-i386.h b/target/i386/hvf/hvf-i386.h
index ef20c73eca..e0edffd077 100644
--- a/target/i386/hvf/hvf-i386.h
+++ b/target/i386/hvf/hvf-i386.h
@@ -57,13 +57,13 @@ typedef struct hvf_vcpu_caps {
uint64_t vmx_cap_preemption_timer;
} hvf_vcpu_caps;
-typedef struct HVFState {
+struct HVFState {
AccelState parent;
hvf_slot slots[32];
int num_slots;
hvf_vcpu_caps *hvf_caps;
-} HVFState;
+};
extern HVFState *hvf_state;
void hvf_set_phys_mem(MemoryRegionSection *, bool);
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 6f18d940a5..205b68bc0c 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -4607,7 +4607,7 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
if (iommu) {
int ret;
MSIMessage src, dst;
- X86IOMMUClass *class = X86_IOMMU_GET_CLASS(iommu);
+ X86IOMMUClass *class = X86_IOMMU_DEVICE_GET_CLASS(iommu);
if (!class->int_remap) {
return 0;
diff --git a/target/i386/sev.c b/target/i386/sev.c
index c3ecf86704..de4818da6d 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -500,6 +500,7 @@ sev_read_file_base64(const char *filename, guchar **data, gsize *len)
if (!g_file_get_contents(filename, &base64, &sz, &error)) {
error_report("failed to read '%s' (%s)", filename, error->message);
+ g_error_free(error);
return -1;
}
diff --git a/target/microblaze/cpu-param.h b/target/microblaze/cpu-param.h
index 4abbc62d50..4d8297fa94 100644
--- a/target/microblaze/cpu-param.h
+++ b/target/microblaze/cpu-param.h
@@ -8,9 +8,24 @@
#ifndef MICROBLAZE_CPU_PARAM_H
#define MICROBLAZE_CPU_PARAM_H 1
+/*
+ * While system mode can address up to 64 bits of address space,
+ * this is done via the lea/sea instructions, which are system-only
+ * (as they also bypass the mmu).
+ *
+ * We can improve the user-only experience by only exposing 32 bits
+ * of address space.
+ */
+#ifdef CONFIG_USER_ONLY
+#define TARGET_LONG_BITS 32
+#define TARGET_PHYS_ADDR_SPACE_BITS 32
+#define TARGET_VIRT_ADDR_SPACE_BITS 32
+#else
#define TARGET_LONG_BITS 64
#define TARGET_PHYS_ADDR_SPACE_BITS 64
#define TARGET_VIRT_ADDR_SPACE_BITS 64
+#endif
+
/* FIXME: MB uses variable pages down to 1K but linux only uses 4k. */
#define TARGET_PAGE_BITS 12
#define NB_MMU_MODES 3
diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c
index 51e5c85b10..6392524135 100644
--- a/target/microblaze/cpu.c
+++ b/target/microblaze/cpu.c
@@ -79,7 +79,17 @@ static void mb_cpu_set_pc(CPUState *cs, vaddr value)
{
MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs);
- cpu->env.sregs[SR_PC] = value;
+ cpu->env.pc = value;
+ /* Ensure D_FLAG and IMM_FLAG are clear for the new PC */
+ cpu->env.iflags = 0;
+}
+
+static void mb_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb)
+{
+ MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs);
+
+ cpu->env.pc = tb->pc;
+ cpu->env.iflags = tb->flags & IFLAGS_TB_MASK;
}
static bool mb_cpu_has_work(CPUState *cs)
@@ -117,13 +127,13 @@ static void mb_cpu_reset(DeviceState *dev)
/* Disable stack protector. */
env->shr = ~0;
- env->sregs[SR_PC] = cpu->cfg.base_vectors;
+ env->pc = cpu->cfg.base_vectors;
#if defined(CONFIG_USER_ONLY)
/* start in user mode with interrupts enabled. */
- env->sregs[SR_MSR] = MSR_EE | MSR_IE | MSR_VM | MSR_UM;
+ mb_cpu_write_msr(env, MSR_EE | MSR_IE | MSR_VM | MSR_UM);
#else
- env->sregs[SR_MSR] = 0;
+ mb_cpu_write_msr(env, 0);
mmu_init(&env->mmu);
env->mmu.c_mmu = 3;
env->mmu.c_mmu_tlb_access = 3;
@@ -317,9 +327,11 @@ static void mb_cpu_class_init(ObjectClass *oc, void *data)
cc->class_by_name = mb_cpu_class_by_name;
cc->has_work = mb_cpu_has_work;
cc->do_interrupt = mb_cpu_do_interrupt;
+ cc->do_unaligned_access = mb_cpu_do_unaligned_access;
cc->cpu_exec_interrupt = mb_cpu_exec_interrupt;
cc->dump_state = mb_cpu_dump_state;
cc->set_pc = mb_cpu_set_pc;
+ cc->synchronize_from_tb = mb_cpu_synchronize_from_tb;
cc->gdb_read_register = mb_cpu_gdb_read_register;
cc->gdb_write_register = mb_cpu_gdb_write_register;
cc->tlb_fill = mb_cpu_tlb_fill;
diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h
index a31134b65c..a25a2b427f 100644
--- a/target/microblaze/cpu.h
+++ b/target/microblaze/cpu.h
@@ -31,7 +31,7 @@ typedef struct CPUMBState CPUMBState;
#define EXCP_MMU 1
#define EXCP_IRQ 2
-#define EXCP_BREAK 3
+#define EXCP_SYSCALL 3 /* user-only */
#define EXCP_HW_BREAK 4
#define EXCP_HW_EXCP 5
@@ -79,10 +79,13 @@ typedef struct CPUMBState CPUMBState;
/* Exception State Register (ESR) Fields */
#define ESR_DIZ (1<<11) /* Zone Protection */
+#define ESR_W (1<<11) /* Unaligned word access */
#define ESR_S (1<<10) /* Store instruction */
#define ESR_ESS_FSL_OFFSET 5
+#define ESR_ESS_MASK (0x7f << 5)
+
#define ESR_EC_FSL 0
#define ESR_EC_UNALIGNED_DATA 1
#define ESR_EC_ILLEGAL_OP 2
@@ -228,15 +231,22 @@ typedef struct CPUMBState CPUMBState;
#define STREAM_CONTROL (1 << 3)
#define STREAM_NONBLOCK (1 << 4)
+#define TARGET_INSN_START_EXTRA_WORDS 1
+
struct CPUMBState {
- uint32_t debug;
- uint32_t btaken;
- uint64_t btarget;
- uint32_t bimm;
+ uint32_t bvalue; /* TCG temporary, only valid during a TB */
+ uint32_t btarget; /* Full resolved branch destination */
uint32_t imm;
uint32_t regs[32];
- uint64_t sregs[14];
+ uint32_t pc;
+ uint32_t msr; /* All bits of MSR except MSR[C] and MSR[CC] */
+ uint32_t msr_c; /* MSR[C], in low bit; other bits must be 0 */
+ target_ulong ear;
+ uint32_t esr;
+ uint32_t fsr;
+ uint32_t btr;
+ uint32_t edr;
float_status fp_status;
/* Stack protectors. Yes, it's a hw feature. */
uint32_t slr, shr;
@@ -247,14 +257,23 @@ struct CPUMBState {
uint32_t res_val;
/* Internal flags. */
-#define IMM_FLAG 4
-#define MSR_EE_FLAG (1 << 8)
+#define IMM_FLAG (1 << 0)
+#define BIMM_FLAG (1 << 1)
+#define ESR_ESS_FLAG (1 << 2) /* indicates ESR_ESS_MASK is present */
+/* MSR_EE (1 << 8) -- these 3 are not in iflags but tb_flags */
+/* MSR_UM (1 << 11) */
+/* MSR_VM (1 << 13) */
+/* ESR_ESS_MASK [11:5] -- unwind into iflags for unaligned excp */
#define DRTI_FLAG (1 << 16)
#define DRTE_FLAG (1 << 17)
#define DRTB_FLAG (1 << 18)
#define D_FLAG (1 << 19) /* Bit in ESR. */
+
/* TB dependent CPUMBState. */
-#define IFLAGS_TB_MASK (D_FLAG | IMM_FLAG | DRTI_FLAG | DRTE_FLAG | DRTB_FLAG)
+#define IFLAGS_TB_MASK (D_FLAG | BIMM_FLAG | IMM_FLAG | \
+ DRTI_FLAG | DRTE_FLAG | DRTB_FLAG)
+#define MSR_TB_MASK (MSR_UM | MSR_VM | MSR_EE)
+
uint32_t iflags;
#if !defined(CONFIG_USER_ONLY)
@@ -317,11 +336,30 @@ struct MicroBlazeCPU {
void mb_cpu_do_interrupt(CPUState *cs);
bool mb_cpu_exec_interrupt(CPUState *cs, int int_req);
+void mb_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
+ MMUAccessType access_type,
+ int mmu_idx, uintptr_t retaddr);
void mb_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
hwaddr mb_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
int mb_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
int mb_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
+static inline uint32_t mb_cpu_read_msr(const CPUMBState *env)
+{
+ /* Replicate MSR[C] to MSR[CC]. */
+ return env->msr | (env->msr_c * (MSR_C | MSR_CC));
+}
+
+static inline void mb_cpu_write_msr(CPUMBState *env, uint32_t val)
+{
+ env->msr_c = (val >> 2) & 1;
+ /*
+ * Clear both MSR[C] and MSR[CC] from the saved copy.
+ * MSR_PVR is not writable and is always clear.
+ */
+ env->msr = val & ~(MSR_C | MSR_CC | MSR_PVR);
+}
+
void mb_tcg_init(void);
/* you can call this signal handler from your SIGBUS and SIGSEGV
signal handlers to inform the virtual CPU of exceptions. non zero
@@ -348,13 +386,15 @@ typedef MicroBlazeCPU ArchCPU;
#include "exec/cpu-all.h"
+/* Ensure there is no overlap between the two masks. */
+QEMU_BUILD_BUG_ON(MSR_TB_MASK & IFLAGS_TB_MASK);
+
static inline void cpu_get_tb_cpu_state(CPUMBState *env, target_ulong *pc,
target_ulong *cs_base, uint32_t *flags)
{
- *pc = env->sregs[SR_PC];
- *cs_base = 0;
- *flags = (env->iflags & IFLAGS_TB_MASK) |
- (env->sregs[SR_MSR] & (MSR_UM | MSR_VM | MSR_EE));
+ *pc = env->pc;
+ *flags = (env->iflags & IFLAGS_TB_MASK) | (env->msr & MSR_TB_MASK);
+ *cs_base = (*flags & IMM_FLAG ? env->imm : 0);
}
#if !defined(CONFIG_USER_ONLY)
@@ -369,11 +409,11 @@ static inline int cpu_mmu_index(CPUMBState *env, bool ifetch)
MicroBlazeCPU *cpu = env_archcpu(env);
/* Are we in nommu mode?. */
- if (!(env->sregs[SR_MSR] & MSR_VM) || !cpu->cfg.use_mmu) {
+ if (!(env->msr & MSR_VM) || !cpu->cfg.use_mmu) {
return MMU_NOMMU_IDX;
}
- if (env->sregs[SR_MSR] & MSR_UM) {
+ if (env->msr & MSR_UM) {
return MMU_USER_IDX;
}
return MMU_KERNEL_IDX;
diff --git a/target/microblaze/gdbstub.c b/target/microblaze/gdbstub.c
index 73e8973597..08d6a0e807 100644
--- a/target/microblaze/gdbstub.c
+++ b/target/microblaze/gdbstub.c
@@ -21,58 +21,80 @@
#include "cpu.h"
#include "exec/gdbstub.h"
+/*
+ * GDB expects SREGs in the following order:
+ * PC, MSR, EAR, ESR, FSR, BTR, EDR, PID, ZPR, TLBX, TLBSX, TLBLO, TLBHI.
+ *
+ * PID, ZPR, TLBx, TLBsx, TLBLO, and TLBHI aren't modeled, so we don't
+ * map them to anything and return a value of 0 instead.
+ */
+
+enum {
+ GDB_PC = 32 + 0,
+ GDB_MSR = 32 + 1,
+ GDB_EAR = 32 + 2,
+ GDB_ESR = 32 + 3,
+ GDB_FSR = 32 + 4,
+ GDB_BTR = 32 + 5,
+ GDB_PVR0 = 32 + 6,
+ GDB_PVR11 = 32 + 17,
+ GDB_EDR = 32 + 18,
+ GDB_SLR = 32 + 25,
+ GDB_SHR = 32 + 26,
+};
+
int mb_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
{
MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs);
+ CPUClass *cc = CPU_GET_CLASS(cs);
CPUMBState *env = &cpu->env;
- /*
- * GDB expects SREGs in the following order:
- * PC, MSR, EAR, ESR, FSR, BTR, EDR, PID, ZPR, TLBX, TLBSX, TLBLO, TLBHI.
- * They aren't stored in this order, so make a map.
- * PID, ZPR, TLBx, TLBsx, TLBLO, and TLBHI aren't modeled, so we don't
- * map them to anything and return a value of 0 instead.
- */
- static const uint8_t sreg_map[6] = {
- SR_PC,
- SR_MSR,
- SR_EAR,
- SR_ESR,
- SR_FSR,
- SR_BTR
- };
+ uint32_t val;
- /*
- * GDB expects registers to be reported in this order:
- * R0-R31
- * PC-BTR
- * PVR0-PVR11
- * EDR-TLBHI
- * SLR-SHR
- */
- if (n < 32) {
- return gdb_get_reg32(mem_buf, env->regs[n]);
- } else {
- n -= 32;
- switch (n) {
- case 0 ... 5:
- return gdb_get_reg32(mem_buf, env->sregs[sreg_map[n]]);
+ if (n > cc->gdb_num_core_regs) {
+ return 0;
+ }
+
+ switch (n) {
+ case 1 ... 31:
+ val = env->regs[n];
+ break;
+ case GDB_PC:
+ val = env->pc;
+ break;
+ case GDB_MSR:
+ val = mb_cpu_read_msr(env);
+ break;
+ case GDB_EAR:
+ val = env->ear;
+ break;
+ case GDB_ESR:
+ val = env->esr;
+ break;
+ case GDB_FSR:
+ val = env->fsr;
+ break;
+ case GDB_BTR:
+ val = env->btr;
+ break;
+ case GDB_PVR0 ... GDB_PVR11:
/* PVR12 is intentionally skipped */
- case 6 ... 17:
- n -= 6;
- return gdb_get_reg32(mem_buf, env->pvr.regs[n]);
- case 18:
- return gdb_get_reg32(mem_buf, env->sregs[SR_EDR]);
+ val = env->pvr.regs[n - GDB_PVR0];
+ break;
+ case GDB_EDR:
+ val = env->edr;
+ break;
+ case GDB_SLR:
+ val = env->slr;
+ break;
+ case GDB_SHR:
+ val = env->shr;
+ break;
+ default:
/* Other SRegs aren't modeled, so report a value of 0 */
- case 19 ... 24:
- return gdb_get_reg32(mem_buf, 0);
- case 25:
- return gdb_get_reg32(mem_buf, env->slr);
- case 26:
- return gdb_get_reg32(mem_buf, env->shr);
- default:
- return 0;
- }
+ val = 0;
+ break;
}
+ return gdb_get_reg32(mem_buf, val);
}
int mb_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
@@ -82,60 +104,47 @@ int mb_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
CPUMBState *env = &cpu->env;
uint32_t tmp;
- /*
- * GDB expects SREGs in the following order:
- * PC, MSR, EAR, ESR, FSR, BTR, EDR, PID, ZPR, TLBX, TLBSX, TLBLO, TLBHI.
- * They aren't stored in this order, so make a map.
- * PID, ZPR, TLBx, TLBsx, TLBLO, and TLBHI aren't modeled, so we don't
- * map them to anything.
- */
- static const uint8_t sreg_map[6] = {
- SR_PC,
- SR_MSR,
- SR_EAR,
- SR_ESR,
- SR_FSR,
- SR_BTR
- };
-
if (n > cc->gdb_num_core_regs) {
return 0;
}
tmp = ldl_p(mem_buf);
- /*
- * GDB expects registers to be reported in this order:
- * R0-R31
- * PC-BTR
- * PVR0-PVR11
- * EDR-TLBHI
- * SLR-SHR
- */
- if (n < 32) {
+ switch (n) {
+ case 1 ... 31:
env->regs[n] = tmp;
- } else {
- n -= 32;
- switch (n) {
- case 0 ... 5:
- env->sregs[sreg_map[n]] = tmp;
- break;
+ break;
+ case GDB_PC:
+ env->pc = tmp;
+ break;
+ case GDB_MSR:
+ mb_cpu_write_msr(env, tmp);
+ break;
+ case GDB_EAR:
+ env->ear = tmp;
+ break;
+ case GDB_ESR:
+ env->esr = tmp;
+ break;
+ case GDB_FSR:
+ env->fsr = tmp;
+ break;
+ case GDB_BTR:
+ env->btr = tmp;
+ break;
+ case GDB_PVR0 ... GDB_PVR11:
/* PVR12 is intentionally skipped */
- case 6 ... 17:
- n -= 6;
- env->pvr.regs[n] = tmp;
- break;
- /* Only EDR is modeled in these indeces, so ignore the rest */
- case 18:
- env->sregs[SR_EDR] = tmp;
- break;
- case 25:
- env->slr = tmp;
- break;
- case 26:
- env->shr = tmp;
- break;
- }
+ env->pvr.regs[n - GDB_PVR0] = tmp;
+ break;
+ case GDB_EDR:
+ env->edr = tmp;
+ break;
+ case GDB_SLR:
+ env->slr = tmp;
+ break;
+ case GDB_SHR:
+ env->shr = tmp;
+ break;
}
return 4;
}
diff --git a/target/microblaze/helper.c b/target/microblaze/helper.c
index ab2ceeb055..00090526da 100644
--- a/target/microblaze/helper.c
+++ b/target/microblaze/helper.c
@@ -24,8 +24,6 @@
#include "qemu/host-utils.h"
#include "exec/log.h"
-#define D(x)
-
#if defined(CONFIG_USER_ONLY)
void mb_cpu_do_interrupt(CPUState *cs)
@@ -35,7 +33,7 @@ void mb_cpu_do_interrupt(CPUState *cs)
cs->exception_index = -1;
env->res_addr = RES_ADDR_NONE;
- env->regs[14] = env->sregs[SR_PC];
+ env->regs[14] = env->pc;
}
bool mb_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
@@ -85,15 +83,15 @@ bool mb_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
qemu_log_mask(CPU_LOG_MMU, "mmu=%d miss v=%" VADDR_PRIx "\n",
mmu_idx, address);
- env->sregs[SR_EAR] = address;
+ env->ear = address;
switch (lu.err) {
case ERR_PROT:
- env->sregs[SR_ESR] = access_type == MMU_INST_FETCH ? 17 : 16;
- env->sregs[SR_ESR] |= (access_type == MMU_DATA_STORE) << 10;
+ env->esr = access_type == MMU_INST_FETCH ? 17 : 16;
+ env->esr |= (access_type == MMU_DATA_STORE) << 10;
break;
case ERR_MISS:
- env->sregs[SR_ESR] = access_type == MMU_INST_FETCH ? 19 : 18;
- env->sregs[SR_ESR] |= (access_type == MMU_DATA_STORE) << 10;
+ env->esr = access_type == MMU_INST_FETCH ? 19 : 18;
+ env->esr |= (access_type == MMU_DATA_STORE) << 10;
break;
default:
abort();
@@ -112,12 +110,14 @@ void mb_cpu_do_interrupt(CPUState *cs)
{
MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs);
CPUMBState *env = &cpu->env;
- uint32_t t;
+ uint32_t t, msr = mb_cpu_read_msr(env);
/* IMM flag cannot propagate across a branch and into the dslot. */
- assert(!((env->iflags & D_FLAG) && (env->iflags & IMM_FLAG)));
+ assert((env->iflags & (D_FLAG | IMM_FLAG)) != (D_FLAG | IMM_FLAG));
+ /* BIMM flag cannot be set without D_FLAG. */
+ assert((env->iflags & (D_FLAG | BIMM_FLAG)) != BIMM_FLAG);
+ /* RTI flags are private to translate. */
assert(!(env->iflags & (DRTI_FLAG | DRTE_FLAG | DRTB_FLAG)));
-/* assert(env->sregs[SR_MSR] & (MSR_EE)); Only for HW exceptions. */
env->res_addr = RES_ADDR_NONE;
switch (cs->exception_index) {
case EXCP_HW_EXCP:
@@ -126,80 +126,79 @@ void mb_cpu_do_interrupt(CPUState *cs)
return;
}
- env->regs[17] = env->sregs[SR_PC] + 4;
- env->sregs[SR_ESR] &= ~(1 << 12);
+ env->regs[17] = env->pc + 4;
+ env->esr &= ~(1 << 12);
/* Exception breaks branch + dslot sequence? */
if (env->iflags & D_FLAG) {
- env->sregs[SR_ESR] |= 1 << 12 ;
- env->sregs[SR_BTR] = env->btarget;
+ env->esr |= 1 << 12 ;
+ env->btr = env->btarget;
}
/* Disable the MMU. */
- t = (env->sregs[SR_MSR] & (MSR_VM | MSR_UM)) << 1;
- env->sregs[SR_MSR] &= ~(MSR_VMS | MSR_UMS | MSR_VM | MSR_UM);
- env->sregs[SR_MSR] |= t;
+ t = (msr & (MSR_VM | MSR_UM)) << 1;
+ msr &= ~(MSR_VMS | MSR_UMS | MSR_VM | MSR_UM);
+ msr |= t;
/* Exception in progress. */
- env->sregs[SR_MSR] |= MSR_EIP;
+ msr |= MSR_EIP;
+ mb_cpu_write_msr(env, msr);
qemu_log_mask(CPU_LOG_INT,
- "hw exception at pc=%" PRIx64 " ear=%" PRIx64 " "
- "esr=%" PRIx64 " iflags=%x\n",
- env->sregs[SR_PC], env->sregs[SR_EAR],
- env->sregs[SR_ESR], env->iflags);
+ "hw exception at pc=%x ear=%" PRIx64 " "
+ "esr=%x iflags=%x\n",
+ env->pc, env->ear,
+ env->esr, env->iflags);
log_cpu_state_mask(CPU_LOG_INT, cs, 0);
- env->iflags &= ~(IMM_FLAG | D_FLAG);
- env->sregs[SR_PC] = cpu->cfg.base_vectors + 0x20;
+ env->iflags = 0;
+ env->pc = cpu->cfg.base_vectors + 0x20;
break;
case EXCP_MMU:
- env->regs[17] = env->sregs[SR_PC];
+ env->regs[17] = env->pc;
+
+ qemu_log_mask(CPU_LOG_INT,
+ "MMU exception at pc=%x iflags=%x ear=%" PRIx64 "\n",
+ env->pc, env->iflags, env->ear);
- env->sregs[SR_ESR] &= ~(1 << 12);
+ env->esr &= ~(1 << 12);
/* Exception breaks branch + dslot sequence? */
if (env->iflags & D_FLAG) {
- D(qemu_log("D_FLAG set at exception bimm=%d\n", env->bimm));
- env->sregs[SR_ESR] |= 1 << 12 ;
- env->sregs[SR_BTR] = env->btarget;
+ env->esr |= 1 << 12 ;
+ env->btr = env->btarget;
/* Reexecute the branch. */
env->regs[17] -= 4;
/* was the branch immprefixed?. */
- if (env->bimm) {
- qemu_log_mask(CPU_LOG_INT,
- "bimm exception at pc=%" PRIx64 " "
- "iflags=%x\n",
- env->sregs[SR_PC], env->iflags);
+ if (env->iflags & BIMM_FLAG) {
env->regs[17] -= 4;
log_cpu_state_mask(CPU_LOG_INT, cs, 0);
}
} else if (env->iflags & IMM_FLAG) {
- D(qemu_log("IMM_FLAG set at exception\n"));
env->regs[17] -= 4;
}
/* Disable the MMU. */
- t = (env->sregs[SR_MSR] & (MSR_VM | MSR_UM)) << 1;
- env->sregs[SR_MSR] &= ~(MSR_VMS | MSR_UMS | MSR_VM | MSR_UM);
- env->sregs[SR_MSR] |= t;
+ t = (msr & (MSR_VM | MSR_UM)) << 1;
+ msr &= ~(MSR_VMS | MSR_UMS | MSR_VM | MSR_UM);
+ msr |= t;
/* Exception in progress. */
- env->sregs[SR_MSR] |= MSR_EIP;
+ msr |= MSR_EIP;
+ mb_cpu_write_msr(env, msr);
qemu_log_mask(CPU_LOG_INT,
- "exception at pc=%" PRIx64 " ear=%" PRIx64 " "
- "iflags=%x\n",
- env->sregs[SR_PC], env->sregs[SR_EAR], env->iflags);
+ "exception at pc=%x ear=%" PRIx64 " iflags=%x\n",
+ env->pc, env->ear, env->iflags);
log_cpu_state_mask(CPU_LOG_INT, cs, 0);
- env->iflags &= ~(IMM_FLAG | D_FLAG);
- env->sregs[SR_PC] = cpu->cfg.base_vectors + 0x20;
+ env->iflags = 0;
+ env->pc = cpu->cfg.base_vectors + 0x20;
break;
case EXCP_IRQ:
- assert(!(env->sregs[SR_MSR] & (MSR_EIP | MSR_BIP)));
- assert(env->sregs[SR_MSR] & MSR_IE);
- assert(!(env->iflags & D_FLAG));
+ assert(!(msr & (MSR_EIP | MSR_BIP)));
+ assert(msr & MSR_IE);
+ assert(!(env->iflags & (D_FLAG | IMM_FLAG)));
- t = (env->sregs[SR_MSR] & (MSR_VM | MSR_UM)) << 1;
+ t = (msr & (MSR_VM | MSR_UM)) << 1;
#if 0
#include "disas/disas.h"
@@ -209,53 +208,47 @@ void mb_cpu_do_interrupt(CPUState *cs)
{
const char *sym;
- sym = lookup_symbol(env->sregs[SR_PC]);
+ sym = lookup_symbol(env->pc);
if (sym
&& (!strcmp("netif_rx", sym)
|| !strcmp("process_backlog", sym))) {
- qemu_log(
- "interrupt at pc=%x msr=%x %x iflags=%x sym=%s\n",
- env->sregs[SR_PC], env->sregs[SR_MSR], t, env->iflags,
- sym);
+ qemu_log("interrupt at pc=%x msr=%x %x iflags=%x sym=%s\n",
+ env->pc, msr, t, env->iflags, sym);
log_cpu_state(cs, 0);
}
}
#endif
qemu_log_mask(CPU_LOG_INT,
- "interrupt at pc=%" PRIx64 " msr=%" PRIx64 " %x "
- "iflags=%x\n",
- env->sregs[SR_PC], env->sregs[SR_MSR], t, env->iflags);
+ "interrupt at pc=%x msr=%x %x iflags=%x\n",
+ env->pc, msr, t, env->iflags);
- env->sregs[SR_MSR] &= ~(MSR_VMS | MSR_UMS | MSR_VM \
- | MSR_UM | MSR_IE);
- env->sregs[SR_MSR] |= t;
+ msr &= ~(MSR_VMS | MSR_UMS | MSR_VM | MSR_UM | MSR_IE);
+ msr |= t;
+ mb_cpu_write_msr(env, msr);
- env->regs[14] = env->sregs[SR_PC];
- env->sregs[SR_PC] = cpu->cfg.base_vectors + 0x10;
+ env->regs[14] = env->pc;
+ env->iflags = 0;
+ env->pc = cpu->cfg.base_vectors + 0x10;
//log_cpu_state_mask(CPU_LOG_INT, cs, 0);
break;
- case EXCP_BREAK:
case EXCP_HW_BREAK:
- assert(!(env->iflags & IMM_FLAG));
- assert(!(env->iflags & D_FLAG));
- t = (env->sregs[SR_MSR] & (MSR_VM | MSR_UM)) << 1;
+ assert(!(env->iflags & (D_FLAG | IMM_FLAG)));
+
+ t = (msr & (MSR_VM | MSR_UM)) << 1;
qemu_log_mask(CPU_LOG_INT,
- "break at pc=%" PRIx64 " msr=%" PRIx64 " %x "
- "iflags=%x\n",
- env->sregs[SR_PC], env->sregs[SR_MSR], t, env->iflags);
+ "break at pc=%x msr=%x %x iflags=%x\n",
+ env->pc, msr, t, env->iflags);
log_cpu_state_mask(CPU_LOG_INT, cs, 0);
- env->sregs[SR_MSR] &= ~(MSR_VMS | MSR_UMS | MSR_VM | MSR_UM);
- env->sregs[SR_MSR] |= t;
- env->sregs[SR_MSR] |= MSR_BIP;
- if (cs->exception_index == EXCP_HW_BREAK) {
- env->regs[16] = env->sregs[SR_PC];
- env->sregs[SR_MSR] |= MSR_BIP;
- env->sregs[SR_PC] = cpu->cfg.base_vectors + 0x18;
- } else
- env->sregs[SR_PC] = env->btarget;
+ msr &= ~(MSR_VMS | MSR_UMS | MSR_VM | MSR_UM);
+ msr |= t;
+ msr |= MSR_BIP;
+ env->regs[16] = env->pc;
+ env->iflags = 0;
+ env->pc = cpu->cfg.base_vectors + 0x18;
+ mb_cpu_write_msr(env, msr);
break;
default:
cpu_abort(cs, "unhandled exception type=%d\n",
@@ -293,8 +286,8 @@ bool mb_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
CPUMBState *env = &cpu->env;
if ((interrupt_request & CPU_INTERRUPT_HARD)
- && (env->sregs[SR_MSR] & MSR_IE)
- && !(env->sregs[SR_MSR] & (MSR_EIP | MSR_BIP))
+ && (env->msr & MSR_IE)
+ && !(env->msr & (MSR_EIP | MSR_BIP))
&& !(env->iflags & (D_FLAG | IMM_FLAG))) {
cs->exception_index = EXCP_IRQ;
mb_cpu_do_interrupt(cs);
@@ -302,3 +295,31 @@ bool mb_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
}
return false;
}
+
+void mb_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
+ MMUAccessType access_type,
+ int mmu_idx, uintptr_t retaddr)
+{
+ MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs);
+ uint32_t esr, iflags;
+
+ /* Recover the pc and iflags from the corresponding insn_start. */
+ cpu_restore_state(cs, retaddr, true);
+ iflags = cpu->env.iflags;
+
+ qemu_log_mask(CPU_LOG_INT,
+ "Unaligned access addr=" TARGET_FMT_lx " pc=%x iflags=%x\n",
+ (target_ulong)addr, cpu->env.pc, iflags);
+
+ esr = ESR_EC_UNALIGNED_DATA;
+ if (likely(iflags & ESR_ESS_FLAG)) {
+ esr |= iflags & ESR_ESS_MASK;
+ } else {
+ qemu_log_mask(LOG_UNIMP, "Unaligned access without ESR_ESS_FLAG\n");
+ }
+
+ cpu->env.ear = addr;
+ cpu->env.esr = esr;
+ cs->exception_index = EXCP_HW_EXCP;
+ cpu_loop_exit(cs);
+}
diff --git a/target/microblaze/helper.h b/target/microblaze/helper.h
index 2f8bdea22b..f740835fcb 100644
--- a/target/microblaze/helper.h
+++ b/target/microblaze/helper.h
@@ -1,36 +1,31 @@
-DEF_HELPER_2(raise_exception, void, env, i32)
-DEF_HELPER_1(debug, void, env)
-DEF_HELPER_FLAGS_3(carry, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
-DEF_HELPER_2(cmp, i32, i32, i32)
-DEF_HELPER_2(cmpu, i32, i32, i32)
+DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, i32)
-DEF_HELPER_3(divs, i32, env, i32, i32)
-DEF_HELPER_3(divu, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(divs, TCG_CALL_NO_WG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(divu, TCG_CALL_NO_WG, i32, env, i32, i32)
-DEF_HELPER_3(fadd, i32, env, i32, i32)
-DEF_HELPER_3(frsub, i32, env, i32, i32)
-DEF_HELPER_3(fmul, i32, env, i32, i32)
-DEF_HELPER_3(fdiv, i32, env, i32, i32)
-DEF_HELPER_2(flt, i32, env, i32)
-DEF_HELPER_2(fint, i32, env, i32)
-DEF_HELPER_2(fsqrt, i32, env, i32)
+DEF_HELPER_FLAGS_3(fadd, TCG_CALL_NO_WG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(frsub, TCG_CALL_NO_WG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(fmul, TCG_CALL_NO_WG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(fdiv, TCG_CALL_NO_WG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_2(flt, TCG_CALL_NO_WG, i32, env, i32)
+DEF_HELPER_FLAGS_2(fint, TCG_CALL_NO_WG, i32, env, i32)
+DEF_HELPER_FLAGS_2(fsqrt, TCG_CALL_NO_WG, i32, env, i32)
-DEF_HELPER_3(fcmp_un, i32, env, i32, i32)
-DEF_HELPER_3(fcmp_lt, i32, env, i32, i32)
-DEF_HELPER_3(fcmp_eq, i32, env, i32, i32)
-DEF_HELPER_3(fcmp_le, i32, env, i32, i32)
-DEF_HELPER_3(fcmp_gt, i32, env, i32, i32)
-DEF_HELPER_3(fcmp_ne, i32, env, i32, i32)
-DEF_HELPER_3(fcmp_ge, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(fcmp_un, TCG_CALL_NO_WG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(fcmp_lt, TCG_CALL_NO_WG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(fcmp_eq, TCG_CALL_NO_WG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(fcmp_le, TCG_CALL_NO_WG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(fcmp_gt, TCG_CALL_NO_WG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(fcmp_ne, TCG_CALL_NO_WG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(fcmp_ge, TCG_CALL_NO_WG, i32, env, i32, i32)
DEF_HELPER_FLAGS_2(pcmpbf, TCG_CALL_NO_RWG_SE, i32, i32, i32)
#if !defined(CONFIG_USER_ONLY)
-DEF_HELPER_3(mmu_read, i32, env, i32, i32)
-DEF_HELPER_4(mmu_write, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_3(mmu_read, TCG_CALL_NO_RWG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_4(mmu_write, TCG_CALL_NO_RWG, void, env, i32, i32, i32)
#endif
-DEF_HELPER_5(memalign, void, env, tl, i32, i32, i32)
-DEF_HELPER_2(stackprot, void, env, tl)
+DEF_HELPER_FLAGS_2(stackprot, TCG_CALL_NO_WG, void, env, tl)
-DEF_HELPER_2(get, i32, i32, i32)
-DEF_HELPER_3(put, void, i32, i32, i32)
+DEF_HELPER_FLAGS_2(get, TCG_CALL_NO_RWG, i32, i32, i32)
+DEF_HELPER_FLAGS_3(put, TCG_CALL_NO_RWG, void, i32, i32, i32)
diff --git a/target/microblaze/insns.decode b/target/microblaze/insns.decode
new file mode 100644
index 0000000000..fb0f0e6838
--- /dev/null
+++ b/target/microblaze/insns.decode
@@ -0,0 +1,256 @@
+#
+# MicroBlaze instruction decode definitions.
+#
+# Copyright (c) 2020 Richard Henderson <rth@twiddle.net>
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
+#
+
+&typea0 rd ra
+&typea rd ra rb
+&typea_br rd rb
+&typea_bc ra rb
+&typeb rd ra imm
+&typeb_br rd imm
+&typeb_bc ra imm
+&type_msr rd imm
+
+# Include any IMM prefix in the value reported.
+%extimm 0:s16 !function=typeb_imm
+
+@typea ...... rd:5 ra:5 rb:5 ... .... .... &typea
+@typeb ...... rd:5 ra:5 ................ &typeb imm=%extimm
+
+# Officially typea, but with rb==0, which is not used.
+@typea0 ...... rd:5 ra:5 ................ &typea0
+
+# Officially typea, but with ra as opcode.
+@typea_br ...... rd:5 ..... rb:5 ........... &typea_br
+
+# Officially typea, but with rd as opcode.
+@typea_bc ...... ..... ra:5 rb:5 ........... &typea_bc
+
+# Officially typeb, but any immediate extension is unused.
+@typeb_bs ...... rd:5 ra:5 ..... ...... imm:5 &typeb
+
+# Officially typeb, but with ra as opcode.
+@typeb_br ...... rd:5 ..... ................ &typeb_br imm=%extimm
+
+# Officially typeb, but with rd as opcode.
+@typeb_bc ...... ..... ra:5 ................ &typeb_bc imm=%extimm
+
+# For convenience, extract the two imm_w/imm_s fields, then pack
+# them back together as "imm". Doing this makes it easiest to
+# match the required zero at bit 5.
+%ieimm 6:5 0:5
+@typeb_ie ...... rd:5 ra:5 ..... ..... . ..... &typeb imm=%ieimm
+
+@type_msr ...... rd:5 ...... imm:15 &type_msr
+
+###
+
+{
+ zero 000000 00000 00000 00000 000 0000 0000
+ add 000000 ..... ..... ..... 000 0000 0000 @typea
+}
+addc 000010 ..... ..... ..... 000 0000 0000 @typea
+addk 000100 ..... ..... ..... 000 0000 0000 @typea
+addkc 000110 ..... ..... ..... 000 0000 0000 @typea
+
+addi 001000 ..... ..... ................ @typeb
+addic 001010 ..... ..... ................ @typeb
+addik 001100 ..... ..... ................ @typeb
+addikc 001110 ..... ..... ................ @typeb
+
+and 100001 ..... ..... ..... 000 0000 0000 @typea
+andi 101001 ..... ..... ................ @typeb
+
+andn 100011 ..... ..... ..... 000 0000 0000 @typea
+andni 101011 ..... ..... ................ @typeb
+
+beq 100111 00000 ..... ..... 000 0000 0000 @typea_bc
+bge 100111 00101 ..... ..... 000 0000 0000 @typea_bc
+bgt 100111 00100 ..... ..... 000 0000 0000 @typea_bc
+ble 100111 00011 ..... ..... 000 0000 0000 @typea_bc
+blt 100111 00010 ..... ..... 000 0000 0000 @typea_bc
+bne 100111 00001 ..... ..... 000 0000 0000 @typea_bc
+
+beqd 100111 10000 ..... ..... 000 0000 0000 @typea_bc
+bged 100111 10101 ..... ..... 000 0000 0000 @typea_bc
+bgtd 100111 10100 ..... ..... 000 0000 0000 @typea_bc
+bled 100111 10011 ..... ..... 000 0000 0000 @typea_bc
+bltd 100111 10010 ..... ..... 000 0000 0000 @typea_bc
+bned 100111 10001 ..... ..... 000 0000 0000 @typea_bc
+
+beqi 101111 00000 ..... ................ @typeb_bc
+bgei 101111 00101 ..... ................ @typeb_bc
+bgti 101111 00100 ..... ................ @typeb_bc
+blei 101111 00011 ..... ................ @typeb_bc
+blti 101111 00010 ..... ................ @typeb_bc
+bnei 101111 00001 ..... ................ @typeb_bc
+
+beqid 101111 10000 ..... ................ @typeb_bc
+bgeid 101111 10101 ..... ................ @typeb_bc
+bgtid 101111 10100 ..... ................ @typeb_bc
+bleid 101111 10011 ..... ................ @typeb_bc
+bltid 101111 10010 ..... ................ @typeb_bc
+bneid 101111 10001 ..... ................ @typeb_bc
+
+br 100110 ..... 00000 ..... 000 0000 0000 @typea_br
+bra 100110 ..... 01000 ..... 000 0000 0000 @typea_br
+brd 100110 ..... 10000 ..... 000 0000 0000 @typea_br
+brad 100110 ..... 11000 ..... 000 0000 0000 @typea_br
+brld 100110 ..... 10100 ..... 000 0000 0000 @typea_br
+brald 100110 ..... 11100 ..... 000 0000 0000 @typea_br
+
+bri 101110 ..... 00000 ................ @typeb_br
+brai 101110 ..... 01000 ................ @typeb_br
+brid 101110 ..... 10000 ................ @typeb_br
+braid 101110 ..... 11000 ................ @typeb_br
+brlid 101110 ..... 10100 ................ @typeb_br
+bralid 101110 ..... 11100 ................ @typeb_br
+
+brk 100110 ..... 01100 ..... 000 0000 0000 @typea_br
+brki 101110 ..... 01100 ................ @typeb_br
+
+bsrl 010001 ..... ..... ..... 000 0000 0000 @typea
+bsra 010001 ..... ..... ..... 010 0000 0000 @typea
+bsll 010001 ..... ..... ..... 100 0000 0000 @typea
+
+bsrli 011001 ..... ..... 00000 000000 ..... @typeb_bs
+bsrai 011001 ..... ..... 00000 010000 ..... @typeb_bs
+bslli 011001 ..... ..... 00000 100000 ..... @typeb_bs
+
+bsefi 011001 ..... ..... 01000 .....0 ..... @typeb_ie
+bsifi 011001 ..... ..... 10000 .....0 ..... @typeb_ie
+
+clz 100100 ..... ..... 00000 000 1110 0000 @typea0
+
+cmp 000101 ..... ..... ..... 000 0000 0001 @typea
+cmpu 000101 ..... ..... ..... 000 0000 0011 @typea
+
+fadd 010110 ..... ..... ..... 0000 000 0000 @typea
+frsub 010110 ..... ..... ..... 0001 000 0000 @typea
+fmul 010110 ..... ..... ..... 0010 000 0000 @typea
+fdiv 010110 ..... ..... ..... 0011 000 0000 @typea
+fcmp_un 010110 ..... ..... ..... 0100 000 0000 @typea
+fcmp_lt 010110 ..... ..... ..... 0100 001 0000 @typea
+fcmp_eq 010110 ..... ..... ..... 0100 010 0000 @typea
+fcmp_le 010110 ..... ..... ..... 0100 011 0000 @typea
+fcmp_gt 010110 ..... ..... ..... 0100 100 0000 @typea
+fcmp_ne 010110 ..... ..... ..... 0100 101 0000 @typea
+fcmp_ge 010110 ..... ..... ..... 0100 110 0000 @typea
+
+# Note that flt and fint, unlike fsqrt, are documented as having the RB
+# operand which is unused. So allow the field to be non-zero but discard
+# the value and treat as 2-operand insns.
+flt 010110 ..... ..... ----- 0101 000 0000 @typea0
+fint 010110 ..... ..... ----- 0110 000 0000 @typea0
+fsqrt 010110 ..... ..... 00000 0111 000 0000 @typea0
+
+get 011011 rd:5 00000 0 ctrl:5 000000 imm:4
+getd 010011 rd:5 00000 rb:5 0 ctrl:5 00000
+
+idiv 010010 ..... ..... ..... 000 0000 0000 @typea
+idivu 010010 ..... ..... ..... 000 0000 0010 @typea
+
+imm 101100 00000 00000 imm:16
+
+lbu 110000 ..... ..... ..... 0000 000 0000 @typea
+lbur 110000 ..... ..... ..... 0100 000 0000 @typea
+lbuea 110000 ..... ..... ..... 0001 000 0000 @typea
+lbui 111000 ..... ..... ................ @typeb
+
+lhu 110001 ..... ..... ..... 0000 000 0000 @typea
+lhur 110001 ..... ..... ..... 0100 000 0000 @typea
+lhuea 110001 ..... ..... ..... 0001 000 0000 @typea
+lhui 111001 ..... ..... ................ @typeb
+
+lw 110010 ..... ..... ..... 0000 000 0000 @typea
+lwr 110010 ..... ..... ..... 0100 000 0000 @typea
+lwea 110010 ..... ..... ..... 0001 000 0000 @typea
+lwx 110010 ..... ..... ..... 1000 000 0000 @typea
+lwi 111010 ..... ..... ................ @typeb
+
+mbar 101110 imm:5 00010 0000 0000 0000 0100
+
+mfs 100101 rd:5 0 e:1 000 10 rs:14
+mts 100101 0 e:1 000 ra:5 11 rs:14
+
+msrclr 100101 ..... 100010 ............... @type_msr
+msrset 100101 ..... 100000 ............... @type_msr
+
+mul 010000 ..... ..... ..... 000 0000 0000 @typea
+mulh 010000 ..... ..... ..... 000 0000 0001 @typea
+mulhu 010000 ..... ..... ..... 000 0000 0011 @typea
+mulhsu 010000 ..... ..... ..... 000 0000 0010 @typea
+muli 011000 ..... ..... ................ @typeb
+
+or 100000 ..... ..... ..... 000 0000 0000 @typea
+ori 101000 ..... ..... ................ @typeb
+
+pcmpbf 100000 ..... ..... ..... 100 0000 0000 @typea
+pcmpeq 100010 ..... ..... ..... 100 0000 0000 @typea
+pcmpne 100011 ..... ..... ..... 100 0000 0000 @typea
+
+put 011011 00000 ra:5 1 ctrl:5 000000 imm:4
+putd 010011 00000 ra:5 rb:5 1 ctrl:5 00000
+
+rsub 000001 ..... ..... ..... 000 0000 0000 @typea
+rsubc 000011 ..... ..... ..... 000 0000 0000 @typea
+rsubk 000101 ..... ..... ..... 000 0000 0000 @typea
+rsubkc 000111 ..... ..... ..... 000 0000 0000 @typea
+
+rsubi 001001 ..... ..... ................ @typeb
+rsubic 001011 ..... ..... ................ @typeb
+rsubik 001101 ..... ..... ................ @typeb
+rsubikc 001111 ..... ..... ................ @typeb
+
+rtbd 101101 10010 ..... ................ @typeb_bc
+rtid 101101 10001 ..... ................ @typeb_bc
+rted 101101 10100 ..... ................ @typeb_bc
+rtsd 101101 10000 ..... ................ @typeb_bc
+
+sb 110100 ..... ..... ..... 0000 000 0000 @typea
+sbr 110100 ..... ..... ..... 0100 000 0000 @typea
+sbea 110100 ..... ..... ..... 0001 000 0000 @typea
+sbi 111100 ..... ..... ................ @typeb
+
+sh 110101 ..... ..... ..... 0000 000 0000 @typea
+shr 110101 ..... ..... ..... 0100 000 0000 @typea
+shea 110101 ..... ..... ..... 0001 000 0000 @typea
+shi 111101 ..... ..... ................ @typeb
+
+sw 110110 ..... ..... ..... 0000 000 0000 @typea
+swr 110110 ..... ..... ..... 0100 000 0000 @typea
+swea 110110 ..... ..... ..... 0001 000 0000 @typea
+swx 110110 ..... ..... ..... 1000 000 0000 @typea
+swi 111110 ..... ..... ................ @typeb
+
+sext8 100100 ..... ..... 00000 000 0110 0000 @typea0
+sext16 100100 ..... ..... 00000 000 0110 0001 @typea0
+
+sra 100100 ..... ..... 00000 000 0000 0001 @typea0
+src 100100 ..... ..... 00000 000 0010 0001 @typea0
+srl 100100 ..... ..... 00000 000 0100 0001 @typea0
+
+swapb 100100 ..... ..... 00000 001 1110 0000 @typea0
+swaph 100100 ..... ..... 00000 001 1110 0010 @typea0
+
+# Cache operations have no effect in qemu: discard the arguments.
+wdic 100100 00000 ----- ----- -00 -11- 01-0 # wdc
+wdic 100100 00000 ----- ----- 000 0110 1000 # wic
+
+xor 100010 ..... ..... ..... 000 0000 0000 @typea
+xori 101010 ..... ..... ................ @typeb
diff --git a/target/microblaze/meson.build b/target/microblaze/meson.build
index b8fe4afe61..639c3f73a8 100644
--- a/target/microblaze/meson.build
+++ b/target/microblaze/meson.build
@@ -1,4 +1,7 @@
+gen = decodetree.process('insns.decode')
+
microblaze_ss = ss.source_set()
+microblaze_ss.add(gen)
microblaze_ss.add(files(
'cpu.c',
'gdbstub.c',
diff --git a/target/microblaze/microblaze-decode.h b/target/microblaze/microblaze-decode.h
deleted file mode 100644
index 17b2f29fff..0000000000
--- a/target/microblaze/microblaze-decode.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * MicroBlaze insn decoding macros.
- *
- * Copyright (c) 2009 Edgar E. Iglesias <edgar.iglesias@gmail.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef TARGET_MICROBLAZE_MICROBLAZE_DECODE_H
-#define TARGET_MICROBLAZE_MICROBLAZE_DECODE_H
-
-/* Convenient binary macros. */
-#define HEX__(n) 0x##n##LU
-#define B8__(x) ((x&0x0000000FLU)?1:0) \
- + ((x&0x000000F0LU)?2:0) \
- + ((x&0x00000F00LU)?4:0) \
- + ((x&0x0000F000LU)?8:0) \
- + ((x&0x000F0000LU)?16:0) \
- + ((x&0x00F00000LU)?32:0) \
- + ((x&0x0F000000LU)?64:0) \
- + ((x&0xF0000000LU)?128:0)
-#define B8(d) ((unsigned char)B8__(HEX__(d)))
-
-/* Decode logic, value and mask. */
-#define DEC_ADD {B8(00000000), B8(00110001)}
-#define DEC_SUB {B8(00000001), B8(00110001)}
-#define DEC_AND {B8(00100001), B8(00110101)}
-#define DEC_XOR {B8(00100010), B8(00110111)}
-#define DEC_OR {B8(00100000), B8(00110111)}
-#define DEC_BIT {B8(00100100), B8(00111111)}
-#define DEC_MSR {B8(00100101), B8(00111111)}
-
-#define DEC_BARREL {B8(00010001), B8(00110111)}
-#define DEC_MUL {B8(00010000), B8(00110111)}
-#define DEC_DIV {B8(00010010), B8(00110111)}
-#define DEC_FPU {B8(00010110), B8(00111111)}
-
-#define DEC_LD {B8(00110000), B8(00110100)}
-#define DEC_ST {B8(00110100), B8(00110100)}
-#define DEC_IMM {B8(00101100), B8(00111111)}
-
-#define DEC_BR {B8(00100110), B8(00110111)}
-#define DEC_BCC {B8(00100111), B8(00110111)}
-#define DEC_RTS {B8(00101101), B8(00111111)}
-
-#define DEC_STREAM {B8(00010011), B8(00110111)}
-
-#endif
diff --git a/target/microblaze/mmu.c b/target/microblaze/mmu.c
index 6763421ba2..6e583d78d9 100644
--- a/target/microblaze/mmu.c
+++ b/target/microblaze/mmu.c
@@ -250,8 +250,8 @@ void mmu_write(CPUMBState *env, bool ext, uint32_t rn, uint32_t v)
if (rn == MMU_R_TLBHI) {
if (i < 3 && !(v & TLB_VALID) && qemu_loglevel_mask(~0))
qemu_log_mask(LOG_GUEST_ERROR,
- "invalidating index %x at pc=%" PRIx64 "\n",
- i, env->sregs[SR_PC]);
+ "invalidating index %x at pc=%x\n",
+ i, env->pc);
env->mmu.tids[i] = env->mmu.regs[MMU_R_PID] & 0xff;
mmu_flush_idx(env, i);
}
diff --git a/target/microblaze/op_helper.c b/target/microblaze/op_helper.c
index f3b17a95b3..4614e99db3 100644
--- a/target/microblaze/op_helper.c
+++ b/target/microblaze/op_helper.c
@@ -26,8 +26,6 @@
#include "exec/cpu_ldst.h"
#include "fpu/softfloat.h"
-#define D(x)
-
void helper_put(uint32_t id, uint32_t ctrl, uint32_t data)
{
int test = ctrl & STREAM_TEST;
@@ -71,85 +69,27 @@ void helper_raise_exception(CPUMBState *env, uint32_t index)
cpu_loop_exit(cs);
}
-void helper_debug(CPUMBState *env)
-{
- int i;
-
- qemu_log("PC=%" PRIx64 "\n", env->sregs[SR_PC]);
- qemu_log("rmsr=%" PRIx64 " resr=%" PRIx64 " rear=%" PRIx64 " "
- "debug[%x] imm=%x iflags=%x\n",
- env->sregs[SR_MSR], env->sregs[SR_ESR], env->sregs[SR_EAR],
- env->debug, env->imm, env->iflags);
- qemu_log("btaken=%d btarget=%" PRIx64 " mode=%s(saved=%s) eip=%d ie=%d\n",
- env->btaken, env->btarget,
- (env->sregs[SR_MSR] & MSR_UM) ? "user" : "kernel",
- (env->sregs[SR_MSR] & MSR_UMS) ? "user" : "kernel",
- (bool)(env->sregs[SR_MSR] & MSR_EIP),
- (bool)(env->sregs[SR_MSR] & MSR_IE));
- for (i = 0; i < 32; i++) {
- qemu_log("r%2.2d=%8.8x ", i, env->regs[i]);
- if ((i + 1) % 4 == 0)
- qemu_log("\n");
- }
- qemu_log("\n\n");
-}
-
-static inline uint32_t compute_carry(uint32_t a, uint32_t b, uint32_t cin)
-{
- uint32_t cout = 0;
-
- if ((b == ~0) && cin)
- cout = 1;
- else if ((~0 - a) < (b + cin))
- cout = 1;
- return cout;
-}
-
-uint32_t helper_cmp(uint32_t a, uint32_t b)
-{
- uint32_t t;
-
- t = b + ~a + 1;
- if ((b & 0x80000000) ^ (a & 0x80000000))
- t = (t & 0x7fffffff) | (b & 0x80000000);
- return t;
-}
-
-uint32_t helper_cmpu(uint32_t a, uint32_t b)
+static bool check_divz(CPUMBState *env, uint32_t a, uint32_t b, uintptr_t ra)
{
- uint32_t t;
+ if (unlikely(b == 0)) {
+ env->msr |= MSR_DZ;
- t = b + ~a + 1;
- if ((b & 0x80000000) ^ (a & 0x80000000))
- t = (t & 0x7fffffff) | (a & 0x80000000);
- return t;
-}
+ if ((env->msr & MSR_EE) &&
+ env_archcpu(env)->cfg.div_zero_exception) {
+ CPUState *cs = env_cpu(env);
-uint32_t helper_carry(uint32_t a, uint32_t b, uint32_t cf)
-{
- return compute_carry(a, b, cf);
-}
-
-static inline int div_prepare(CPUMBState *env, uint32_t a, uint32_t b)
-{
- MicroBlazeCPU *cpu = env_archcpu(env);
-
- if (b == 0) {
- env->sregs[SR_MSR] |= MSR_DZ;
-
- if ((env->sregs[SR_MSR] & MSR_EE) && cpu->cfg.div_zero_exception) {
- env->sregs[SR_ESR] = ESR_EC_DIVZERO;
- helper_raise_exception(env, EXCP_HW_EXCP);
+ env->esr = ESR_EC_DIVZERO;
+ cs->exception_index = EXCP_HW_EXCP;
+ cpu_loop_exit_restore(cs, ra);
}
- return 0;
+ return false;
}
- env->sregs[SR_MSR] &= ~MSR_DZ;
- return 1;
+ return true;
}
uint32_t helper_divs(CPUMBState *env, uint32_t a, uint32_t b)
{
- if (!div_prepare(env, a, b)) {
+ if (!check_divz(env, a, b, GETPC())) {
return 0;
}
return (int32_t)a / (int32_t)b;
@@ -157,43 +97,46 @@ uint32_t helper_divs(CPUMBState *env, uint32_t a, uint32_t b)
uint32_t helper_divu(CPUMBState *env, uint32_t a, uint32_t b)
{
- if (!div_prepare(env, a, b)) {
+ if (!check_divz(env, a, b, GETPC())) {
return 0;
}
return a / b;
}
/* raise FPU exception. */
-static void raise_fpu_exception(CPUMBState *env)
+static void raise_fpu_exception(CPUMBState *env, uintptr_t ra)
{
- env->sregs[SR_ESR] = ESR_EC_FPU;
- helper_raise_exception(env, EXCP_HW_EXCP);
+ CPUState *cs = env_cpu(env);
+
+ env->esr = ESR_EC_FPU;
+ cs->exception_index = EXCP_HW_EXCP;
+ cpu_loop_exit_restore(cs, ra);
}
-static void update_fpu_flags(CPUMBState *env, int flags)
+static void update_fpu_flags(CPUMBState *env, int flags, uintptr_t ra)
{
int raise = 0;
if (flags & float_flag_invalid) {
- env->sregs[SR_FSR] |= FSR_IO;
+ env->fsr |= FSR_IO;
raise = 1;
}
if (flags & float_flag_divbyzero) {
- env->sregs[SR_FSR] |= FSR_DZ;
+ env->fsr |= FSR_DZ;
raise = 1;
}
if (flags & float_flag_overflow) {
- env->sregs[SR_FSR] |= FSR_OF;
+ env->fsr |= FSR_OF;
raise = 1;
}
if (flags & float_flag_underflow) {
- env->sregs[SR_FSR] |= FSR_UF;
+ env->fsr |= FSR_UF;
raise = 1;
}
if (raise
&& (env->pvr.regs[2] & PVR2_FPU_EXC_MASK)
- && (env->sregs[SR_MSR] & MSR_EE)) {
- raise_fpu_exception(env);
+ && (env->msr & MSR_EE)) {
+ raise_fpu_exception(env, ra);
}
}
@@ -208,7 +151,7 @@ uint32_t helper_fadd(CPUMBState *env, uint32_t a, uint32_t b)
fd.f = float32_add(fa.f, fb.f, &env->fp_status);
flags = get_float_exception_flags(&env->fp_status);
- update_fpu_flags(env, flags);
+ update_fpu_flags(env, flags, GETPC());
return fd.l;
}
@@ -222,7 +165,7 @@ uint32_t helper_frsub(CPUMBState *env, uint32_t a, uint32_t b)
fb.l = b;
fd.f = float32_sub(fb.f, fa.f, &env->fp_status);
flags = get_float_exception_flags(&env->fp_status);
- update_fpu_flags(env, flags);
+ update_fpu_flags(env, flags, GETPC());
return fd.l;
}
@@ -236,7 +179,7 @@ uint32_t helper_fmul(CPUMBState *env, uint32_t a, uint32_t b)
fb.l = b;
fd.f = float32_mul(fa.f, fb.f, &env->fp_status);
flags = get_float_exception_flags(&env->fp_status);
- update_fpu_flags(env, flags);
+ update_fpu_flags(env, flags, GETPC());
return fd.l;
}
@@ -251,7 +194,7 @@ uint32_t helper_fdiv(CPUMBState *env, uint32_t a, uint32_t b)
fb.l = b;
fd.f = float32_div(fb.f, fa.f, &env->fp_status);
flags = get_float_exception_flags(&env->fp_status);
- update_fpu_flags(env, flags);
+ update_fpu_flags(env, flags, GETPC());
return fd.l;
}
@@ -266,7 +209,7 @@ uint32_t helper_fcmp_un(CPUMBState *env, uint32_t a, uint32_t b)
if (float32_is_signaling_nan(fa.f, &env->fp_status) ||
float32_is_signaling_nan(fb.f, &env->fp_status)) {
- update_fpu_flags(env, float_flag_invalid);
+ update_fpu_flags(env, float_flag_invalid, GETPC());
r = 1;
}
@@ -289,7 +232,7 @@ uint32_t helper_fcmp_lt(CPUMBState *env, uint32_t a, uint32_t b)
fb.l = b;
r = float32_lt(fb.f, fa.f, &env->fp_status);
flags = get_float_exception_flags(&env->fp_status);
- update_fpu_flags(env, flags & float_flag_invalid);
+ update_fpu_flags(env, flags & float_flag_invalid, GETPC());
return r;
}
@@ -305,7 +248,7 @@ uint32_t helper_fcmp_eq(CPUMBState *env, uint32_t a, uint32_t b)
fb.l = b;
r = float32_eq_quiet(fa.f, fb.f, &env->fp_status);
flags = get_float_exception_flags(&env->fp_status);
- update_fpu_flags(env, flags & float_flag_invalid);
+ update_fpu_flags(env, flags & float_flag_invalid, GETPC());
return r;
}
@@ -321,7 +264,7 @@ uint32_t helper_fcmp_le(CPUMBState *env, uint32_t a, uint32_t b)
set_float_exception_flags(0, &env->fp_status);
r = float32_le(fa.f, fb.f, &env->fp_status);
flags = get_float_exception_flags(&env->fp_status);
- update_fpu_flags(env, flags & float_flag_invalid);
+ update_fpu_flags(env, flags & float_flag_invalid, GETPC());
return r;
@@ -337,7 +280,7 @@ uint32_t helper_fcmp_gt(CPUMBState *env, uint32_t a, uint32_t b)
set_float_exception_flags(0, &env->fp_status);
r = float32_lt(fa.f, fb.f, &env->fp_status);
flags = get_float_exception_flags(&env->fp_status);
- update_fpu_flags(env, flags & float_flag_invalid);
+ update_fpu_flags(env, flags & float_flag_invalid, GETPC());
return r;
}
@@ -351,7 +294,7 @@ uint32_t helper_fcmp_ne(CPUMBState *env, uint32_t a, uint32_t b)
set_float_exception_flags(0, &env->fp_status);
r = !float32_eq_quiet(fa.f, fb.f, &env->fp_status);
flags = get_float_exception_flags(&env->fp_status);
- update_fpu_flags(env, flags & float_flag_invalid);
+ update_fpu_flags(env, flags & float_flag_invalid, GETPC());
return r;
}
@@ -366,7 +309,7 @@ uint32_t helper_fcmp_ge(CPUMBState *env, uint32_t a, uint32_t b)
set_float_exception_flags(0, &env->fp_status);
r = !float32_lt(fa.f, fb.f, &env->fp_status);
flags = get_float_exception_flags(&env->fp_status);
- update_fpu_flags(env, flags & float_flag_invalid);
+ update_fpu_flags(env, flags & float_flag_invalid, GETPC());
return r;
}
@@ -390,7 +333,7 @@ uint32_t helper_fint(CPUMBState *env, uint32_t a)
fa.l = a;
r = float32_to_int32(fa.f, &env->fp_status);
flags = get_float_exception_flags(&env->fp_status);
- update_fpu_flags(env, flags);
+ update_fpu_flags(env, flags, GETPC());
return r;
}
@@ -404,7 +347,7 @@ uint32_t helper_fsqrt(CPUMBState *env, uint32_t a)
fa.l = a;
fd.l = float32_sqrt(fa.f, &env->fp_status);
flags = get_float_exception_flags(&env->fp_status);
- update_fpu_flags(env, flags);
+ update_fpu_flags(env, flags, GETPC());
return fd.l;
}
@@ -422,37 +365,19 @@ uint32_t helper_pcmpbf(uint32_t a, uint32_t b)
return 0;
}
-void helper_memalign(CPUMBState *env, target_ulong addr,
- uint32_t dr, uint32_t wr,
- uint32_t mask)
-{
- if (addr & mask) {
- qemu_log_mask(CPU_LOG_INT,
- "unaligned access addr=" TARGET_FMT_lx
- " mask=%x, wr=%d dr=r%d\n",
- addr, mask, wr, dr);
- env->sregs[SR_EAR] = addr;
- env->sregs[SR_ESR] = ESR_EC_UNALIGNED_DATA | (wr << 10) \
- | (dr & 31) << 5;
- if (mask == 3) {
- env->sregs[SR_ESR] |= 1 << 11;
- }
- if (!(env->sregs[SR_MSR] & MSR_EE)) {
- return;
- }
- helper_raise_exception(env, EXCP_HW_EXCP);
- }
-}
-
void helper_stackprot(CPUMBState *env, target_ulong addr)
{
if (addr < env->slr || addr > env->shr) {
+ CPUState *cs = env_cpu(env);
+
qemu_log_mask(CPU_LOG_INT, "Stack protector violation at "
TARGET_FMT_lx " %x %x\n",
addr, env->slr, env->shr);
- env->sregs[SR_EAR] = addr;
- env->sregs[SR_ESR] = ESR_EC_STACKPROT;
- helper_raise_exception(env, EXCP_HW_EXCP);
+
+ env->ear = addr;
+ env->esr = ESR_EC_STACKPROT;
+ cs->exception_index = EXCP_HW_EXCP;
+ cpu_loop_exit_restore(cs, GETPC());
}
}
@@ -473,32 +398,33 @@ void mb_cpu_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr,
int mmu_idx, MemTxAttrs attrs,
MemTxResult response, uintptr_t retaddr)
{
- MicroBlazeCPU *cpu;
- CPUMBState *env;
+ MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs);
+ CPUMBState *env = &cpu->env;
+
qemu_log_mask(CPU_LOG_INT, "Transaction failed: vaddr 0x%" VADDR_PRIx
" physaddr 0x" TARGET_FMT_plx " size %d access type %s\n",
addr, physaddr, size,
access_type == MMU_INST_FETCH ? "INST_FETCH" :
(access_type == MMU_DATA_LOAD ? "DATA_LOAD" : "DATA_STORE"));
- cpu = MICROBLAZE_CPU(cs);
- env = &cpu->env;
- cpu_restore_state(cs, retaddr, true);
- if (!(env->sregs[SR_MSR] & MSR_EE)) {
+ if (!(env->msr & MSR_EE)) {
return;
}
- env->sregs[SR_EAR] = addr;
if (access_type == MMU_INST_FETCH) {
- if ((env->pvr.regs[2] & PVR2_IOPB_BUS_EXC_MASK)) {
- env->sregs[SR_ESR] = ESR_EC_INSN_BUS;
- helper_raise_exception(env, EXCP_HW_EXCP);
+ if (!cpu->cfg.iopb_bus_exception) {
+ return;
}
+ env->esr = ESR_EC_INSN_BUS;
} else {
- if ((env->pvr.regs[2] & PVR2_DOPB_BUS_EXC_MASK)) {
- env->sregs[SR_ESR] = ESR_EC_DATA_BUS;
- helper_raise_exception(env, EXCP_HW_EXCP);
+ if (!cpu->cfg.dopb_bus_exception) {
+ return;
}
+ env->esr = ESR_EC_DATA_BUS;
}
+
+ env->ear = addr;
+ cs->exception_index = EXCP_HW_EXCP;
+ cpu_loop_exit_restore(cs, retaddr);
}
#endif
diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c
index a96cb21d96..a8a3249185 100644
--- a/target/microblaze/translate.c
+++ b/target/microblaze/translate.c
@@ -24,7 +24,6 @@
#include "exec/exec-all.h"
#include "tcg/tcg-op.h"
#include "exec/helper-proto.h"
-#include "microblaze-decode.h"
#include "exec/cpu_ldst.h"
#include "exec/helper-gen.h"
#include "exec/translator.h"
@@ -33,106 +32,99 @@
#include "trace-tcg.h"
#include "exec/log.h"
-
-#define SIM_COMPAT 0
-#define DISAS_GNU 1
-#define DISAS_MB 1
-#if DISAS_MB && !SIM_COMPAT
-# define LOG_DIS(...) qemu_log_mask(CPU_LOG_TB_IN_ASM, ## __VA_ARGS__)
-#else
-# define LOG_DIS(...) do { } while (0)
-#endif
-
-#define D(x)
-
#define EXTRACT_FIELD(src, start, end) \
(((src) >> start) & ((1 << (end - start + 1)) - 1))
/* is_jmp field values */
#define DISAS_JUMP DISAS_TARGET_0 /* only pc was modified dynamically */
#define DISAS_UPDATE DISAS_TARGET_1 /* cpu state was modified dynamically */
-#define DISAS_TB_JUMP DISAS_TARGET_2 /* only pc was modified statically */
-static TCGv_i32 env_debug;
static TCGv_i32 cpu_R[32];
-static TCGv_i64 cpu_SR[14];
-static TCGv_i32 env_imm;
-static TCGv_i32 env_btaken;
-static TCGv_i64 env_btarget;
-static TCGv_i32 env_iflags;
-static TCGv env_res_addr;
-static TCGv_i32 env_res_val;
+static TCGv_i32 cpu_pc;
+static TCGv_i32 cpu_msr;
+static TCGv_i32 cpu_msr_c;
+static TCGv_i32 cpu_imm;
+static TCGv_i32 cpu_bvalue;
+static TCGv_i32 cpu_btarget;
+static TCGv_i32 cpu_iflags;
+static TCGv cpu_res_addr;
+static TCGv_i32 cpu_res_val;
#include "exec/gen-icount.h"
/* This is the state at translation time. */
typedef struct DisasContext {
+ DisasContextBase base;
MicroBlazeCPU *cpu;
- uint32_t pc;
- /* Decoder. */
- int type_b;
- uint32_t ir;
- uint8_t opcode;
- uint8_t rd, ra, rb;
- uint16_t imm;
+ /* TCG op of the current insn_start. */
+ TCGOp *insn_start;
+
+ TCGv_i32 r0;
+ bool r0_set;
+ /* Decoder. */
+ uint32_t ext_imm;
unsigned int cpustate_changed;
- unsigned int delayed_branch;
- unsigned int tb_flags, synced_flags; /* tb dependent flags. */
- unsigned int clear_imm;
- int is_jmp;
-
-#define JMP_NOJMP 0
-#define JMP_DIRECT 1
-#define JMP_DIRECT_CC 2
-#define JMP_INDIRECT 3
- unsigned int jmp;
- uint32_t jmp_pc;
-
- int abort_at_next_insn;
- struct TranslationBlock *tb;
- int singlestep_enabled;
+ unsigned int tb_flags;
+ unsigned int tb_flags_to_set;
+ int mem_index;
+
+ /* Condition under which to jump, including NEVER and ALWAYS. */
+ TCGCond jmp_cond;
+
+ /* Immediate branch-taken destination, or -1 for indirect. */
+ uint32_t jmp_dest;
} DisasContext;
-static const char *regnames[] =
+static int typeb_imm(DisasContext *dc, int x)
{
- "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
- "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
- "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
- "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
-};
+ if (dc->tb_flags & IMM_FLAG) {
+ return deposit32(dc->ext_imm, 0, 16, x);
+ }
+ return x;
+}
-static const char *special_regnames[] =
-{
- "rpc", "rmsr", "sr2", "rear", "sr4", "resr", "sr6", "rfsr",
- "sr8", "sr9", "sr10", "rbtr", "sr12", "redr"
-};
+/* Include the auto-generated decoder. */
+#include "decode-insns.c.inc"
-static inline void t_sync_flags(DisasContext *dc)
+static void t_sync_flags(DisasContext *dc)
{
/* Synch the tb dependent flags between translator and runtime. */
- if (dc->tb_flags != dc->synced_flags) {
- tcg_gen_movi_i32(env_iflags, dc->tb_flags);
- dc->synced_flags = dc->tb_flags;
+ if ((dc->tb_flags ^ dc->base.tb->flags) & IFLAGS_TB_MASK) {
+ tcg_gen_movi_i32(cpu_iflags, dc->tb_flags & IFLAGS_TB_MASK);
}
}
-static inline void t_gen_raise_exception(DisasContext *dc, uint32_t index)
+static void gen_raise_exception(DisasContext *dc, uint32_t index)
{
TCGv_i32 tmp = tcg_const_i32(index);
- t_sync_flags(dc);
- tcg_gen_movi_i64(cpu_SR[SR_PC], dc->pc);
gen_helper_raise_exception(cpu_env, tmp);
tcg_temp_free_i32(tmp);
- dc->is_jmp = DISAS_UPDATE;
+ dc->base.is_jmp = DISAS_NORETURN;
+}
+
+static void gen_raise_exception_sync(DisasContext *dc, uint32_t index)
+{
+ t_sync_flags(dc);
+ tcg_gen_movi_i32(cpu_pc, dc->base.pc_next);
+ gen_raise_exception(dc, index);
+}
+
+static void gen_raise_hw_excp(DisasContext *dc, uint32_t esr_ec)
+{
+ TCGv_i32 tmp = tcg_const_i32(esr_ec);
+ tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUMBState, esr));
+ tcg_temp_free_i32(tmp);
+
+ gen_raise_exception_sync(dc, EXCP_HW_EXCP);
}
static inline bool use_goto_tb(DisasContext *dc, target_ulong dest)
{
#ifndef CONFIG_USER_ONLY
- return (dc->tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
+ return (dc->base.pc_first & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
#else
return true;
#endif
@@ -140,42 +132,20 @@ static inline bool use_goto_tb(DisasContext *dc, target_ulong dest)
static void gen_goto_tb(DisasContext *dc, int n, target_ulong dest)
{
- if (use_goto_tb(dc, dest)) {
+ if (dc->base.singlestep_enabled) {
+ TCGv_i32 tmp = tcg_const_i32(EXCP_DEBUG);
+ tcg_gen_movi_i32(cpu_pc, dest);
+ gen_helper_raise_exception(cpu_env, tmp);
+ tcg_temp_free_i32(tmp);
+ } else if (use_goto_tb(dc, dest)) {
tcg_gen_goto_tb(n);
- tcg_gen_movi_i64(cpu_SR[SR_PC], dest);
- tcg_gen_exit_tb(dc->tb, n);
+ tcg_gen_movi_i32(cpu_pc, dest);
+ tcg_gen_exit_tb(dc->base.tb, n);
} else {
- tcg_gen_movi_i64(cpu_SR[SR_PC], dest);
+ tcg_gen_movi_i32(cpu_pc, dest);
tcg_gen_exit_tb(NULL, 0);
}
-}
-
-static void read_carry(DisasContext *dc, TCGv_i32 d)
-{
- tcg_gen_extrl_i64_i32(d, cpu_SR[SR_MSR]);
- tcg_gen_shri_i32(d, d, 31);
-}
-
-/*
- * write_carry sets the carry bits in MSR based on bit 0 of v.
- * v[31:1] are ignored.
- */
-static void write_carry(DisasContext *dc, TCGv_i32 v)
-{
- TCGv_i64 t0 = tcg_temp_new_i64();
- tcg_gen_extu_i32_i64(t0, v);
- /* Deposit bit 0 into MSR_C and the alias MSR_CC. */
- tcg_gen_deposit_i64(cpu_SR[SR_MSR], cpu_SR[SR_MSR], t0, 2, 1);
- tcg_gen_deposit_i64(cpu_SR[SR_MSR], cpu_SR[SR_MSR], t0, 31, 1);
- tcg_temp_free_i64(t0);
-}
-
-static void write_carryi(DisasContext *dc, bool carry)
-{
- TCGv_i32 t0 = tcg_temp_new_i32();
- tcg_gen_movi_i32(t0, carry);
- write_carry(dc, t0);
- tcg_temp_free_i32(t0);
+ dc->base.is_jmp = DISAS_NORETURN;
}
/*
@@ -184,10 +154,9 @@ static void write_carryi(DisasContext *dc, bool carry)
*/
static bool trap_illegal(DisasContext *dc, bool cond)
{
- if (cond && (dc->tb_flags & MSR_EE_FLAG)
+ if (cond && (dc->tb_flags & MSR_EE)
&& dc->cpu->cfg.illegal_opcode_exception) {
- tcg_gen_movi_i64(cpu_SR[SR_ESR], ESR_EC_ILLEGAL_OP);
- t_gen_raise_exception(dc, EXCP_HW_EXCP);
+ gen_raise_hw_excp(dc, ESR_EC_ILLEGAL_OP);
}
return cond;
}
@@ -198,759 +167,717 @@ static bool trap_illegal(DisasContext *dc, bool cond)
*/
static bool trap_userspace(DisasContext *dc, bool cond)
{
- int mem_index = cpu_mmu_index(&dc->cpu->env, false);
- bool cond_user = cond && mem_index == MMU_USER_IDX;
+ bool cond_user = cond && dc->mem_index == MMU_USER_IDX;
- if (cond_user && (dc->tb_flags & MSR_EE_FLAG)) {
- tcg_gen_movi_i64(cpu_SR[SR_ESR], ESR_EC_PRIVINSN);
- t_gen_raise_exception(dc, EXCP_HW_EXCP);
+ if (cond_user && (dc->tb_flags & MSR_EE)) {
+ gen_raise_hw_excp(dc, ESR_EC_PRIVINSN);
}
return cond_user;
}
-/* True if ALU operand b is a small immediate that may deserve
- faster treatment. */
-static inline int dec_alu_op_b_is_small_imm(DisasContext *dc)
+static TCGv_i32 reg_for_read(DisasContext *dc, int reg)
{
- /* Immediate insn without the imm prefix ? */
- return dc->type_b && !(dc->tb_flags & IMM_FLAG);
+ if (likely(reg != 0)) {
+ return cpu_R[reg];
+ }
+ if (!dc->r0_set) {
+ if (dc->r0 == NULL) {
+ dc->r0 = tcg_temp_new_i32();
+ }
+ tcg_gen_movi_i32(dc->r0, 0);
+ dc->r0_set = true;
+ }
+ return dc->r0;
}
-static inline TCGv_i32 *dec_alu_op_b(DisasContext *dc)
+static TCGv_i32 reg_for_write(DisasContext *dc, int reg)
{
- if (dc->type_b) {
- if (dc->tb_flags & IMM_FLAG)
- tcg_gen_ori_i32(env_imm, env_imm, dc->imm);
- else
- tcg_gen_movi_i32(env_imm, (int32_t)((int16_t)dc->imm));
- return &env_imm;
- } else
- return &cpu_R[dc->rb];
+ if (likely(reg != 0)) {
+ return cpu_R[reg];
+ }
+ if (dc->r0 == NULL) {
+ dc->r0 = tcg_temp_new_i32();
+ }
+ return dc->r0;
}
-static void dec_add(DisasContext *dc)
+static bool do_typea(DisasContext *dc, arg_typea *arg, bool side_effects,
+ void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32))
{
- unsigned int k, c;
- TCGv_i32 cf;
+ TCGv_i32 rd, ra, rb;
- k = dc->opcode & 4;
- c = dc->opcode & 2;
-
- LOG_DIS("add%s%s%s r%d r%d r%d\n",
- dc->type_b ? "i" : "", k ? "k" : "", c ? "c" : "",
- dc->rd, dc->ra, dc->rb);
+ if (arg->rd == 0 && !side_effects) {
+ return true;
+ }
- /* Take care of the easy cases first. */
- if (k) {
- /* k - keep carry, no need to update MSR. */
- /* If rd == r0, it's a nop. */
- if (dc->rd) {
- tcg_gen_add_i32(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc)));
+ rd = reg_for_write(dc, arg->rd);
+ ra = reg_for_read(dc, arg->ra);
+ rb = reg_for_read(dc, arg->rb);
+ fn(rd, ra, rb);
+ return true;
+}
- if (c) {
- /* c - Add carry into the result. */
- cf = tcg_temp_new_i32();
+static bool do_typea0(DisasContext *dc, arg_typea0 *arg, bool side_effects,
+ void (*fn)(TCGv_i32, TCGv_i32))
+{
+ TCGv_i32 rd, ra;
- read_carry(dc, cf);
- tcg_gen_add_i32(cpu_R[dc->rd], cpu_R[dc->rd], cf);
- tcg_temp_free_i32(cf);
- }
- }
- return;
+ if (arg->rd == 0 && !side_effects) {
+ return true;
}
- /* From now on, we can assume k is zero. So we need to update MSR. */
- /* Extract carry. */
- cf = tcg_temp_new_i32();
- if (c) {
- read_carry(dc, cf);
- } else {
- tcg_gen_movi_i32(cf, 0);
+ rd = reg_for_write(dc, arg->rd);
+ ra = reg_for_read(dc, arg->ra);
+ fn(rd, ra);
+ return true;
+}
+
+static bool do_typeb_imm(DisasContext *dc, arg_typeb *arg, bool side_effects,
+ void (*fni)(TCGv_i32, TCGv_i32, int32_t))
+{
+ TCGv_i32 rd, ra;
+
+ if (arg->rd == 0 && !side_effects) {
+ return true;
}
- if (dc->rd) {
- TCGv_i32 ncf = tcg_temp_new_i32();
- gen_helper_carry(ncf, cpu_R[dc->ra], *(dec_alu_op_b(dc)), cf);
- tcg_gen_add_i32(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc)));
- tcg_gen_add_i32(cpu_R[dc->rd], cpu_R[dc->rd], cf);
- write_carry(dc, ncf);
- tcg_temp_free_i32(ncf);
- } else {
- gen_helper_carry(cf, cpu_R[dc->ra], *(dec_alu_op_b(dc)), cf);
- write_carry(dc, cf);
+ rd = reg_for_write(dc, arg->rd);
+ ra = reg_for_read(dc, arg->ra);
+ fni(rd, ra, arg->imm);
+ return true;
+}
+
+static bool do_typeb_val(DisasContext *dc, arg_typeb *arg, bool side_effects,
+ void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32))
+{
+ TCGv_i32 rd, ra, imm;
+
+ if (arg->rd == 0 && !side_effects) {
+ return true;
}
- tcg_temp_free_i32(cf);
+
+ rd = reg_for_write(dc, arg->rd);
+ ra = reg_for_read(dc, arg->ra);
+ imm = tcg_const_i32(arg->imm);
+
+ fn(rd, ra, imm);
+
+ tcg_temp_free_i32(imm);
+ return true;
}
-static void dec_sub(DisasContext *dc)
+#define DO_TYPEA(NAME, SE, FN) \
+ static bool trans_##NAME(DisasContext *dc, arg_typea *a) \
+ { return do_typea(dc, a, SE, FN); }
+
+#define DO_TYPEA_CFG(NAME, CFG, SE, FN) \
+ static bool trans_##NAME(DisasContext *dc, arg_typea *a) \
+ { return dc->cpu->cfg.CFG && do_typea(dc, a, SE, FN); }
+
+#define DO_TYPEA0(NAME, SE, FN) \
+ static bool trans_##NAME(DisasContext *dc, arg_typea0 *a) \
+ { return do_typea0(dc, a, SE, FN); }
+
+#define DO_TYPEA0_CFG(NAME, CFG, SE, FN) \
+ static bool trans_##NAME(DisasContext *dc, arg_typea0 *a) \
+ { return dc->cpu->cfg.CFG && do_typea0(dc, a, SE, FN); }
+
+#define DO_TYPEBI(NAME, SE, FNI) \
+ static bool trans_##NAME(DisasContext *dc, arg_typeb *a) \
+ { return do_typeb_imm(dc, a, SE, FNI); }
+
+#define DO_TYPEBI_CFG(NAME, CFG, SE, FNI) \
+ static bool trans_##NAME(DisasContext *dc, arg_typeb *a) \
+ { return dc->cpu->cfg.CFG && do_typeb_imm(dc, a, SE, FNI); }
+
+#define DO_TYPEBV(NAME, SE, FN) \
+ static bool trans_##NAME(DisasContext *dc, arg_typeb *a) \
+ { return do_typeb_val(dc, a, SE, FN); }
+
+#define ENV_WRAPPER2(NAME, HELPER) \
+ static void NAME(TCGv_i32 out, TCGv_i32 ina) \
+ { HELPER(out, cpu_env, ina); }
+
+#define ENV_WRAPPER3(NAME, HELPER) \
+ static void NAME(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) \
+ { HELPER(out, cpu_env, ina, inb); }
+
+/* No input carry, but output carry. */
+static void gen_add(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb)
{
- unsigned int u, cmp, k, c;
- TCGv_i32 cf, na;
+ TCGv_i32 zero = tcg_const_i32(0);
- u = dc->imm & 2;
- k = dc->opcode & 4;
- c = dc->opcode & 2;
- cmp = (dc->imm & 1) && (!dc->type_b) && k;
+ tcg_gen_add2_i32(out, cpu_msr_c, ina, zero, inb, zero);
- if (cmp) {
- LOG_DIS("cmp%s r%d, r%d ir=%x\n", u ? "u" : "", dc->rd, dc->ra, dc->ir);
- if (dc->rd) {
- if (u)
- gen_helper_cmpu(cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
- else
- gen_helper_cmp(cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
- }
- return;
- }
+ tcg_temp_free_i32(zero);
+}
- LOG_DIS("sub%s%s r%d, r%d r%d\n",
- k ? "k" : "", c ? "c" : "", dc->rd, dc->ra, dc->rb);
+/* Input and output carry. */
+static void gen_addc(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb)
+{
+ TCGv_i32 zero = tcg_const_i32(0);
+ TCGv_i32 tmp = tcg_temp_new_i32();
- /* Take care of the easy cases first. */
- if (k) {
- /* k - keep carry, no need to update MSR. */
- /* If rd == r0, it's a nop. */
- if (dc->rd) {
- tcg_gen_sub_i32(cpu_R[dc->rd], *(dec_alu_op_b(dc)), cpu_R[dc->ra]);
+ tcg_gen_add2_i32(tmp, cpu_msr_c, ina, zero, cpu_msr_c, zero);
+ tcg_gen_add2_i32(out, cpu_msr_c, tmp, cpu_msr_c, inb, zero);
- if (c) {
- /* c - Add carry into the result. */
- cf = tcg_temp_new_i32();
+ tcg_temp_free_i32(tmp);
+ tcg_temp_free_i32(zero);
+}
- read_carry(dc, cf);
- tcg_gen_add_i32(cpu_R[dc->rd], cpu_R[dc->rd], cf);
- tcg_temp_free_i32(cf);
- }
- }
- return;
- }
+/* Input carry, but no output carry. */
+static void gen_addkc(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb)
+{
+ tcg_gen_add_i32(out, ina, inb);
+ tcg_gen_add_i32(out, out, cpu_msr_c);
+}
- /* From now on, we can assume k is zero. So we need to update MSR. */
- /* Extract carry. And complement a into na. */
- cf = tcg_temp_new_i32();
- na = tcg_temp_new_i32();
- if (c) {
- read_carry(dc, cf);
- } else {
- tcg_gen_movi_i32(cf, 1);
- }
+DO_TYPEA(add, true, gen_add)
+DO_TYPEA(addc, true, gen_addc)
+DO_TYPEA(addk, false, tcg_gen_add_i32)
+DO_TYPEA(addkc, true, gen_addkc)
- /* d = b + ~a + c. carry defaults to 1. */
- tcg_gen_not_i32(na, cpu_R[dc->ra]);
+DO_TYPEBV(addi, true, gen_add)
+DO_TYPEBV(addic, true, gen_addc)
+DO_TYPEBI(addik, false, tcg_gen_addi_i32)
+DO_TYPEBV(addikc, true, gen_addkc)
- if (dc->rd) {
- TCGv_i32 ncf = tcg_temp_new_i32();
- gen_helper_carry(ncf, na, *(dec_alu_op_b(dc)), cf);
- tcg_gen_add_i32(cpu_R[dc->rd], na, *(dec_alu_op_b(dc)));
- tcg_gen_add_i32(cpu_R[dc->rd], cpu_R[dc->rd], cf);
- write_carry(dc, ncf);
- tcg_temp_free_i32(ncf);
- } else {
- gen_helper_carry(cf, na, *(dec_alu_op_b(dc)), cf);
- write_carry(dc, cf);
- }
- tcg_temp_free_i32(cf);
- tcg_temp_free_i32(na);
+static void gen_andni(TCGv_i32 out, TCGv_i32 ina, int32_t imm)
+{
+ tcg_gen_andi_i32(out, ina, ~imm);
}
-static void dec_pattern(DisasContext *dc)
+DO_TYPEA(and, false, tcg_gen_and_i32)
+DO_TYPEBI(andi, false, tcg_gen_andi_i32)
+DO_TYPEA(andn, false, tcg_gen_andc_i32)
+DO_TYPEBI(andni, false, gen_andni)
+
+static void gen_bsra(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb)
{
- unsigned int mode;
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ tcg_gen_andi_i32(tmp, inb, 31);
+ tcg_gen_sar_i32(out, ina, tmp);
+ tcg_temp_free_i32(tmp);
+}
- if (trap_illegal(dc, !dc->cpu->cfg.use_pcmp_instr)) {
- return;
- }
+static void gen_bsrl(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb)
+{
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ tcg_gen_andi_i32(tmp, inb, 31);
+ tcg_gen_shr_i32(out, ina, tmp);
+ tcg_temp_free_i32(tmp);
+}
- mode = dc->opcode & 3;
- switch (mode) {
- case 0:
- /* pcmpbf. */
- LOG_DIS("pcmpbf r%d r%d r%d\n", dc->rd, dc->ra, dc->rb);
- if (dc->rd)
- gen_helper_pcmpbf(cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
- break;
- case 2:
- LOG_DIS("pcmpeq r%d r%d r%d\n", dc->rd, dc->ra, dc->rb);
- if (dc->rd) {
- tcg_gen_setcond_i32(TCG_COND_EQ, cpu_R[dc->rd],
- cpu_R[dc->ra], cpu_R[dc->rb]);
- }
- break;
- case 3:
- LOG_DIS("pcmpne r%d r%d r%d\n", dc->rd, dc->ra, dc->rb);
- if (dc->rd) {
- tcg_gen_setcond_i32(TCG_COND_NE, cpu_R[dc->rd],
- cpu_R[dc->ra], cpu_R[dc->rb]);
- }
- break;
- default:
- cpu_abort(CPU(dc->cpu),
- "unsupported pattern insn opcode=%x\n", dc->opcode);
- break;
- }
+static void gen_bsll(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb)
+{
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ tcg_gen_andi_i32(tmp, inb, 31);
+ tcg_gen_shl_i32(out, ina, tmp);
+ tcg_temp_free_i32(tmp);
}
-static void dec_and(DisasContext *dc)
+static void gen_bsefi(TCGv_i32 out, TCGv_i32 ina, int32_t imm)
{
- unsigned int not;
+ /* Note that decodetree has extracted and reassembled imm_w/imm_s. */
+ int imm_w = extract32(imm, 5, 5);
+ int imm_s = extract32(imm, 0, 5);
+
+ if (imm_w + imm_s > 32 || imm_w == 0) {
+ /* These inputs have an undefined behavior. */
+ qemu_log_mask(LOG_GUEST_ERROR, "bsefi: Bad input w=%d s=%d\n",
+ imm_w, imm_s);
+ } else {
+ tcg_gen_extract_i32(out, ina, imm_s, imm_w);
+ }
+}
- if (!dc->type_b && (dc->imm & (1 << 10))) {
- dec_pattern(dc);
- return;
+static void gen_bsifi(TCGv_i32 out, TCGv_i32 ina, int32_t imm)
+{
+ /* Note that decodetree has extracted and reassembled imm_w/imm_s. */
+ int imm_w = extract32(imm, 5, 5);
+ int imm_s = extract32(imm, 0, 5);
+ int width = imm_w - imm_s + 1;
+
+ if (imm_w < imm_s) {
+ /* These inputs have an undefined behavior. */
+ qemu_log_mask(LOG_GUEST_ERROR, "bsifi: Bad input w=%d s=%d\n",
+ imm_w, imm_s);
+ } else {
+ tcg_gen_deposit_i32(out, out, ina, imm_s, width);
}
+}
- not = dc->opcode & (1 << 1);
- LOG_DIS("and%s\n", not ? "n" : "");
+DO_TYPEA_CFG(bsra, use_barrel, false, gen_bsra)
+DO_TYPEA_CFG(bsrl, use_barrel, false, gen_bsrl)
+DO_TYPEA_CFG(bsll, use_barrel, false, gen_bsll)
- if (!dc->rd)
- return;
+DO_TYPEBI_CFG(bsrai, use_barrel, false, tcg_gen_sari_i32)
+DO_TYPEBI_CFG(bsrli, use_barrel, false, tcg_gen_shri_i32)
+DO_TYPEBI_CFG(bslli, use_barrel, false, tcg_gen_shli_i32)
+
+DO_TYPEBI_CFG(bsefi, use_barrel, false, gen_bsefi)
+DO_TYPEBI_CFG(bsifi, use_barrel, false, gen_bsifi)
- if (not) {
- tcg_gen_andc_i32(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc)));
- } else
- tcg_gen_and_i32(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc)));
+static void gen_clz(TCGv_i32 out, TCGv_i32 ina)
+{
+ tcg_gen_clzi_i32(out, ina, 32);
}
-static void dec_or(DisasContext *dc)
+DO_TYPEA0_CFG(clz, use_pcmp_instr, false, gen_clz)
+
+static void gen_cmp(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb)
{
- if (!dc->type_b && (dc->imm & (1 << 10))) {
- dec_pattern(dc);
- return;
- }
+ TCGv_i32 lt = tcg_temp_new_i32();
- LOG_DIS("or r%d r%d r%d imm=%x\n", dc->rd, dc->ra, dc->rb, dc->imm);
- if (dc->rd)
- tcg_gen_or_i32(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc)));
+ tcg_gen_setcond_i32(TCG_COND_LT, lt, inb, ina);
+ tcg_gen_sub_i32(out, inb, ina);
+ tcg_gen_deposit_i32(out, out, lt, 31, 1);
+ tcg_temp_free_i32(lt);
}
-static void dec_xor(DisasContext *dc)
+static void gen_cmpu(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb)
{
- if (!dc->type_b && (dc->imm & (1 << 10))) {
- dec_pattern(dc);
- return;
- }
+ TCGv_i32 lt = tcg_temp_new_i32();
- LOG_DIS("xor r%d\n", dc->rd);
- if (dc->rd)
- tcg_gen_xor_i32(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc)));
+ tcg_gen_setcond_i32(TCG_COND_LTU, lt, inb, ina);
+ tcg_gen_sub_i32(out, inb, ina);
+ tcg_gen_deposit_i32(out, out, lt, 31, 1);
+ tcg_temp_free_i32(lt);
}
-static inline void msr_read(DisasContext *dc, TCGv_i32 d)
+DO_TYPEA(cmp, false, gen_cmp)
+DO_TYPEA(cmpu, false, gen_cmpu)
+
+ENV_WRAPPER3(gen_fadd, gen_helper_fadd)
+ENV_WRAPPER3(gen_frsub, gen_helper_frsub)
+ENV_WRAPPER3(gen_fmul, gen_helper_fmul)
+ENV_WRAPPER3(gen_fdiv, gen_helper_fdiv)
+ENV_WRAPPER3(gen_fcmp_un, gen_helper_fcmp_un)
+ENV_WRAPPER3(gen_fcmp_lt, gen_helper_fcmp_lt)
+ENV_WRAPPER3(gen_fcmp_eq, gen_helper_fcmp_eq)
+ENV_WRAPPER3(gen_fcmp_le, gen_helper_fcmp_le)
+ENV_WRAPPER3(gen_fcmp_gt, gen_helper_fcmp_gt)
+ENV_WRAPPER3(gen_fcmp_ne, gen_helper_fcmp_ne)
+ENV_WRAPPER3(gen_fcmp_ge, gen_helper_fcmp_ge)
+
+DO_TYPEA_CFG(fadd, use_fpu, true, gen_fadd)
+DO_TYPEA_CFG(frsub, use_fpu, true, gen_frsub)
+DO_TYPEA_CFG(fmul, use_fpu, true, gen_fmul)
+DO_TYPEA_CFG(fdiv, use_fpu, true, gen_fdiv)
+DO_TYPEA_CFG(fcmp_un, use_fpu, true, gen_fcmp_un)
+DO_TYPEA_CFG(fcmp_lt, use_fpu, true, gen_fcmp_lt)
+DO_TYPEA_CFG(fcmp_eq, use_fpu, true, gen_fcmp_eq)
+DO_TYPEA_CFG(fcmp_le, use_fpu, true, gen_fcmp_le)
+DO_TYPEA_CFG(fcmp_gt, use_fpu, true, gen_fcmp_gt)
+DO_TYPEA_CFG(fcmp_ne, use_fpu, true, gen_fcmp_ne)
+DO_TYPEA_CFG(fcmp_ge, use_fpu, true, gen_fcmp_ge)
+
+ENV_WRAPPER2(gen_flt, gen_helper_flt)
+ENV_WRAPPER2(gen_fint, gen_helper_fint)
+ENV_WRAPPER2(gen_fsqrt, gen_helper_fsqrt)
+
+DO_TYPEA0_CFG(flt, use_fpu >= 2, true, gen_flt)
+DO_TYPEA0_CFG(fint, use_fpu >= 2, true, gen_fint)
+DO_TYPEA0_CFG(fsqrt, use_fpu >= 2, true, gen_fsqrt)
+
+/* Does not use ENV_WRAPPER3, because arguments are swapped as well. */
+static void gen_idiv(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb)
{
- tcg_gen_extrl_i64_i32(d, cpu_SR[SR_MSR]);
+ gen_helper_divs(out, cpu_env, inb, ina);
}
-static inline void msr_write(DisasContext *dc, TCGv_i32 v)
+static void gen_idivu(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb)
{
- TCGv_i64 t;
+ gen_helper_divu(out, cpu_env, inb, ina);
+}
- t = tcg_temp_new_i64();
- dc->cpustate_changed = 1;
- /* PVR bit is not writable. */
- tcg_gen_extu_i32_i64(t, v);
- tcg_gen_andi_i64(t, t, ~MSR_PVR);
- tcg_gen_andi_i64(cpu_SR[SR_MSR], cpu_SR[SR_MSR], MSR_PVR);
- tcg_gen_or_i64(cpu_SR[SR_MSR], cpu_SR[SR_MSR], t);
- tcg_temp_free_i64(t);
-}
-
-static void dec_msr(DisasContext *dc)
-{
- CPUState *cs = CPU(dc->cpu);
- TCGv_i32 t0, t1;
- unsigned int sr, rn;
- bool to, clrset, extended = false;
-
- sr = extract32(dc->imm, 0, 14);
- to = extract32(dc->imm, 14, 1);
- clrset = extract32(dc->imm, 15, 1) == 0;
- dc->type_b = 1;
- if (to) {
- dc->cpustate_changed = 1;
- }
+DO_TYPEA_CFG(idiv, use_div, true, gen_idiv)
+DO_TYPEA_CFG(idivu, use_div, true, gen_idivu)
- /* Extended MSRs are only available if addr_size > 32. */
- if (dc->cpu->cfg.addr_size > 32) {
- /* The E-bit is encoded differently for To/From MSR. */
- static const unsigned int e_bit[] = { 19, 24 };
+static bool trans_imm(DisasContext *dc, arg_imm *arg)
+{
+ dc->ext_imm = arg->imm << 16;
+ tcg_gen_movi_i32(cpu_imm, dc->ext_imm);
+ dc->tb_flags_to_set = IMM_FLAG;
+ return true;
+}
- extended = extract32(dc->imm, e_bit[to], 1);
- }
+static void gen_mulh(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb)
+{
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ tcg_gen_muls2_i32(tmp, out, ina, inb);
+ tcg_temp_free_i32(tmp);
+}
- /* msrclr and msrset. */
- if (clrset) {
- bool clr = extract32(dc->ir, 16, 1);
+static void gen_mulhu(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb)
+{
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ tcg_gen_mulu2_i32(tmp, out, ina, inb);
+ tcg_temp_free_i32(tmp);
+}
- LOG_DIS("msr%s r%d imm=%x\n", clr ? "clr" : "set",
- dc->rd, dc->imm);
+static void gen_mulhsu(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb)
+{
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ tcg_gen_mulsu2_i32(tmp, out, ina, inb);
+ tcg_temp_free_i32(tmp);
+}
- if (!dc->cpu->cfg.use_msr_instr) {
- /* nop??? */
- return;
- }
+DO_TYPEA_CFG(mul, use_hw_mul, false, tcg_gen_mul_i32)
+DO_TYPEA_CFG(mulh, use_hw_mul >= 2, false, gen_mulh)
+DO_TYPEA_CFG(mulhu, use_hw_mul >= 2, false, gen_mulhu)
+DO_TYPEA_CFG(mulhsu, use_hw_mul >= 2, false, gen_mulhsu)
+DO_TYPEBI_CFG(muli, use_hw_mul, false, tcg_gen_muli_i32)
- if (trap_userspace(dc, dc->imm != 4 && dc->imm != 0)) {
- return;
- }
+DO_TYPEA(or, false, tcg_gen_or_i32)
+DO_TYPEBI(ori, false, tcg_gen_ori_i32)
- if (dc->rd)
- msr_read(dc, cpu_R[dc->rd]);
-
- t0 = tcg_temp_new_i32();
- t1 = tcg_temp_new_i32();
- msr_read(dc, t0);
- tcg_gen_mov_i32(t1, *(dec_alu_op_b(dc)));
-
- if (clr) {
- tcg_gen_not_i32(t1, t1);
- tcg_gen_and_i32(t0, t0, t1);
- } else
- tcg_gen_or_i32(t0, t0, t1);
- msr_write(dc, t0);
- tcg_temp_free_i32(t0);
- tcg_temp_free_i32(t1);
- tcg_gen_movi_i64(cpu_SR[SR_PC], dc->pc + 4);
- dc->is_jmp = DISAS_UPDATE;
- return;
- }
+static void gen_pcmpeq(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb)
+{
+ tcg_gen_setcond_i32(TCG_COND_EQ, out, ina, inb);
+}
- if (trap_userspace(dc, to)) {
- return;
- }
+static void gen_pcmpne(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb)
+{
+ tcg_gen_setcond_i32(TCG_COND_NE, out, ina, inb);
+}
-#if !defined(CONFIG_USER_ONLY)
- /* Catch read/writes to the mmu block. */
- if ((sr & ~0xff) == 0x1000) {
- TCGv_i32 tmp_ext = tcg_const_i32(extended);
- TCGv_i32 tmp_sr;
+DO_TYPEA_CFG(pcmpbf, use_pcmp_instr, false, gen_helper_pcmpbf)
+DO_TYPEA_CFG(pcmpeq, use_pcmp_instr, false, gen_pcmpeq)
+DO_TYPEA_CFG(pcmpne, use_pcmp_instr, false, gen_pcmpne)
- sr &= 7;
- tmp_sr = tcg_const_i32(sr);
- LOG_DIS("m%ss sr%d r%d imm=%x\n", to ? "t" : "f", sr, dc->ra, dc->imm);
- if (to) {
- gen_helper_mmu_write(cpu_env, tmp_ext, tmp_sr, cpu_R[dc->ra]);
- } else {
- gen_helper_mmu_read(cpu_R[dc->rd], cpu_env, tmp_ext, tmp_sr);
- }
- tcg_temp_free_i32(tmp_sr);
- tcg_temp_free_i32(tmp_ext);
- return;
- }
-#endif
+/* No input carry, but output carry. */
+static void gen_rsub(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb)
+{
+ tcg_gen_setcond_i32(TCG_COND_GEU, cpu_msr_c, inb, ina);
+ tcg_gen_sub_i32(out, inb, ina);
+}
- if (to) {
- LOG_DIS("m%ss sr%x r%d imm=%x\n", to ? "t" : "f", sr, dc->ra, dc->imm);
- switch (sr) {
- case 0:
- break;
- case 1:
- msr_write(dc, cpu_R[dc->ra]);
- break;
- case SR_EAR:
- case SR_ESR:
- case SR_FSR:
- tcg_gen_extu_i32_i64(cpu_SR[sr], cpu_R[dc->ra]);
- break;
- case 0x800:
- tcg_gen_st_i32(cpu_R[dc->ra],
- cpu_env, offsetof(CPUMBState, slr));
- break;
- case 0x802:
- tcg_gen_st_i32(cpu_R[dc->ra],
- cpu_env, offsetof(CPUMBState, shr));
- break;
- default:
- cpu_abort(CPU(dc->cpu), "unknown mts reg %x\n", sr);
- break;
- }
- } else {
- LOG_DIS("m%ss r%d sr%x imm=%x\n", to ? "t" : "f", dc->rd, sr, dc->imm);
-
- switch (sr) {
- case 0:
- tcg_gen_movi_i32(cpu_R[dc->rd], dc->pc);
- break;
- case 1:
- msr_read(dc, cpu_R[dc->rd]);
- break;
- case SR_EAR:
- if (extended) {
- tcg_gen_extrh_i64_i32(cpu_R[dc->rd], cpu_SR[sr]);
- break;
- }
- case SR_ESR:
- case SR_FSR:
- case SR_BTR:
- case SR_EDR:
- tcg_gen_extrl_i64_i32(cpu_R[dc->rd], cpu_SR[sr]);
- break;
- case 0x800:
- tcg_gen_ld_i32(cpu_R[dc->rd],
- cpu_env, offsetof(CPUMBState, slr));
- break;
- case 0x802:
- tcg_gen_ld_i32(cpu_R[dc->rd],
- cpu_env, offsetof(CPUMBState, shr));
- break;
- case 0x2000 ... 0x200c:
- rn = sr & 0xf;
- tcg_gen_ld_i32(cpu_R[dc->rd],
- cpu_env, offsetof(CPUMBState, pvr.regs[rn]));
- break;
- default:
- cpu_abort(cs, "unknown mfs reg %x\n", sr);
- break;
- }
- }
+/* Input and output carry. */
+static void gen_rsubc(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb)
+{
+ TCGv_i32 zero = tcg_const_i32(0);
+ TCGv_i32 tmp = tcg_temp_new_i32();
- if (dc->rd == 0) {
- tcg_gen_movi_i32(cpu_R[0], 0);
- }
+ tcg_gen_not_i32(tmp, ina);
+ tcg_gen_add2_i32(tmp, cpu_msr_c, tmp, zero, cpu_msr_c, zero);
+ tcg_gen_add2_i32(out, cpu_msr_c, tmp, cpu_msr_c, inb, zero);
+
+ tcg_temp_free_i32(zero);
+ tcg_temp_free_i32(tmp);
}
-/* Multiplier unit. */
-static void dec_mul(DisasContext *dc)
+/* No input or output carry. */
+static void gen_rsubk(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb)
{
- TCGv_i32 tmp;
- unsigned int subcode;
+ tcg_gen_sub_i32(out, inb, ina);
+}
- if (trap_illegal(dc, !dc->cpu->cfg.use_hw_mul)) {
- return;
- }
+/* Input carry, no output carry. */
+static void gen_rsubkc(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb)
+{
+ TCGv_i32 nota = tcg_temp_new_i32();
- subcode = dc->imm & 3;
+ tcg_gen_not_i32(nota, ina);
+ tcg_gen_add_i32(out, inb, nota);
+ tcg_gen_add_i32(out, out, cpu_msr_c);
- if (dc->type_b) {
- LOG_DIS("muli r%d r%d %x\n", dc->rd, dc->ra, dc->imm);
- tcg_gen_mul_i32(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc)));
- return;
- }
+ tcg_temp_free_i32(nota);
+}
- /* mulh, mulhsu and mulhu are not available if C_USE_HW_MUL is < 2. */
- if (subcode >= 1 && subcode <= 3 && dc->cpu->cfg.use_hw_mul < 2) {
- /* nop??? */
- }
+DO_TYPEA(rsub, true, gen_rsub)
+DO_TYPEA(rsubc, true, gen_rsubc)
+DO_TYPEA(rsubk, false, gen_rsubk)
+DO_TYPEA(rsubkc, true, gen_rsubkc)
+
+DO_TYPEBV(rsubi, true, gen_rsub)
+DO_TYPEBV(rsubic, true, gen_rsubc)
+DO_TYPEBV(rsubik, false, gen_rsubk)
+DO_TYPEBV(rsubikc, true, gen_rsubkc)
+
+DO_TYPEA0(sext8, false, tcg_gen_ext8s_i32)
+DO_TYPEA0(sext16, false, tcg_gen_ext16s_i32)
+
+static void gen_sra(TCGv_i32 out, TCGv_i32 ina)
+{
+ tcg_gen_andi_i32(cpu_msr_c, ina, 1);
+ tcg_gen_sari_i32(out, ina, 1);
+}
+
+static void gen_src(TCGv_i32 out, TCGv_i32 ina)
+{
+ TCGv_i32 tmp = tcg_temp_new_i32();
+
+ tcg_gen_mov_i32(tmp, cpu_msr_c);
+ tcg_gen_andi_i32(cpu_msr_c, ina, 1);
+ tcg_gen_extract2_i32(out, ina, tmp, 1);
- tmp = tcg_temp_new_i32();
- switch (subcode) {
- case 0:
- LOG_DIS("mul r%d r%d r%d\n", dc->rd, dc->ra, dc->rb);
- tcg_gen_mul_i32(cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
- break;
- case 1:
- LOG_DIS("mulh r%d r%d r%d\n", dc->rd, dc->ra, dc->rb);
- tcg_gen_muls2_i32(tmp, cpu_R[dc->rd],
- cpu_R[dc->ra], cpu_R[dc->rb]);
- break;
- case 2:
- LOG_DIS("mulhsu r%d r%d r%d\n", dc->rd, dc->ra, dc->rb);
- tcg_gen_mulsu2_i32(tmp, cpu_R[dc->rd],
- cpu_R[dc->ra], cpu_R[dc->rb]);
- break;
- case 3:
- LOG_DIS("mulhu r%d r%d r%d\n", dc->rd, dc->ra, dc->rb);
- tcg_gen_mulu2_i32(tmp, cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
- break;
- default:
- cpu_abort(CPU(dc->cpu), "unknown MUL insn %x\n", subcode);
- break;
- }
tcg_temp_free_i32(tmp);
}
-/* Div unit. */
-static void dec_div(DisasContext *dc)
+static void gen_srl(TCGv_i32 out, TCGv_i32 ina)
{
- unsigned int u;
+ tcg_gen_andi_i32(cpu_msr_c, ina, 1);
+ tcg_gen_shri_i32(out, ina, 1);
+}
- u = dc->imm & 2;
- LOG_DIS("div\n");
+DO_TYPEA0(sra, false, gen_sra)
+DO_TYPEA0(src, false, gen_src)
+DO_TYPEA0(srl, false, gen_srl)
- if (trap_illegal(dc, !dc->cpu->cfg.use_div)) {
- return;
- }
+static void gen_swaph(TCGv_i32 out, TCGv_i32 ina)
+{
+ tcg_gen_rotri_i32(out, ina, 16);
+}
+
+DO_TYPEA0(swapb, false, tcg_gen_bswap32_i32)
+DO_TYPEA0(swaph, false, gen_swaph)
- if (u)
- gen_helper_divu(cpu_R[dc->rd], cpu_env, *(dec_alu_op_b(dc)),
- cpu_R[dc->ra]);
- else
- gen_helper_divs(cpu_R[dc->rd], cpu_env, *(dec_alu_op_b(dc)),
- cpu_R[dc->ra]);
- if (!dc->rd)
- tcg_gen_movi_i32(cpu_R[dc->rd], 0);
+static bool trans_wdic(DisasContext *dc, arg_wdic *a)
+{
+ /* Cache operations are nops: only check for supervisor mode. */
+ trap_userspace(dc, true);
+ return true;
}
-static void dec_barrel(DisasContext *dc)
+DO_TYPEA(xor, false, tcg_gen_xor_i32)
+DO_TYPEBI(xori, false, tcg_gen_xori_i32)
+
+static TCGv compute_ldst_addr_typea(DisasContext *dc, int ra, int rb)
{
- TCGv_i32 t0;
- unsigned int imm_w, imm_s;
- bool s, t, e = false, i = false;
+ TCGv ret = tcg_temp_new();
- if (trap_illegal(dc, !dc->cpu->cfg.use_barrel)) {
- return;
+ /* If any of the regs is r0, set t to the value of the other reg. */
+ if (ra && rb) {
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ tcg_gen_add_i32(tmp, cpu_R[ra], cpu_R[rb]);
+ tcg_gen_extu_i32_tl(ret, tmp);
+ tcg_temp_free_i32(tmp);
+ } else if (ra) {
+ tcg_gen_extu_i32_tl(ret, cpu_R[ra]);
+ } else if (rb) {
+ tcg_gen_extu_i32_tl(ret, cpu_R[rb]);
+ } else {
+ tcg_gen_movi_tl(ret, 0);
}
- if (dc->type_b) {
- /* Insert and extract are only available in immediate mode. */
- i = extract32(dc->imm, 15, 1);
- e = extract32(dc->imm, 14, 1);
+ if ((ra == 1 || rb == 1) && dc->cpu->cfg.stackprot) {
+ gen_helper_stackprot(cpu_env, ret);
}
- s = extract32(dc->imm, 10, 1);
- t = extract32(dc->imm, 9, 1);
- imm_w = extract32(dc->imm, 6, 5);
- imm_s = extract32(dc->imm, 0, 5);
+ return ret;
+}
- LOG_DIS("bs%s%s%s r%d r%d r%d\n",
- e ? "e" : "",
- s ? "l" : "r", t ? "a" : "l", dc->rd, dc->ra, dc->rb);
+static TCGv compute_ldst_addr_typeb(DisasContext *dc, int ra, int imm)
+{
+ TCGv ret = tcg_temp_new();
- if (e) {
- if (imm_w + imm_s > 32 || imm_w == 0) {
- /* These inputs have an undefined behavior. */
- qemu_log_mask(LOG_GUEST_ERROR, "bsefi: Bad input w=%d s=%d\n",
- imm_w, imm_s);
- } else {
- tcg_gen_extract_i32(cpu_R[dc->rd], cpu_R[dc->ra], imm_s, imm_w);
- }
- } else if (i) {
- int width = imm_w - imm_s + 1;
+ /* If any of the regs is r0, set t to the value of the other reg. */
+ if (ra) {
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ tcg_gen_addi_i32(tmp, cpu_R[ra], imm);
+ tcg_gen_extu_i32_tl(ret, tmp);
+ tcg_temp_free_i32(tmp);
+ } else {
+ tcg_gen_movi_tl(ret, (uint32_t)imm);
+ }
- if (imm_w < imm_s) {
- /* These inputs have an undefined behavior. */
- qemu_log_mask(LOG_GUEST_ERROR, "bsifi: Bad input w=%d s=%d\n",
- imm_w, imm_s);
+ if (ra == 1 && dc->cpu->cfg.stackprot) {
+ gen_helper_stackprot(cpu_env, ret);
+ }
+ return ret;
+}
+
+#ifndef CONFIG_USER_ONLY
+static TCGv compute_ldst_addr_ea(DisasContext *dc, int ra, int rb)
+{
+ int addr_size = dc->cpu->cfg.addr_size;
+ TCGv ret = tcg_temp_new();
+
+ if (addr_size == 32 || ra == 0) {
+ if (rb) {
+ tcg_gen_extu_i32_tl(ret, cpu_R[rb]);
} else {
- tcg_gen_deposit_i32(cpu_R[dc->rd], cpu_R[dc->rd], cpu_R[dc->ra],
- imm_s, width);
+ tcg_gen_movi_tl(ret, 0);
}
} else {
- t0 = tcg_temp_new_i32();
-
- tcg_gen_mov_i32(t0, *(dec_alu_op_b(dc)));
- tcg_gen_andi_i32(t0, t0, 31);
-
- if (s) {
- tcg_gen_shl_i32(cpu_R[dc->rd], cpu_R[dc->ra], t0);
+ if (rb) {
+ tcg_gen_concat_i32_i64(ret, cpu_R[rb], cpu_R[ra]);
} else {
- if (t) {
- tcg_gen_sar_i32(cpu_R[dc->rd], cpu_R[dc->ra], t0);
- } else {
- tcg_gen_shr_i32(cpu_R[dc->rd], cpu_R[dc->ra], t0);
- }
+ tcg_gen_extu_i32_tl(ret, cpu_R[ra]);
+ tcg_gen_shli_tl(ret, ret, 32);
+ }
+ if (addr_size < 64) {
+ /* Mask off out of range bits. */
+ tcg_gen_andi_i64(ret, ret, MAKE_64BIT_MASK(0, addr_size));
}
- tcg_temp_free_i32(t0);
}
+ return ret;
}
+#endif
-static void dec_bit(DisasContext *dc)
+static void record_unaligned_ess(DisasContext *dc, int rd,
+ MemOp size, bool store)
{
- CPUState *cs = CPU(dc->cpu);
- TCGv_i32 t0;
- unsigned int op;
+ uint32_t iflags = tcg_get_insn_start_param(dc->insn_start, 1);
- op = dc->ir & ((1 << 9) - 1);
- switch (op) {
- case 0x21:
- /* src. */
- t0 = tcg_temp_new_i32();
+ iflags |= ESR_ESS_FLAG;
+ iflags |= rd << 5;
+ iflags |= store * ESR_S;
+ iflags |= (size == MO_32) * ESR_W;
- LOG_DIS("src r%d r%d\n", dc->rd, dc->ra);
- tcg_gen_extrl_i64_i32(t0, cpu_SR[SR_MSR]);
- tcg_gen_andi_i32(t0, t0, MSR_CC);
- write_carry(dc, cpu_R[dc->ra]);
- if (dc->rd) {
- tcg_gen_shri_i32(cpu_R[dc->rd], cpu_R[dc->ra], 1);
- tcg_gen_or_i32(cpu_R[dc->rd], cpu_R[dc->rd], t0);
- }
- tcg_temp_free_i32(t0);
- break;
-
- case 0x1:
- case 0x41:
- /* srl. */
- LOG_DIS("srl r%d r%d\n", dc->rd, dc->ra);
-
- /* Update carry. Note that write carry only looks at the LSB. */
- write_carry(dc, cpu_R[dc->ra]);
- if (dc->rd) {
- if (op == 0x41)
- tcg_gen_shri_i32(cpu_R[dc->rd], cpu_R[dc->ra], 1);
- else
- tcg_gen_sari_i32(cpu_R[dc->rd], cpu_R[dc->ra], 1);
- }
- break;
- case 0x60:
- LOG_DIS("ext8s r%d r%d\n", dc->rd, dc->ra);
- tcg_gen_ext8s_i32(cpu_R[dc->rd], cpu_R[dc->ra]);
- break;
- case 0x61:
- LOG_DIS("ext16s r%d r%d\n", dc->rd, dc->ra);
- tcg_gen_ext16s_i32(cpu_R[dc->rd], cpu_R[dc->ra]);
- break;
- case 0x64:
- case 0x66:
- case 0x74:
- case 0x76:
- /* wdc. */
- LOG_DIS("wdc r%d\n", dc->ra);
- trap_userspace(dc, true);
- break;
- case 0x68:
- /* wic. */
- LOG_DIS("wic r%d\n", dc->ra);
- trap_userspace(dc, true);
- break;
- case 0xe0:
- if (trap_illegal(dc, !dc->cpu->cfg.use_pcmp_instr)) {
- return;
- }
- if (dc->cpu->cfg.use_pcmp_instr) {
- tcg_gen_clzi_i32(cpu_R[dc->rd], cpu_R[dc->ra], 32);
- }
- break;
- case 0x1e0:
- /* swapb */
- LOG_DIS("swapb r%d r%d\n", dc->rd, dc->ra);
- tcg_gen_bswap32_i32(cpu_R[dc->rd], cpu_R[dc->ra]);
- break;
- case 0x1e2:
- /*swaph */
- LOG_DIS("swaph r%d r%d\n", dc->rd, dc->ra);
- tcg_gen_rotri_i32(cpu_R[dc->rd], cpu_R[dc->ra], 16);
- break;
- default:
- cpu_abort(cs, "unknown bit oc=%x op=%x rd=%d ra=%d rb=%d\n",
- dc->pc, op, dc->rd, dc->ra, dc->rb);
- break;
- }
+ tcg_set_insn_start_param(dc->insn_start, 1, iflags);
}
-static inline void sync_jmpstate(DisasContext *dc)
+static bool do_load(DisasContext *dc, int rd, TCGv addr, MemOp mop,
+ int mem_index, bool rev)
{
- if (dc->jmp == JMP_DIRECT || dc->jmp == JMP_DIRECT_CC) {
- if (dc->jmp == JMP_DIRECT) {
- tcg_gen_movi_i32(env_btaken, 1);
+ MemOp size = mop & MO_SIZE;
+
+ /*
+ * When doing reverse accesses we need to do two things.
+ *
+ * 1. Reverse the address wrt endianness.
+ * 2. Byteswap the data lanes on the way back into the CPU core.
+ */
+ if (rev) {
+ if (size > MO_8) {
+ mop ^= MO_BSWAP;
+ }
+ if (size < MO_32) {
+ tcg_gen_xori_tl(addr, addr, 3 - size);
}
- dc->jmp = JMP_INDIRECT;
- tcg_gen_movi_i64(env_btarget, dc->jmp_pc);
}
+
+ if (size > MO_8 &&
+ (dc->tb_flags & MSR_EE) &&
+ dc->cpu->cfg.unaligned_exceptions) {
+ record_unaligned_ess(dc, rd, size, false);
+ mop |= MO_ALIGN;
+ }
+
+ tcg_gen_qemu_ld_i32(reg_for_write(dc, rd), addr, mem_index, mop);
+
+ tcg_temp_free(addr);
+ return true;
}
-static void dec_imm(DisasContext *dc)
+static bool trans_lbu(DisasContext *dc, arg_typea *arg)
{
- LOG_DIS("imm %x\n", dc->imm << 16);
- tcg_gen_movi_i32(env_imm, (dc->imm << 16));
- dc->tb_flags |= IMM_FLAG;
- dc->clear_imm = 0;
+ TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb);
+ return do_load(dc, arg->rd, addr, MO_UB, dc->mem_index, false);
}
-static inline void compute_ldst_addr(DisasContext *dc, bool ea, TCGv t)
+static bool trans_lbur(DisasContext *dc, arg_typea *arg)
{
- bool extimm = dc->tb_flags & IMM_FLAG;
- /* Should be set to true if r1 is used by loadstores. */
- bool stackprot = false;
- TCGv_i32 t32;
+ TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb);
+ return do_load(dc, arg->rd, addr, MO_UB, dc->mem_index, true);
+}
- /* All load/stores use ra. */
- if (dc->ra == 1 && dc->cpu->cfg.stackprot) {
- stackprot = true;
+static bool trans_lbuea(DisasContext *dc, arg_typea *arg)
+{
+ if (trap_userspace(dc, true)) {
+ return true;
}
+#ifdef CONFIG_USER_ONLY
+ return true;
+#else
+ TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb);
+ return do_load(dc, arg->rd, addr, MO_UB, MMU_NOMMU_IDX, false);
+#endif
+}
- /* Treat the common cases first. */
- if (!dc->type_b) {
- if (ea) {
- int addr_size = dc->cpu->cfg.addr_size;
+static bool trans_lbui(DisasContext *dc, arg_typeb *arg)
+{
+ TCGv addr = compute_ldst_addr_typeb(dc, arg->ra, arg->imm);
+ return do_load(dc, arg->rd, addr, MO_UB, dc->mem_index, false);
+}
- if (addr_size == 32) {
- tcg_gen_extu_i32_tl(t, cpu_R[dc->rb]);
- return;
- }
+static bool trans_lhu(DisasContext *dc, arg_typea *arg)
+{
+ TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb);
+ return do_load(dc, arg->rd, addr, MO_TEUW, dc->mem_index, false);
+}
- tcg_gen_concat_i32_i64(t, cpu_R[dc->rb], cpu_R[dc->ra]);
- if (addr_size < 64) {
- /* Mask off out of range bits. */
- tcg_gen_andi_i64(t, t, MAKE_64BIT_MASK(0, addr_size));
- }
- return;
- }
+static bool trans_lhur(DisasContext *dc, arg_typea *arg)
+{
+ TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb);
+ return do_load(dc, arg->rd, addr, MO_TEUW, dc->mem_index, true);
+}
- /* If any of the regs is r0, set t to the value of the other reg. */
- if (dc->ra == 0) {
- tcg_gen_extu_i32_tl(t, cpu_R[dc->rb]);
- return;
- } else if (dc->rb == 0) {
- tcg_gen_extu_i32_tl(t, cpu_R[dc->ra]);
- return;
- }
+static bool trans_lhuea(DisasContext *dc, arg_typea *arg)
+{
+ if (trap_userspace(dc, true)) {
+ return true;
+ }
+#ifdef CONFIG_USER_ONLY
+ return true;
+#else
+ TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb);
+ return do_load(dc, arg->rd, addr, MO_TEUW, MMU_NOMMU_IDX, false);
+#endif
+}
- if (dc->rb == 1 && dc->cpu->cfg.stackprot) {
- stackprot = true;
- }
+static bool trans_lhui(DisasContext *dc, arg_typeb *arg)
+{
+ TCGv addr = compute_ldst_addr_typeb(dc, arg->ra, arg->imm);
+ return do_load(dc, arg->rd, addr, MO_TEUW, dc->mem_index, false);
+}
- t32 = tcg_temp_new_i32();
- tcg_gen_add_i32(t32, cpu_R[dc->ra], cpu_R[dc->rb]);
- tcg_gen_extu_i32_tl(t, t32);
- tcg_temp_free_i32(t32);
+static bool trans_lw(DisasContext *dc, arg_typea *arg)
+{
+ TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb);
+ return do_load(dc, arg->rd, addr, MO_TEUL, dc->mem_index, false);
+}
- if (stackprot) {
- gen_helper_stackprot(cpu_env, t);
- }
- return;
- }
- /* Immediate. */
- t32 = tcg_temp_new_i32();
- if (!extimm) {
- tcg_gen_addi_i32(t32, cpu_R[dc->ra], (int16_t)dc->imm);
- } else {
- tcg_gen_add_i32(t32, cpu_R[dc->ra], *(dec_alu_op_b(dc)));
- }
- tcg_gen_extu_i32_tl(t, t32);
- tcg_temp_free_i32(t32);
+static bool trans_lwr(DisasContext *dc, arg_typea *arg)
+{
+ TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb);
+ return do_load(dc, arg->rd, addr, MO_TEUL, dc->mem_index, true);
+}
- if (stackprot) {
- gen_helper_stackprot(cpu_env, t);
+static bool trans_lwea(DisasContext *dc, arg_typea *arg)
+{
+ if (trap_userspace(dc, true)) {
+ return true;
}
- return;
+#ifdef CONFIG_USER_ONLY
+ return true;
+#else
+ TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb);
+ return do_load(dc, arg->rd, addr, MO_TEUL, MMU_NOMMU_IDX, false);
+#endif
}
-static void dec_load(DisasContext *dc)
+static bool trans_lwi(DisasContext *dc, arg_typeb *arg)
{
- TCGv_i32 v;
- TCGv addr;
- unsigned int size;
- bool rev = false, ex = false, ea = false;
- int mem_index = cpu_mmu_index(&dc->cpu->env, false);
- MemOp mop;
+ TCGv addr = compute_ldst_addr_typeb(dc, arg->ra, arg->imm);
+ return do_load(dc, arg->rd, addr, MO_TEUL, dc->mem_index, false);
+}
- mop = dc->opcode & 3;
- size = 1 << mop;
- if (!dc->type_b) {
- ea = extract32(dc->ir, 7, 1);
- rev = extract32(dc->ir, 9, 1);
- ex = extract32(dc->ir, 10, 1);
- }
- mop |= MO_TE;
- if (rev) {
- mop ^= MO_BSWAP;
- }
+static bool trans_lwx(DisasContext *dc, arg_typea *arg)
+{
+ TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb);
- if (trap_illegal(dc, size > 4)) {
- return;
- }
+ /* lwx does not throw unaligned access errors, so force alignment */
+ tcg_gen_andi_tl(addr, addr, ~3);
- if (trap_userspace(dc, ea)) {
- return;
+ tcg_gen_qemu_ld_i32(cpu_res_val, addr, dc->mem_index, MO_TEUL);
+ tcg_gen_mov_tl(cpu_res_addr, addr);
+ tcg_temp_free(addr);
+
+ if (arg->rd) {
+ tcg_gen_mov_i32(cpu_R[arg->rd], cpu_res_val);
}
- LOG_DIS("l%d%s%s%s%s\n", size, dc->type_b ? "i" : "", rev ? "r" : "",
- ex ? "x" : "",
- ea ? "ea" : "");
+ /* No support for AXI exclusive so always clear C */
+ tcg_gen_movi_i32(cpu_msr_c, 0);
+ return true;
+}
- t_sync_flags(dc);
- addr = tcg_temp_new();
- compute_ldst_addr(dc, ea, addr);
- /* Extended addressing bypasses the MMU. */
- mem_index = ea ? MMU_NOMMU_IDX : mem_index;
+static bool do_store(DisasContext *dc, int rd, TCGv addr, MemOp mop,
+ int mem_index, bool rev)
+{
+ MemOp size = mop & MO_SIZE;
/*
* When doing reverse accesses we need to do two things.
@@ -958,925 +885,1025 @@ static void dec_load(DisasContext *dc)
* 1. Reverse the address wrt endianness.
* 2. Byteswap the data lanes on the way back into the CPU core.
*/
- if (rev && size != 4) {
- /* Endian reverse the address. t is addr. */
- switch (size) {
- case 1:
- {
- tcg_gen_xori_tl(addr, addr, 3);
- break;
- }
-
- case 2:
- /* 00 -> 10
- 10 -> 00. */
- tcg_gen_xori_tl(addr, addr, 2);
- break;
- default:
- cpu_abort(CPU(dc->cpu), "Invalid reverse size\n");
- break;
+ if (rev) {
+ if (size > MO_8) {
+ mop ^= MO_BSWAP;
+ }
+ if (size < MO_32) {
+ tcg_gen_xori_tl(addr, addr, 3 - size);
}
}
- /* lwx does not throw unaligned access errors, so force alignment */
- if (ex) {
- tcg_gen_andi_tl(addr, addr, ~3);
+ if (size > MO_8 &&
+ (dc->tb_flags & MSR_EE) &&
+ dc->cpu->cfg.unaligned_exceptions) {
+ record_unaligned_ess(dc, rd, size, true);
+ mop |= MO_ALIGN;
}
- /* If we get a fault on a dslot, the jmpstate better be in sync. */
- sync_jmpstate(dc);
+ tcg_gen_qemu_st_i32(reg_for_read(dc, rd), addr, mem_index, mop);
- /* Verify alignment if needed. */
- /*
- * Microblaze gives MMU faults priority over faults due to
- * unaligned addresses. That's why we speculatively do the load
- * into v. If the load succeeds, we verify alignment of the
- * address and if that succeeds we write into the destination reg.
- */
- v = tcg_temp_new_i32();
- tcg_gen_qemu_ld_i32(v, addr, mem_index, mop);
+ tcg_temp_free(addr);
+ return true;
+}
- if (dc->cpu->cfg.unaligned_exceptions && size > 1) {
- TCGv_i32 t0 = tcg_const_i32(0);
- TCGv_i32 treg = tcg_const_i32(dc->rd);
- TCGv_i32 tsize = tcg_const_i32(size - 1);
+static bool trans_sb(DisasContext *dc, arg_typea *arg)
+{
+ TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb);
+ return do_store(dc, arg->rd, addr, MO_UB, dc->mem_index, false);
+}
- tcg_gen_movi_i64(cpu_SR[SR_PC], dc->pc);
- gen_helper_memalign(cpu_env, addr, treg, t0, tsize);
+static bool trans_sbr(DisasContext *dc, arg_typea *arg)
+{
+ TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb);
+ return do_store(dc, arg->rd, addr, MO_UB, dc->mem_index, true);
+}
- tcg_temp_free_i32(t0);
- tcg_temp_free_i32(treg);
- tcg_temp_free_i32(tsize);
+static bool trans_sbea(DisasContext *dc, arg_typea *arg)
+{
+ if (trap_userspace(dc, true)) {
+ return true;
}
+#ifdef CONFIG_USER_ONLY
+ return true;
+#else
+ TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb);
+ return do_store(dc, arg->rd, addr, MO_UB, MMU_NOMMU_IDX, false);
+#endif
+}
- if (ex) {
- tcg_gen_mov_tl(env_res_addr, addr);
- tcg_gen_mov_i32(env_res_val, v);
- }
- if (dc->rd) {
- tcg_gen_mov_i32(cpu_R[dc->rd], v);
- }
- tcg_temp_free_i32(v);
+static bool trans_sbi(DisasContext *dc, arg_typeb *arg)
+{
+ TCGv addr = compute_ldst_addr_typeb(dc, arg->ra, arg->imm);
+ return do_store(dc, arg->rd, addr, MO_UB, dc->mem_index, false);
+}
- if (ex) { /* lwx */
- /* no support for AXI exclusive so always clear C */
- write_carryi(dc, 0);
+static bool trans_sh(DisasContext *dc, arg_typea *arg)
+{
+ TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb);
+ return do_store(dc, arg->rd, addr, MO_TEUW, dc->mem_index, false);
+}
+
+static bool trans_shr(DisasContext *dc, arg_typea *arg)
+{
+ TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb);
+ return do_store(dc, arg->rd, addr, MO_TEUW, dc->mem_index, true);
+}
+
+static bool trans_shea(DisasContext *dc, arg_typea *arg)
+{
+ if (trap_userspace(dc, true)) {
+ return true;
}
+#ifdef CONFIG_USER_ONLY
+ return true;
+#else
+ TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb);
+ return do_store(dc, arg->rd, addr, MO_TEUW, MMU_NOMMU_IDX, false);
+#endif
+}
- tcg_temp_free(addr);
+static bool trans_shi(DisasContext *dc, arg_typeb *arg)
+{
+ TCGv addr = compute_ldst_addr_typeb(dc, arg->ra, arg->imm);
+ return do_store(dc, arg->rd, addr, MO_TEUW, dc->mem_index, false);
}
-static void dec_store(DisasContext *dc)
+static bool trans_sw(DisasContext *dc, arg_typea *arg)
{
- TCGv addr;
- TCGLabel *swx_skip = NULL;
- unsigned int size;
- bool rev = false, ex = false, ea = false;
- int mem_index = cpu_mmu_index(&dc->cpu->env, false);
- MemOp mop;
+ TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb);
+ return do_store(dc, arg->rd, addr, MO_TEUL, dc->mem_index, false);
+}
- mop = dc->opcode & 3;
- size = 1 << mop;
- if (!dc->type_b) {
- ea = extract32(dc->ir, 7, 1);
- rev = extract32(dc->ir, 9, 1);
- ex = extract32(dc->ir, 10, 1);
- }
- mop |= MO_TE;
- if (rev) {
- mop ^= MO_BSWAP;
- }
+static bool trans_swr(DisasContext *dc, arg_typea *arg)
+{
+ TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb);
+ return do_store(dc, arg->rd, addr, MO_TEUL, dc->mem_index, true);
+}
- if (trap_illegal(dc, size > 4)) {
- return;
+static bool trans_swea(DisasContext *dc, arg_typea *arg)
+{
+ if (trap_userspace(dc, true)) {
+ return true;
}
+#ifdef CONFIG_USER_ONLY
+ return true;
+#else
+ TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb);
+ return do_store(dc, arg->rd, addr, MO_TEUL, MMU_NOMMU_IDX, false);
+#endif
+}
+
+static bool trans_swi(DisasContext *dc, arg_typeb *arg)
+{
+ TCGv addr = compute_ldst_addr_typeb(dc, arg->ra, arg->imm);
+ return do_store(dc, arg->rd, addr, MO_TEUL, dc->mem_index, false);
+}
- trap_userspace(dc, ea);
+static bool trans_swx(DisasContext *dc, arg_typea *arg)
+{
+ TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb);
+ TCGLabel *swx_done = gen_new_label();
+ TCGLabel *swx_fail = gen_new_label();
+ TCGv_i32 tval;
- LOG_DIS("s%d%s%s%s%s\n", size, dc->type_b ? "i" : "", rev ? "r" : "",
- ex ? "x" : "",
- ea ? "ea" : "");
- t_sync_flags(dc);
- /* If we get a fault on a dslot, the jmpstate better be in sync. */
- sync_jmpstate(dc);
- /* SWX needs a temp_local. */
- addr = ex ? tcg_temp_local_new() : tcg_temp_new();
- compute_ldst_addr(dc, ea, addr);
- /* Extended addressing bypasses the MMU. */
- mem_index = ea ? MMU_NOMMU_IDX : mem_index;
+ /* swx does not throw unaligned access errors, so force alignment */
+ tcg_gen_andi_tl(addr, addr, ~3);
- if (ex) { /* swx */
- TCGv_i32 tval;
+ /*
+ * Compare the address vs the one we used during lwx.
+ * On mismatch, the operation fails. On match, addr dies at the
+ * branch, but we know we can use the equal version in the global.
+ * In either case, addr is no longer needed.
+ */
+ tcg_gen_brcond_tl(TCG_COND_NE, cpu_res_addr, addr, swx_fail);
+ tcg_temp_free(addr);
- /* swx does not throw unaligned access errors, so force alignment */
- tcg_gen_andi_tl(addr, addr, ~3);
+ /*
+ * Compare the value loaded during lwx with current contents of
+ * the reserved location.
+ */
+ tval = tcg_temp_new_i32();
- write_carryi(dc, 1);
- swx_skip = gen_new_label();
- tcg_gen_brcond_tl(TCG_COND_NE, env_res_addr, addr, swx_skip);
+ tcg_gen_atomic_cmpxchg_i32(tval, cpu_res_addr, cpu_res_val,
+ reg_for_write(dc, arg->rd),
+ dc->mem_index, MO_TEUL);
- /*
- * Compare the value loaded at lwx with current contents of
- * the reserved location.
- */
- tval = tcg_temp_new_i32();
+ tcg_gen_brcond_i32(TCG_COND_NE, cpu_res_val, tval, swx_fail);
+ tcg_temp_free_i32(tval);
- tcg_gen_atomic_cmpxchg_i32(tval, addr, env_res_val,
- cpu_R[dc->rd], mem_index,
- mop);
+ /* Success */
+ tcg_gen_movi_i32(cpu_msr_c, 0);
+ tcg_gen_br(swx_done);
- tcg_gen_brcond_i32(TCG_COND_NE, env_res_val, tval, swx_skip);
- write_carryi(dc, 0);
- tcg_temp_free_i32(tval);
- }
+ /* Failure */
+ gen_set_label(swx_fail);
+ tcg_gen_movi_i32(cpu_msr_c, 1);
- if (rev && size != 4) {
- /* Endian reverse the address. t is addr. */
- switch (size) {
- case 1:
- {
- tcg_gen_xori_tl(addr, addr, 3);
- break;
- }
+ gen_set_label(swx_done);
- case 2:
- /* 00 -> 10
- 10 -> 00. */
- /* Force addr into the temp. */
- tcg_gen_xori_tl(addr, addr, 2);
- break;
- default:
- cpu_abort(CPU(dc->cpu), "Invalid reverse size\n");
- break;
- }
- }
+ /*
+ * Prevent the saved address from working again without another ldx.
+ * Akin to the pseudocode setting reservation = 0.
+ */
+ tcg_gen_movi_tl(cpu_res_addr, -1);
+ return true;
+}
- if (!ex) {
- tcg_gen_qemu_st_i32(cpu_R[dc->rd], addr, mem_index, mop);
+static void setup_dslot(DisasContext *dc, bool type_b)
+{
+ dc->tb_flags_to_set |= D_FLAG;
+ if (type_b && (dc->tb_flags & IMM_FLAG)) {
+ dc->tb_flags_to_set |= BIMM_FLAG;
}
+}
- /* Verify alignment if needed. */
- if (dc->cpu->cfg.unaligned_exceptions && size > 1) {
- TCGv_i32 t1 = tcg_const_i32(1);
- TCGv_i32 treg = tcg_const_i32(dc->rd);
- TCGv_i32 tsize = tcg_const_i32(size - 1);
-
- tcg_gen_movi_i64(cpu_SR[SR_PC], dc->pc);
- /* FIXME: if the alignment is wrong, we should restore the value
- * in memory. One possible way to achieve this is to probe
- * the MMU prior to the memaccess, thay way we could put
- * the alignment checks in between the probe and the mem
- * access.
- */
- gen_helper_memalign(cpu_env, addr, treg, t1, tsize);
+static bool do_branch(DisasContext *dc, int dest_rb, int dest_imm,
+ bool delay, bool abs, int link)
+{
+ uint32_t add_pc;
- tcg_temp_free_i32(t1);
- tcg_temp_free_i32(treg);
- tcg_temp_free_i32(tsize);
+ if (delay) {
+ setup_dslot(dc, dest_rb < 0);
}
- if (ex) {
- gen_set_label(swx_skip);
+ if (link) {
+ tcg_gen_movi_i32(cpu_R[link], dc->base.pc_next);
}
- tcg_temp_free(addr);
+ /* Store the branch taken destination into btarget. */
+ add_pc = abs ? 0 : dc->base.pc_next;
+ if (dest_rb > 0) {
+ dc->jmp_dest = -1;
+ tcg_gen_addi_i32(cpu_btarget, cpu_R[dest_rb], add_pc);
+ } else {
+ dc->jmp_dest = add_pc + dest_imm;
+ tcg_gen_movi_i32(cpu_btarget, dc->jmp_dest);
+ }
+ dc->jmp_cond = TCG_COND_ALWAYS;
+ return true;
}
-static inline void eval_cc(DisasContext *dc, unsigned int cc,
- TCGv_i32 d, TCGv_i32 a)
+#define DO_BR(NAME, NAMEI, DELAY, ABS, LINK) \
+ static bool trans_##NAME(DisasContext *dc, arg_typea_br *arg) \
+ { return do_branch(dc, arg->rb, 0, DELAY, ABS, LINK ? arg->rd : 0); } \
+ static bool trans_##NAMEI(DisasContext *dc, arg_typeb_br *arg) \
+ { return do_branch(dc, -1, arg->imm, DELAY, ABS, LINK ? arg->rd : 0); }
+
+DO_BR(br, bri, false, false, false)
+DO_BR(bra, brai, false, true, false)
+DO_BR(brd, brid, true, false, false)
+DO_BR(brad, braid, true, true, false)
+DO_BR(brld, brlid, true, false, true)
+DO_BR(brald, bralid, true, true, true)
+
+static bool do_bcc(DisasContext *dc, int dest_rb, int dest_imm,
+ TCGCond cond, int ra, bool delay)
{
- static const int mb_to_tcg_cc[] = {
- [CC_EQ] = TCG_COND_EQ,
- [CC_NE] = TCG_COND_NE,
- [CC_LT] = TCG_COND_LT,
- [CC_LE] = TCG_COND_LE,
- [CC_GE] = TCG_COND_GE,
- [CC_GT] = TCG_COND_GT,
- };
+ TCGv_i32 zero, next;
- switch (cc) {
- case CC_EQ:
- case CC_NE:
- case CC_LT:
- case CC_LE:
- case CC_GE:
- case CC_GT:
- tcg_gen_setcondi_i32(mb_to_tcg_cc[cc], d, a, 0);
- break;
- default:
- cpu_abort(CPU(dc->cpu), "Unknown condition code %x.\n", cc);
- break;
+ if (delay) {
+ setup_dslot(dc, dest_rb < 0);
}
-}
-static void eval_cond_jmp(DisasContext *dc, TCGv_i64 pc_true, TCGv_i64 pc_false)
-{
- TCGv_i64 tmp_btaken = tcg_temp_new_i64();
- TCGv_i64 tmp_zero = tcg_const_i64(0);
+ dc->jmp_cond = cond;
- tcg_gen_extu_i32_i64(tmp_btaken, env_btaken);
- tcg_gen_movcond_i64(TCG_COND_NE, cpu_SR[SR_PC],
- tmp_btaken, tmp_zero,
- pc_true, pc_false);
+ /* Cache the condition register in cpu_bvalue across any delay slot. */
+ tcg_gen_mov_i32(cpu_bvalue, reg_for_read(dc, ra));
- tcg_temp_free_i64(tmp_btaken);
- tcg_temp_free_i64(tmp_zero);
+ /* Store the branch taken destination into btarget. */
+ if (dest_rb > 0) {
+ dc->jmp_dest = -1;
+ tcg_gen_addi_i32(cpu_btarget, cpu_R[dest_rb], dc->base.pc_next);
+ } else {
+ dc->jmp_dest = dc->base.pc_next + dest_imm;
+ tcg_gen_movi_i32(cpu_btarget, dc->jmp_dest);
+ }
+
+ /* Compute the final destination into btarget. */
+ zero = tcg_const_i32(0);
+ next = tcg_const_i32(dc->base.pc_next + (delay + 1) * 4);
+ tcg_gen_movcond_i32(dc->jmp_cond, cpu_btarget,
+ reg_for_read(dc, ra), zero,
+ cpu_btarget, next);
+ tcg_temp_free_i32(zero);
+ tcg_temp_free_i32(next);
+
+ return true;
}
-static void dec_setup_dslot(DisasContext *dc)
+#define DO_BCC(NAME, COND) \
+ static bool trans_##NAME(DisasContext *dc, arg_typea_bc *arg) \
+ { return do_bcc(dc, arg->rb, 0, COND, arg->ra, false); } \
+ static bool trans_##NAME##d(DisasContext *dc, arg_typea_bc *arg) \
+ { return do_bcc(dc, arg->rb, 0, COND, arg->ra, true); } \
+ static bool trans_##NAME##i(DisasContext *dc, arg_typeb_bc *arg) \
+ { return do_bcc(dc, -1, arg->imm, COND, arg->ra, false); } \
+ static bool trans_##NAME##id(DisasContext *dc, arg_typeb_bc *arg) \
+ { return do_bcc(dc, -1, arg->imm, COND, arg->ra, true); }
+
+DO_BCC(beq, TCG_COND_EQ)
+DO_BCC(bge, TCG_COND_GE)
+DO_BCC(bgt, TCG_COND_GT)
+DO_BCC(ble, TCG_COND_LE)
+DO_BCC(blt, TCG_COND_LT)
+DO_BCC(bne, TCG_COND_NE)
+
+static bool trans_brk(DisasContext *dc, arg_typea_br *arg)
{
- TCGv_i32 tmp = tcg_const_i32(dc->type_b && (dc->tb_flags & IMM_FLAG));
-
- dc->delayed_branch = 2;
- dc->tb_flags |= D_FLAG;
+ if (trap_userspace(dc, true)) {
+ return true;
+ }
+ tcg_gen_mov_i32(cpu_pc, reg_for_read(dc, arg->rb));
+ if (arg->rd) {
+ tcg_gen_movi_i32(cpu_R[arg->rd], dc->base.pc_next);
+ }
+ tcg_gen_ori_i32(cpu_msr, cpu_msr, MSR_BIP);
+ tcg_gen_movi_tl(cpu_res_addr, -1);
- tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUMBState, bimm));
- tcg_temp_free_i32(tmp);
+ dc->base.is_jmp = DISAS_UPDATE;
+ return true;
}
-static void dec_bcc(DisasContext *dc)
+static bool trans_brki(DisasContext *dc, arg_typeb_br *arg)
{
- unsigned int cc;
- unsigned int dslot;
-
- cc = EXTRACT_FIELD(dc->ir, 21, 23);
- dslot = dc->ir & (1 << 25);
- LOG_DIS("bcc%s r%d %x\n", dslot ? "d" : "", dc->ra, dc->imm);
+ uint32_t imm = arg->imm;
- dc->delayed_branch = 1;
- if (dslot) {
- dec_setup_dslot(dc);
+ if (trap_userspace(dc, imm != 0x8 && imm != 0x18)) {
+ return true;
}
+ tcg_gen_movi_i32(cpu_pc, imm);
+ if (arg->rd) {
+ tcg_gen_movi_i32(cpu_R[arg->rd], dc->base.pc_next);
+ }
+ tcg_gen_movi_tl(cpu_res_addr, -1);
- if (dec_alu_op_b_is_small_imm(dc)) {
- int32_t offset = (int32_t)((int16_t)dc->imm); /* sign-extend. */
+#ifdef CONFIG_USER_ONLY
+ switch (imm) {
+ case 0x8: /* syscall trap */
+ gen_raise_exception_sync(dc, EXCP_SYSCALL);
+ break;
+ case 0x18: /* debug trap */
+ gen_raise_exception_sync(dc, EXCP_DEBUG);
+ break;
+ default: /* eliminated with trap_userspace check */
+ g_assert_not_reached();
+ }
+#else
+ uint32_t msr_to_set = 0;
- tcg_gen_movi_i64(env_btarget, dc->pc + offset);
- dc->jmp = JMP_DIRECT_CC;
- dc->jmp_pc = dc->pc + offset;
- } else {
- dc->jmp = JMP_INDIRECT;
- tcg_gen_extu_i32_i64(env_btarget, *(dec_alu_op_b(dc)));
- tcg_gen_addi_i64(env_btarget, env_btarget, dc->pc);
- tcg_gen_andi_i64(env_btarget, env_btarget, UINT32_MAX);
+ if (imm != 0x18) {
+ msr_to_set |= MSR_BIP;
}
- eval_cc(dc, cc, env_btaken, cpu_R[dc->ra]);
+ if (imm == 0x8 || imm == 0x18) {
+ /* MSR_UM and MSR_VM are in tb_flags, so we know their value. */
+ msr_to_set |= (dc->tb_flags & (MSR_UM | MSR_VM)) << 1;
+ tcg_gen_andi_i32(cpu_msr, cpu_msr,
+ ~(MSR_VMS | MSR_UMS | MSR_VM | MSR_UM));
+ }
+ tcg_gen_ori_i32(cpu_msr, cpu_msr, msr_to_set);
+ dc->base.is_jmp = DISAS_UPDATE;
+#endif
+
+ return true;
}
-static void dec_br(DisasContext *dc)
+static bool trans_mbar(DisasContext *dc, arg_mbar *arg)
{
- unsigned int dslot, link, abs, mbar;
-
- dslot = dc->ir & (1 << 20);
- abs = dc->ir & (1 << 19);
- link = dc->ir & (1 << 18);
+ int mbar_imm = arg->imm;
- /* Memory barrier. */
- mbar = (dc->ir >> 16) & 31;
- if (mbar == 2 && dc->imm == 4) {
- uint16_t mbar_imm = dc->rd;
+ /* Data access memory barrier. */
+ if ((mbar_imm & 2) == 0) {
+ tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);
+ }
- LOG_DIS("mbar %d\n", mbar_imm);
+ /* Sleep. */
+ if (mbar_imm & 16) {
+ TCGv_i32 tmp_1;
- /* Data access memory barrier. */
- if ((mbar_imm & 2) == 0) {
- tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);
+ if (trap_userspace(dc, true)) {
+ /* Sleep is a privileged instruction. */
+ return true;
}
- /* mbar IMM & 16 decodes to sleep. */
- if (mbar_imm & 16) {
- TCGv_i32 tmp_hlt = tcg_const_i32(EXCP_HLT);
- TCGv_i32 tmp_1 = tcg_const_i32(1);
+ t_sync_flags(dc);
- LOG_DIS("sleep\n");
+ tmp_1 = tcg_const_i32(1);
+ tcg_gen_st_i32(tmp_1, cpu_env,
+ -offsetof(MicroBlazeCPU, env)
+ +offsetof(CPUState, halted));
+ tcg_temp_free_i32(tmp_1);
- if (trap_userspace(dc, true)) {
- /* Sleep is a privileged instruction. */
- return;
- }
+ tcg_gen_movi_i32(cpu_pc, dc->base.pc_next + 4);
- t_sync_flags(dc);
- tcg_gen_st_i32(tmp_1, cpu_env,
- -offsetof(MicroBlazeCPU, env)
- +offsetof(CPUState, halted));
- tcg_gen_movi_i64(cpu_SR[SR_PC], dc->pc + 4);
- gen_helper_raise_exception(cpu_env, tmp_hlt);
- tcg_temp_free_i32(tmp_hlt);
- tcg_temp_free_i32(tmp_1);
- return;
- }
- /* Break the TB. */
- dc->cpustate_changed = 1;
- return;
+ gen_raise_exception(dc, EXCP_HLT);
}
- LOG_DIS("br%s%s%s%s imm=%x\n",
- abs ? "a" : "", link ? "l" : "",
- dc->type_b ? "i" : "", dslot ? "d" : "",
- dc->imm);
+ /*
+ * If !(mbar_imm & 1), this is an instruction access memory barrier
+ * and we need to end the TB so that we recognize self-modified
+ * code immediately.
+ *
+ * However, there are some data mbars that need the TB break
+ * (and return to main loop) to recognize interrupts right away.
+ * E.g. recognizing a change to an interrupt controller register.
+ *
+ * Therefore, choose to end the TB always.
+ */
+ dc->cpustate_changed = 1;
+ return true;
+}
- dc->delayed_branch = 1;
- if (dslot) {
- dec_setup_dslot(dc);
+static bool do_rts(DisasContext *dc, arg_typeb_bc *arg, int to_set)
+{
+ if (trap_userspace(dc, to_set)) {
+ return true;
}
- if (link && dc->rd)
- tcg_gen_movi_i32(cpu_R[dc->rd], dc->pc);
-
- dc->jmp = JMP_INDIRECT;
- if (abs) {
- tcg_gen_movi_i32(env_btaken, 1);
- tcg_gen_extu_i32_i64(env_btarget, *(dec_alu_op_b(dc)));
- if (link && !dslot) {
- if (!(dc->tb_flags & IMM_FLAG) && (dc->imm == 8 || dc->imm == 0x18))
- t_gen_raise_exception(dc, EXCP_BREAK);
- if (dc->imm == 0) {
- if (trap_userspace(dc, true)) {
- return;
- }
+ dc->tb_flags_to_set |= to_set;
+ setup_dslot(dc, true);
- t_gen_raise_exception(dc, EXCP_DEBUG);
- }
- }
- } else {
- if (dec_alu_op_b_is_small_imm(dc)) {
- dc->jmp = JMP_DIRECT;
- dc->jmp_pc = dc->pc + (int32_t)((int16_t)dc->imm);
- } else {
- tcg_gen_movi_i32(env_btaken, 1);
- tcg_gen_extu_i32_i64(env_btarget, *(dec_alu_op_b(dc)));
- tcg_gen_addi_i64(env_btarget, env_btarget, dc->pc);
- tcg_gen_andi_i64(env_btarget, env_btarget, UINT32_MAX);
- }
- }
+ dc->jmp_cond = TCG_COND_ALWAYS;
+ dc->jmp_dest = -1;
+ tcg_gen_addi_i32(cpu_btarget, reg_for_read(dc, arg->ra), arg->imm);
+ return true;
}
-static inline void do_rti(DisasContext *dc)
-{
- TCGv_i32 t0, t1;
- t0 = tcg_temp_new_i32();
- t1 = tcg_temp_new_i32();
- tcg_gen_extrl_i64_i32(t1, cpu_SR[SR_MSR]);
- tcg_gen_shri_i32(t0, t1, 1);
- tcg_gen_ori_i32(t1, t1, MSR_IE);
- tcg_gen_andi_i32(t0, t0, (MSR_VM | MSR_UM));
+#define DO_RTS(NAME, IFLAG) \
+ static bool trans_##NAME(DisasContext *dc, arg_typeb_bc *arg) \
+ { return do_rts(dc, arg, IFLAG); }
- tcg_gen_andi_i32(t1, t1, ~(MSR_VM | MSR_UM));
- tcg_gen_or_i32(t1, t1, t0);
- msr_write(dc, t1);
- tcg_temp_free_i32(t1);
- tcg_temp_free_i32(t0);
- dc->tb_flags &= ~DRTI_FLAG;
+DO_RTS(rtbd, DRTB_FLAG)
+DO_RTS(rtid, DRTI_FLAG)
+DO_RTS(rted, DRTE_FLAG)
+DO_RTS(rtsd, 0)
+
+static bool trans_zero(DisasContext *dc, arg_zero *arg)
+{
+ /* If opcode_0_illegal, trap. */
+ if (dc->cpu->cfg.opcode_0_illegal) {
+ trap_illegal(dc, true);
+ return true;
+ }
+ /*
+ * Otherwise, this is "add r0, r0, r0".
+ * Continue to trans_add so that MSR[C] gets cleared.
+ */
+ return false;
}
-static inline void do_rtb(DisasContext *dc)
+static void msr_read(DisasContext *dc, TCGv_i32 d)
{
- TCGv_i32 t0, t1;
- t0 = tcg_temp_new_i32();
- t1 = tcg_temp_new_i32();
- tcg_gen_extrl_i64_i32(t1, cpu_SR[SR_MSR]);
- tcg_gen_andi_i32(t1, t1, ~MSR_BIP);
- tcg_gen_shri_i32(t0, t1, 1);
- tcg_gen_andi_i32(t0, t0, (MSR_VM | MSR_UM));
+ TCGv_i32 t;
- tcg_gen_andi_i32(t1, t1, ~(MSR_VM | MSR_UM));
- tcg_gen_or_i32(t1, t1, t0);
- msr_write(dc, t1);
- tcg_temp_free_i32(t1);
- tcg_temp_free_i32(t0);
- dc->tb_flags &= ~DRTB_FLAG;
+ /* Replicate the cpu_msr_c boolean into the proper bit and the copy. */
+ t = tcg_temp_new_i32();
+ tcg_gen_muli_i32(t, cpu_msr_c, MSR_C | MSR_CC);
+ tcg_gen_or_i32(d, cpu_msr, t);
+ tcg_temp_free_i32(t);
}
-static inline void do_rte(DisasContext *dc)
+#ifndef CONFIG_USER_ONLY
+static void msr_write(DisasContext *dc, TCGv_i32 v)
{
- TCGv_i32 t0, t1;
- t0 = tcg_temp_new_i32();
- t1 = tcg_temp_new_i32();
+ dc->cpustate_changed = 1;
- tcg_gen_extrl_i64_i32(t1, cpu_SR[SR_MSR]);
- tcg_gen_ori_i32(t1, t1, MSR_EE);
- tcg_gen_andi_i32(t1, t1, ~MSR_EIP);
- tcg_gen_shri_i32(t0, t1, 1);
- tcg_gen_andi_i32(t0, t0, (MSR_VM | MSR_UM));
+ /* Install MSR_C. */
+ tcg_gen_extract_i32(cpu_msr_c, v, 2, 1);
- tcg_gen_andi_i32(t1, t1, ~(MSR_VM | MSR_UM));
- tcg_gen_or_i32(t1, t1, t0);
- msr_write(dc, t1);
- tcg_temp_free_i32(t1);
- tcg_temp_free_i32(t0);
- dc->tb_flags &= ~DRTE_FLAG;
+ /* Clear MSR_C and MSR_CC; MSR_PVR is not writable, and is always clear. */
+ tcg_gen_andi_i32(cpu_msr, v, ~(MSR_C | MSR_CC | MSR_PVR));
}
+#endif
-static void dec_rts(DisasContext *dc)
+static bool do_msrclrset(DisasContext *dc, arg_type_msr *arg, bool set)
{
- unsigned int b_bit, i_bit, e_bit;
- TCGv_i64 tmp64;
+ uint32_t imm = arg->imm;
- i_bit = dc->ir & (1 << 21);
- b_bit = dc->ir & (1 << 22);
- e_bit = dc->ir & (1 << 23);
-
- if (trap_userspace(dc, i_bit || b_bit || e_bit)) {
- return;
+ if (trap_userspace(dc, imm != MSR_C)) {
+ return true;
}
- dec_setup_dslot(dc);
+ if (arg->rd) {
+ msr_read(dc, cpu_R[arg->rd]);
+ }
- if (i_bit) {
- LOG_DIS("rtid ir=%x\n", dc->ir);
- dc->tb_flags |= DRTI_FLAG;
- } else if (b_bit) {
- LOG_DIS("rtbd ir=%x\n", dc->ir);
- dc->tb_flags |= DRTB_FLAG;
- } else if (e_bit) {
- LOG_DIS("rted ir=%x\n", dc->ir);
- dc->tb_flags |= DRTE_FLAG;
- } else
- LOG_DIS("rts ir=%x\n", dc->ir);
+ /*
+ * Handle the carry bit separately.
+ * This is the only bit that userspace can modify.
+ */
+ if (imm & MSR_C) {
+ tcg_gen_movi_i32(cpu_msr_c, set);
+ }
- dc->jmp = JMP_INDIRECT;
- tcg_gen_movi_i32(env_btaken, 1);
+ /*
+ * MSR_C and MSR_CC set above.
+ * MSR_PVR is not writable, and is always clear.
+ */
+ imm &= ~(MSR_C | MSR_CC | MSR_PVR);
- tmp64 = tcg_temp_new_i64();
- tcg_gen_extu_i32_i64(env_btarget, *(dec_alu_op_b(dc)));
- tcg_gen_extu_i32_i64(tmp64, cpu_R[dc->ra]);
- tcg_gen_add_i64(env_btarget, env_btarget, tmp64);
- tcg_gen_andi_i64(env_btarget, env_btarget, UINT32_MAX);
- tcg_temp_free_i64(tmp64);
+ if (imm != 0) {
+ if (set) {
+ tcg_gen_ori_i32(cpu_msr, cpu_msr, imm);
+ } else {
+ tcg_gen_andi_i32(cpu_msr, cpu_msr, ~imm);
+ }
+ dc->cpustate_changed = 1;
+ }
+ return true;
}
-static int dec_check_fpuv2(DisasContext *dc)
+static bool trans_msrclr(DisasContext *dc, arg_type_msr *arg)
{
- if ((dc->cpu->cfg.use_fpu != 2) && (dc->tb_flags & MSR_EE_FLAG)) {
- tcg_gen_movi_i64(cpu_SR[SR_ESR], ESR_EC_FPU);
- t_gen_raise_exception(dc, EXCP_HW_EXCP);
- }
- return (dc->cpu->cfg.use_fpu == 2) ? PVR2_USE_FPU2_MASK : 0;
+ return do_msrclrset(dc, arg, false);
}
-static void dec_fpu(DisasContext *dc)
+static bool trans_msrset(DisasContext *dc, arg_type_msr *arg)
{
- unsigned int fpu_insn;
+ return do_msrclrset(dc, arg, true);
+}
- if (trap_illegal(dc, !dc->cpu->cfg.use_fpu)) {
- return;
+static bool trans_mts(DisasContext *dc, arg_mts *arg)
+{
+ if (trap_userspace(dc, true)) {
+ return true;
}
- fpu_insn = (dc->ir >> 7) & 7;
-
- switch (fpu_insn) {
- case 0:
- gen_helper_fadd(cpu_R[dc->rd], cpu_env, cpu_R[dc->ra],
- cpu_R[dc->rb]);
- break;
+#ifdef CONFIG_USER_ONLY
+ g_assert_not_reached();
+#else
+ if (arg->e && arg->rs != 0x1003) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "Invalid extended mts reg 0x%x\n", arg->rs);
+ return true;
+ }
- case 1:
- gen_helper_frsub(cpu_R[dc->rd], cpu_env, cpu_R[dc->ra],
- cpu_R[dc->rb]);
- break;
+ TCGv_i32 src = reg_for_read(dc, arg->ra);
+ switch (arg->rs) {
+ case SR_MSR:
+ msr_write(dc, src);
+ break;
+ case SR_FSR:
+ tcg_gen_st_i32(src, cpu_env, offsetof(CPUMBState, fsr));
+ break;
+ case 0x800:
+ tcg_gen_st_i32(src, cpu_env, offsetof(CPUMBState, slr));
+ break;
+ case 0x802:
+ tcg_gen_st_i32(src, cpu_env, offsetof(CPUMBState, shr));
+ break;
- case 2:
- gen_helper_fmul(cpu_R[dc->rd], cpu_env, cpu_R[dc->ra],
- cpu_R[dc->rb]);
- break;
+ case 0x1000: /* PID */
+ case 0x1001: /* ZPR */
+ case 0x1002: /* TLBX */
+ case 0x1003: /* TLBLO */
+ case 0x1004: /* TLBHI */
+ case 0x1005: /* TLBSX */
+ {
+ TCGv_i32 tmp_ext = tcg_const_i32(arg->e);
+ TCGv_i32 tmp_reg = tcg_const_i32(arg->rs & 7);
+
+ gen_helper_mmu_write(cpu_env, tmp_ext, tmp_reg, src);
+ tcg_temp_free_i32(tmp_reg);
+ tcg_temp_free_i32(tmp_ext);
+ }
+ break;
- case 3:
- gen_helper_fdiv(cpu_R[dc->rd], cpu_env, cpu_R[dc->ra],
- cpu_R[dc->rb]);
- break;
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "Invalid mts reg 0x%x\n", arg->rs);
+ return true;
+ }
+ dc->cpustate_changed = 1;
+ return true;
+#endif
+}
- case 4:
- switch ((dc->ir >> 4) & 7) {
- case 0:
- gen_helper_fcmp_un(cpu_R[dc->rd], cpu_env,
- cpu_R[dc->ra], cpu_R[dc->rb]);
- break;
- case 1:
- gen_helper_fcmp_lt(cpu_R[dc->rd], cpu_env,
- cpu_R[dc->ra], cpu_R[dc->rb]);
- break;
- case 2:
- gen_helper_fcmp_eq(cpu_R[dc->rd], cpu_env,
- cpu_R[dc->ra], cpu_R[dc->rb]);
- break;
- case 3:
- gen_helper_fcmp_le(cpu_R[dc->rd], cpu_env,
- cpu_R[dc->ra], cpu_R[dc->rb]);
- break;
- case 4:
- gen_helper_fcmp_gt(cpu_R[dc->rd], cpu_env,
- cpu_R[dc->ra], cpu_R[dc->rb]);
- break;
- case 5:
- gen_helper_fcmp_ne(cpu_R[dc->rd], cpu_env,
- cpu_R[dc->ra], cpu_R[dc->rb]);
- break;
- case 6:
- gen_helper_fcmp_ge(cpu_R[dc->rd], cpu_env,
- cpu_R[dc->ra], cpu_R[dc->rb]);
- break;
- default:
- qemu_log_mask(LOG_UNIMP,
- "unimplemented fcmp fpu_insn=%x pc=%x"
- " opc=%x\n",
- fpu_insn, dc->pc, dc->opcode);
- dc->abort_at_next_insn = 1;
- break;
- }
- break;
+static bool trans_mfs(DisasContext *dc, arg_mfs *arg)
+{
+ TCGv_i32 dest = reg_for_write(dc, arg->rd);
- case 5:
- if (!dec_check_fpuv2(dc)) {
- return;
+ if (arg->e) {
+ switch (arg->rs) {
+ case SR_EAR:
+ {
+ TCGv_i64 t64 = tcg_temp_new_i64();
+ tcg_gen_ld_i64(t64, cpu_env, offsetof(CPUMBState, ear));
+ tcg_gen_extrh_i64_i32(dest, t64);
+ tcg_temp_free_i64(t64);
}
- gen_helper_flt(cpu_R[dc->rd], cpu_env, cpu_R[dc->ra]);
+ return true;
+#ifndef CONFIG_USER_ONLY
+ case 0x1003: /* TLBLO */
+ /* Handled below. */
break;
+#endif
+ case 0x2006 ... 0x2009:
+ /* High bits of PVR6-9 not implemented. */
+ tcg_gen_movi_i32(dest, 0);
+ return true;
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "Invalid extended mfs reg 0x%x\n", arg->rs);
+ return true;
+ }
+ }
- case 6:
- if (!dec_check_fpuv2(dc)) {
- return;
- }
- gen_helper_fint(cpu_R[dc->rd], cpu_env, cpu_R[dc->ra]);
- break;
+ switch (arg->rs) {
+ case SR_PC:
+ tcg_gen_movi_i32(dest, dc->base.pc_next);
+ break;
+ case SR_MSR:
+ msr_read(dc, dest);
+ break;
+ case SR_EAR:
+ {
+ TCGv_i64 t64 = tcg_temp_new_i64();
+ tcg_gen_ld_i64(t64, cpu_env, offsetof(CPUMBState, ear));
+ tcg_gen_extrl_i64_i32(dest, t64);
+ tcg_temp_free_i64(t64);
+ }
+ break;
+ case SR_ESR:
+ tcg_gen_ld_i32(dest, cpu_env, offsetof(CPUMBState, esr));
+ break;
+ case SR_FSR:
+ tcg_gen_ld_i32(dest, cpu_env, offsetof(CPUMBState, fsr));
+ break;
+ case SR_BTR:
+ tcg_gen_ld_i32(dest, cpu_env, offsetof(CPUMBState, btr));
+ break;
+ case SR_EDR:
+ tcg_gen_ld_i32(dest, cpu_env, offsetof(CPUMBState, edr));
+ break;
+ case 0x800:
+ tcg_gen_ld_i32(dest, cpu_env, offsetof(CPUMBState, slr));
+ break;
+ case 0x802:
+ tcg_gen_ld_i32(dest, cpu_env, offsetof(CPUMBState, shr));
+ break;
- case 7:
- if (!dec_check_fpuv2(dc)) {
- return;
- }
- gen_helper_fsqrt(cpu_R[dc->rd], cpu_env, cpu_R[dc->ra]);
- break;
+#ifndef CONFIG_USER_ONLY
+ case 0x1000: /* PID */
+ case 0x1001: /* ZPR */
+ case 0x1002: /* TLBX */
+ case 0x1003: /* TLBLO */
+ case 0x1004: /* TLBHI */
+ case 0x1005: /* TLBSX */
+ {
+ TCGv_i32 tmp_ext = tcg_const_i32(arg->e);
+ TCGv_i32 tmp_reg = tcg_const_i32(arg->rs & 7);
+
+ gen_helper_mmu_read(dest, cpu_env, tmp_ext, tmp_reg);
+ tcg_temp_free_i32(tmp_reg);
+ tcg_temp_free_i32(tmp_ext);
+ }
+ break;
+#endif
- default:
- qemu_log_mask(LOG_UNIMP, "unimplemented FPU insn fpu_insn=%x pc=%x"
- " opc=%x\n",
- fpu_insn, dc->pc, dc->opcode);
- dc->abort_at_next_insn = 1;
- break;
+ case 0x2000 ... 0x200c:
+ tcg_gen_ld_i32(dest, cpu_env,
+ offsetof(CPUMBState, pvr.regs[arg->rs - 0x2000]));
+ break;
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "Invalid mfs reg 0x%x\n", arg->rs);
+ break;
}
+ return true;
}
-static void dec_null(DisasContext *dc)
+static void do_rti(DisasContext *dc)
{
- if (trap_illegal(dc, true)) {
- return;
- }
- qemu_log_mask(LOG_GUEST_ERROR, "unknown insn pc=%x opc=%x\n", dc->pc, dc->opcode);
- dc->abort_at_next_insn = 1;
+ TCGv_i32 tmp = tcg_temp_new_i32();
+
+ tcg_gen_shri_i32(tmp, cpu_msr, 1);
+ tcg_gen_ori_i32(cpu_msr, cpu_msr, MSR_IE);
+ tcg_gen_andi_i32(tmp, tmp, MSR_VM | MSR_UM);
+ tcg_gen_andi_i32(cpu_msr, cpu_msr, ~(MSR_VM | MSR_UM));
+ tcg_gen_or_i32(cpu_msr, cpu_msr, tmp);
+
+ tcg_temp_free_i32(tmp);
+ dc->tb_flags &= ~DRTI_FLAG;
+}
+
+static void do_rtb(DisasContext *dc)
+{
+ TCGv_i32 tmp = tcg_temp_new_i32();
+
+ tcg_gen_shri_i32(tmp, cpu_msr, 1);
+ tcg_gen_andi_i32(cpu_msr, cpu_msr, ~(MSR_VM | MSR_UM | MSR_BIP));
+ tcg_gen_andi_i32(tmp, tmp, (MSR_VM | MSR_UM));
+ tcg_gen_or_i32(cpu_msr, cpu_msr, tmp);
+
+ tcg_temp_free_i32(tmp);
+ dc->tb_flags &= ~DRTB_FLAG;
+}
+
+static void do_rte(DisasContext *dc)
+{
+ TCGv_i32 tmp = tcg_temp_new_i32();
+
+ tcg_gen_shri_i32(tmp, cpu_msr, 1);
+ tcg_gen_ori_i32(cpu_msr, cpu_msr, MSR_EE);
+ tcg_gen_andi_i32(tmp, tmp, (MSR_VM | MSR_UM));
+ tcg_gen_andi_i32(cpu_msr, cpu_msr, ~(MSR_VM | MSR_UM | MSR_EIP));
+ tcg_gen_or_i32(cpu_msr, cpu_msr, tmp);
+
+ tcg_temp_free_i32(tmp);
+ dc->tb_flags &= ~DRTE_FLAG;
}
/* Insns connected to FSL or AXI stream attached devices. */
-static void dec_stream(DisasContext *dc)
+static bool do_get(DisasContext *dc, int rd, int rb, int imm, int ctrl)
{
TCGv_i32 t_id, t_ctrl;
- int ctrl;
-
- LOG_DIS("%s%s imm=%x\n", dc->rd ? "get" : "put",
- dc->type_b ? "" : "d", dc->imm);
if (trap_userspace(dc, true)) {
- return;
+ return true;
}
t_id = tcg_temp_new_i32();
- if (dc->type_b) {
- tcg_gen_movi_i32(t_id, dc->imm & 0xf);
- ctrl = dc->imm >> 10;
+ if (rb) {
+ tcg_gen_andi_i32(t_id, cpu_R[rb], 0xf);
} else {
- tcg_gen_andi_i32(t_id, cpu_R[dc->rb], 0xf);
- ctrl = dc->imm >> 5;
+ tcg_gen_movi_i32(t_id, imm);
}
t_ctrl = tcg_const_i32(ctrl);
+ gen_helper_get(reg_for_write(dc, rd), t_id, t_ctrl);
+ tcg_temp_free_i32(t_id);
+ tcg_temp_free_i32(t_ctrl);
+ return true;
+}
- if (dc->rd == 0) {
- gen_helper_put(t_id, t_ctrl, cpu_R[dc->ra]);
+static bool trans_get(DisasContext *dc, arg_get *arg)
+{
+ return do_get(dc, arg->rd, 0, arg->imm, arg->ctrl);
+}
+
+static bool trans_getd(DisasContext *dc, arg_getd *arg)
+{
+ return do_get(dc, arg->rd, arg->rb, 0, arg->ctrl);
+}
+
+static bool do_put(DisasContext *dc, int ra, int rb, int imm, int ctrl)
+{
+ TCGv_i32 t_id, t_ctrl;
+
+ if (trap_userspace(dc, true)) {
+ return true;
+ }
+
+ t_id = tcg_temp_new_i32();
+ if (rb) {
+ tcg_gen_andi_i32(t_id, cpu_R[rb], 0xf);
} else {
- gen_helper_get(cpu_R[dc->rd], t_id, t_ctrl);
+ tcg_gen_movi_i32(t_id, imm);
}
+
+ t_ctrl = tcg_const_i32(ctrl);
+ gen_helper_put(t_id, t_ctrl, reg_for_read(dc, ra));
tcg_temp_free_i32(t_id);
tcg_temp_free_i32(t_ctrl);
+ return true;
}
-static struct decoder_info {
- struct {
- uint32_t bits;
- uint32_t mask;
- };
- void (*dec)(DisasContext *dc);
-} decinfo[] = {
- {DEC_ADD, dec_add},
- {DEC_SUB, dec_sub},
- {DEC_AND, dec_and},
- {DEC_XOR, dec_xor},
- {DEC_OR, dec_or},
- {DEC_BIT, dec_bit},
- {DEC_BARREL, dec_barrel},
- {DEC_LD, dec_load},
- {DEC_ST, dec_store},
- {DEC_IMM, dec_imm},
- {DEC_BR, dec_br},
- {DEC_BCC, dec_bcc},
- {DEC_RTS, dec_rts},
- {DEC_FPU, dec_fpu},
- {DEC_MUL, dec_mul},
- {DEC_DIV, dec_div},
- {DEC_MSR, dec_msr},
- {DEC_STREAM, dec_stream},
- {{0, 0}, dec_null}
-};
+static bool trans_put(DisasContext *dc, arg_put *arg)
+{
+ return do_put(dc, arg->ra, 0, arg->imm, arg->ctrl);
+}
-static inline void decode(DisasContext *dc, uint32_t ir)
+static bool trans_putd(DisasContext *dc, arg_putd *arg)
{
- int i;
+ return do_put(dc, arg->ra, arg->rb, 0, arg->ctrl);
+}
- dc->ir = ir;
- LOG_DIS("%8.8x\t", dc->ir);
+static void mb_tr_init_disas_context(DisasContextBase *dcb, CPUState *cs)
+{
+ DisasContext *dc = container_of(dcb, DisasContext, base);
+ MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs);
+ int bound;
- if (ir == 0) {
- trap_illegal(dc, dc->cpu->cfg.opcode_0_illegal);
- /* Don't decode nop/zero instructions any further. */
- return;
- }
+ dc->cpu = cpu;
+ dc->tb_flags = dc->base.tb->flags;
+ dc->cpustate_changed = 0;
+ dc->ext_imm = dc->base.tb->cs_base;
+ dc->r0 = NULL;
+ dc->r0_set = false;
+ dc->mem_index = cpu_mmu_index(&cpu->env, false);
+ dc->jmp_cond = dc->tb_flags & D_FLAG ? TCG_COND_ALWAYS : TCG_COND_NEVER;
+ dc->jmp_dest = -1;
+
+ bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
+ dc->base.max_insns = MIN(dc->base.max_insns, bound);
+}
- /* bit 2 seems to indicate insn type. */
- dc->type_b = ir & (1 << 29);
+static void mb_tr_tb_start(DisasContextBase *dcb, CPUState *cs)
+{
+}
- dc->opcode = EXTRACT_FIELD(ir, 26, 31);
- dc->rd = EXTRACT_FIELD(ir, 21, 25);
- dc->ra = EXTRACT_FIELD(ir, 16, 20);
- dc->rb = EXTRACT_FIELD(ir, 11, 15);
- dc->imm = EXTRACT_FIELD(ir, 0, 15);
+static void mb_tr_insn_start(DisasContextBase *dcb, CPUState *cs)
+{
+ DisasContext *dc = container_of(dcb, DisasContext, base);
- /* Large switch for all insns. */
- for (i = 0; i < ARRAY_SIZE(decinfo); i++) {
- if ((dc->opcode & decinfo[i].mask) == decinfo[i].bits) {
- decinfo[i].dec(dc);
- break;
- }
- }
+ tcg_gen_insn_start(dc->base.pc_next, dc->tb_flags & ~MSR_TB_MASK);
+ dc->insn_start = tcg_last_op();
}
-/* generate intermediate code for basic block 'tb'. */
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
+static bool mb_tr_breakpoint_check(DisasContextBase *dcb, CPUState *cs,
+ const CPUBreakpoint *bp)
{
+ DisasContext *dc = container_of(dcb, DisasContext, base);
+
+ gen_raise_exception_sync(dc, EXCP_DEBUG);
+
+ /*
+ * The address covered by the breakpoint must be included in
+ * [tb->pc, tb->pc + tb->size) in order to for it to be
+ * properly cleared -- thus we increment the PC here so that
+ * the logic setting tb->size below does the right thing.
+ */
+ dc->base.pc_next += 4;
+ return true;
+}
+
+static void mb_tr_translate_insn(DisasContextBase *dcb, CPUState *cs)
+{
+ DisasContext *dc = container_of(dcb, DisasContext, base);
CPUMBState *env = cs->env_ptr;
- MicroBlazeCPU *cpu = env_archcpu(env);
- uint32_t pc_start;
- struct DisasContext ctx;
- struct DisasContext *dc = &ctx;
- uint32_t page_start, org_flags;
- uint32_t npc;
- int num_insns;
-
- pc_start = tb->pc;
- dc->cpu = cpu;
- dc->tb = tb;
- org_flags = dc->synced_flags = dc->tb_flags = tb->flags;
+ uint32_t ir;
- dc->is_jmp = DISAS_NEXT;
- dc->jmp = 0;
- dc->delayed_branch = !!(dc->tb_flags & D_FLAG);
- if (dc->delayed_branch) {
- dc->jmp = JMP_INDIRECT;
+ /* TODO: This should raise an exception, not terminate qemu. */
+ if (dc->base.pc_next & 3) {
+ cpu_abort(cs, "Microblaze: unaligned PC=%x\n",
+ (uint32_t)dc->base.pc_next);
}
- dc->pc = pc_start;
- dc->singlestep_enabled = cs->singlestep_enabled;
- dc->cpustate_changed = 0;
- dc->abort_at_next_insn = 0;
- if (pc_start & 3) {
- cpu_abort(cs, "Microblaze: unaligned PC=%x\n", pc_start);
+ dc->tb_flags_to_set = 0;
+
+ ir = cpu_ldl_code(env, dc->base.pc_next);
+ if (!decode(dc, ir)) {
+ trap_illegal(dc, true);
}
- page_start = pc_start & TARGET_PAGE_MASK;
- num_insns = 0;
+ if (dc->r0) {
+ tcg_temp_free_i32(dc->r0);
+ dc->r0 = NULL;
+ dc->r0_set = false;
+ }
- gen_tb_start(tb);
- do
- {
- tcg_gen_insn_start(dc->pc);
- num_insns++;
+ /* Discard the imm global when its contents cannot be used. */
+ if ((dc->tb_flags & ~dc->tb_flags_to_set) & IMM_FLAG) {
+ tcg_gen_discard_i32(cpu_imm);
+ }
-#if SIM_COMPAT
- if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
- tcg_gen_movi_i64(cpu_SR[SR_PC], dc->pc);
- gen_helper_debug();
- }
-#endif
+ dc->tb_flags &= ~(IMM_FLAG | BIMM_FLAG | D_FLAG);
+ dc->tb_flags |= dc->tb_flags_to_set;
+ dc->base.pc_next += 4;
- if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) {
- t_gen_raise_exception(dc, EXCP_DEBUG);
- dc->is_jmp = DISAS_UPDATE;
- /* The address covered by the breakpoint must be included in
- [tb->pc, tb->pc + tb->size) in order to for it to be
- properly cleared -- thus we increment the PC here so that
- the logic setting tb->size below does the right thing. */
- dc->pc += 4;
- break;
+ if (dc->jmp_cond != TCG_COND_NEVER && !(dc->tb_flags & D_FLAG)) {
+ if (dc->tb_flags & DRTI_FLAG) {
+ do_rti(dc);
+ } else if (dc->tb_flags & DRTB_FLAG) {
+ do_rtb(dc);
+ } else if (dc->tb_flags & DRTE_FLAG) {
+ do_rte(dc);
}
+ dc->base.is_jmp = DISAS_JUMP;
+ }
- /* Pretty disas. */
- LOG_DIS("%8.8x:\t", dc->pc);
+ /* Force an exit if the per-tb cpu state has changed. */
+ if (dc->base.is_jmp == DISAS_NEXT && dc->cpustate_changed) {
+ dc->base.is_jmp = DISAS_UPDATE;
+ tcg_gen_movi_i32(cpu_pc, dc->base.pc_next);
+ }
+}
- if (num_insns == max_insns && (tb_cflags(tb) & CF_LAST_IO)) {
- gen_io_start();
- }
+static void mb_tr_tb_stop(DisasContextBase *dcb, CPUState *cs)
+{
+ DisasContext *dc = container_of(dcb, DisasContext, base);
- dc->clear_imm = 1;
- decode(dc, cpu_ldl_code(env, dc->pc));
- if (dc->clear_imm)
- dc->tb_flags &= ~IMM_FLAG;
- dc->pc += 4;
-
- if (dc->delayed_branch) {
- dc->delayed_branch--;
- if (!dc->delayed_branch) {
- if (dc->tb_flags & DRTI_FLAG)
- do_rti(dc);
- if (dc->tb_flags & DRTB_FLAG)
- do_rtb(dc);
- if (dc->tb_flags & DRTE_FLAG)
- do_rte(dc);
- /* Clear the delay slot flag. */
- dc->tb_flags &= ~D_FLAG;
- /* If it is a direct jump, try direct chaining. */
- if (dc->jmp == JMP_INDIRECT) {
- TCGv_i64 tmp_pc = tcg_const_i64(dc->pc);
- eval_cond_jmp(dc, env_btarget, tmp_pc);
- tcg_temp_free_i64(tmp_pc);
-
- dc->is_jmp = DISAS_JUMP;
- } else if (dc->jmp == JMP_DIRECT) {
- t_sync_flags(dc);
- gen_goto_tb(dc, 0, dc->jmp_pc);
- dc->is_jmp = DISAS_TB_JUMP;
- } else if (dc->jmp == JMP_DIRECT_CC) {
- TCGLabel *l1 = gen_new_label();
- t_sync_flags(dc);
- /* Conditional jmp. */
- tcg_gen_brcondi_i32(TCG_COND_NE, env_btaken, 0, l1);
- gen_goto_tb(dc, 1, dc->pc);
- gen_set_label(l1);
- gen_goto_tb(dc, 0, dc->jmp_pc);
-
- dc->is_jmp = DISAS_TB_JUMP;
- }
- break;
- }
- }
- if (cs->singlestep_enabled) {
- break;
- }
- } while (!dc->is_jmp && !dc->cpustate_changed
- && !tcg_op_buf_full()
- && !singlestep
- && (dc->pc - page_start < TARGET_PAGE_SIZE)
- && num_insns < max_insns);
-
- npc = dc->pc;
- if (dc->jmp == JMP_DIRECT || dc->jmp == JMP_DIRECT_CC) {
- if (dc->tb_flags & D_FLAG) {
- dc->is_jmp = DISAS_UPDATE;
- tcg_gen_movi_i64(cpu_SR[SR_PC], npc);
- sync_jmpstate(dc);
- } else
- npc = dc->jmp_pc;
- }
-
- /* Force an update if the per-tb cpu state has changed. */
- if (dc->is_jmp == DISAS_NEXT
- && (dc->cpustate_changed || org_flags != dc->tb_flags)) {
- dc->is_jmp = DISAS_UPDATE;
- tcg_gen_movi_i64(cpu_SR[SR_PC], npc);
+ if (dc->base.is_jmp == DISAS_NORETURN) {
+ /* We have already exited the TB. */
+ return;
}
+
t_sync_flags(dc);
- if (unlikely(cs->singlestep_enabled)) {
- TCGv_i32 tmp = tcg_const_i32(EXCP_DEBUG);
+ switch (dc->base.is_jmp) {
+ case DISAS_TOO_MANY:
+ gen_goto_tb(dc, 0, dc->base.pc_next);
+ return;
- if (dc->is_jmp != DISAS_JUMP) {
- tcg_gen_movi_i64(cpu_SR[SR_PC], npc);
+ case DISAS_UPDATE:
+ if (unlikely(cs->singlestep_enabled)) {
+ gen_raise_exception(dc, EXCP_DEBUG);
+ } else {
+ tcg_gen_exit_tb(NULL, 0);
}
- gen_helper_raise_exception(cpu_env, tmp);
- tcg_temp_free_i32(tmp);
- } else {
- switch(dc->is_jmp) {
- case DISAS_NEXT:
- gen_goto_tb(dc, 1, npc);
- break;
- default:
- case DISAS_JUMP:
- case DISAS_UPDATE:
- /* indicate that the hash table must be used
- to find the next TB */
- tcg_gen_exit_tb(NULL, 0);
- break;
- case DISAS_TB_JUMP:
- /* nothing more to generate */
- break;
+ return;
+
+ case DISAS_JUMP:
+ if (dc->jmp_dest != -1 && !cs->singlestep_enabled) {
+ /* Direct jump. */
+ tcg_gen_discard_i32(cpu_btarget);
+
+ if (dc->jmp_cond != TCG_COND_ALWAYS) {
+ /* Conditional direct jump. */
+ TCGLabel *taken = gen_new_label();
+ TCGv_i32 tmp = tcg_temp_new_i32();
+
+ /*
+ * Copy bvalue to a temp now, so we can discard bvalue.
+ * This can avoid writing bvalue to memory when the
+ * delay slot cannot raise an exception.
+ */
+ tcg_gen_mov_i32(tmp, cpu_bvalue);
+ tcg_gen_discard_i32(cpu_bvalue);
+
+ tcg_gen_brcondi_i32(dc->jmp_cond, tmp, 0, taken);
+ gen_goto_tb(dc, 1, dc->base.pc_next);
+ gen_set_label(taken);
+ }
+ gen_goto_tb(dc, 0, dc->jmp_dest);
+ return;
}
- }
- gen_tb_end(tb, num_insns);
- tb->size = dc->pc - pc_start;
- tb->icount = num_insns;
+ /* Indirect jump (or direct jump w/ singlestep) */
+ tcg_gen_mov_i32(cpu_pc, cpu_btarget);
+ tcg_gen_discard_i32(cpu_btarget);
+
+ if (unlikely(cs->singlestep_enabled)) {
+ gen_raise_exception(dc, EXCP_DEBUG);
+ } else {
+ tcg_gen_exit_tb(NULL, 0);
+ }
+ return;
-#ifdef DEBUG_DISAS
-#if !SIM_COMPAT
- if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
- && qemu_log_in_addr_range(pc_start)) {
- FILE *logfile = qemu_log_lock();
- qemu_log("--------------\n");
- log_target_disas(cs, pc_start, dc->pc - pc_start);
- qemu_log_unlock(logfile);
+ default:
+ g_assert_not_reached();
}
-#endif
-#endif
- assert(!dc->abort_at_next_insn);
+}
+
+static void mb_tr_disas_log(const DisasContextBase *dcb, CPUState *cs)
+{
+ qemu_log("IN: %s\n", lookup_symbol(dcb->pc_first));
+ log_target_disas(cs, dcb->pc_first, dcb->tb->size);
+}
+
+static const TranslatorOps mb_tr_ops = {
+ .init_disas_context = mb_tr_init_disas_context,
+ .tb_start = mb_tr_tb_start,
+ .insn_start = mb_tr_insn_start,
+ .breakpoint_check = mb_tr_breakpoint_check,
+ .translate_insn = mb_tr_translate_insn,
+ .tb_stop = mb_tr_tb_stop,
+ .disas_log = mb_tr_disas_log,
+};
+
+void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
+{
+ DisasContext dc;
+ translator_loop(&mb_tr_ops, &dc.base, cpu, tb, max_insns);
}
void mb_cpu_dump_state(CPUState *cs, FILE *f, int flags)
{
MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs);
CPUMBState *env = &cpu->env;
+ uint32_t iflags;
int i;
- if (!env) {
- return;
+ qemu_fprintf(f, "pc=0x%08x msr=0x%05x mode=%s(saved=%s) eip=%d ie=%d\n",
+ env->pc, env->msr,
+ (env->msr & MSR_UM) ? "user" : "kernel",
+ (env->msr & MSR_UMS) ? "user" : "kernel",
+ (bool)(env->msr & MSR_EIP),
+ (bool)(env->msr & MSR_IE));
+
+ iflags = env->iflags;
+ qemu_fprintf(f, "iflags: 0x%08x", iflags);
+ if (iflags & IMM_FLAG) {
+ qemu_fprintf(f, " IMM(0x%08x)", env->imm);
+ }
+ if (iflags & BIMM_FLAG) {
+ qemu_fprintf(f, " BIMM");
+ }
+ if (iflags & D_FLAG) {
+ qemu_fprintf(f, " D(btarget=0x%08x)", env->btarget);
}
+ if (iflags & DRTI_FLAG) {
+ qemu_fprintf(f, " DRTI");
+ }
+ if (iflags & DRTE_FLAG) {
+ qemu_fprintf(f, " DRTE");
+ }
+ if (iflags & DRTB_FLAG) {
+ qemu_fprintf(f, " DRTB");
+ }
+ if (iflags & ESR_ESS_FLAG) {
+ qemu_fprintf(f, " ESR_ESS(0x%04x)", iflags & ESR_ESS_MASK);
+ }
+
+ qemu_fprintf(f, "\nesr=0x%04x fsr=0x%02x btr=0x%08x edr=0x%x\n"
+ "ear=0x" TARGET_FMT_lx " slr=0x%x shr=0x%x\n",
+ env->esr, env->fsr, env->btr, env->edr,
+ env->ear, env->slr, env->shr);
- qemu_fprintf(f, "IN: PC=%" PRIx64 " %s\n",
- env->sregs[SR_PC], lookup_symbol(env->sregs[SR_PC]));
- qemu_fprintf(f, "rmsr=%" PRIx64 " resr=%" PRIx64 " rear=%" PRIx64 " "
- "debug=%x imm=%x iflags=%x fsr=%" PRIx64 " "
- "rbtr=%" PRIx64 "\n",
- env->sregs[SR_MSR], env->sregs[SR_ESR], env->sregs[SR_EAR],
- env->debug, env->imm, env->iflags, env->sregs[SR_FSR],
- env->sregs[SR_BTR]);
- qemu_fprintf(f, "btaken=%d btarget=%" PRIx64 " mode=%s(saved=%s) "
- "eip=%d ie=%d\n",
- env->btaken, env->btarget,
- (env->sregs[SR_MSR] & MSR_UM) ? "user" : "kernel",
- (env->sregs[SR_MSR] & MSR_UMS) ? "user" : "kernel",
- (bool)(env->sregs[SR_MSR] & MSR_EIP),
- (bool)(env->sregs[SR_MSR] & MSR_IE));
for (i = 0; i < 12; i++) {
- qemu_fprintf(f, "rpvr%2.2d=%8.8x ", i, env->pvr.regs[i]);
- if ((i + 1) % 4 == 0) {
- qemu_fprintf(f, "\n");
- }
+ qemu_fprintf(f, "rpvr%-2d=%08x%c",
+ i, env->pvr.regs[i], i % 4 == 3 ? '\n' : ' ');
}
- /* Registers that aren't modeled are reported as 0 */
- qemu_fprintf(f, "redr=%" PRIx64 " rpid=0 rzpr=0 rtlbx=0 rtlbsx=0 "
- "rtlblo=0 rtlbhi=0\n", env->sregs[SR_EDR]);
- qemu_fprintf(f, "slr=%x shr=%x\n", env->slr, env->shr);
for (i = 0; i < 32; i++) {
- qemu_fprintf(f, "r%2.2d=%8.8x ", i, env->regs[i]);
- if ((i + 1) % 4 == 0)
- qemu_fprintf(f, "\n");
- }
- qemu_fprintf(f, "\n\n");
+ qemu_fprintf(f, "r%2.2d=%08x%c",
+ i, env->regs[i], i % 4 == 3 ? '\n' : ' ');
+ }
+ qemu_fprintf(f, "\n");
}
void mb_tcg_init(void)
{
- int i;
+#define R(X) { &cpu_R[X], offsetof(CPUMBState, regs[X]), "r" #X }
+#define SP(X) { &cpu_##X, offsetof(CPUMBState, X), #X }
+
+ static const struct {
+ TCGv_i32 *var; int ofs; char name[8];
+ } i32s[] = {
+ /*
+ * Note that r0 is handled specially in reg_for_read
+ * and reg_for_write. Nothing should touch cpu_R[0].
+ * Leave that element NULL, which will assert quickly
+ * inside the tcg generator functions.
+ */
+ R(1), R(2), R(3), R(4), R(5), R(6), R(7),
+ R(8), R(9), R(10), R(11), R(12), R(13), R(14), R(15),
+ R(16), R(17), R(18), R(19), R(20), R(21), R(22), R(23),
+ R(24), R(25), R(26), R(27), R(28), R(29), R(30), R(31),
+
+ SP(pc),
+ SP(msr),
+ SP(msr_c),
+ SP(imm),
+ SP(iflags),
+ SP(bvalue),
+ SP(btarget),
+ SP(res_val),
+ };
- env_debug = tcg_global_mem_new_i32(cpu_env,
- offsetof(CPUMBState, debug),
- "debug0");
- env_iflags = tcg_global_mem_new_i32(cpu_env,
- offsetof(CPUMBState, iflags),
- "iflags");
- env_imm = tcg_global_mem_new_i32(cpu_env,
- offsetof(CPUMBState, imm),
- "imm");
- env_btarget = tcg_global_mem_new_i64(cpu_env,
- offsetof(CPUMBState, btarget),
- "btarget");
- env_btaken = tcg_global_mem_new_i32(cpu_env,
- offsetof(CPUMBState, btaken),
- "btaken");
- env_res_addr = tcg_global_mem_new(cpu_env,
- offsetof(CPUMBState, res_addr),
- "res_addr");
- env_res_val = tcg_global_mem_new_i32(cpu_env,
- offsetof(CPUMBState, res_val),
- "res_val");
- for (i = 0; i < ARRAY_SIZE(cpu_R); i++) {
- cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
- offsetof(CPUMBState, regs[i]),
- regnames[i]);
- }
- for (i = 0; i < ARRAY_SIZE(cpu_SR); i++) {
- cpu_SR[i] = tcg_global_mem_new_i64(cpu_env,
- offsetof(CPUMBState, sregs[i]),
- special_regnames[i]);
+#undef R
+#undef SP
+
+ for (int i = 0; i < ARRAY_SIZE(i32s); ++i) {
+ *i32s[i].var =
+ tcg_global_mem_new_i32(cpu_env, i32s[i].ofs, i32s[i].name);
}
+
+ cpu_res_addr =
+ tcg_global_mem_new(cpu_env, offsetof(CPUMBState, res_addr), "res_addr");
}
void restore_state_to_opc(CPUMBState *env, TranslationBlock *tb,
target_ulong *data)
{
- env->sregs[SR_PC] = data[0];
+ env->pc = data[0];
+ env->iflags = data[1];
}
diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c
index 55b68d1246..e43a3b4686 100644
--- a/target/ppc/misc_helper.c
+++ b/target/ppc/misc_helper.c
@@ -234,25 +234,20 @@ target_ulong helper_clcs(CPUPPCState *env, uint32_t arg)
case 0x0CUL:
/* Instruction cache line size */
return env->icache_line_size;
- break;
case 0x0DUL:
/* Data cache line size */
return env->dcache_line_size;
- break;
case 0x0EUL:
/* Minimum cache line size */
return (env->icache_line_size < env->dcache_line_size) ?
env->icache_line_size : env->dcache_line_size;
- break;
case 0x0FUL:
/* Maximum cache line size */
return (env->icache_line_size > env->dcache_line_size) ?
env->icache_line_size : env->dcache_line_size;
- break;
default:
/* Undefined */
return 0;
- break;
}
}
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 793af99067..a156573d28 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -3955,12 +3955,6 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
} \
}
-static bool float16_eq_quiet(uint16_t a, uint16_t b, float_status *s)
-{
- FloatRelation compare = float16_compare_quiet(a, b, s);
- return compare == float_relation_equal;
-}
-
GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
@@ -4017,12 +4011,6 @@ GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
-static bool float16_lt(uint16_t a, uint16_t b, float_status *s)
-{
- FloatRelation compare = float16_compare(a, b, s);
- return compare == float_relation_less;
-}
-
GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
@@ -4030,13 +4018,6 @@ GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
-static bool float16_le(uint16_t a, uint16_t b, float_status *s)
-{
- FloatRelation compare = float16_compare(a, b, s);
- return compare == float_relation_less ||
- compare == float_relation_equal;
-}
-
GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
@@ -4091,12 +4072,6 @@ GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
-static bool float16_unordered_quiet(uint16_t a, uint16_t b, float_status *s)
-{
- FloatRelation compare = float16_compare_quiet(a, b, s);
- return compare == float_relation_unordered;
-}
-
GEN_VEXT_CMP_VV_ENV(vmford_vv_h, uint16_t, H2, !float16_unordered_quiet)
GEN_VEXT_CMP_VV_ENV(vmford_vv_w, uint32_t, H4, !float32_unordered_quiet)
GEN_VEXT_CMP_VV_ENV(vmford_vv_d, uint64_t, H8, !float64_unordered_quiet)
diff --git a/target/rx/cpu-qom.h b/target/rx/cpu-qom.h
index 3e81856ef5..9054762326 100644
--- a/target/rx/cpu-qom.h
+++ b/target/rx/cpu-qom.h
@@ -25,11 +25,12 @@
#define TYPE_RX62N_CPU RX_CPU_TYPE_NAME("rx62n")
-#define RXCPU_CLASS(klass) \
+typedef struct RXCPU RXCPU;
+#define RX_CPU_CLASS(klass) \
OBJECT_CLASS_CHECK(RXCPUClass, (klass), TYPE_RX_CPU)
-#define RXCPU(obj) \
+#define RX_CPU(obj) \
OBJECT_CHECK(RXCPU, (obj), TYPE_RX_CPU)
-#define RXCPU_GET_CLASS(obj) \
+#define RX_CPU_GET_CLASS(obj) \
OBJECT_GET_CLASS(RXCPUClass, (obj), TYPE_RX_CPU)
/*
diff --git a/target/rx/cpu.c b/target/rx/cpu.c
index 219e05397b..23ee17a701 100644
--- a/target/rx/cpu.c
+++ b/target/rx/cpu.c
@@ -28,14 +28,14 @@
static void rx_cpu_set_pc(CPUState *cs, vaddr value)
{
- RXCPU *cpu = RXCPU(cs);
+ RXCPU *cpu = RX_CPU(cs);
cpu->env.pc = value;
}
static void rx_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb)
{
- RXCPU *cpu = RXCPU(cs);
+ RXCPU *cpu = RX_CPU(cs);
cpu->env.pc = tb->pc;
}
@@ -48,8 +48,8 @@ static bool rx_cpu_has_work(CPUState *cs)
static void rx_cpu_reset(DeviceState *dev)
{
- RXCPU *cpu = RXCPU(dev);
- RXCPUClass *rcc = RXCPU_GET_CLASS(cpu);
+ RXCPU *cpu = RX_CPU(dev);
+ RXCPUClass *rcc = RX_CPU_GET_CLASS(cpu);
CPURXState *env = &cpu->env;
uint32_t *resetvec;
@@ -108,7 +108,7 @@ static ObjectClass *rx_cpu_class_by_name(const char *cpu_model)
static void rx_cpu_realize(DeviceState *dev, Error **errp)
{
CPUState *cs = CPU(dev);
- RXCPUClass *rcc = RXCPU_GET_CLASS(dev);
+ RXCPUClass *rcc = RX_CPU_GET_CLASS(dev);
Error *local_err = NULL;
cpu_exec_realizefn(cs, &local_err);
@@ -164,7 +164,7 @@ static bool rx_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
static void rx_cpu_init(Object *obj)
{
CPUState *cs = CPU(obj);
- RXCPU *cpu = RXCPU(obj);
+ RXCPU *cpu = RX_CPU(obj);
CPURXState *env = &cpu->env;
cpu_set_cpustate_pointers(cpu);
@@ -176,7 +176,7 @@ static void rx_cpu_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
CPUClass *cc = CPU_CLASS(klass);
- RXCPUClass *rcc = RXCPU_CLASS(klass);
+ RXCPUClass *rcc = RX_CPU_CLASS(klass);
device_class_set_parent_realize(dc, rx_cpu_realize,
&rcc->parent_realize);
diff --git a/target/rx/cpu.h b/target/rx/cpu.h
index d1fb1ef3ca..0b4b998c7b 100644
--- a/target/rx/cpu.h
+++ b/target/rx/cpu.h
@@ -115,7 +115,6 @@ struct RXCPU {
CPURXState env;
};
-typedef struct RXCPU RXCPU;
typedef RXCPU ArchCPU;
#define ENV_OFFSET offsetof(RXCPU, env)
diff --git a/target/rx/gdbstub.c b/target/rx/gdbstub.c
index 9391e8151e..c811d4810b 100644
--- a/target/rx/gdbstub.c
+++ b/target/rx/gdbstub.c
@@ -22,7 +22,7 @@
int rx_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
{
- RXCPU *cpu = RXCPU(cs);
+ RXCPU *cpu = RX_CPU(cs);
CPURXState *env = &cpu->env;
switch (n) {
@@ -54,7 +54,7 @@ int rx_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
int rx_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
{
- RXCPU *cpu = RXCPU(cs);
+ RXCPU *cpu = RX_CPU(cs);
CPURXState *env = &cpu->env;
uint32_t psw;
switch (n) {
diff --git a/target/rx/helper.c b/target/rx/helper.c
index a6a337a311..3e380a94fe 100644
--- a/target/rx/helper.c
+++ b/target/rx/helper.c
@@ -44,7 +44,7 @@ void rx_cpu_unpack_psw(CPURXState *env, uint32_t psw, int rte)
#define INT_FLAGS (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIR)
void rx_cpu_do_interrupt(CPUState *cs)
{
- RXCPU *cpu = RXCPU(cs);
+ RXCPU *cpu = RX_CPU(cs);
CPURXState *env = &cpu->env;
int do_irq = cs->interrupt_request & INT_FLAGS;
uint32_t save_psw;
@@ -121,7 +121,7 @@ void rx_cpu_do_interrupt(CPUState *cs)
bool rx_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
{
- RXCPU *cpu = RXCPU(cs);
+ RXCPU *cpu = RX_CPU(cs);
CPURXState *env = &cpu->env;
int accept = 0;
/* hardware interrupt (Normal) */
diff --git a/target/rx/translate.c b/target/rx/translate.c
index da9713d362..482278edd2 100644
--- a/target/rx/translate.c
+++ b/target/rx/translate.c
@@ -128,7 +128,7 @@ static int bdsp_s(DisasContext *ctx, int d)
void rx_cpu_dump_state(CPUState *cs, FILE *f, int flags)
{
- RXCPU *cpu = RXCPU(cs);
+ RXCPU *cpu = RX_CPU(cs);
CPURXState *env = &cpu->env;
int i;
uint32_t psw;
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 6192d83e8c..60c863d9e1 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -1542,7 +1542,6 @@ static void _decode_opc(DisasContext * ctx)
tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx,
MO_TEUL | MO_UNALN);
return;
- break;
case 0x40e9: /* movua.l @Rm+,R0 */
CHECK_SH4A
/* Load non-boundary-aligned data */
@@ -1550,7 +1549,6 @@ static void _decode_opc(DisasContext * ctx)
MO_TEUL | MO_UNALN);
tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
return;
- break;
case 0x0029: /* movt Rn */
tcg_gen_mov_i32(REG(B11_8), cpu_sr_t);
return;
@@ -1638,7 +1636,6 @@ static void _decode_opc(DisasContext * ctx)
CHECK_SH4A
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
return;
- break;
case 0x4024: /* rotcl Rn */
{
TCGv tmp = tcg_temp_new();