From 30038fd81808f7c3bca92be2369e74c8ca7b3d69 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 17 Oct 2011 10:42:49 -0700 Subject: target-sparc: Change fpr representation to doubles. This allows a more efficient representation for 64-bit hosts. It should be about the same for 32-bit hosts, as we can still access the individual pieces of the double. Signed-off-by: Richard Henderson --- target-sparc/cpu.h | 7 ++- target-sparc/cpu_init.c | 6 +- target-sparc/ldst_helper.c | 71 +++++++++------------ target-sparc/machine.c | 20 ++---- target-sparc/translate.c | 150 +++++++++++++++++++++------------------------ 5 files changed, 114 insertions(+), 140 deletions(-) (limited to 'target-sparc') diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h index 4eace33c47..38a707466c 100644 --- a/target-sparc/cpu.h +++ b/target-sparc/cpu.h @@ -3,16 +3,17 @@ #include "config.h" #include "qemu-common.h" +#include "bswap.h" #if !defined(TARGET_SPARC64) #define TARGET_LONG_BITS 32 -#define TARGET_FPREGS 32 +#define TARGET_DPREGS 16 #define TARGET_PAGE_BITS 12 /* 4k */ #define TARGET_PHYS_ADDR_SPACE_BITS 36 #define TARGET_VIRT_ADDR_SPACE_BITS 32 #else #define TARGET_LONG_BITS 64 -#define TARGET_FPREGS 64 +#define TARGET_DPREGS 32 #define TARGET_PAGE_BITS 13 /* 8k */ #define TARGET_PHYS_ADDR_SPACE_BITS 41 # ifdef TARGET_ABI32 @@ -395,7 +396,7 @@ typedef struct CPUSPARCState { uint32_t psr; /* processor state register */ target_ulong fsr; /* FPU state register */ - float32 fpr[TARGET_FPREGS]; /* floating point registers */ + CPU_DoubleU fpr[TARGET_DPREGS]; /* floating point registers */ uint32_t cwp; /* index of current register window (extracted from PSR) */ #if !defined(TARGET_SPARC64) || defined(TARGET_ABI32) diff --git a/target-sparc/cpu_init.c b/target-sparc/cpu_init.c index 6954800af0..c7269b54a8 100644 --- a/target-sparc/cpu_init.c +++ b/target-sparc/cpu_init.c @@ -813,11 +813,11 @@ void cpu_dump_state(CPUState *env, FILE *f, fprintf_function cpu_fprintf, } } cpu_fprintf(f, "\nFloating Point Registers:\n"); - for (i = 0; i < TARGET_FPREGS; i++) { + for (i = 0; i < TARGET_DPREGS; i++) { if ((i & 3) == 0) { - cpu_fprintf(f, "%%f%02d:", i); + cpu_fprintf(f, "%%f%02d:", i * 2); } - cpu_fprintf(f, " %016f", *(float *)&env->fpr[i]); + cpu_fprintf(f, " %016" PRIx64, env->fpr[i].ll); if ((i & 3) == 3) { cpu_fprintf(f, "\n"); } diff --git a/target-sparc/ldst_helper.c b/target-sparc/ldst_helper.c index 80e540818f..b59707ecd2 100644 --- a/target-sparc/ldst_helper.c +++ b/target-sparc/ldst_helper.c @@ -2045,7 +2045,7 @@ void helper_ldda_asi(target_ulong addr, int asi, int rd) void helper_ldf_asi(target_ulong addr, int asi, int size, int rd) { unsigned int i; - CPU_DoubleU u; + target_ulong val; helper_check_align(addr, 3); addr = asi_address_mask(env, asi, addr); @@ -2060,13 +2060,11 @@ void helper_ldf_asi(target_ulong addr, int asi, int size, int rd) return; } helper_check_align(addr, 0x3f); - for (i = 0; i < 16; i++) { - *(uint32_t *)&env->fpr[rd++] = helper_ld_asi(addr, asi & 0x8f, 4, - 0); - addr += 4; + for (i = 0; i < 8; i++, rd += 2, addr += 8) { + env->fpr[rd/2].ll = helper_ld_asi(addr, asi & 0x8f, 8, 0); } - return; + case 0x16: /* UA2007 Block load primary, user privilege */ case 0x17: /* UA2007 Block load secondary, user privilege */ case 0x1e: /* UA2007 Block load primary LE, user privilege */ @@ -2080,13 +2078,11 @@ void helper_ldf_asi(target_ulong addr, int asi, int size, int rd) return; } helper_check_align(addr, 0x3f); - for (i = 0; i < 16; i++) { - *(uint32_t *)&env->fpr[rd++] = helper_ld_asi(addr, asi & 0x19, 4, - 0); - addr += 4; + for (i = 0; i < 8; i++, rd += 2, addr += 4) { + env->fpr[rd/2].ll = helper_ld_asi(addr, asi & 0x19, 8, 0); } - return; + default: break; } @@ -2094,20 +2090,19 @@ void helper_ldf_asi(target_ulong addr, int asi, int size, int rd) switch (size) { default: case 4: - *((uint32_t *)&env->fpr[rd]) = helper_ld_asi(addr, asi, size, 0); + val = helper_ld_asi(addr, asi, size, 0); + if (rd & 1) { + env->fpr[rd/2].l.lower = val; + } else { + env->fpr[rd/2].l.upper = val; + } break; case 8: - u.ll = helper_ld_asi(addr, asi, size, 0); - *((uint32_t *)&env->fpr[rd++]) = u.l.upper; - *((uint32_t *)&env->fpr[rd++]) = u.l.lower; + env->fpr[rd/2].ll = helper_ld_asi(addr, asi, size, 0); break; case 16: - u.ll = helper_ld_asi(addr, asi, 8, 0); - *((uint32_t *)&env->fpr[rd++]) = u.l.upper; - *((uint32_t *)&env->fpr[rd++]) = u.l.lower; - u.ll = helper_ld_asi(addr + 8, asi, 8, 0); - *((uint32_t *)&env->fpr[rd++]) = u.l.upper; - *((uint32_t *)&env->fpr[rd++]) = u.l.lower; + env->fpr[rd/2].ll = helper_ld_asi(addr, asi, 8, 0); + env->fpr[rd/2 + 1].ll = helper_ld_asi(addr + 8, asi, 8, 0); break; } } @@ -2115,8 +2110,7 @@ void helper_ldf_asi(target_ulong addr, int asi, int size, int rd) void helper_stf_asi(target_ulong addr, int asi, int size, int rd) { unsigned int i; - target_ulong val = 0; - CPU_DoubleU u; + target_ulong val; helper_check_align(addr, 3); addr = asi_address_mask(env, asi, addr); @@ -2133,10 +2127,8 @@ void helper_stf_asi(target_ulong addr, int asi, int size, int rd) return; } helper_check_align(addr, 0x3f); - for (i = 0; i < 16; i++) { - val = *(uint32_t *)&env->fpr[rd++]; - helper_st_asi(addr, val, asi & 0x8f, 4); - addr += 4; + for (i = 0; i < 8; i++, rd += 2, addr += 8) { + helper_st_asi(addr, env->fpr[rd/2].ll, asi & 0x8f, 8); } return; @@ -2153,10 +2145,8 @@ void helper_stf_asi(target_ulong addr, int asi, int size, int rd) return; } helper_check_align(addr, 0x3f); - for (i = 0; i < 16; i++) { - val = *(uint32_t *)&env->fpr[rd++]; - helper_st_asi(addr, val, asi & 0x19, 4); - addr += 4; + for (i = 0; i < 8; i++, rd += 2, addr += 8) { + helper_st_asi(addr, env->fpr[rd/2].ll, asi & 0x19, 8); } return; @@ -2167,20 +2157,19 @@ void helper_stf_asi(target_ulong addr, int asi, int size, int rd) switch (size) { default: case 4: - helper_st_asi(addr, *(uint32_t *)&env->fpr[rd], asi, size); + if (rd & 1) { + val = env->fpr[rd/2].l.lower; + } else { + val = env->fpr[rd/2].l.upper; + } + helper_st_asi(addr, val, asi, size); break; case 8: - u.l.upper = *(uint32_t *)&env->fpr[rd++]; - u.l.lower = *(uint32_t *)&env->fpr[rd++]; - helper_st_asi(addr, u.ll, asi, size); + helper_st_asi(addr, env->fpr[rd/2].ll, asi, size); break; case 16: - u.l.upper = *(uint32_t *)&env->fpr[rd++]; - u.l.lower = *(uint32_t *)&env->fpr[rd++]; - helper_st_asi(addr, u.ll, asi, 8); - u.l.upper = *(uint32_t *)&env->fpr[rd++]; - u.l.lower = *(uint32_t *)&env->fpr[rd++]; - helper_st_asi(addr + 8, u.ll, asi, 8); + helper_st_asi(addr, env->fpr[rd/2].ll, asi, 8); + helper_st_asi(addr + 8, env->fpr[rd/2 + 1].ll, asi, 8); break; } } diff --git a/target-sparc/machine.c b/target-sparc/machine.c index 56ae0412cd..235b088a45 100644 --- a/target-sparc/machine.c +++ b/target-sparc/machine.c @@ -21,13 +21,9 @@ void cpu_save(QEMUFile *f, void *opaque) qemu_put_betls(f, &env->regbase[i]); /* FPU */ - for(i = 0; i < TARGET_FPREGS; i++) { - union { - float32 f; - uint32_t i; - } u; - u.f = env->fpr[i]; - qemu_put_be32(f, u.i); + for (i = 0; i < TARGET_DPREGS; i++) { + qemu_put_be32(f, env->fpr[i].l.upper); + qemu_put_be32(f, env->fpr[i].l.lower); } qemu_put_betls(f, &env->pc); @@ -128,13 +124,9 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id) qemu_get_betls(f, &env->regbase[i]); /* FPU */ - for(i = 0; i < TARGET_FPREGS; i++) { - union { - float32 f; - uint32_t i; - } u; - u.i = qemu_get_be32(f); - env->fpr[i] = u.f; + for (i = 0; i < TARGET_DPREGS; i++) { + env->fpr[i].l.upper = qemu_get_be32(f); + env->fpr[i].l.lower = qemu_get_be32(f); } qemu_get_betls(f, &env->pc); diff --git a/target-sparc/translate.c b/target-sparc/translate.c index 0b95b64ca4..2c123b1e09 100644 --- a/target-sparc/translate.c +++ b/target-sparc/translate.c @@ -63,7 +63,7 @@ static TCGv cpu_tmp0; static TCGv_i32 cpu_tmp32; static TCGv_i64 cpu_tmp64; /* Floating point registers */ -static TCGv_i32 cpu_fpr[TARGET_FPREGS]; +static TCGv_i64 cpu_fpr[TARGET_DPREGS]; static target_ulong gen_opc_npc[OPC_BUF_SIZE]; static target_ulong gen_opc_jump_pc[2]; @@ -82,8 +82,8 @@ typedef struct DisasContext { uint32_t cc_op; /* current CC operation */ struct TranslationBlock *tb; sparc_def_t *def; - TCGv_i64 t64[3]; - int n_t64; + TCGv_i32 t32[3]; + int n_t32; } DisasContext; // This function uses non-native bit order @@ -126,12 +126,44 @@ static inline void gen_update_fprs_dirty(int rd) /* floating point registers moves */ static TCGv_i32 gen_load_fpr_F(DisasContext *dc, unsigned int src) { - return cpu_fpr[src]; +#if TCG_TARGET_REG_BITS == 32 + if (src & 1) { + return TCGV_LOW(cpu_fpr[src / 2]); + } else { + return TCGV_HIGH(cpu_fpr[src / 2]); + } +#else + if (src & 1) { + return MAKE_TCGV_I32(GET_TCGV_I64(cpu_fpr[src / 2])); + } else { + TCGv_i32 ret = tcg_temp_local_new_i32(); + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shri_i64(t, cpu_fpr[src / 2], 32); + tcg_gen_trunc_i64_i32(ret, t); + tcg_temp_free_i64(t); + + dc->t32[dc->n_t32++] = ret; + assert(dc->n_t32 <= ARRAY_SIZE(dc->t32)); + + return ret; + } +#endif } static void gen_store_fpr_F(DisasContext *dc, unsigned int dst, TCGv_i32 v) { - tcg_gen_mov_i32(cpu_fpr[dst], v); +#if TCG_TARGET_REG_BITS == 32 + if (dst & 1) { + tcg_gen_mov_i32(TCGV_LOW(cpu_fpr[dst / 2]), v); + } else { + tcg_gen_mov_i32(TCGV_HIGH(cpu_fpr[dst / 2]), v); + } +#else + TCGv_i64 t = MAKE_TCGV_I64(GET_TCGV_I32(v)); + tcg_gen_deposit_i64(cpu_fpr[dst / 2], cpu_fpr[dst / 2], t, + (dst & 1 ? 0 : 32), 32); +#endif gen_update_fprs_dirty(dst); } @@ -142,42 +174,14 @@ static TCGv_i32 gen_dest_fpr_F(void) static TCGv_i64 gen_load_fpr_D(DisasContext *dc, unsigned int src) { - TCGv_i64 ret = tcg_temp_new_i64(); src = DFPREG(src); - -#if TCG_TARGET_REG_BITS == 32 - tcg_gen_mov_i32(TCGV_HIGH(ret), cpu_fpr[src]); - tcg_gen_mov_i32(TCGV_LOW(ret), cpu_fpr[src + 1]); -#else - { - TCGv_i64 t = tcg_temp_new_i64(); - tcg_gen_extu_i32_i64(ret, cpu_fpr[src]); - tcg_gen_extu_i32_i64(t, cpu_fpr[src + 1]); - tcg_gen_shli_i64(ret, ret, 32); - tcg_gen_or_i64(ret, ret, t); - tcg_temp_free_i64(t); - } -#endif - - dc->t64[dc->n_t64++] = ret; - assert(dc->n_t64 <= ARRAY_SIZE(dc->t64)); - - return ret; + return cpu_fpr[src / 2]; } static void gen_store_fpr_D(DisasContext *dc, unsigned int dst, TCGv_i64 v) { dst = DFPREG(dst); - -#if TCG_TARGET_REG_BITS == 32 - tcg_gen_mov_i32(cpu__fpu[dst], TCGV_HIGH(v)); - tcg_gen_mov_i32(cpu__fpu[dst + 1], TCGV_LOW(v)); -#else - tcg_gen_trunc_i64_i32(cpu_fpr[dst + 1], v); - tcg_gen_shri_i64(v, v, 32); - tcg_gen_trunc_i64_i32(cpu_fpr[dst], v); -#endif - + tcg_gen_mov_i64(cpu_fpr[dst / 2], v); gen_update_fprs_dirty(dst); } @@ -188,50 +192,36 @@ static TCGv_i64 gen_dest_fpr_D(void) static void gen_op_load_fpr_QT0(unsigned int src) { - tcg_gen_st_i32(cpu_fpr[src], cpu_env, offsetof(CPUSPARCState, qt0) + - offsetof(CPU_QuadU, l.upmost)); - tcg_gen_st_i32(cpu_fpr[src + 1], cpu_env, offsetof(CPUSPARCState, qt0) + - offsetof(CPU_QuadU, l.upper)); - tcg_gen_st_i32(cpu_fpr[src + 2], cpu_env, offsetof(CPUSPARCState, qt0) + - offsetof(CPU_QuadU, l.lower)); - tcg_gen_st_i32(cpu_fpr[src + 3], cpu_env, offsetof(CPUSPARCState, qt0) + - offsetof(CPU_QuadU, l.lowest)); + tcg_gen_st_i64(cpu_fpr[src / 2], cpu_env, offsetof(CPUSPARCState, qt0) + + offsetof(CPU_QuadU, ll.upper)); + tcg_gen_st_i64(cpu_fpr[src/2 + 1], cpu_env, offsetof(CPUSPARCState, qt0) + + offsetof(CPU_QuadU, ll.lower)); } static void gen_op_load_fpr_QT1(unsigned int src) { - tcg_gen_st_i32(cpu_fpr[src], cpu_env, offsetof(CPUSPARCState, qt1) + - offsetof(CPU_QuadU, l.upmost)); - tcg_gen_st_i32(cpu_fpr[src + 1], cpu_env, offsetof(CPUSPARCState, qt1) + - offsetof(CPU_QuadU, l.upper)); - tcg_gen_st_i32(cpu_fpr[src + 2], cpu_env, offsetof(CPUSPARCState, qt1) + - offsetof(CPU_QuadU, l.lower)); - tcg_gen_st_i32(cpu_fpr[src + 3], cpu_env, offsetof(CPUSPARCState, qt1) + - offsetof(CPU_QuadU, l.lowest)); + tcg_gen_st_i64(cpu_fpr[src / 2], cpu_env, offsetof(CPUSPARCState, qt1) + + offsetof(CPU_QuadU, ll.upper)); + tcg_gen_st_i64(cpu_fpr[src/2 + 1], cpu_env, offsetof(CPUSPARCState, qt1) + + offsetof(CPU_QuadU, ll.lower)); } static void gen_op_store_QT0_fpr(unsigned int dst) { - tcg_gen_ld_i32(cpu_fpr[dst], cpu_env, offsetof(CPUSPARCState, qt0) + - offsetof(CPU_QuadU, l.upmost)); - tcg_gen_ld_i32(cpu_fpr[dst + 1], cpu_env, offsetof(CPUSPARCState, qt0) + - offsetof(CPU_QuadU, l.upper)); - tcg_gen_ld_i32(cpu_fpr[dst + 2], cpu_env, offsetof(CPUSPARCState, qt0) + - offsetof(CPU_QuadU, l.lower)); - tcg_gen_ld_i32(cpu_fpr[dst + 3], cpu_env, offsetof(CPUSPARCState, qt0) + - offsetof(CPU_QuadU, l.lowest)); + tcg_gen_ld_i64(cpu_fpr[dst / 2], cpu_env, offsetof(CPUSPARCState, qt0) + + offsetof(CPU_QuadU, ll.upper)); + tcg_gen_ld_i64(cpu_fpr[dst/2 + 1], cpu_env, offsetof(CPUSPARCState, qt0) + + offsetof(CPU_QuadU, ll.lower)); } #ifdef TARGET_SPARC64 -static void gen_move_Q(int rd, int rs) +static void gen_move_Q(unsigned int rd, unsigned int rs) { rd = QFPREG(rd); rs = QFPREG(rs); - tcg_gen_mov_i32(cpu_fpr[rd], cpu_fpr[rs]); - tcg_gen_mov_i32(cpu_fpr[rd + 1], cpu_fpr[rs + 1]); - tcg_gen_mov_i32(cpu_fpr[rd + 2], cpu_fpr[rs + 2]); - tcg_gen_mov_i32(cpu_fpr[rd + 3], cpu_fpr[rs + 3]); + tcg_gen_mov_i64(cpu_fpr[rd / 2], cpu_fpr[rs / 2]); + tcg_gen_mov_i64(cpu_fpr[rd / 2 + 1], cpu_fpr[rs / 2 + 1]); gen_update_fprs_dirty(rd); } #endif @@ -5001,6 +4991,13 @@ static void disas_sparc_insn(DisasContext * dc) egress: tcg_temp_free(cpu_tmp1); tcg_temp_free(cpu_tmp2); + if (dc->n_t32 != 0) { + int i; + for (i = dc->n_t32 - 1; i >= 0; --i) { + tcg_temp_free_i32(dc->t32[i]); + } + dc->n_t32 = 0; + } } static inline void gen_intermediate_code_internal(TranslationBlock * tb, @@ -5100,9 +5097,6 @@ static inline void gen_intermediate_code_internal(TranslationBlock * tb, tcg_temp_free_i64(cpu_tmp64); tcg_temp_free_i32(cpu_tmp32); tcg_temp_free(cpu_tmp0); - for (j = dc->n_t64 - 1; j >= 0; --j) { - tcg_temp_free_i64(dc->t64[j]); - } if (tb->cflags & CF_LAST_IO) gen_io_end(); @@ -5168,15 +5162,11 @@ void gen_intermediate_code_init(CPUSPARCState *env) "g6", "g7", }; - static const char * const fregnames[64] = { - "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", - "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", - "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", - "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", - "f32", "f33", "f34", "f35", "f36", "f37", "f38", "f39", - "f40", "f41", "f42", "f43", "f44", "f45", "f46", "f47", - "f48", "f49", "f50", "f51", "f52", "f53", "f54", "f55", - "f56", "f57", "f58", "f59", "f60", "f61", "f62", "f63", + static const char * const fregnames[32] = { + "f0", "f2", "f4", "f6", "f8", "f10", "f12", "f14", + "f16", "f18", "f20", "f22", "f24", "f26", "f28", "f30", + "f32", "f34", "f36", "f38", "f40", "f42", "f44", "f46", + "f48", "f50", "f52", "f54", "f56", "f58", "f60", "f62", }; /* init various static tables */ @@ -5246,14 +5236,16 @@ void gen_intermediate_code_init(CPUSPARCState *env) cpu_tbr = tcg_global_mem_new(TCG_AREG0, offsetof(CPUState, tbr), "tbr"); #endif - for (i = 1; i < 8; i++) + for (i = 1; i < 8; i++) { cpu_gregs[i] = tcg_global_mem_new(TCG_AREG0, offsetof(CPUState, gregs[i]), gregnames[i]); - for (i = 0; i < TARGET_FPREGS; i++) - cpu_fpr[i] = tcg_global_mem_new_i32(TCG_AREG0, + } + for (i = 0; i < TARGET_DPREGS; i++) { + cpu_fpr[i] = tcg_global_mem_new_i64(TCG_AREG0, offsetof(CPUState, fpr[i]), fregnames[i]); + } /* register helpers */ -- cgit v1.2.3