From 091d5677713d5e8e48ad670655d6bf1bac0b064d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 12 Sep 2013 15:06:23 -0700 Subject: tcg-arm: Improve GUEST_BASE qemu_ld/st If we pull the code to emit the actual load/store into a subroutine, we can share the reg+reg addressing mode code between softmmu and usermode. This lets us load GUEST_BASE into a temporary register rather than attempting to add it piece-wise to the address. Which lets us use movw+movt for armv7, rather than (up to) 4 adds. Code size for pre-armv7 stays the same. Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.c | 220 +++++++++++++++++++++++++++------------------------ 1 file changed, 116 insertions(+), 104 deletions(-) diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c index c3fd2b0528..e93a4a237b 100644 --- a/tcg/arm/tcg-target.c +++ b/tcg/arm/tcg-target.c @@ -1367,33 +1367,11 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) } #endif /* SOFTMMU */ -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) +static inline void tcg_out_qemu_ld_index(TCGContext *s, TCGMemOp opc, + TCGReg datalo, TCGReg datahi, + TCGReg addrlo, TCGReg addend) { - TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused)); - TCGMemOp opc, bswap; -#ifdef CONFIG_SOFTMMU - TCGMemOp s_bits; - int mem_index; - TCGReg addend; - uint8_t *label_ptr; -#endif - - datalo = *args++; - datahi = (is64 ? *args++ : 0); - addrlo = *args++; - addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0); - opc = *args++; - bswap = opc & MO_BSWAP; - -#ifdef CONFIG_SOFTMMU - s_bits = opc & MO_SIZE; - mem_index = *args; - addend = tcg_out_tlb_read(s, addrlo, addrhi, s_bits, mem_index, 1); - - /* This a conditional BL only to load a pointer within this opcode into LR - for the slow path. We will not be using the value for a tail call. */ - label_ptr = s->code_ptr; - tcg_out_bl_noaddr(s, COND_NE); + TCGMemOp bswap = opc & MO_BSWAP; switch (opc & MO_SSIZE) { case MO_UB: @@ -1425,8 +1403,6 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) break; case MO_Q: { - /* Be careful not to modify datalo and datahi - for the slow path below. */ TCGReg dl = (bswap ? datahi : datalo); TCGReg dh = (bswap ? datalo : datahi); @@ -1442,30 +1418,20 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) tcg_out_ld32_12(s, COND_AL, dh, TCG_REG_TMP, 4); } if (bswap) { - tcg_out_bswap32(s, COND_AL, dh, dh); tcg_out_bswap32(s, COND_AL, dl, dl); + tcg_out_bswap32(s, COND_AL, dh, dh); } } break; } +} - add_qemu_ldst_label(s, 1, opc, datalo, datahi, addrlo, addrhi, - mem_index, s->code_ptr, label_ptr); -#else /* !CONFIG_SOFTMMU */ - if (GUEST_BASE) { - uint32_t offset = GUEST_BASE; - int i, rot; - - while (offset) { - i = ctz32(offset) & ~1; - rot = ((32 - i) << 7) & 0xf00; +static inline void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc, + TCGReg datalo, TCGReg datahi, + TCGReg addrlo) +{ + TCGMemOp bswap = opc & MO_BSWAP; - tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_TMP, addrlo, - ((offset >> i) & 0xff) | rot); - addrlo = TCG_REG_TMP; - offset &= ~(0xff << i); - } - } switch (opc & MO_SSIZE) { case MO_UB: tcg_out_ld8_12(s, COND_AL, datalo, addrlo, 0); @@ -1495,32 +1461,32 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) } break; case MO_Q: - if (use_armv6_instructions && !bswap - && (datalo & 1) == 0 && datahi == datalo + 1) { - tcg_out_ldrd_8(s, COND_AL, datalo, addrlo, 0); - } else if (use_armv6_instructions && bswap - && (datahi & 1) == 0 && datalo == datahi + 1) { - tcg_out_ldrd_8(s, COND_AL, datahi, addrlo, 0); - } else if (datalo == addrlo) { - tcg_out_ld32_12(s, COND_AL, datahi, addrlo, bswap ? 0 : 4); - tcg_out_ld32_12(s, COND_AL, datalo, addrlo, bswap ? 4 : 0); - } else { - tcg_out_ld32_12(s, COND_AL, datalo, addrlo, bswap ? 4 : 0); - tcg_out_ld32_12(s, COND_AL, datahi, addrlo, bswap ? 0 : 4); - } - if (bswap) { - tcg_out_bswap32(s, COND_AL, datalo, datalo); - tcg_out_bswap32(s, COND_AL, datahi, datahi); + { + TCGReg dl = (bswap ? datahi : datalo); + TCGReg dh = (bswap ? datalo : datahi); + + if (use_armv6_instructions && (dl & 1) == 0 && dh == dl + 1) { + tcg_out_ldrd_8(s, COND_AL, dl, addrlo, 0); + } else if (dl == addrlo) { + tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4); + tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0); + } else { + tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0); + tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4); + } + if (bswap) { + tcg_out_bswap32(s, COND_AL, dl, dl); + tcg_out_bswap32(s, COND_AL, dh, dh); + } } break; } -#endif } -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) { TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused)); - TCGMemOp opc, bswap, s_bits; + TCGMemOp opc; #ifdef CONFIG_SOFTMMU int mem_index; TCGReg addend; @@ -1532,73 +1498,81 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) addrlo = *args++; addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0); opc = *args++; - bswap = opc & MO_BSWAP; - s_bits = opc & MO_SIZE; #ifdef CONFIG_SOFTMMU mem_index = *args; - addend = tcg_out_tlb_read(s, addrlo, addrhi, s_bits, mem_index, 0); + addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 1); + + /* This a conditional BL only to load a pointer within this opcode into LR + for the slow path. We will not be using the value for a tail call. */ + label_ptr = s->code_ptr; + tcg_out_bl_noaddr(s, COND_NE); + + tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend); - switch (s_bits) { + add_qemu_ldst_label(s, 1, opc, datalo, datahi, addrlo, addrhi, + mem_index, s->code_ptr, label_ptr); +#else /* !CONFIG_SOFTMMU */ + if (GUEST_BASE) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, GUEST_BASE); + tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP); + } else { + tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo); + } +#endif +} + +static inline void tcg_out_qemu_st_index(TCGContext *s, int cond, TCGMemOp opc, + TCGReg datalo, TCGReg datahi, + TCGReg addrlo, TCGReg addend) +{ + TCGMemOp bswap = opc & MO_BSWAP; + + switch (opc & MO_SIZE) { case MO_8: - tcg_out_st8_r(s, COND_EQ, datalo, addrlo, addend); + tcg_out_st8_r(s, cond, datalo, addrlo, addend); break; case MO_16: if (bswap) { - tcg_out_bswap16st(s, COND_EQ, TCG_REG_R0, datalo); - tcg_out_st16_r(s, COND_EQ, TCG_REG_R0, addrlo, addend); + tcg_out_bswap16st(s, cond, TCG_REG_R0, datalo); + tcg_out_st16_r(s, cond, TCG_REG_R0, addrlo, addend); } else { - tcg_out_st16_r(s, COND_EQ, datalo, addrlo, addend); + tcg_out_st16_r(s, cond, datalo, addrlo, addend); } break; case MO_32: default: if (bswap) { - tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, datalo); - tcg_out_st32_r(s, COND_EQ, TCG_REG_R0, addrlo, addend); + tcg_out_bswap32(s, cond, TCG_REG_R0, datalo); + tcg_out_st32_r(s, cond, TCG_REG_R0, addrlo, addend); } else { - tcg_out_st32_r(s, COND_EQ, datalo, addrlo, addend); + tcg_out_st32_r(s, cond, datalo, addrlo, addend); } break; case MO_64: if (bswap) { - tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, datahi); - tcg_out_st32_rwb(s, COND_EQ, TCG_REG_R0, addend, addrlo); - tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, datalo); - tcg_out_st32_12(s, COND_EQ, TCG_REG_R0, addend, 4); + tcg_out_bswap32(s, cond, TCG_REG_R0, datahi); + tcg_out_st32_rwb(s, cond, TCG_REG_R0, addend, addrlo); + tcg_out_bswap32(s, cond, TCG_REG_R0, datalo); + tcg_out_st32_12(s, cond, TCG_REG_R0, addend, 4); } else if (use_armv6_instructions && (datalo & 1) == 0 && datahi == datalo + 1) { - tcg_out_strd_r(s, COND_EQ, datalo, addrlo, addend); + tcg_out_strd_r(s, cond, datalo, addrlo, addend); } else { - tcg_out_st32_rwb(s, COND_EQ, datalo, addend, addrlo); - tcg_out_st32_12(s, COND_EQ, datahi, addend, 4); + tcg_out_st32_rwb(s, cond, datalo, addend, addrlo); + tcg_out_st32_12(s, cond, datahi, addend, 4); } break; } +} - /* The conditional call must come last, as we're going to return here. */ - label_ptr = s->code_ptr; - tcg_out_bl_noaddr(s, COND_NE); +static inline void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc, + TCGReg datalo, TCGReg datahi, + TCGReg addrlo) +{ + TCGMemOp bswap = opc & MO_BSWAP; - add_qemu_ldst_label(s, 0, opc, datalo, datahi, addrlo, addrhi, - mem_index, s->code_ptr, label_ptr); -#else /* !CONFIG_SOFTMMU */ - if (GUEST_BASE) { - uint32_t offset = GUEST_BASE; - int i; - int rot; - - while (offset) { - i = ctz32(offset) & ~1; - rot = ((32 - i) << 7) & 0xf00; - - tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R1, addrlo, - ((offset >> i) & 0xff) | rot); - addrlo = TCG_REG_R1; - offset &= ~(0xff << i); - } - } - switch (s_bits) { + switch (opc & MO_SIZE) { case MO_8: tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0); break; @@ -1634,6 +1608,44 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) } break; } +} + +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) +{ + TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused)); + TCGMemOp opc; +#ifdef CONFIG_SOFTMMU + int mem_index; + TCGReg addend; + uint8_t *label_ptr; +#endif + + datalo = *args++; + datahi = (is64 ? *args++ : 0); + addrlo = *args++; + addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0); + opc = *args++; + +#ifdef CONFIG_SOFTMMU + mem_index = *args; + addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 0); + + tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend); + + /* The conditional call must come last, as we're going to return here. */ + label_ptr = s->code_ptr; + tcg_out_bl_noaddr(s, COND_NE); + + add_qemu_ldst_label(s, 0, opc, datalo, datahi, addrlo, addrhi, + mem_index, s->code_ptr, label_ptr); +#else /* !CONFIG_SOFTMMU */ + if (GUEST_BASE) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, GUEST_BASE); + tcg_out_qemu_st_index(s, COND_AL, opc, datalo, + datahi, addrlo, TCG_REG_TMP); + } else { + tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo); + } #endif } -- cgit v1.2.3