diff options
author | Richard Henderson <rth@twiddle.net> | 2013-08-29 09:32:20 -0700 |
---|---|---|
committer | Richard Henderson <rth@twiddle.net> | 2013-09-25 07:46:31 -0700 |
commit | 8f50c841b374dc90ea604888ca92c37f469c428a (patch) | |
tree | 716b4e36c8dd41f8060ce8d291f9787bc20c8c02 | |
parent | 5b1c985b7e4d3f430769925c1775c9e8836272df (diff) |
tcg-ppc: Fix and cleanup tcg_out_tlb_check
The fix is that sparc has so many mmu modes that the last one overflowed
the 16-bit signed offset we assumed would fit. Handle this, and check
the new assumption at compile time.
Load the tlb addend earlier for the fast path.
Remove the explicit address + addend and make use of index addressing.
Adjust constraints for qemu_ld64 such that we don't clobber the address
register or tlb addend before loading both values.
Signed-off-by: Richard Henderson <rth@twiddle.net>
-rw-r--r-- | tcg/ppc/tcg-target.c | 302 |
1 files changed, 127 insertions, 175 deletions
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index 516d28f879..97e33edcfd 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -575,42 +575,72 @@ static const void * const qemu_st_helpers[4] = { static void *ld_trampolines[4]; static void *st_trampolines[4]; -static void tcg_out_tlb_check (TCGContext *s, int r0, int r1, int r2, - int addr_reg, int addr_reg2, int s_bits, - int offset1, int offset2, uint8_t **label_ptr) +/* Perform the TLB load and compare. Branches to the slow path, placing the + address of the branch in *LABEL_PTR. Loads the addend of the TLB into R0. + Clobbers R1 and R2. */ + +static void tcg_out_tlb_check(TCGContext *s, TCGReg r0, TCGReg r1, TCGReg r2, + TCGReg addrlo, TCGReg addrhi, int s_bits, + int mem_index, int is_load, uint8_t **label_ptr) { + int cmp_off = + (is_load + ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) + : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); + int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); uint16_t retranst; + TCGReg base = TCG_AREG0; + + /* Extract the page index, shifted into place for tlb index. */ + tcg_out32(s, (RLWINM + | RA(r0) + | RS(addrlo) + | SH(32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS)) + | MB(32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS)) + | ME(31 - CPU_TLB_ENTRY_BITS))); + + /* Compensate for very large offsets. */ + if (add_off >= 0x8000) { + /* Most target env are smaller than 32k; none are larger than 64k. + Simplify the logic here merely to offset by 0x7ff0, giving us a + range just shy of 64k. Check this assumption. */ + QEMU_BUILD_BUG_ON(offsetof(CPUArchState, + tlb_table[NB_MMU_MODES - 1][1]) + > 0x7ff0 + 0x7fff); + tcg_out32(s, ADDI | RT(r1) | RA(base) | 0x7ff0); + base = r1; + cmp_off -= 0x7ff0; + add_off -= 0x7ff0; + } - tcg_out32 (s, (RLWINM - | RA (r0) - | RS (addr_reg) - | SH (32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS)) - | MB (32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS)) - | ME (31 - CPU_TLB_ENTRY_BITS) - ) - ); - tcg_out32 (s, ADD | RT (r0) | RA (r0) | RB (TCG_AREG0)); - tcg_out32 (s, (LWZU - | RT (r1) - | RA (r0) - | offset1 - ) - ); - tcg_out32 (s, (RLWINM - | RA (r2) - | RS (addr_reg) - | SH (0) - | MB ((32 - s_bits) & 31) - | ME (31 - TARGET_PAGE_BITS) - ) - ); + /* Clear the non-page, non-alignment bits from the address. */ + tcg_out32(s, (RLWINM + | RA(r2) + | RS(addrlo) + | SH(0) + | MB((32 - s_bits) & 31) + | ME(31 - TARGET_PAGE_BITS))); - tcg_out32 (s, CMP | BF (7) | RA (r2) | RB (r1)); -#if TARGET_LONG_BITS == 64 - tcg_out32 (s, LWZ | RT (r1) | RA (r0) | 4); - tcg_out32 (s, CMP | BF (6) | RA (addr_reg2) | RB (r1)); - tcg_out32 (s, CRAND | BT (7, CR_EQ) | BA (6, CR_EQ) | BB (7, CR_EQ)); -#endif + tcg_out32(s, ADD | RT(r0) | RA(r0) | RB(base)); + base = r0; + + /* Load the tlb comparator. */ + tcg_out32(s, LWZ | RT(r1) | RA(base) | (cmp_off & 0xffff)); + + tcg_out32(s, CMP | BF(7) | RA(r2) | RB(r1)); + + if (TARGET_LONG_BITS == 64) { + tcg_out32(s, LWZ | RT(r1) | RA(base) | ((cmp_off + 4) & 0xffff)); + } + + /* Load the tlb addend for use on the fast path. + Do this asap to minimize load delay. */ + tcg_out32(s, LWZ | RT(r0) | RA(base) | (add_off & 0xffff)); + + if (TARGET_LONG_BITS == 64) { + tcg_out32(s, CMP | BF(6) | RA(addrhi) | RB(r1)); + tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); + } /* Use a conditional branch-and-link so that we load a pointer to somewhere within the current opcode, for passing on to the helper. @@ -619,58 +649,31 @@ static void tcg_out_tlb_check (TCGContext *s, int r0, int r1, int r2, *label_ptr = s->code_ptr; retranst = ((uint16_t *) s->code_ptr)[1] & ~3; tcg_out32(s, BC | BI(7, CR_EQ) | retranst | BO_COND_FALSE | LK); - - /* r0 now contains &env->tlb_table[mem_index][index].addr_x */ - tcg_out32 (s, (LWZ - | RT (r0) - | RA (r0) - | offset2 - ) - ); - /* r0 = env->tlb_table[mem_index][index].addend */ - tcg_out32 (s, ADD | RT (r0) | RA (r0) | RB (addr_reg)); - /* r0 = env->tlb_table[mem_index][index].addend + addr */ - } #endif static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc) { - int addr_reg, data_reg, data_reg2, r0, r1, rbase, bswap; + TCGReg addrlo, datalo, datahi, rbase; + int bswap; #ifdef CONFIG_SOFTMMU - int mem_index, s_bits, r2, addr_reg2; + int mem_index; + TCGReg addrhi; uint8_t *label_ptr; #endif - data_reg = *args++; - if (opc == 3) - data_reg2 = *args++; - else - data_reg2 = 0; - addr_reg = *args++; + datalo = *args++; + datahi = (opc == 3 ? *args++ : 0); + addrlo = *args++; #ifdef CONFIG_SOFTMMU -#if TARGET_LONG_BITS == 64 - addr_reg2 = *args++; -#else - addr_reg2 = 0; -#endif + addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0); mem_index = *args; - s_bits = opc & 3; - r0 = 3; - r1 = 4; - r2 = 0; - rbase = 0; - - tcg_out_tlb_check ( - s, r0, r1, r2, addr_reg, addr_reg2, s_bits, - offsetof (CPUArchState, tlb_table[mem_index][0].addr_read), - offsetof (CPUTLBEntry, addend) - offsetof (CPUTLBEntry, addr_read), - &label_ptr - ); + + tcg_out_tlb_check(s, TCG_REG_R3, TCG_REG_R4, TCG_REG_R0, addrlo, + addrhi, opc & 3, mem_index, 0, &label_ptr); + rbase = TCG_REG_R3; #else /* !CONFIG_SOFTMMU */ - r0 = addr_reg; - r1 = 3; rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0; #endif @@ -683,106 +686,72 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc) switch (opc) { default: case 0: - tcg_out32 (s, LBZX | TAB (data_reg, rbase, r0)); + tcg_out32(s, LBZX | TAB(datalo, rbase, addrlo)); break; case 0|4: - tcg_out32 (s, LBZX | TAB (data_reg, rbase, r0)); - tcg_out32 (s, EXTSB | RA (data_reg) | RS (data_reg)); + tcg_out32(s, LBZX | TAB(datalo, rbase, addrlo)); + tcg_out32(s, EXTSB | RA(datalo) | RS(datalo)); break; case 1: - if (bswap) - tcg_out32 (s, LHBRX | TAB (data_reg, rbase, r0)); - else - tcg_out32 (s, LHZX | TAB (data_reg, rbase, r0)); + tcg_out32(s, (bswap ? LHBRX : LHZX) | TAB(datalo, rbase, addrlo)); break; case 1|4: if (bswap) { - tcg_out32 (s, LHBRX | TAB (data_reg, rbase, r0)); - tcg_out32 (s, EXTSH | RA (data_reg) | RS (data_reg)); + tcg_out32(s, LHBRX | TAB(datalo, rbase, addrlo)); + tcg_out32(s, EXTSH | RA(datalo) | RS(datalo)); + } else { + tcg_out32(s, LHAX | TAB(datalo, rbase, addrlo)); } - else tcg_out32 (s, LHAX | TAB (data_reg, rbase, r0)); break; case 2: - if (bswap) - tcg_out32 (s, LWBRX | TAB (data_reg, rbase, r0)); - else - tcg_out32 (s, LWZX | TAB (data_reg, rbase, r0)); + tcg_out32(s, (bswap ? LWBRX : LWZX) | TAB(datalo, rbase, addrlo)); break; case 3: if (bswap) { - tcg_out32 (s, ADDI | RT (r1) | RA (r0) | 4); - tcg_out32 (s, LWBRX | TAB (data_reg, rbase, r0)); - tcg_out32 (s, LWBRX | TAB (data_reg2, rbase, r1)); - } - else { -#ifdef CONFIG_USE_GUEST_BASE - tcg_out32 (s, ADDI | RT (r1) | RA (r0) | 4); - tcg_out32 (s, LWZX | TAB (data_reg2, rbase, r0)); - tcg_out32 (s, LWZX | TAB (data_reg, rbase, r1)); -#else - if (r0 == data_reg2) { - tcg_out32 (s, LWZ | RT (0) | RA (r0)); - tcg_out32 (s, LWZ | RT (data_reg) | RA (r0) | 4); - tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 0); - } - else { - tcg_out32 (s, LWZ | RT (data_reg2) | RA (r0)); - tcg_out32 (s, LWZ | RT (data_reg) | RA (r0) | 4); - } -#endif + tcg_out32(s, ADDI | RT(TCG_REG_R0) | RA(addrlo) | 4); + tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo)); + tcg_out32(s, LWBRX | TAB(datahi, rbase, TCG_REG_R0)); + } else if (rbase != 0) { + tcg_out32(s, ADDI | RT(TCG_REG_R0) | RA(addrlo) | 4); + tcg_out32(s, LWZX | TAB(datahi, rbase, addrlo)); + tcg_out32(s, LWZX | TAB(datalo, rbase, TCG_REG_R0)); + } else if (addrlo == datahi) { + tcg_out32(s, LWZ | RT(datalo) | RA(addrlo) | 4); + tcg_out32(s, LWZ | RT(datahi) | RA(addrlo)); + } else { + tcg_out32(s, LWZ | RT(datahi) | RA(addrlo)); + tcg_out32(s, LWZ | RT(datalo) | RA(addrlo) | 4); } break; } #ifdef CONFIG_SOFTMMU - add_qemu_ldst_label (s, - 1, - opc, - data_reg, - data_reg2, - addr_reg, - addr_reg2, - mem_index, - s->code_ptr, - label_ptr); + add_qemu_ldst_label(s, 1, opc, datalo, datahi, addrlo, + addrhi, mem_index, s->code_ptr, label_ptr); #endif } static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc) { - int addr_reg, r0, r1, data_reg, data_reg2, bswap, rbase; + TCGReg addrlo, datalo, datahi, rbase; + int bswap; #ifdef CONFIG_SOFTMMU - int mem_index, r2, addr_reg2; + int mem_index; + TCGReg addrhi; uint8_t *label_ptr; #endif - data_reg = *args++; - if (opc == 3) - data_reg2 = *args++; - else - data_reg2 = 0; - addr_reg = *args++; + datalo = *args++; + datahi = (opc == 3 ? *args++ : 0); + addrlo = *args++; #ifdef CONFIG_SOFTMMU -#if TARGET_LONG_BITS == 64 - addr_reg2 = *args++; -#else - addr_reg2 = 0; -#endif + addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0); mem_index = *args; - r0 = 3; - r1 = 4; - r2 = 0; - rbase = 0; - - tcg_out_tlb_check ( - s, r0, r1, r2, addr_reg, addr_reg2, opc & 3, - offsetof (CPUArchState, tlb_table[mem_index][0].addr_write), - offsetof (CPUTLBEntry, addend) - offsetof (CPUTLBEntry, addr_write), - &label_ptr - ); + + tcg_out_tlb_check(s, TCG_REG_R3, TCG_REG_R4, TCG_REG_R0, addrlo, + addrhi, opc & 3, mem_index, 0, &label_ptr); + rbase = TCG_REG_R3; #else /* !CONFIG_SOFTMMU */ - r0 = addr_reg; - r1 = 3; rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0; #endif @@ -793,50 +762,33 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc) #endif switch (opc) { case 0: - tcg_out32 (s, STBX | SAB (data_reg, rbase, r0)); + tcg_out32(s, STBX | SAB(datalo, rbase, addrlo)); break; case 1: - if (bswap) - tcg_out32 (s, STHBRX | SAB (data_reg, rbase, r0)); - else - tcg_out32 (s, STHX | SAB (data_reg, rbase, r0)); + tcg_out32(s, (bswap ? STHBRX : STHX) | SAB(datalo, rbase, addrlo)); break; case 2: - if (bswap) - tcg_out32 (s, STWBRX | SAB (data_reg, rbase, r0)); - else - tcg_out32 (s, STWX | SAB (data_reg, rbase, r0)); + tcg_out32(s, (bswap ? STWBRX : STWX) | SAB(datalo, rbase, addrlo)); break; case 3: if (bswap) { - tcg_out32 (s, ADDI | RT (r1) | RA (r0) | 4); - tcg_out32 (s, STWBRX | SAB (data_reg, rbase, r0)); - tcg_out32 (s, STWBRX | SAB (data_reg2, rbase, r1)); - } - else { -#ifdef CONFIG_USE_GUEST_BASE - tcg_out32 (s, STWX | SAB (data_reg2, rbase, r0)); - tcg_out32 (s, ADDI | RT (r1) | RA (r0) | 4); - tcg_out32 (s, STWX | SAB (data_reg, rbase, r1)); -#else - tcg_out32 (s, STW | RS (data_reg2) | RA (r0)); - tcg_out32 (s, STW | RS (data_reg) | RA (r0) | 4); -#endif + tcg_out32(s, ADDI | RT(TCG_REG_R0) | RA(addrlo) | 4); + tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo)); + tcg_out32(s, STWBRX | SAB(datahi, rbase, TCG_REG_R0)); + } else if (rbase != 0) { + tcg_out32(s, ADDI | RT(TCG_REG_R0) | RA(addrlo) | 4); + tcg_out32(s, STWX | SAB(datahi, rbase, addrlo)); + tcg_out32(s, STWX | SAB(datalo, rbase, TCG_REG_R0)); + } else { + tcg_out32(s, STW | RS(datahi) | RA(addrlo)); + tcg_out32(s, STW | RS(datalo) | RA(addrlo) | 4); } break; } #ifdef CONFIG_SOFTMMU - add_qemu_ldst_label (s, - 0, - opc, - data_reg, - data_reg2, - addr_reg, - addr_reg2, - mem_index, - s->code_ptr, - label_ptr); + add_qemu_ldst_label(s, 0, opc, datalo, datahi, addrlo, addrhi, + mem_index, s->code_ptr, label_ptr); #endif } @@ -1994,7 +1946,7 @@ static const TCGTargetOpDef ppc_op_defs[] = { { INDEX_op_qemu_ld16u, { "r", "L" } }, { INDEX_op_qemu_ld16s, { "r", "L" } }, { INDEX_op_qemu_ld32, { "r", "L" } }, - { INDEX_op_qemu_ld64, { "r", "r", "L" } }, + { INDEX_op_qemu_ld64, { "L", "L", "L" } }, { INDEX_op_qemu_st8, { "K", "K" } }, { INDEX_op_qemu_st16, { "K", "K" } }, @@ -2006,7 +1958,7 @@ static const TCGTargetOpDef ppc_op_defs[] = { { INDEX_op_qemu_ld16u, { "r", "L", "L" } }, { INDEX_op_qemu_ld16s, { "r", "L", "L" } }, { INDEX_op_qemu_ld32, { "r", "L", "L" } }, - { INDEX_op_qemu_ld64, { "r", "L", "L", "L" } }, + { INDEX_op_qemu_ld64, { "L", "L", "L", "L" } }, { INDEX_op_qemu_st8, { "K", "K", "K" } }, { INDEX_op_qemu_st16, { "K", "K", "K" } }, |