From 647ab96aaf5defeb138e48d610f7f633c587b40d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 27 Jul 2017 13:16:16 -0700 Subject: tcg/arm: Improve tlb load for armv7 Use UBFX to avoid limitation on CPU_TLB_BITS. Since we're dropping the initial shift, we need to replace the page masking. We can use MOVW+BIC to do this without shifting. The result is the same size as the armv6 path with one less conditional instruction. Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.inc.c | 72 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 52 insertions(+), 20 deletions(-) (limited to 'tcg/arm') diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c index 81ea900852..66c369c239 100644 --- a/tcg/arm/tcg-target.inc.c +++ b/tcg/arm/tcg-target.inc.c @@ -1173,18 +1173,33 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, unsigned s_bits = opc & MO_SIZE; unsigned a_bits = get_alignment_bits(opc); - /* Should generate something like the following: - * shr tmp, addrlo, #TARGET_PAGE_BITS (1) + /* V7 generates the following: + * ubfx r0, addrlo, #TARGET_PAGE_BITS, #CPU_TLB_BITS * add r2, env, #high - * and r0, tmp, #(CPU_TLB_SIZE - 1) (2) - * add r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS (3) - * ldr r0, [r2, #cmp] (4) + * add r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS + * ldr r0, [r2, #cmp] + * ldr r2, [r2, #add] + * movw tmp, #page_align_mask + * bic tmp, addrlo, tmp + * cmp r0, tmp + * + * Otherwise we generate: + * shr tmp, addrlo, #TARGET_PAGE_BITS + * add r2, env, #high + * and r0, tmp, #(CPU_TLB_SIZE - 1) + * add r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS + * ldr r0, [r2, #cmp] + * ldr r2, [r2, #add] * tst addrlo, #s_mask - * ldr r2, [r2, #add] (5) * cmpeq r0, tmp, lsl #TARGET_PAGE_BITS */ - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, - 0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS)); + if (use_armv7_instructions) { + tcg_out_extract(s, COND_AL, TCG_REG_R0, addrlo, + TARGET_PAGE_BITS, CPU_TLB_BITS); + } else { + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, + 0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS)); + } /* We checked that the offset is contained within 16 bits above. */ if (add_off > 0xfff || (use_armv6_instructions && cmp_off > 0xff)) { @@ -1194,9 +1209,10 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, add_off -= cmp_off & 0xff00; cmp_off &= 0xff; } - - tcg_out_dat_imm(s, COND_AL, ARITH_AND, - TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1); + if (!use_armv7_instructions) { + tcg_out_dat_imm(s, COND_AL, ARITH_AND, + TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1); + } tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R2, base, TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS)); @@ -1212,24 +1228,40 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, } } + /* Load the tlb addend. */ + tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, add_off); + /* Check alignment. We don't support inline unaligned acceses, but we can easily support overalignment checks. */ if (a_bits < s_bits) { a_bits = s_bits; } - if (a_bits) { - tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, (1 << a_bits) - 1); - } - /* Load the tlb addend. */ - tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, add_off); + if (use_armv7_instructions) { + tcg_target_ulong mask = ~(TARGET_PAGE_MASK | ((1 << a_bits) - 1)); + int rot = encode_imm(mask); - tcg_out_dat_reg(s, (a_bits ? COND_EQ : COND_AL), ARITH_CMP, 0, - TCG_REG_R0, TCG_REG_TMP, SHIFT_IMM_LSL(TARGET_PAGE_BITS)); + if (rot >= 0) { + tcg_out_dat_imm(s, COND_AL, ARITH_BIC, TCG_REG_TMP, addrlo, + rotl(mask, rot) | (rot << 7)); + } else { + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, mask); + tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP, + addrlo, TCG_REG_TMP, 0); + } + tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R0, TCG_REG_TMP, 0); + } else { + if (a_bits) { + tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, + (1 << a_bits) - 1); + } + tcg_out_dat_reg(s, (a_bits ? COND_EQ : COND_AL), ARITH_CMP, + 0, TCG_REG_R0, TCG_REG_TMP, + SHIFT_IMM_LSL(TARGET_PAGE_BITS)); + } if (TARGET_LONG_BITS == 64) { - tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, - TCG_REG_R1, addrhi, SHIFT_IMM_LSL(0)); + tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R1, addrhi, 0); } return TCG_REG_R2; -- cgit v1.2.3