diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2019-06-10 16:09:19 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2019-06-10 16:09:19 +0100 |
commit | a578cdfbdd8f9beff5ced52b7826ddb1669abbbf (patch) | |
tree | 90697278e6aefd0b91858c403ddb5670f6bdf053 /tcg/arm/tcg-target.inc.c | |
parent | 19735c837ae2056b4651720290eda59498eca65a (diff) | |
parent | 43b3952dea0f763ceeaa2f119c473b5cc6d29c90 (diff) |
Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20190610' into staging
Move softmmu tlb into CPUNegativeOffsetState
# gpg: Signature made Mon 10 Jun 2019 15:07:55 BST
# gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg: issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F
* remotes/rth/tags/pull-tcg-20190610: (39 commits)
tcg/arm: Remove mostly unreachable tlb special case
tcg/arm: Use LDRD to load tlb mask+table
tcg/aarch64: Use LDP to load tlb mask+table
cpu: Remove CPU_COMMON
cpu: Move the softmmu tlb to CPUNegativeOffsetState
cpu: Move icount_decr to CPUNegativeOffsetState
cpu: Introduce CPUNegativeOffsetState
cpu: Introduce cpu_set_cpustate_pointers
cpu: Move ENV_OFFSET to exec/gen-icount.h
target/xtensa: Use env_cpu, env_archcpu
target/unicore32: Use env_cpu, env_archcpu
target/tricore: Use env_cpu
target/tilegx: Use env_cpu
target/sparc: Use env_cpu, env_archcpu
target/sh4: Use env_cpu, env_archcpu
target/s390x: Use env_cpu, env_archcpu
target/riscv: Use env_cpu, env_archcpu
target/ppc: Use env_cpu, env_archcpu
target/openrisc: Use env_cpu, env_archcpu
target/nios2: Use env_cpu, env_archcpu
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'tcg/arm/tcg-target.inc.c')
-rw-r--r-- | tcg/arm/tcg-target.inc.c | 121 |
1 files changed, 54 insertions, 67 deletions
diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c index 7316504c9d..ece88dc2eb 100644 --- a/tcg/arm/tcg-target.inc.c +++ b/tcg/arm/tcg-target.inc.c @@ -267,6 +267,7 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1); tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14); #endif break; @@ -1220,13 +1221,13 @@ static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg, #define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS) -/* We expect tlb_mask to be before tlb_table. */ -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) < - offsetof(CPUArchState, tlb_mask)); +/* We expect to use an 9-bit sign-magnitude negative offset from ENV. */ +QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); +QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256); -/* We expect to use a 20-bit unsigned offset from ENV. */ -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1]) - > 0xfffff); +/* These offsets are built into the LDRD below. */ +QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); +QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4); /* Load and compare a TLB entry, leaving the flags set. Returns the register containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */ @@ -1236,105 +1237,91 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, { int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read) : offsetof(CPUTLBEntry, addr_write)); - int mask_off = offsetof(CPUArchState, tlb_mask[mem_index]); - int table_off = offsetof(CPUArchState, tlb_table[mem_index]); - TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0; + int fast_off = TLB_MASK_TABLE_OFS(mem_index); + int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); + int table_off = fast_off + offsetof(CPUTLBDescFast, table); unsigned s_bits = opc & MO_SIZE; unsigned a_bits = get_alignment_bits(opc); - if (table_off > 0xfff) { - int mask_hi = mask_off & ~0xfff; - int table_hi = table_off & ~0xfff; - int rot; - - table_base = TCG_REG_R2; - if (mask_hi == table_hi) { - mask_base = table_base; - } else if (mask_hi) { - mask_base = TCG_REG_TMP; - rot = encode_imm(mask_hi); - assert(rot >= 0); - tcg_out_dat_imm(s, COND_AL, ARITH_ADD, mask_base, TCG_AREG0, - rotl(mask_hi, rot) | (rot << 7)); - } - rot = encode_imm(table_hi); - assert(rot >= 0); - tcg_out_dat_imm(s, COND_AL, ARITH_ADD, table_base, TCG_AREG0, - rotl(table_hi, rot) | (rot << 7)); - - mask_off -= mask_hi; - table_off -= table_hi; + /* + * We don't support inline unaligned acceses, but we can easily + * support overalignment checks. + */ + if (a_bits < s_bits) { + a_bits = s_bits; } - /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */ - tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP, mask_base, mask_off); - tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R2, table_base, table_off); + /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}. */ + if (use_armv6_instructions) { + tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off); + } else { + tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R0, TCG_AREG0, mask_off); + tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R1, TCG_AREG0, table_off); + } - /* Extract the tlb index from the address into TMP. */ - tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, addrlo, + /* Extract the tlb index from the address into R0. */ + tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS)); /* - * Add the tlb_table pointer, creating the CPUTLBEntry address in R2. - * Load the tlb comparator into R0/R1 and the fast path addend into R2. + * Add the tlb_table pointer, creating the CPUTLBEntry address in R1. + * Load the tlb comparator into R2/R3 and the fast path addend into R1. */ if (cmp_off == 0) { - if (use_armv6_instructions && TARGET_LONG_BITS == 64) { - tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R0, TCG_REG_R2, TCG_REG_TMP); + if (use_armv6_instructions && TARGET_LONG_BITS == 64) { + tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0); } else { - tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R0, TCG_REG_R2, TCG_REG_TMP); + tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0); } } else { tcg_out_dat_reg(s, COND_AL, ARITH_ADD, - TCG_REG_R2, TCG_REG_R2, TCG_REG_TMP, 0); + TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0); if (use_armv6_instructions && TARGET_LONG_BITS == 64) { - tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off); + tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off); } else { - tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off); - } + tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off); + } } if (!use_armv6_instructions && TARGET_LONG_BITS == 64) { - tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4); + tcg_out_ld32_12(s, COND_AL, TCG_REG_R3, TCG_REG_R1, cmp_off + 4); } /* Load the tlb addend. */ - tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, + tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1, offsetof(CPUTLBEntry, addend)); - /* Check alignment. We don't support inline unaligned acceses, - but we can easily support overalignment checks. */ - if (a_bits < s_bits) { - a_bits = s_bits; - } - - if (use_armv7_instructions) { + /* + * Check alignment, check comparators. + * Do this in no more than 3 insns. Use MOVW for v7, if possible, + * to reduce the number of sequential conditional instructions. + * Almost all guests have at least 4k pages, which means that we need + * to clear at least 9 bits even for an 8-byte memory, which means it + * isn't worth checking for an immediate operand for BIC. + */ + if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) { tcg_target_ulong mask = ~(TARGET_PAGE_MASK | ((1 << a_bits) - 1)); - int rot = encode_imm(mask); - if (rot >= 0) { - tcg_out_dat_imm(s, COND_AL, ARITH_BIC, TCG_REG_TMP, addrlo, - rotl(mask, rot) | (rot << 7)); - } else { - tcg_out_movi32(s, COND_AL, TCG_REG_TMP, mask); - tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP, - addrlo, TCG_REG_TMP, 0); - } - tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R0, TCG_REG_TMP, 0); + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, mask); + tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP, + addrlo, TCG_REG_TMP, 0); + tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0); } else { if (a_bits) { tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, (1 << a_bits) - 1); } + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, addrlo, + SHIFT_IMM_LSR(TARGET_PAGE_BITS)); tcg_out_dat_reg(s, (a_bits ? COND_EQ : COND_AL), ARITH_CMP, - 0, TCG_REG_R0, TCG_REG_TMP, + 0, TCG_REG_R2, TCG_REG_TMP, SHIFT_IMM_LSL(TARGET_PAGE_BITS)); } if (TARGET_LONG_BITS == 64) { - tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R1, addrhi, 0); + tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0); } - return TCG_REG_R2; + return TCG_REG_R1; } /* Record the context of a call to the out of line helper code for the slow |