aboutsummaryrefslogtreecommitdiff
path: root/tcg/arm/tcg-target.inc.c
diff options
context:
space:
mode:
Diffstat (limited to 'tcg/arm/tcg-target.inc.c')
-rw-r--r--tcg/arm/tcg-target.inc.c121
1 files changed, 54 insertions, 67 deletions
diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
index 7316504c9d..ece88dc2eb 100644
--- a/tcg/arm/tcg-target.inc.c
+++ b/tcg/arm/tcg-target.inc.c
@@ -267,6 +267,7 @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
#endif
break;
@@ -1220,13 +1221,13 @@ static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
#define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
-/* We expect tlb_mask to be before tlb_table. */
-QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) <
- offsetof(CPUArchState, tlb_mask));
+/* We expect to use an 9-bit sign-magnitude negative offset from ENV. */
+QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
+QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256);
-/* We expect to use a 20-bit unsigned offset from ENV. */
-QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1])
- > 0xfffff);
+/* These offsets are built into the LDRD below. */
+QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
+QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4);
/* Load and compare a TLB entry, leaving the flags set. Returns the register
containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */
@@ -1236,105 +1237,91 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
{
int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write));
- int mask_off = offsetof(CPUArchState, tlb_mask[mem_index]);
- int table_off = offsetof(CPUArchState, tlb_table[mem_index]);
- TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0;
+ int fast_off = TLB_MASK_TABLE_OFS(mem_index);
+ int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
+ int table_off = fast_off + offsetof(CPUTLBDescFast, table);
unsigned s_bits = opc & MO_SIZE;
unsigned a_bits = get_alignment_bits(opc);
- if (table_off > 0xfff) {
- int mask_hi = mask_off & ~0xfff;
- int table_hi = table_off & ~0xfff;
- int rot;
-
- table_base = TCG_REG_R2;
- if (mask_hi == table_hi) {
- mask_base = table_base;
- } else if (mask_hi) {
- mask_base = TCG_REG_TMP;
- rot = encode_imm(mask_hi);
- assert(rot >= 0);
- tcg_out_dat_imm(s, COND_AL, ARITH_ADD, mask_base, TCG_AREG0,
- rotl(mask_hi, rot) | (rot << 7));
- }
- rot = encode_imm(table_hi);
- assert(rot >= 0);
- tcg_out_dat_imm(s, COND_AL, ARITH_ADD, table_base, TCG_AREG0,
- rotl(table_hi, rot) | (rot << 7));
-
- mask_off -= mask_hi;
- table_off -= table_hi;
+ /*
+ * We don't support inline unaligned acceses, but we can easily
+ * support overalignment checks.
+ */
+ if (a_bits < s_bits) {
+ a_bits = s_bits;
}
- /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
- tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP, mask_base, mask_off);
- tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R2, table_base, table_off);
+ /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}. */
+ if (use_armv6_instructions) {
+ tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off);
+ } else {
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R0, TCG_AREG0, mask_off);
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R1, TCG_AREG0, table_off);
+ }
- /* Extract the tlb index from the address into TMP. */
- tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, addrlo,
+ /* Extract the tlb index from the address into R0. */
+ tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo,
SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
/*
- * Add the tlb_table pointer, creating the CPUTLBEntry address in R2.
- * Load the tlb comparator into R0/R1 and the fast path addend into R2.
+ * Add the tlb_table pointer, creating the CPUTLBEntry address in R1.
+ * Load the tlb comparator into R2/R3 and the fast path addend into R1.
*/
if (cmp_off == 0) {
- if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
- tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R0, TCG_REG_R2, TCG_REG_TMP);
+ if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
+ tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
} else {
- tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R0, TCG_REG_R2, TCG_REG_TMP);
+ tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
}
} else {
tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
- TCG_REG_R2, TCG_REG_R2, TCG_REG_TMP, 0);
+ TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0);
if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
- tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
+ tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
} else {
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
- }
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
+ }
}
if (!use_armv6_instructions && TARGET_LONG_BITS == 64) {
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4);
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R3, TCG_REG_R1, cmp_off + 4);
}
/* Load the tlb addend. */
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2,
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1,
offsetof(CPUTLBEntry, addend));
- /* Check alignment. We don't support inline unaligned acceses,
- but we can easily support overalignment checks. */
- if (a_bits < s_bits) {
- a_bits = s_bits;
- }
-
- if (use_armv7_instructions) {
+ /*
+ * Check alignment, check comparators.
+ * Do this in no more than 3 insns. Use MOVW for v7, if possible,
+ * to reduce the number of sequential conditional instructions.
+ * Almost all guests have at least 4k pages, which means that we need
+ * to clear at least 9 bits even for an 8-byte memory, which means it
+ * isn't worth checking for an immediate operand for BIC.
+ */
+ if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) {
tcg_target_ulong mask = ~(TARGET_PAGE_MASK | ((1 << a_bits) - 1));
- int rot = encode_imm(mask);
- if (rot >= 0) {
- tcg_out_dat_imm(s, COND_AL, ARITH_BIC, TCG_REG_TMP, addrlo,
- rotl(mask, rot) | (rot << 7));
- } else {
- tcg_out_movi32(s, COND_AL, TCG_REG_TMP, mask);
- tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
- addrlo, TCG_REG_TMP, 0);
- }
- tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R0, TCG_REG_TMP, 0);
+ tcg_out_movi32(s, COND_AL, TCG_REG_TMP, mask);
+ tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
+ addrlo, TCG_REG_TMP, 0);
+ tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0);
} else {
if (a_bits) {
tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo,
(1 << a_bits) - 1);
}
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, addrlo,
+ SHIFT_IMM_LSR(TARGET_PAGE_BITS));
tcg_out_dat_reg(s, (a_bits ? COND_EQ : COND_AL), ARITH_CMP,
- 0, TCG_REG_R0, TCG_REG_TMP,
+ 0, TCG_REG_R2, TCG_REG_TMP,
SHIFT_IMM_LSL(TARGET_PAGE_BITS));
}
if (TARGET_LONG_BITS == 64) {
- tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R1, addrhi, 0);
+ tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
}
- return TCG_REG_R2;
+ return TCG_REG_R1;
}
/* Record the context of a call to the out of line helper code for the slow