diff options
author | Richard Henderson <rth@twiddle.net> | 2014-04-25 13:24:23 -0400 |
---|---|---|
committer | Richard Henderson <rth@twiddle.net> | 2014-05-12 10:06:58 -0700 |
commit | 5588ff29210666234bb046034595fd0d53be90ef (patch) | |
tree | e4571df7901e3866485acd64b2d36cffae200fb1 /tcg/ia64 | |
parent | 8c081b18025407fd5fa7c0d25a69398594ea5b2c (diff) |
tcg-ia64: Define TCG_TARGET_INSN_UNIT_SIZE
Using a 16-byte aligned structure achieves best results, both for code
cleanliness and compiled code size. However, this means that we can't
use the trick of encoding the slot number into the low 2 bits.
Thankfully, we only ever use slot2, so make that explicit in the names
of the relocation functions, and drop the code for other slots.
Signed-off-by: Richard Henderson <rth@twiddle.net>
Diffstat (limited to 'tcg/ia64')
-rw-r--r-- | tcg/ia64/tcg-target.c | 217 | ||||
-rw-r--r-- | tcg/ia64/tcg-target.h | 6 |
2 files changed, 78 insertions, 145 deletions
diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c index 1f523d6466..90dd9cd36a 100644 --- a/tcg/ia64/tcg-target.c +++ b/tcg/ia64/tcg-target.c @@ -692,112 +692,32 @@ static inline uint64_t tcg_opc_x4(int qp, uint64_t opc, int b1, uint64_t imm) /* - * Relocations + * Relocations - Note that we never encode branches elsewhere than slot 2. */ -static inline void reloc_pcrel21b(void *pc, intptr_t target) +static void reloc_pcrel21b_slot2(tcg_insn_unit *pc, tcg_insn_unit *target) { - uint64_t imm; - int64_t disp; - int slot; - - slot = (intptr_t)pc & 3; - pc = (void *)((intptr_t)pc & ~3); - - disp = target - (intptr_t)pc; - imm = (uint64_t) disp >> 4; - - switch(slot) { - case 0: - *(uint64_t *)(pc + 0) = (*(uint64_t *)(pc + 8) & 0xfffffdc00003ffffull) - | ((imm & 0x100000) << 21) /* s */ - | ((imm & 0x0fffff) << 18); /* imm20b */ - break; - case 1: - *(uint64_t *)(pc + 8) = (*(uint64_t *)(pc + 8) & 0xfffffffffffb8000ull) - | ((imm & 0x100000) >> 2) /* s */ - | ((imm & 0x0fffe0) >> 5); /* imm20b */ - *(uint64_t *)(pc + 0) = (*(uint64_t *)(pc + 0) & 0x07ffffffffffffffull) - | ((imm & 0x00001f) << 59); /* imm20b */ - break; - case 2: - *(uint64_t *)(pc + 8) = (*(uint64_t *)(pc + 8) & 0xf700000fffffffffull) - | ((imm & 0x100000) << 39) /* s */ - | ((imm & 0x0fffff) << 36); /* imm20b */ - break; - } -} - -static inline uint64_t get_reloc_pcrel21b (void *pc) -{ - int64_t low, high; - int slot; - - slot = (tcg_target_long) pc & 3; - pc = (void *)((tcg_target_long) pc & ~3); - - low = (*(uint64_t *)(pc + 0)); - high = (*(uint64_t *)(pc + 8)); + uint64_t imm = target - pc; - switch(slot) { - case 0: - return ((low >> 21) & 0x100000) + /* s */ - ((low >> 18) & 0x0fffff); /* imm20b */ - case 1: - return ((high << 2) & 0x100000) + /* s */ - ((high << 5) & 0x0fffe0) + /* imm20b */ - ((low >> 59) & 0x00001f); /* imm20b */ - case 2: - return ((high >> 39) & 0x100000) + /* s */ - ((high >> 36) & 0x0fffff); /* imm20b */ - default: - tcg_abort(); - } + pc->hi = (pc->hi & 0xf700000fffffffffull) + | ((imm & 0x100000) << 39) /* s */ + | ((imm & 0x0fffff) << 36); /* imm20b */ } -static inline void reloc_pcrel60b(void *pc, intptr_t target) +static uint64_t get_reloc_pcrel21b_slot2(tcg_insn_unit *pc) { - int64_t disp; - uint64_t imm; + int64_t high = pc->hi; - disp = target - (intptr_t)pc; - imm = (uint64_t) disp >> 4; - - *(uint64_t *)(pc + 8) = (*(uint64_t *)(pc + 8) & 0xf700000fff800000ull) - | (imm & 0x0800000000000000ull) /* s */ - | ((imm & 0x07fffff000000000ull) >> 36) /* imm39 */ - | ((imm & 0x00000000000fffffull) << 36); /* imm20b */ - *(uint64_t *)(pc + 0) = (*(uint64_t *)(pc + 0) & 0x00003fffffffffffull) - | ((imm & 0x0000000ffff00000ull) << 28); /* imm39 */ + return ((high >> 39) & 0x100000) + /* s */ + ((high >> 36) & 0x0fffff); /* imm20b */ } -static inline uint64_t get_reloc_pcrel60b (void *pc) -{ - int64_t low, high; - - low = (*(uint64_t *)(pc + 0)); - high = (*(uint64_t *)(pc + 8)); - - return ((high) & 0x0800000000000000ull) + /* s */ - ((high >> 36) & 0x00000000000fffffull) + /* imm20b */ - ((high << 36) & 0x07fffff000000000ull) + /* imm39 */ - ((low >> 28) & 0x0000000ffff00000ull); /* imm39 */ -} - - -static void patch_reloc(uint8_t *code_ptr, int type, +static void patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { - value += addend; - switch (type) { - case R_IA64_PCREL21B: - reloc_pcrel21b(code_ptr, value); - break; - case R_IA64_PCREL60B: - reloc_pcrel60b(code_ptr, value); - default: - tcg_abort(); - } + assert(addend == 0); + assert(type == R_IA64_PCREL21B); + reloc_pcrel21b_slot2(code_ptr, (tcg_insn_unit *)value); } /* @@ -861,7 +781,7 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, * Code generation */ -static uint8_t *tb_ret_addr; +static tcg_insn_unit *tb_ret_addr; static inline void tcg_out_bundle(TCGContext *s, int template, uint64_t slot0, uint64_t slot1, @@ -872,9 +792,10 @@ static inline void tcg_out_bundle(TCGContext *s, int template, slot1 &= 0x1ffffffffffull; /* 41 bits */ slot2 &= 0x1ffffffffffull; /* 41 bits */ - *(uint64_t *)(s->code_ptr + 0) = (slot1 << 46) | (slot0 << 5) | template; - *(uint64_t *)(s->code_ptr + 8) = (slot2 << 23) | (slot1 >> 18); - s->code_ptr += 16; + *s->code_ptr++ = (tcg_insn_unit){ + (slot1 << 46) | (slot0 << 5) | template, + (slot2 << 23) | (slot1 >> 18) + }; } static inline uint64_t tcg_opc_mov_a(int qp, TCGReg dst, TCGReg src) @@ -909,33 +830,34 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type, static void tcg_out_br(TCGContext *s, int label_index) { TCGLabel *l = &s->labels[label_index]; + uint64_t imm; /* We pay attention here to not modify the branch target by reading the existing value and using it again. This ensure that caches and memory are kept coherent during retranslation. */ - tcg_out_bundle(s, mmB, - INSN_NOP_M, - INSN_NOP_M, - tcg_opc_b1 (TCG_REG_P0, OPC_BR_SPTK_MANY_B1, - get_reloc_pcrel21b(s->code_ptr + 2))); - if (l->has_value) { - reloc_pcrel21b((s->code_ptr - 16) + 2, l->u.value); + imm = l->u.value_ptr - s->code_ptr; } else { - tcg_out_reloc(s, (s->code_ptr - 16) + 2, - R_IA64_PCREL21B, label_index, 0); + imm = get_reloc_pcrel21b_slot2(s->code_ptr); + tcg_out_reloc(s, s->code_ptr, R_IA64_PCREL21B, label_index, 0); } + + tcg_out_bundle(s, mmB, + INSN_NOP_M, + INSN_NOP_M, + tcg_opc_b1(TCG_REG_P0, OPC_BR_SPTK_MANY_B1, imm)); } -static inline void tcg_out_calli(TCGContext *s, uintptr_t addr) +static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *desc) { + uintptr_t func = desc->lo, gp = desc->hi, disp; + /* Look through the function descriptor. */ - uintptr_t disp, *desc = (uintptr_t *)addr; tcg_out_bundle(s, mlx, INSN_NOP_M, - tcg_opc_l2 (desc[1]), - tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, TCG_REG_R1, desc[1])); - disp = (desc[0] - (uintptr_t)s->code_ptr) >> 4; + tcg_opc_l2 (gp), + tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, TCG_REG_R1, gp)); + disp = (tcg_insn_unit *)func - s->code_ptr; tcg_out_bundle(s, mLX, INSN_NOP_M, tcg_opc_l4 (disp), @@ -959,7 +881,6 @@ static inline void tcg_out_callr(TCGContext *s, TCGReg addr) static void tcg_out_exit_tb(TCGContext *s, tcg_target_long arg) { - int64_t disp; uint64_t imm, opc1; /* At least arg == 0 is a common operation. */ @@ -970,8 +891,7 @@ static void tcg_out_exit_tb(TCGContext *s, tcg_target_long arg) opc1 = INSN_NOP_M; } - disp = tb_ret_addr - s->code_ptr; - imm = (uint64_t)disp >> 4; + imm = tb_ret_addr - s->code_ptr; tcg_out_bundle(s, mLX, opc1, @@ -1000,7 +920,7 @@ static inline void tcg_out_goto_tb(TCGContext *s, TCGArg arg) tcg_opc_b4 (TCG_REG_P0, OPC_BR_SPTK_MANY_B4, TCG_REG_B6)); } - s->tb_next_offset[arg] = s->code_ptr - s->code_buf; + s->tb_next_offset[arg] = tcg_current_code_size(s); } static inline void tcg_out_jmp(TCGContext *s, TCGArg addr) @@ -1521,19 +1441,22 @@ static inline void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, TCGReg arg2, int label_index, int cmp4) { TCGLabel *l = &s->labels[label_index]; + uint64_t imm; - tcg_out_bundle(s, miB, - INSN_NOP_M, - tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4), - tcg_opc_b1(TCG_REG_P6, OPC_BR_DPTK_FEW_B1, - get_reloc_pcrel21b(s->code_ptr + 2))); - + /* We pay attention here to not modify the branch target by reading + the existing value and using it again. This ensure that caches and + memory are kept coherent during retranslation. */ if (l->has_value) { - reloc_pcrel21b((s->code_ptr - 16) + 2, l->u.value); + imm = l->u.value_ptr - s->code_ptr; } else { - tcg_out_reloc(s, (s->code_ptr - 16) + 2, - R_IA64_PCREL21B, label_index, 0); + imm = get_reloc_pcrel21b_slot2(s->code_ptr); + tcg_out_reloc(s, s->code_ptr, R_IA64_PCREL21B, label_index, 0); } + + tcg_out_bundle(s, miB, + INSN_NOP_M, + tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4), + tcg_opc_b1(TCG_REG_P6, OPC_BR_DPTK_FEW_B1, imm)); } static inline void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGArg ret, @@ -1646,7 +1569,7 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg, typedef struct TCGLabelQemuLdst { bool is_ld; TCGMemOp size; - uint8_t *label_ptr; /* label pointers to be updated */ + tcg_insn_unit *label_ptr; /* label pointers to be updated */ } TCGLabelQemuLdst; typedef struct TCGBackendData { @@ -1660,7 +1583,7 @@ static inline void tcg_out_tb_init(TCGContext *s) } static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc, - uint8_t *label_ptr) + tcg_insn_unit *label_ptr) { TCGBackendData *be = s->be; TCGLabelQemuLdst *l = &be->ldst_labels[be->nb_ldst_labels++]; @@ -1683,43 +1606,44 @@ static void tcg_out_tb_finalize(TCGContext *s) helper_le_ldul_mmu, helper_le_ldq_mmu, }; - uintptr_t thunks[8] = { }; + tcg_insn_unit *thunks[8] = { }; TCGBackendData *be = s->be; size_t i, n = be->nb_ldst_labels; for (i = 0; i < n; i++) { TCGLabelQemuLdst *l = &be->ldst_labels[i]; long x = l->is_ld * 4 + l->size; - uintptr_t dest = thunks[x]; + tcg_insn_unit *dest = thunks[x]; /* The out-of-line thunks are all the same; load the return address from B0, load the GP, and branch to the code. Note that we are always post-call, so the register window has rolled, so we're using incomming parameter register numbers, not outgoing. */ - if (dest == 0) { - uintptr_t disp, *desc = (uintptr_t *)helpers[x]; + if (dest == NULL) { + uintptr_t *desc = (uintptr_t *)helpers[x]; + uintptr_t func = desc[0], gp = desc[1], disp; - thunks[x] = dest = (uintptr_t)s->code_ptr; + thunks[x] = dest = s->code_ptr; tcg_out_bundle(s, mlx, INSN_NOP_M, - tcg_opc_l2 (desc[1]), + tcg_opc_l2 (gp), tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, - TCG_REG_R1, desc[1])); + TCG_REG_R1, gp)); tcg_out_bundle(s, mii, INSN_NOP_M, INSN_NOP_I, tcg_opc_i22(TCG_REG_P0, OPC_MOV_I22, l->is_ld ? TCG_REG_R35 : TCG_REG_R36, TCG_REG_B0)); - disp = (desc[0] - (uintptr_t)s->code_ptr) >> 4; + disp = (tcg_insn_unit *)func - s->code_ptr; tcg_out_bundle(s, mLX, INSN_NOP_M, tcg_opc_l3 (disp), tcg_opc_x3 (TCG_REG_P0, OPC_BRL_SPTK_MANY_X3, disp)); } - reloc_pcrel21b(l->label_ptr, dest); + reloc_pcrel21b_slot2(l->label_ptr, dest); } } @@ -1731,7 +1655,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args) int addr_reg, data_reg, mem_index; TCGMemOp opc, s_bits; uint64_t fin1, fin2; - uint8_t *label_ptr; + tcg_insn_unit *label_ptr; data_reg = args[0]; addr_reg = args[1]; @@ -1765,13 +1689,13 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args) tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2, TCG_REG_R2, TCG_REG_R57), tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index)); - label_ptr = s->code_ptr + 2; + label_ptr = s->code_ptr; tcg_out_bundle(s, miB, tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits], TCG_REG_R8, TCG_REG_R2), INSN_NOP_I, tcg_opc_b3 (TCG_REG_P7, OPC_BR_CALL_SPNT_FEW_B3, TCG_REG_B0, - get_reloc_pcrel21b(label_ptr))); + get_reloc_pcrel21b_slot2(label_ptr))); add_qemu_ldst_label(s, 1, opc, label_ptr); @@ -1792,7 +1716,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) int mem_index; uint64_t pre1, pre2; TCGMemOp opc, s_bits; - uint8_t *label_ptr; + tcg_insn_unit *label_ptr; data_reg = args[0]; addr_reg = args[1]; @@ -1827,13 +1751,13 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2, TCG_REG_R2, TCG_REG_R57), tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index)); - label_ptr = s->code_ptr + 2; + label_ptr = s->code_ptr; tcg_out_bundle(s, miB, tcg_opc_m4 (TCG_REG_P6, opc_st_m4[s_bits], TCG_REG_R58, TCG_REG_R2), INSN_NOP_I, tcg_opc_b3 (TCG_REG_P7, OPC_BR_CALL_SPNT_FEW_B3, TCG_REG_B0, - get_reloc_pcrel21b(label_ptr))); + get_reloc_pcrel21b_slot2(label_ptr))); add_qemu_ldst_label(s, 0, opc, label_ptr); } @@ -2087,7 +2011,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_call: if (likely(const_args[0])) { - tcg_out_calli(s, args[0]); + tcg_out_call(s, (tcg_insn_unit *)(intptr_t)args[0]); } else { tcg_out_callr(s, args[0]); } @@ -2442,8 +2366,11 @@ static void tcg_target_qemu_prologue(TCGContext *s) CPU_TEMP_BUF_NLONGS * sizeof(long)); /* First emit adhoc function descriptor */ - *(uint64_t *)(s->code_ptr) = (uint64_t)s->code_ptr + 16; /* entry point */ - s->code_ptr += 16; /* skip GP */ + *s->code_ptr = (tcg_insn_unit){ + (uint64_t)(s->code_ptr + 1), /* entry point */ + 0 /* skip gp */ + }; + s->code_ptr++; /* prologue */ tcg_out_bundle(s, miI, diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h index d834beb323..3a59b50349 100644 --- a/tcg/ia64/tcg-target.h +++ b/tcg/ia64/tcg-target.h @@ -25,6 +25,12 @@ #ifndef TCG_TARGET_IA64 #define TCG_TARGET_IA64 1 +#define TCG_TARGET_INSN_UNIT_SIZE 16 +typedef struct { + uint64_t lo __attribute__((aligned(16))); + uint64_t hi; +} tcg_insn_unit; + /* We only map the first 64 registers */ #define TCG_TARGET_NB_REGS 64 typedef enum { |