diff options
-rwxr-xr-x | configure | 4 | ||||
-rw-r--r-- | include/exec/exec-all.h | 9 | ||||
-rw-r--r-- | tcg/s390/tcg-target.c | 752 | ||||
-rw-r--r-- | tcg/s390/tcg-target.h | 2 |
4 files changed, 455 insertions, 312 deletions
@@ -1137,11 +1137,11 @@ case "$cpu" in CPU_CFLAGS="-m64 -mcpu=ultrasparc" ;; s390) - CPU_CFLAGS="-m31 -march=z990" + CPU_CFLAGS="-m31" LDFLAGS="-m31 $LDFLAGS" ;; s390x) - CPU_CFLAGS="-m64 -march=z990" + CPU_CFLAGS="-m64" LDFLAGS="-m64 $LDFLAGS" ;; i386) diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 444b4d9a1d..8bc2eb663e 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -131,6 +131,7 @@ static inline void tlb_flush(CPUState *cpu, int flush_global) #if defined(__arm__) || defined(_ARCH_PPC) \ || defined(__x86_64__) || defined(__i386__) \ || defined(__sparc__) || defined(__aarch64__) \ + || defined(__s390x__) \ || defined(CONFIG_TCG_INTERPRETER) #define USE_DIRECT_JUMP #endif @@ -232,6 +233,14 @@ static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) stl_le_p((void*)jmp_addr, addr - (jmp_addr + 4)); /* no need to flush icache explicitly */ } +#elif defined(__s390x__) +static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) +{ + /* patch the branch destination */ + intptr_t disp = addr - (jmp_addr - 2); + stl_be_p((void*)jmp_addr, disp / 2); + /* no need to flush icache explicitly */ +} #elif defined(__aarch64__) void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr); #define tb_set_jmp_target1 aarch64_tb_set_jmp_target diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c index ebdd0743cf..07164e544d 100644 --- a/tcg/s390/tcg-target.c +++ b/tcg/s390/tcg-target.c @@ -24,7 +24,7 @@ * THE SOFTWARE. */ -#include "tcg-be-null.h" +#include "tcg-be-ldst.h" /* We only support generating code for 64-bit mode. */ #if TCG_TARGET_REG_BITS != 64 @@ -42,6 +42,7 @@ #define TCG_CT_CONST_ORI 0x200 #define TCG_CT_CONST_XORI 0x400 #define TCG_CT_CONST_CMPI 0x800 +#define TCG_CT_CONST_ADLI 0x1000 /* Several places within the instruction set 0 means "no register" rather than TCG_REG_R0. */ @@ -227,16 +228,6 @@ typedef enum S390Opcode { RX_STH = 0x40, } S390Opcode; -#define LD_SIGNED 0x04 -#define LD_UINT8 0x00 -#define LD_INT8 (LD_UINT8 | LD_SIGNED) -#define LD_UINT16 0x01 -#define LD_INT16 (LD_UINT16 | LD_SIGNED) -#define LD_UINT32 0x02 -#define LD_INT32 (LD_UINT32 | LD_SIGNED) -#define LD_UINT64 0x03 -#define LD_INT64 (LD_UINT64 | LD_SIGNED) - #ifndef NDEBUG static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", @@ -248,6 +239,7 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { call-saved registers. Likewise prefer the call-clobbered registers in reverse order to maximize the chance of avoiding the arguments. */ static const int tcg_target_reg_alloc_order[] = { + /* Call saved registers. */ TCG_REG_R13, TCG_REG_R12, TCG_REG_R11, @@ -256,9 +248,11 @@ static const int tcg_target_reg_alloc_order[] = { TCG_REG_R8, TCG_REG_R7, TCG_REG_R6, + /* Call clobbered registers. */ TCG_REG_R14, TCG_REG_R0, TCG_REG_R1, + /* Argument registers, in reverse order of allocation. */ TCG_REG_R5, TCG_REG_R4, TCG_REG_R3, @@ -318,22 +312,29 @@ static const uint8_t tcg_cond_to_ltr_cond[] = { }; #ifdef CONFIG_SOFTMMU -/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, - int mmu_idx) */ -static void * const qemu_ld_helpers[4] = { - helper_ldb_mmu, - helper_ldw_mmu, - helper_ldl_mmu, - helper_ldq_mmu, +static void * const qemu_ld_helpers[16] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_SB] = helper_ret_ldsb_mmu, + [MO_LEUW] = helper_le_lduw_mmu, + [MO_LESW] = helper_le_ldsw_mmu, + [MO_LEUL] = helper_le_ldul_mmu, + [MO_LESL] = helper_le_ldsl_mmu, + [MO_LEQ] = helper_le_ldq_mmu, + [MO_BEUW] = helper_be_lduw_mmu, + [MO_BESW] = helper_be_ldsw_mmu, + [MO_BEUL] = helper_be_ldul_mmu, + [MO_BESL] = helper_be_ldsl_mmu, + [MO_BEQ] = helper_be_ldq_mmu, }; -/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, - uintxx_t val, int mmu_idx) */ -static void * const qemu_st_helpers[4] = { - helper_stb_mmu, - helper_stw_mmu, - helper_stl_mmu, - helper_stq_mmu, +static void * const qemu_st_helpers[16] = { + [MO_UB] = helper_ret_stb_mmu, + [MO_LEUW] = helper_le_stw_mmu, + [MO_LEUL] = helper_le_stl_mmu, + [MO_LEQ] = helper_le_stq_mmu, + [MO_BEUW] = helper_be_stw_mmu, + [MO_BEUL] = helper_be_stl_mmu, + [MO_BEQ] = helper_be_stq_mmu, }; #endif @@ -403,6 +404,9 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) tcg_regset_clear(ct->u.regs); tcg_regset_set_reg(ct->u.regs, TCG_REG_R3); break; + case 'A': + ct->ct |= TCG_CT_CONST_ADLI; + break; case 'K': ct->ct |= TCG_CT_CONST_MULI; break; @@ -507,6 +511,20 @@ static int tcg_match_cmpi(TCGType type, tcg_target_long val) } } +/* Immediates to be used with add2/sub2. */ + +static int tcg_match_add2i(TCGType type, tcg_target_long val) +{ + if (facilities & FACILITY_EXT_IMM) { + if (type == TCG_TYPE_I32) { + return 1; + } else if (val >= -0xffffffffll && val <= 0xffffffffll) { + return 1; + } + } + return 0; +} + /* Test if a constant matches the constraint. */ static int tcg_target_const_match(tcg_target_long val, TCGType type, const TCGArgConstraint *arg_ct) @@ -532,6 +550,8 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, } else { return val == (int16_t)val; } + } else if (ct & TCG_CT_CONST_ADLI) { + return tcg_match_add2i(type, val); } else if (ct & TCG_CT_CONST_ORI) { return tcg_match_ori(type, val); } else if (ct & TCG_CT_CONST_XORI) { @@ -933,6 +953,20 @@ static inline bool risbg_mask(uint64_t c) return c == -lsb; } +static void tgen_andi_risbg(TCGContext *s, TCGReg out, TCGReg in, uint64_t val) +{ + int msb, lsb; + if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) { + /* Achieve wraparound by swapping msb and lsb. */ + msb = 64 - ctz64(~val); + lsb = clz64(~val) - 1; + } else { + msb = clz64(val); + lsb = 63 - ctz64(val); + } + tcg_out_risbg(s, out, in, msb, lsb, 0, 1); +} + static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val) { static const S390Opcode ni_insns[4] = { @@ -980,16 +1014,7 @@ static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val) } } if ((facilities & FACILITY_GEN_INST_EXT) && risbg_mask(val)) { - int msb, lsb; - if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) { - /* Achieve wraparound by swapping msb and lsb. */ - msb = 63 - ctz64(~val); - lsb = clz64(~val) + 1; - } else { - msb = clz64(val); - lsb = 63 - ctz64(val); - } - tcg_out_risbg(s, dest, dest, msb, lsb, 0, 1); + tgen_andi_risbg(s, dest, dest, val); return; } @@ -1110,15 +1135,100 @@ static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1, return tcg_cond_to_s390_cond[c]; } -static void tgen_setcond(TCGContext *s, TCGType type, TCGCond c, +static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, TCGReg dest, TCGReg c1, TCGArg c2, int c2const) { - int cc = tgen_cmp(s, type, c, c1, c2, c2const); + int cc; + + switch (cond) { + case TCG_COND_GTU: + case TCG_COND_GT: + do_greater: + /* The result of a compare has CC=2 for GT and CC=3 unused. + ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit. */ + tgen_cmp(s, type, cond, c1, c2, c2const); + tcg_out_movi(s, type, dest, 0); + tcg_out_insn(s, RRE, ALCGR, dest, dest); + return; + + case TCG_COND_GEU: + do_geu: + /* We need "real" carry semantics, so use SUBTRACT LOGICAL + instead of COMPARE LOGICAL. This needs an extra move. */ + tcg_out_mov(s, type, TCG_TMP0, c1); + if (c2const) { + tcg_out_movi(s, TCG_TYPE_I64, dest, 0); + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RIL, SLFI, TCG_TMP0, c2); + } else { + tcg_out_insn(s, RIL, SLGFI, TCG_TMP0, c2); + } + } else { + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RR, SLR, TCG_TMP0, c2); + } else { + tcg_out_insn(s, RRE, SLGR, TCG_TMP0, c2); + } + tcg_out_movi(s, TCG_TYPE_I64, dest, 0); + } + tcg_out_insn(s, RRE, ALCGR, dest, dest); + return; + + case TCG_COND_LEU: + case TCG_COND_LTU: + case TCG_COND_LT: + /* Swap operands so that we can use GEU/GTU/GT. */ + if (c2const) { + tcg_out_movi(s, type, TCG_TMP0, c2); + c2 = c1; + c2const = 0; + c1 = TCG_TMP0; + } else { + TCGReg t = c1; + c1 = c2; + c2 = t; + } + if (cond == TCG_COND_LEU) { + goto do_geu; + } + cond = tcg_swap_cond(cond); + goto do_greater; + + case TCG_COND_NE: + /* X != 0 is X > 0. */ + if (c2const && c2 == 0) { + cond = TCG_COND_GTU; + goto do_greater; + } + break; + + case TCG_COND_EQ: + /* X == 0 is X <= 0 is 0 >= X. */ + if (c2const && c2 == 0) { + tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 0); + c2 = c1; + c2const = 0; + c1 = TCG_TMP0; + goto do_geu; + } + break; + + default: + break; + } - /* Emit: r1 = 1; if (cc) goto over; r1 = 0; over: */ - tcg_out_movi(s, type, dest, 1); - tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1); - tcg_out_movi(s, type, dest, 0); + cc = tgen_cmp(s, type, cond, c1, c2, c2const); + if (facilities & FACILITY_LOAD_ON_COND) { + /* Emit: d = 0, t = 1, d = (cc ? t : d). */ + tcg_out_movi(s, TCG_TYPE_I64, dest, 0); + tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1); + tcg_out_insn(s, RRF, LOCGR, dest, TCG_TMP0, cc); + } else { + /* Emit: d = 1; if (cc) goto over; d = 0; over: */ + tcg_out_movi(s, type, dest, 1); + tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1); + tcg_out_movi(s, type, dest, 0); + } } static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest, @@ -1280,230 +1390,231 @@ static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest) } } -static void tcg_out_qemu_ld_direct(TCGContext *s, int opc, TCGReg data, +static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc, TCGReg data, TCGReg base, TCGReg index, int disp) { -#ifdef TARGET_WORDS_BIGENDIAN - const int bswap = 0; -#else - const int bswap = 1; -#endif switch (opc) { - case LD_UINT8: + case MO_UB: tcg_out_insn(s, RXY, LLGC, data, base, index, disp); break; - case LD_INT8: + case MO_SB: tcg_out_insn(s, RXY, LGB, data, base, index, disp); break; - case LD_UINT16: - if (bswap) { - /* swapped unsigned halfword load with upper bits zeroed */ - tcg_out_insn(s, RXY, LRVH, data, base, index, disp); - tgen_ext16u(s, TCG_TYPE_I64, data, data); - } else { - tcg_out_insn(s, RXY, LLGH, data, base, index, disp); - } + + case MO_UW | MO_BSWAP: + /* swapped unsigned halfword load with upper bits zeroed */ + tcg_out_insn(s, RXY, LRVH, data, base, index, disp); + tgen_ext16u(s, TCG_TYPE_I64, data, data); break; - case LD_INT16: - if (bswap) { - /* swapped sign-extended halfword load */ - tcg_out_insn(s, RXY, LRVH, data, base, index, disp); - tgen_ext16s(s, TCG_TYPE_I64, data, data); - } else { - tcg_out_insn(s, RXY, LGH, data, base, index, disp); - } + case MO_UW: + tcg_out_insn(s, RXY, LLGH, data, base, index, disp); break; - case LD_UINT32: - if (bswap) { - /* swapped unsigned int load with upper bits zeroed */ - tcg_out_insn(s, RXY, LRV, data, base, index, disp); - tgen_ext32u(s, data, data); - } else { - tcg_out_insn(s, RXY, LLGF, data, base, index, disp); - } + + case MO_SW | MO_BSWAP: + /* swapped sign-extended halfword load */ + tcg_out_insn(s, RXY, LRVH, data, base, index, disp); + tgen_ext16s(s, TCG_TYPE_I64, data, data); break; - case LD_INT32: - if (bswap) { - /* swapped sign-extended int load */ - tcg_out_insn(s, RXY, LRV, data, base, index, disp); - tgen_ext32s(s, data, data); - } else { - tcg_out_insn(s, RXY, LGF, data, base, index, disp); - } + case MO_SW: + tcg_out_insn(s, RXY, LGH, data, base, index, disp); break; - case LD_UINT64: - if (bswap) { - tcg_out_insn(s, RXY, LRVG, data, base, index, disp); - } else { - tcg_out_insn(s, RXY, LG, data, base, index, disp); - } + + case MO_UL | MO_BSWAP: + /* swapped unsigned int load with upper bits zeroed */ + tcg_out_insn(s, RXY, LRV, data, base, index, disp); + tgen_ext32u(s, data, data); + break; + case MO_UL: + tcg_out_insn(s, RXY, LLGF, data, base, index, disp); break; + + case MO_SL | MO_BSWAP: + /* swapped sign-extended int load */ + tcg_out_insn(s, RXY, LRV, data, base, index, disp); + tgen_ext32s(s, data, data); + break; + case MO_SL: + tcg_out_insn(s, RXY, LGF, data, base, index, disp); + break; + + case MO_Q | MO_BSWAP: + tcg_out_insn(s, RXY, LRVG, data, base, index, disp); + break; + case MO_Q: + tcg_out_insn(s, RXY, LG, data, base, index, disp); + break; + default: tcg_abort(); } } -static void tcg_out_qemu_st_direct(TCGContext *s, int opc, TCGReg data, +static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc, TCGReg data, TCGReg base, TCGReg index, int disp) { -#ifdef TARGET_WORDS_BIGENDIAN - const int bswap = 0; -#else - const int bswap = 1; -#endif switch (opc) { - case LD_UINT8: + case MO_UB: if (disp >= 0 && disp < 0x1000) { tcg_out_insn(s, RX, STC, data, base, index, disp); } else { tcg_out_insn(s, RXY, STCY, data, base, index, disp); } break; - case LD_UINT16: - if (bswap) { - tcg_out_insn(s, RXY, STRVH, data, base, index, disp); - } else if (disp >= 0 && disp < 0x1000) { + + case MO_UW | MO_BSWAP: + tcg_out_insn(s, RXY, STRVH, data, base, index, disp); + break; + case MO_UW: + if (disp >= 0 && disp < 0x1000) { tcg_out_insn(s, RX, STH, data, base, index, disp); } else { tcg_out_insn(s, RXY, STHY, data, base, index, disp); } break; - case LD_UINT32: - if (bswap) { - tcg_out_insn(s, RXY, STRV, data, base, index, disp); - } else if (disp >= 0 && disp < 0x1000) { + + case MO_UL | MO_BSWAP: + tcg_out_insn(s, RXY, STRV, data, base, index, disp); + break; + case MO_UL: + if (disp >= 0 && disp < 0x1000) { tcg_out_insn(s, RX, ST, data, base, index, disp); } else { tcg_out_insn(s, RXY, STY, data, base, index, disp); } break; - case LD_UINT64: - if (bswap) { - tcg_out_insn(s, RXY, STRVG, data, base, index, disp); - } else { - tcg_out_insn(s, RXY, STG, data, base, index, disp); - } + + case MO_Q | MO_BSWAP: + tcg_out_insn(s, RXY, STRVG, data, base, index, disp); + break; + case MO_Q: + tcg_out_insn(s, RXY, STG, data, base, index, disp); break; + default: tcg_abort(); } } #if defined(CONFIG_SOFTMMU) -static TCGReg tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg, - TCGReg addr_reg, int mem_index, int opc, - tcg_insn_unit **label2_ptr_p, int is_store) +/* We're expecting to use a 20-bit signed offset on the tlb memory ops. + Using the offset of the second entry in the last tlb table ensures + that we can index all of the elements of the first entry. */ +QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1]) + > 0x7ffff); + +/* Load and compare a TLB entry, leaving the flags set. Loads the TLB + addend into R2. Returns a register with the santitized guest address. */ +static TCGReg tcg_out_tlb_read(TCGContext* s, TCGReg addr_reg, TCGMemOp opc, + int mem_index, bool is_ld) { - const TCGReg arg0 = tcg_target_call_iarg_regs[0]; - const TCGReg arg1 = tcg_target_call_iarg_regs[1]; - const TCGReg arg2 = tcg_target_call_iarg_regs[2]; - const TCGReg arg3 = tcg_target_call_iarg_regs[3]; - int s_bits = opc & 3; - tcg_insn_unit *label1_ptr; - tcg_target_long ofs; + TCGMemOp s_bits = opc & MO_SIZE; + uint64_t tlb_mask = TARGET_PAGE_MASK | ((1 << s_bits) - 1); + int ofs; - if (TARGET_LONG_BITS == 32) { - tgen_ext32u(s, arg1, addr_reg); + if (facilities & FACILITY_GEN_INST_EXT) { + tcg_out_risbg(s, TCG_REG_R2, addr_reg, + 64 - CPU_TLB_BITS - CPU_TLB_ENTRY_BITS, + 63 - CPU_TLB_ENTRY_BITS, + 64 + CPU_TLB_ENTRY_BITS - TARGET_PAGE_BITS, 1); + tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask); } else { - tcg_out_mov(s, TCG_TYPE_I64, arg1, addr_reg); + tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_R3, addr_reg); + tgen_andi(s, TCG_TYPE_I64, TCG_REG_R2, + (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); + tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask); } - tcg_out_sh64(s, RSY_SRLG, arg2, addr_reg, TCG_REG_NONE, - TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - - tgen_andi(s, TCG_TYPE_I64, arg1, TARGET_PAGE_MASK | ((1 << s_bits) - 1)); - tgen_andi(s, TCG_TYPE_I64, arg2, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); - - if (is_store) { - ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write); - } else { + if (is_ld) { ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read); + } else { + ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write); } - assert(ofs < 0x80000); - if (TARGET_LONG_BITS == 32) { - tcg_out_mem(s, RX_C, RXY_CY, arg1, arg2, TCG_AREG0, ofs); + tcg_out_mem(s, RX_C, RXY_CY, TCG_REG_R3, TCG_REG_R2, TCG_AREG0, ofs); } else { - tcg_out_mem(s, 0, RXY_CG, arg1, arg2, TCG_AREG0, ofs); + tcg_out_mem(s, 0, RXY_CG, TCG_REG_R3, TCG_REG_R2, TCG_AREG0, ofs); } - if (TARGET_LONG_BITS == 32) { - tgen_ext32u(s, arg1, addr_reg); - } else { - tcg_out_mov(s, TCG_TYPE_I64, arg1, addr_reg); - } - - label1_ptr = s->code_ptr; - - /* je label1 (offset will be patched in later) */ - tcg_out_insn(s, RI, BRC, S390_CC_EQ, 0); - - /* call load/store helper */ - if (is_store) { - /* Make sure to zero-extend the value to the full register - for the calling convention. */ - switch (opc) { - case LD_UINT8: - tgen_ext8u(s, TCG_TYPE_I64, arg2, data_reg); - break; - case LD_UINT16: - tgen_ext16u(s, TCG_TYPE_I64, arg2, data_reg); - break; - case LD_UINT32: - tgen_ext32u(s, arg2, data_reg); - break; - case LD_UINT64: - tcg_out_mov(s, TCG_TYPE_I64, arg2, data_reg); - break; - default: - tcg_abort(); - } - tcg_out_movi(s, TCG_TYPE_I32, arg3, mem_index); - tcg_out_mov(s, TCG_TYPE_I64, arg0, TCG_AREG0); - tcg_out_call(s, qemu_st_helpers[s_bits]); - } else { - tcg_out_movi(s, TCG_TYPE_I32, arg2, mem_index); - tcg_out_mov(s, TCG_TYPE_I64, arg0, TCG_AREG0); - tcg_out_call(s, qemu_ld_helpers[s_bits]); + ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addend); + tcg_out_mem(s, 0, RXY_LG, TCG_REG_R2, TCG_REG_R2, TCG_AREG0, ofs); - /* sign extension */ - switch (opc) { - case LD_INT8: - tgen_ext8s(s, TCG_TYPE_I64, data_reg, TCG_REG_R2); - break; - case LD_INT16: - tgen_ext16s(s, TCG_TYPE_I64, data_reg, TCG_REG_R2); - break; - case LD_INT32: - tgen_ext32s(s, data_reg, TCG_REG_R2); - break; - default: - /* unsigned -> just copy */ - tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2); - break; - } + if (TARGET_LONG_BITS == 32) { + tgen_ext32u(s, TCG_REG_R3, addr_reg); + return TCG_REG_R3; } + return addr_reg; +} - /* jump to label2 (end) */ - *label2_ptr_p = s->code_ptr; - - tcg_out_insn(s, RI, BRC, S390_CC_ALWAYS, 0); +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc, + TCGReg data, TCGReg addr, int mem_index, + tcg_insn_unit *raddr, tcg_insn_unit *label_ptr) +{ + TCGLabelQemuLdst *label = new_ldst_label(s); + + label->is_ld = is_ld; + label->opc = opc; + label->datalo_reg = data; + label->addrlo_reg = addr; + label->mem_index = mem_index; + label->raddr = raddr; + label->label_ptr[0] = label_ptr; +} - /* this is label1, patch branch */ - label1_ptr[1] = s->code_ptr - label1_ptr; +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + TCGReg addr_reg = lb->addrlo_reg; + TCGReg data_reg = lb->datalo_reg; + TCGMemOp opc = lb->opc; - ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addend); - assert(ofs < 0x80000); + patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)s->code_ptr, -2); - tcg_out_mem(s, 0, RXY_AG, arg1, arg2, TCG_AREG0, ofs); + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); + if (TARGET_LONG_BITS == 64) { + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg); + } + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, lb->mem_index); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr); + tcg_out_call(s, qemu_ld_helpers[opc]); + tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2); - return arg1; + tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); } -static void tcg_finish_qemu_ldst(TCGContext* s, tcg_insn_unit *label2_ptr) +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { - /* patch branch */ - label2_ptr[1] = s->code_ptr - label2_ptr; + TCGReg addr_reg = lb->addrlo_reg; + TCGReg data_reg = lb->datalo_reg; + TCGMemOp opc = lb->opc; + + patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)s->code_ptr, -2); + + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); + if (TARGET_LONG_BITS == 64) { + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg); + } + switch (opc & MO_SIZE) { + case MO_UB: + tgen_ext8u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg); + break; + case MO_UW: + tgen_ext16u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg); + break; + case MO_UL: + tgen_ext32u(s, TCG_REG_R4, data_reg); + break; + case MO_Q: + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, data_reg); + break; + default: + tcg_abort(); + } + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, lb->mem_index); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr); + tcg_out_call(s, qemu_st_helpers[opc]); + + tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); } #else static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg, @@ -1523,61 +1634,51 @@ static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg, } #endif /* CONFIG_SOFTMMU */ -/* load data with address translation (if applicable) - and endianness conversion */ -static void tcg_out_qemu_ld(TCGContext* s, const TCGArg* args, int opc) +static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, + TCGMemOp opc, int mem_index) { - TCGReg addr_reg, data_reg; -#if defined(CONFIG_SOFTMMU) - int mem_index; - tcg_insn_unit *label2_ptr; -#else - TCGReg index_reg; - tcg_target_long disp; -#endif - - data_reg = *args++; - addr_reg = *args++; +#ifdef CONFIG_SOFTMMU + tcg_insn_unit *label_ptr; + TCGReg base_reg; -#if defined(CONFIG_SOFTMMU) - mem_index = *args; + base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1); - addr_reg = tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index, - opc, &label2_ptr, 0); + label_ptr = s->code_ptr + 1; + tcg_out_insn(s, RI, BRC, S390_CC_NE, 0); - tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, TCG_REG_NONE, 0); + tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0); - tcg_finish_qemu_ldst(s, label2_ptr); + add_qemu_ldst_label(s, 1, opc, data_reg, addr_reg, mem_index, + s->code_ptr, label_ptr); #else + TCGReg index_reg; + tcg_target_long disp; + tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp); tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, index_reg, disp); #endif } -static void tcg_out_qemu_st(TCGContext* s, const TCGArg* args, int opc) +static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, + TCGMemOp opc, int mem_index) { - TCGReg addr_reg, data_reg; -#if defined(CONFIG_SOFTMMU) - int mem_index; - tcg_insn_unit *label2_ptr; -#else - TCGReg index_reg; - tcg_target_long disp; -#endif - - data_reg = *args++; - addr_reg = *args++; +#ifdef CONFIG_SOFTMMU + tcg_insn_unit *label_ptr; + TCGReg base_reg; -#if defined(CONFIG_SOFTMMU) - mem_index = *args; + base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0); - addr_reg = tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index, - opc, &label2_ptr, 1); + label_ptr = s->code_ptr + 1; + tcg_out_insn(s, RI, BRC, S390_CC_NE, 0); - tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, TCG_REG_NONE, 0); + tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0); - tcg_finish_qemu_ldst(s, label2_ptr); + add_qemu_ldst_label(s, 0, opc, data_reg, addr_reg, mem_index, + s->code_ptr, label_ptr); #else + TCGReg index_reg; + tcg_target_long disp; + tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp); tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, index_reg, disp); #endif @@ -1602,7 +1703,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_goto_tb: if (s->tb_jmp_offset) { - tcg_abort(); + tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4)); + s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); + s->code_ptr += 2; } else { /* load address stored at s->tb_next + args[0] */ tcg_out_ld_abs(s, TCG_TYPE_PTR, TCG_TMP0, s->tb_next + args[0]); @@ -1784,13 +1887,19 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_add2_i32: - /* ??? Make use of ALFI. */ - tcg_out_insn(s, RR, ALR, args[0], args[4]); + if (const_args[4]) { + tcg_out_insn(s, RIL, ALFI, args[0], args[4]); + } else { + tcg_out_insn(s, RR, ALR, args[0], args[4]); + } tcg_out_insn(s, RRE, ALCR, args[1], args[5]); break; case INDEX_op_sub2_i32: - /* ??? Make use of SLFI. */ - tcg_out_insn(s, RR, SLR, args[0], args[4]); + if (const_args[4]) { + tcg_out_insn(s, RIL, SLFI, args[0], args[4]); + } else { + tcg_out_insn(s, RR, SLR, args[0], args[4]); + } tcg_out_insn(s, RRE, SLBR, args[1], args[5]); break; @@ -1811,37 +1920,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, args[2], const_args[2], args[3]); break; - case INDEX_op_qemu_ld8u: - tcg_out_qemu_ld(s, args, LD_UINT8); - break; - case INDEX_op_qemu_ld8s: - tcg_out_qemu_ld(s, args, LD_INT8); - break; - case INDEX_op_qemu_ld16u: - tcg_out_qemu_ld(s, args, LD_UINT16); - break; - case INDEX_op_qemu_ld16s: - tcg_out_qemu_ld(s, args, LD_INT16); - break; - case INDEX_op_qemu_ld32: + case INDEX_op_qemu_ld_i32: /* ??? Technically we can use a non-extending instruction. */ - tcg_out_qemu_ld(s, args, LD_UINT32); - break; - case INDEX_op_qemu_ld64: - tcg_out_qemu_ld(s, args, LD_UINT64); - break; - - case INDEX_op_qemu_st8: - tcg_out_qemu_st(s, args, LD_UINT8); - break; - case INDEX_op_qemu_st16: - tcg_out_qemu_st(s, args, LD_UINT16); - break; - case INDEX_op_qemu_st32: - tcg_out_qemu_st(s, args, LD_UINT32); + case INDEX_op_qemu_ld_i64: + tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3]); break; - case INDEX_op_qemu_st64: - tcg_out_qemu_st(s, args, LD_UINT64); + case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_st_i64: + tcg_out_qemu_st(s, args[0], args[1], args[2], args[3]); break; case INDEX_op_ld16s_i64: @@ -2014,13 +2100,27 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_add2_i64: - /* ??? Make use of ALGFI and SLGFI. */ - tcg_out_insn(s, RRE, ALGR, args[0], args[4]); + if (const_args[4]) { + if ((int64_t)args[4] >= 0) { + tcg_out_insn(s, RIL, ALGFI, args[0], args[4]); + } else { + tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]); + } + } else { + tcg_out_insn(s, RRE, ALGR, args[0], args[4]); + } tcg_out_insn(s, RRE, ALCGR, args[1], args[5]); break; case INDEX_op_sub2_i64: - /* ??? Make use of ALGFI and SLGFI. */ - tcg_out_insn(s, RRE, SLGR, args[0], args[4]); + if (const_args[4]) { + if ((int64_t)args[4] >= 0) { + tcg_out_insn(s, RIL, SLGFI, args[0], args[4]); + } else { + tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]); + } + } else { + tcg_out_insn(s, RRE, SLGR, args[0], args[4]); + } tcg_out_insn(s, RRE, SLBGR, args[1], args[5]); break; @@ -2037,13 +2137,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, args[2], const_args[2], args[3]); break; - case INDEX_op_qemu_ld32u: - tcg_out_qemu_ld(s, args, LD_UINT32); - break; - case INDEX_op_qemu_ld32s: - tcg_out_qemu_ld(s, args, LD_INT32); - break; - OP_32_64(deposit): tgen_deposit(s, args[0], args[2], args[3], args[4]); break; @@ -2100,25 +2193,18 @@ static const TCGTargetOpDef s390_op_defs[] = { { INDEX_op_bswap16_i32, { "r", "r" } }, { INDEX_op_bswap32_i32, { "r", "r" } }, - { INDEX_op_add2_i32, { "r", "r", "0", "1", "r", "r" } }, - { INDEX_op_sub2_i32, { "r", "r", "0", "1", "r", "r" } }, + { INDEX_op_add2_i32, { "r", "r", "0", "1", "rA", "r" } }, + { INDEX_op_sub2_i32, { "r", "r", "0", "1", "rA", "r" } }, { INDEX_op_brcond_i32, { "r", "rC" } }, { INDEX_op_setcond_i32, { "r", "r", "rC" } }, { INDEX_op_movcond_i32, { "r", "r", "rC", "r", "0" } }, { INDEX_op_deposit_i32, { "r", "0", "r" } }, - { INDEX_op_qemu_ld8u, { "r", "L" } }, - { INDEX_op_qemu_ld8s, { "r", "L" } }, - { INDEX_op_qemu_ld16u, { "r", "L" } }, - { INDEX_op_qemu_ld16s, { "r", "L" } }, - { INDEX_op_qemu_ld32, { "r", "L" } }, - { INDEX_op_qemu_ld64, { "r", "L" } }, - - { INDEX_op_qemu_st8, { "L", "L" } }, - { INDEX_op_qemu_st16, { "L", "L" } }, - { INDEX_op_qemu_st32, { "L", "L" } }, - { INDEX_op_qemu_st64, { "L", "L" } }, + { INDEX_op_qemu_ld_i32, { "r", "L" } }, + { INDEX_op_qemu_ld_i64, { "r", "L" } }, + { INDEX_op_qemu_st_i32, { "L", "L" } }, + { INDEX_op_qemu_st_i64, { "L", "L" } }, { INDEX_op_ld8u_i64, { "r", "r" } }, { INDEX_op_ld8s_i64, { "r", "r" } }, @@ -2165,17 +2251,14 @@ static const TCGTargetOpDef s390_op_defs[] = { { INDEX_op_bswap32_i64, { "r", "r" } }, { INDEX_op_bswap64_i64, { "r", "r" } }, - { INDEX_op_add2_i64, { "r", "r", "0", "1", "r", "r" } }, - { INDEX_op_sub2_i64, { "r", "r", "0", "1", "r", "r" } }, + { INDEX_op_add2_i64, { "r", "r", "0", "1", "rA", "r" } }, + { INDEX_op_sub2_i64, { "r", "r", "0", "1", "rA", "r" } }, { INDEX_op_brcond_i64, { "r", "rC" } }, { INDEX_op_setcond_i64, { "r", "r", "rC" } }, { INDEX_op_movcond_i64, { "r", "r", "rC", "r", "0" } }, { INDEX_op_deposit_i64, { "r", "0", "r" } }, - { INDEX_op_qemu_ld32u, { "r", "L" } }, - { INDEX_op_qemu_ld32s, { "r", "L" } }, - { -1 }, }; @@ -2210,6 +2293,9 @@ static void tcg_target_init(TCGContext *s) tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3); tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4); tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5); + /* The r6 register is technically call-saved, but it's also a parameter + register, so it can get killed by setup for the qemu_st helper. */ + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6); /* The return register can be considered call-clobbered. */ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14); @@ -2222,18 +2308,17 @@ static void tcg_target_init(TCGContext *s) tcg_add_target_add_op_defs(s390_op_defs); } +#define FRAME_SIZE ((int)(TCG_TARGET_CALL_STACK_OFFSET \ + + TCG_STATIC_CALL_ARGS_SIZE \ + + CPU_TEMP_BUF_NLONGS * sizeof(long))) + static void tcg_target_qemu_prologue(TCGContext *s) { - tcg_target_long frame_size; - /* stmg %r6,%r15,48(%r15) (save registers) */ tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48); /* aghi %r15,-frame_size */ - frame_size = TCG_TARGET_CALL_STACK_OFFSET; - frame_size += TCG_STATIC_CALL_ARGS_SIZE; - frame_size += CPU_TEMP_BUF_NLONGS * sizeof(long); - tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -frame_size); + tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -FRAME_SIZE); tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET, @@ -2252,8 +2337,57 @@ static void tcg_target_qemu_prologue(TCGContext *s) /* lmg %r6,%r15,fs+48(%r15) (restore registers) */ tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, - frame_size + 48); + FRAME_SIZE + 48); /* br %r14 (return) */ tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14); } + +typedef struct { + DebugFrameCIE cie; + DebugFrameFDEHeader fde; + uint8_t fde_def_cfa[4]; + uint8_t fde_reg_ofs[18]; +} DebugFrame; + +/* We're expecting a 2 byte uleb128 encoded value. */ +QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); + +#define ELF_HOST_MACHINE EM_S390 + +static DebugFrame debug_frame = { + .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ + .cie.id = -1, + .cie.version = 1, + .cie.code_align = 1, + .cie.data_align = 8, /* sleb128 8 */ + .cie.return_column = TCG_REG_R14, + + /* Total FDE size does not include the "len" member. */ + .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), + + .fde_def_cfa = { + 12, TCG_REG_CALL_STACK, /* DW_CFA_def_cfa %r15, ... */ + (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ + (FRAME_SIZE >> 7) + }, + .fde_reg_ofs = { + 0x86, 6, /* DW_CFA_offset, %r6, 48 */ + 0x87, 7, /* DW_CFA_offset, %r7, 56 */ + 0x88, 8, /* DW_CFA_offset, %r8, 64 */ + 0x89, 9, /* DW_CFA_offset, %r92, 72 */ + 0x8a, 10, /* DW_CFA_offset, %r10, 80 */ + 0x8b, 11, /* DW_CFA_offset, %r11, 88 */ + 0x8c, 12, /* DW_CFA_offset, %r12, 96 */ + 0x8d, 13, /* DW_CFA_offset, %r13, 104 */ + 0x8e, 14, /* DW_CFA_offset, %r14, 112 */ + } +}; + +void tcg_register_jit(void *buf, size_t buf_size) +{ + debug_frame.fde.func_start = (uintptr_t)buf; + debug_frame.fde.func_len = buf_size; + + tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); +} diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h index 5bf733eb3d..ad2c6ddaf4 100644 --- a/tcg/s390/tcg-target.h +++ b/tcg/s390/tcg-target.h @@ -100,7 +100,7 @@ typedef enum TCGReg { #define TCG_TARGET_HAS_muluh_i64 0 #define TCG_TARGET_HAS_mulsh_i64 0 -#define TCG_TARGET_HAS_new_ldst 0 +#define TCG_TARGET_HAS_new_ldst 1 extern bool tcg_target_deposit_valid(int ofs, int len); #define TCG_TARGET_deposit_i32_valid tcg_target_deposit_valid |