diff options
-rw-r--r-- | cpu-exec.c | 3 | ||||
-rw-r--r-- | disas/mips.c | 2 | ||||
-rw-r--r-- | include/qemu/log.h | 1 | ||||
-rw-r--r-- | qemu-log.c | 3 | ||||
-rw-r--r-- | tcg/mips/tcg-target.c | 90 | ||||
-rw-r--r-- | tcg/mips/tcg-target.h | 11 | ||||
-rw-r--r-- | tcg/ppc/tcg-target.c | 79 | ||||
-rw-r--r-- | tcg/tcg-opc.h | 13 | ||||
-rw-r--r-- | translate-all.c | 2 |
9 files changed, 158 insertions, 46 deletions
diff --git a/cpu-exec.c b/cpu-exec.c index 8fd56a69e0..7eef0830fe 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -477,7 +477,8 @@ int cpu_exec(CPUState *cpu) /* see if we can patch the calling TB. When the TB spans two pages, we cannot safely do a direct jump. */ - if (next_tb != 0 && tb->page_addr[1] == -1) { + if (next_tb != 0 && tb->page_addr[1] == -1 + && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { tb_add_jump((TranslationBlock *)(next_tb & ~TB_EXIT_MASK), next_tb & TB_EXIT_MASK, tb); } diff --git a/disas/mips.c b/disas/mips.c index 01336a8385..bf0bbaf86a 100644 --- a/disas/mips.c +++ b/disas/mips.c @@ -2420,9 +2420,11 @@ const struct mips_opcode mips_builtin_opcodes[] = {"hibernate","", 0x42000023, 0xffffffff, 0, 0, V1 }, {"ins", "t,r,+A,+B", 0x7c000004, 0xfc00003f, WR_t|RD_s, 0, I33 }, {"jr", "s", 0x00000008, 0xfc1fffff, UBD|RD_s, 0, I1 }, +{"jr", "s", 0x00000009, 0xfc1fffff, UBD|RD_s, 0, I32R6 }, /* jalr */ /* jr.hb is officially MIPS{32,64}R2, but it works on R1 as jr with the same hazard barrier effect. */ {"jr.hb", "s", 0x00000408, 0xfc1fffff, UBD|RD_s, 0, I32 }, +{"jr.hb", "s", 0x00000409, 0xfc1fffff, UBD|RD_s, 0, I32R6 }, /* jalr.hb */ {"j", "s", 0x00000008, 0xfc1fffff, UBD|RD_s, 0, I1 }, /* jr */ /* SVR4 PIC code requires special handling for j, so it must be a macro. */ diff --git a/include/qemu/log.h b/include/qemu/log.h index f880e66dbc..7de45001f2 100644 --- a/include/qemu/log.h +++ b/include/qemu/log.h @@ -41,6 +41,7 @@ static inline bool qemu_log_enabled(void) #define LOG_UNIMP (1 << 10) #define LOG_GUEST_ERROR (1 << 11) #define CPU_LOG_MMU (1 << 12) +#define CPU_LOG_TB_NOCHAIN (1 << 13) /* Returns true if a bit is set in the current loglevel mask */ diff --git a/qemu-log.c b/qemu-log.c index 13f3813f68..efd07c81ea 100644 --- a/qemu-log.c +++ b/qemu-log.c @@ -119,6 +119,9 @@ const QEMULogItem qemu_log_items[] = { { LOG_GUEST_ERROR, "guest_errors", "log when the guest OS does something invalid (eg accessing a\n" "non-existent register)" }, + { CPU_LOG_TB_NOCHAIN, "nochain", + "do not chain compiled TBs so that \"exec\" and \"cpu\" show\n" + "complete traces" }, { 0, NULL, NULL }, }; diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index 4305af9673..79e052ff7a 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -288,16 +288,24 @@ typedef enum { OPC_SRLV = OPC_SPECIAL | 0x06, OPC_ROTRV = OPC_SPECIAL | (0x01 << 6) | 0x06, OPC_SRAV = OPC_SPECIAL | 0x07, - OPC_JR = OPC_SPECIAL | 0x08, + OPC_JR_R5 = OPC_SPECIAL | 0x08, OPC_JALR = OPC_SPECIAL | 0x09, OPC_MOVZ = OPC_SPECIAL | 0x0A, OPC_MOVN = OPC_SPECIAL | 0x0B, OPC_MFHI = OPC_SPECIAL | 0x10, OPC_MFLO = OPC_SPECIAL | 0x12, OPC_MULT = OPC_SPECIAL | 0x18, + OPC_MUL_R6 = OPC_SPECIAL | (0x02 << 6) | 0x18, + OPC_MUH = OPC_SPECIAL | (0x03 << 6) | 0x18, OPC_MULTU = OPC_SPECIAL | 0x19, + OPC_MULU = OPC_SPECIAL | (0x02 << 6) | 0x19, + OPC_MUHU = OPC_SPECIAL | (0x03 << 6) | 0x19, OPC_DIV = OPC_SPECIAL | 0x1A, + OPC_DIV_R6 = OPC_SPECIAL | (0x02 << 6) | 0x1A, + OPC_MOD = OPC_SPECIAL | (0x03 << 6) | 0x1A, OPC_DIVU = OPC_SPECIAL | 0x1B, + OPC_DIVU_R6 = OPC_SPECIAL | (0x02 << 6) | 0x1B, + OPC_MODU = OPC_SPECIAL | (0x03 << 6) | 0x1B, OPC_ADDU = OPC_SPECIAL | 0x21, OPC_SUBU = OPC_SPECIAL | 0x23, OPC_AND = OPC_SPECIAL | 0x24, @@ -306,13 +314,15 @@ typedef enum { OPC_NOR = OPC_SPECIAL | 0x27, OPC_SLT = OPC_SPECIAL | 0x2A, OPC_SLTU = OPC_SPECIAL | 0x2B, + OPC_SELEQZ = OPC_SPECIAL | 0x35, + OPC_SELNEZ = OPC_SPECIAL | 0x37, OPC_REGIMM = 0x01 << 26, OPC_BLTZ = OPC_REGIMM | (0x00 << 16), OPC_BGEZ = OPC_REGIMM | (0x01 << 16), OPC_SPECIAL2 = 0x1c << 26, - OPC_MUL = OPC_SPECIAL2 | 0x002, + OPC_MUL_R5 = OPC_SPECIAL2 | 0x002, OPC_SPECIAL3 = 0x1f << 26, OPC_EXT = OPC_SPECIAL3 | 0x000, @@ -320,6 +330,15 @@ typedef enum { OPC_WSBH = OPC_SPECIAL3 | 0x0a0, OPC_SEB = OPC_SPECIAL3 | 0x420, OPC_SEH = OPC_SPECIAL3 | 0x620, + + /* MIPS r6 doesn't have JR, JALR should be used instead */ + OPC_JR = use_mips32r6_instructions ? OPC_JALR : OPC_JR_R5, + + /* + * MIPS r6 replaces MUL with an alternative encoding which is + * backwards-compatible at the assembly level. + */ + OPC_MUL = use_mips32r6_instructions ? OPC_MUL_R6 : OPC_MUL_R5, } MIPSInsn; /* @@ -841,13 +860,20 @@ static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, } static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg c1, TCGReg c2, TCGReg v) + TCGReg c1, TCGReg c2, TCGReg v1, TCGReg v2) { - MIPSInsn m_opc = OPC_MOVN; + bool eqz = false; + + /* If one of the values is zero, put it last to match SEL*Z instructions */ + if (use_mips32r6_instructions && v1 == 0) { + v1 = v2; + v2 = 0; + cond = tcg_invert_cond(cond); + } switch (cond) { case TCG_COND_EQ: - m_opc = OPC_MOVZ; + eqz = true; /* FALLTHRU */ case TCG_COND_NE: if (c2 != 0) { @@ -860,14 +886,32 @@ static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, /* Minimize code size by preferring a compare not requiring INV. */ if (mips_cmp_map[cond] & MIPS_CMP_INV) { cond = tcg_invert_cond(cond); - m_opc = OPC_MOVZ; + eqz = true; } tcg_out_setcond(s, cond, TCG_TMP0, c1, c2); c1 = TCG_TMP0; break; } - tcg_out_opc_reg(s, m_opc, ret, v, c1); + if (use_mips32r6_instructions) { + MIPSInsn m_opc_t = eqz ? OPC_SELEQZ : OPC_SELNEZ; + MIPSInsn m_opc_f = eqz ? OPC_SELNEZ : OPC_SELEQZ; + + if (v2 != 0) { + tcg_out_opc_reg(s, m_opc_f, TCG_TMP1, v2, c1); + } + tcg_out_opc_reg(s, m_opc_t, ret, v1, c1); + if (v2 != 0) { + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP1); + } + } else { + MIPSInsn m_opc = eqz ? OPC_MOVZ : OPC_MOVN; + + tcg_out_opc_reg(s, m_opc, ret, v1, c1); + + /* This should be guaranteed via constraints */ + tcg_debug_assert(v2 == ret); + } } static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail) @@ -1445,21 +1489,45 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, i1 = OPC_MULT, i2 = OPC_MFLO; goto do_hilo1; case INDEX_op_mulsh_i32: + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_MUH, a0, a1, a2); + break; + } i1 = OPC_MULT, i2 = OPC_MFHI; goto do_hilo1; case INDEX_op_muluh_i32: + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_MUHU, a0, a1, a2); + break; + } i1 = OPC_MULTU, i2 = OPC_MFHI; goto do_hilo1; case INDEX_op_div_i32: + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_DIV_R6, a0, a1, a2); + break; + } i1 = OPC_DIV, i2 = OPC_MFLO; goto do_hilo1; case INDEX_op_divu_i32: + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_DIVU_R6, a0, a1, a2); + break; + } i1 = OPC_DIVU, i2 = OPC_MFLO; goto do_hilo1; case INDEX_op_rem_i32: + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_MOD, a0, a1, a2); + break; + } i1 = OPC_DIV, i2 = OPC_MFHI; goto do_hilo1; case INDEX_op_remu_i32: + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_MODU, a0, a1, a2); + break; + } i1 = OPC_DIVU, i2 = OPC_MFHI; do_hilo1: tcg_out_opc_reg(s, i1, 0, a1, a2); @@ -1536,7 +1604,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_movcond_i32: - tcg_out_movcond(s, args[5], a0, a1, a2, args[3]); + tcg_out_movcond(s, args[5], a0, a1, a2, args[3], args[4]); break; case INDEX_op_setcond_i32: @@ -1592,8 +1660,10 @@ static const TCGTargetOpDef mips_op_defs[] = { { INDEX_op_add_i32, { "r", "rZ", "rJ" } }, { INDEX_op_mul_i32, { "r", "rZ", "rZ" } }, +#if !use_mips32r6_instructions { INDEX_op_muls2_i32, { "r", "r", "rZ", "rZ" } }, { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rZ" } }, +#endif { INDEX_op_mulsh_i32, { "r", "rZ", "rZ" } }, { INDEX_op_muluh_i32, { "r", "rZ", "rZ" } }, { INDEX_op_div_i32, { "r", "rZ", "rZ" } }, @@ -1623,7 +1693,11 @@ static const TCGTargetOpDef mips_op_defs[] = { { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, { INDEX_op_brcond_i32, { "rZ", "rZ" } }, +#if use_mips32r6_instructions + { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "rZ" } }, +#else { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "0" } }, +#endif { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } }, { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rZ", "rZ" } }, diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index f5ba52cacf..b1cda37b66 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -96,6 +96,13 @@ extern bool use_mips32_instructions; extern bool use_mips32r2_instructions; #endif +/* MIPS32R6 instruction set detection */ +#if defined(__mips_isa_rev) && (__mips_isa_rev >= 6) +#define use_mips32r6_instructions 1 +#else +#define use_mips32r6_instructions 0 +#endif + /* optional instructions */ #define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_rem_i32 1 @@ -105,8 +112,8 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_orc_i32 0 #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 -#define TCG_TARGET_HAS_mulu2_i32 1 -#define TCG_TARGET_HAS_muls2_i32 1 +#define TCG_TARGET_HAS_mulu2_i32 (!use_mips32r6_instructions) +#define TCG_TARGET_HAS_muls2_i32 (!use_mips32r6_instructions) #define TCG_TARGET_HAS_muluh_i32 1 #define TCG_TARGET_HAS_mulsh_i32 1 diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index 92ef719e40..2c72565fb9 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -700,14 +700,14 @@ static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) { int mb, me; - if ((c & 0xffff) == c) { + if (mask_operand(c, &mb, &me)) { + tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me); + } else if ((c & 0xffff) == c) { tcg_out32(s, ANDI | SAI(src, dst, c)); return; } else if ((c & 0xffff0000) == c) { tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); return; - } else if (mask_operand(c, &mb, &me)) { - tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me); } else { tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c); tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); @@ -719,18 +719,18 @@ static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c) int mb, me; assert(TCG_TARGET_REG_BITS == 64); - if ((c & 0xffff) == c) { - tcg_out32(s, ANDI | SAI(src, dst, c)); - return; - } else if ((c & 0xffff0000) == c) { - tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); - return; - } else if (mask64_operand(c, &mb, &me)) { + if (mask64_operand(c, &mb, &me)) { if (mb == 0) { tcg_out_rld(s, RLDICR, dst, src, 0, me); } else { tcg_out_rld(s, RLDICL, dst, src, 0, mb); } + } else if ((c & 0xffff) == c) { + tcg_out32(s, ANDI | SAI(src, dst, c)); + return; + } else if ((c & 0xffff0000) == c) { + tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); + return; } else { tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c); tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); @@ -1239,11 +1239,36 @@ static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args, void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr) { - TCGContext s; + tcg_insn_unit i1, i2; + uint64_t pair; + intptr_t diff = addr - jmp_addr; + + if (in_range_b(diff)) { + i1 = B | (diff & 0x3fffffc); + i2 = NOP; + } else if (USE_REG_RA) { + intptr_t lo, hi; + diff = addr - (uintptr_t)tb_ret_addr; + lo = (int16_t)diff; + hi = (int32_t)(diff - lo); + assert(diff == hi + lo); + i1 = ADDIS | TAI(TCG_REG_TMP1, TCG_REG_RA, hi >> 16); + i2 = ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, lo); + } else { + assert(TCG_TARGET_REG_BITS == 32 || addr == (int32_t)addr); + i1 = ADDIS | TAI(TCG_REG_TMP1, 0, addr >> 16); + i2 = ORI | SAI(TCG_REG_TMP1, TCG_REG_TMP1, addr); + } +#ifdef HOST_WORDS_BIGENDIAN + pair = (uint64_t)i1 << 32 | i2; +#else + pair = (uint64_t)i2 << 32 | i1; +#endif - s.code_buf = s.code_ptr = (tcg_insn_unit *)jmp_addr; - tcg_out_b(&s, 0, (tcg_insn_unit *)addr); - flush_icache_range(jmp_addr, jmp_addr + tcg_current_code_size(&s)); + /* ??? __atomic_store_8, presuming there's some way to do that + for 32-bit, otherwise this is good enough for 64-bit. */ + *(uint64_t *)jmp_addr = pair; + flush_icache_range(jmp_addr, jmp_addr + 8); } static void tcg_out_call(TCGContext *s, tcg_insn_unit *target) @@ -1855,12 +1880,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, if (USE_REG_RA) { ptrdiff_t disp = tcg_pcrel_diff(s, tb_ret_addr); - /* If we can use a direct branch, otherwise use the value in RA. - Note that the direct branch is always forward. If it's in - range now, it'll still be in range after the movi. Don't - bother about the 20 bytes where the test here fails but it - would succeed below. */ - if (!in_range_b(disp)) { + /* Use a direct branch if we can, otherwise use the value in RA. + Note that the direct branch is always backward, thus we need + to account for the possibility of 5 insns from the movi. */ + if (!in_range_b(disp - 20)) { tcg_out32(s, MTSPR | RS(TCG_REG_RA) | CTR); tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]); tcg_out32(s, BCCTR | BO_ALWAYS); @@ -1871,14 +1894,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, tcg_out_b(s, 0, tb_ret_addr); break; case INDEX_op_goto_tb: - if (s->tb_jmp_offset) { - /* Direct jump method. */ - s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); - s->code_ptr += 7; - } else { - /* Indirect jump method. */ - tcg_abort(); + tcg_debug_assert(s->tb_jmp_offset); + /* Direct jump. Ensure the next insns are 8-byte aligned. */ + if ((uintptr_t)s->code_ptr & 7) { + tcg_out32(s, NOP); } + s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); + /* To be replaced by either a branch+nop or a load into TMP1. */ + s->code_ptr += 2; + tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS); s->tb_next_offset[args[0]] = tcg_current_code_size(s); break; case INDEX_op_br: diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index c6f95703eb..6d0410c4b9 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -173,18 +173,15 @@ DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64)) DEF(muluh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i64)) DEF(mulsh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i64)) +#define TLADDR_ARGS (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? 1 : 2) +#define DATA64_ARGS (TCG_TARGET_REG_BITS == 64 ? 1 : 2) + /* QEMU specific */ -#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS -DEF(insn_start, 0, 0, 2 * TARGET_INSN_START_WORDS, TCG_OPF_NOT_PRESENT) -#else -DEF(insn_start, 0, 0, TARGET_INSN_START_WORDS, TCG_OPF_NOT_PRESENT) -#endif +DEF(insn_start, 0, 0, TLADDR_ARGS * TARGET_INSN_START_WORDS, + TCG_OPF_NOT_PRESENT) DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END) DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END) -#define TLADDR_ARGS (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? 1 : 2) -#define DATA64_ARGS (TCG_TARGET_REG_BITS == 64 ? 1 : 2) - DEF(qemu_ld_i32, 1, TLADDR_ARGS, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) DEF(qemu_st_i32, 0, TLADDR_ARGS + 1, 1, diff --git a/translate-all.c b/translate-all.c index 333eba4f5d..20ce40ec28 100644 --- a/translate-all.c +++ b/translate-all.c @@ -468,6 +468,8 @@ static inline PageDesc *page_find(tb_page_addr_t index) # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024) #elif defined(__sparc__) # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024) +#elif defined(__powerpc64__) +# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024) #elif defined(__aarch64__) # define MAX_CODE_GEN_BUFFER_SIZE (128ul * 1024 * 1024) #elif defined(__arm__) |