diff options
-rw-r--r-- | tcg/aarch64/tcg-target.c | 4 | ||||
-rw-r--r-- | tcg/arm/tcg-target.c | 6 | ||||
-rw-r--r-- | tcg/i386/tcg-target.c | 4 | ||||
-rw-r--r-- | tcg/mips/tcg-target.c | 4 | ||||
-rw-r--r-- | tcg/optimize.c | 149 | ||||
-rw-r--r-- | tcg/ppc/tcg-target.c | 8 | ||||
-rw-r--r-- | tcg/s390/tcg-target.c | 4 | ||||
-rw-r--r-- | tcg/sparc/tcg-target.c | 30 | ||||
-rw-r--r-- | tcg/tcg.c | 39 | ||||
-rw-r--r-- | tci.c | 8 |
10 files changed, 126 insertions, 130 deletions
diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c index b3be6f3177..fe44ad709c 100644 --- a/tcg/aarch64/tcg-target.c +++ b/tcg/aarch64/tcg-target.c @@ -1004,7 +1004,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi); tcg_out_adr(s, TCG_REG_X3, lb->raddr); - tcg_out_call(s, qemu_ld_helpers[opc & ~MO_SIGN]); + tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); if (opc & MO_SIGN) { tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0); } else { @@ -1027,7 +1027,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg); tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi); tcg_out_adr(s, TCG_REG_X4, lb->raddr); - tcg_out_call(s, qemu_st_helpers[opc]); + tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); tcg_out_goto(s, lb->raddr); } diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c index 06a8064a9f..ae2ec7a922 100644 --- a/tcg/arm/tcg-target.c +++ b/tcg/arm/tcg-target.c @@ -1260,9 +1260,9 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) icache usage. For pre-armv6, use the signed helpers since we do not have a single insn sign-extend. */ if (use_armv6_instructions) { - func = qemu_ld_helpers[opc & ~MO_SIGN]; + func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]; } else { - func = qemu_ld_helpers[opc]; + func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]; if (opc & MO_SIGN) { opc = MO_UL; } @@ -1337,7 +1337,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14); /* Tail-call to the helper, which will return to the fast path. */ - tcg_out_goto(s, COND_AL, qemu_st_helpers[opc]); + tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); } #endif /* SOFTMMU */ diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c index 2e4bf52aae..ff4d9cfec7 100644 --- a/tcg/i386/tcg-target.c +++ b/tcg/i386/tcg-target.c @@ -1307,7 +1307,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) (uintptr_t)l->raddr); } - tcg_out_call(s, qemu_ld_helpers[opc & ~MO_SIGN]); + tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); data_reg = l->datalo_reg; switch (opc & MO_SSIZE) { @@ -1413,7 +1413,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) /* "Tail call" to the helper, with the return address back inline. */ tcg_out_push(s, retaddr); - tcg_out_jmp(s, qemu_st_helpers[opc]); + tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); } #elif defined(__x86_64__) && defined(__linux__) # include <asm/prctl.h> diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index f64c89c3c0..f643eca3df 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -1031,7 +1031,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) } i = tcg_out_call_iarg_imm(s, i, oi); i = tcg_out_call_iarg_imm(s, i, (intptr_t)l->raddr); - tcg_out_call_int(s, qemu_ld_helpers[opc], false); + tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)], false); /* delay slot */ tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); @@ -1094,7 +1094,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) computation to take place in the return address register. */ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)l->raddr); i = tcg_out_call_iarg_reg(s, i, TCG_REG_RA); - tcg_out_call_int(s, qemu_st_helpers[opc], true); + tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], true); /* delay slot */ tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); } diff --git a/tcg/optimize.c b/tcg/optimize.c index 585f1ed7bb..0f6f7008da 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -193,10 +193,42 @@ static bool temps_are_copies(TCGArg arg1, TCGArg arg2) return false; } +static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args, + TCGArg dst, TCGArg val) +{ + TCGOpcode new_op = op_to_movi(op->opc); + tcg_target_ulong mask; + + op->opc = new_op; + + reset_temp(dst); + temps[dst].state = TCG_TEMP_CONST; + temps[dst].val = val; + mask = val; + if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { + /* High bits of the destination are now garbage. */ + mask |= ~0xffffffffull; + } + temps[dst].mask = mask; + + args[0] = dst; + args[1] = val; +} + static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args, - TCGOpcode old_op, TCGArg dst, TCGArg src) + TCGArg dst, TCGArg src) { - TCGOpcode new_op = op_to_mov(old_op); + if (temps_are_copies(dst, src)) { + tcg_op_remove(s, op); + return; + } + + if (temps[src].state == TCG_TEMP_CONST) { + tcg_opt_gen_movi(s, op, args, dst, temps[src].val); + return; + } + + TCGOpcode new_op = op_to_mov(op->opc); tcg_target_ulong mask; op->opc = new_op; @@ -228,28 +260,6 @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args, args[1] = src; } -static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args, - TCGOpcode old_op, TCGArg dst, TCGArg val) -{ - TCGOpcode new_op = op_to_movi(old_op); - tcg_target_ulong mask; - - op->opc = new_op; - - reset_temp(dst); - temps[dst].state = TCG_TEMP_CONST; - temps[dst].val = val; - mask = val; - if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { - /* High bits of the destination are now garbage. */ - mask |= ~0xffffffffull; - } - temps[dst].mask = mask; - - args[0] = dst; - args[1] = val; -} - static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) { uint64_t l64, h64; @@ -564,7 +574,7 @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2) } /* Propagate constants and copies, fold constant expressions. */ -static void tcg_constant_folding(TCGContext *s) +void tcg_optimize(TCGContext *s) { int oi, oi_next, nb_temps, nb_globals; @@ -670,7 +680,7 @@ static void tcg_constant_folding(TCGContext *s) CASE_OP_32_64(rotr): if (temps[args[1]].state == TCG_TEMP_CONST && temps[args[1]].val == 0) { - tcg_opt_gen_movi(s, op, args, opc, args[0], 0); + tcg_opt_gen_movi(s, op, args, args[0], 0); continue; } break; @@ -775,7 +785,8 @@ static void tcg_constant_folding(TCGContext *s) if (temps[args[1]].state != TCG_TEMP_CONST && temps[args[2]].state == TCG_TEMP_CONST && temps[args[2]].val == 0) { - goto do_mov3; + tcg_opt_gen_mov(s, op, args, args[0], args[1]); + continue; } break; CASE_OP_32_64(and): @@ -784,16 +795,10 @@ static void tcg_constant_folding(TCGContext *s) if (temps[args[1]].state != TCG_TEMP_CONST && temps[args[2]].state == TCG_TEMP_CONST && temps[args[2]].val == -1) { - goto do_mov3; + tcg_opt_gen_mov(s, op, args, args[0], args[1]); + continue; } break; - do_mov3: - if (temps_are_copies(args[0], args[1])) { - tcg_op_remove(s, op); - } else { - tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]); - } - continue; default: break; } @@ -942,19 +947,12 @@ static void tcg_constant_folding(TCGContext *s) if (partmask == 0) { assert(nb_oargs == 1); - tcg_opt_gen_movi(s, op, args, opc, args[0], 0); + tcg_opt_gen_movi(s, op, args, args[0], 0); continue; } if (affected == 0) { assert(nb_oargs == 1); - if (temps_are_copies(args[0], args[1])) { - tcg_op_remove(s, op); - } else if (temps[args[1]].state != TCG_TEMP_CONST) { - tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]); - } else { - tcg_opt_gen_movi(s, op, args, opc, - args[0], temps[args[1]].val); - } + tcg_opt_gen_mov(s, op, args, args[0], args[1]); continue; } @@ -966,7 +964,7 @@ static void tcg_constant_folding(TCGContext *s) CASE_OP_32_64(mulsh): if ((temps[args[2]].state == TCG_TEMP_CONST && temps[args[2]].val == 0)) { - tcg_opt_gen_movi(s, op, args, opc, args[0], 0); + tcg_opt_gen_movi(s, op, args, args[0], 0); continue; } break; @@ -979,14 +977,7 @@ static void tcg_constant_folding(TCGContext *s) CASE_OP_32_64(or): CASE_OP_32_64(and): if (temps_are_copies(args[1], args[2])) { - if (temps_are_copies(args[0], args[1])) { - tcg_op_remove(s, op); - } else if (temps[args[1]].state != TCG_TEMP_CONST) { - tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]); - } else { - tcg_opt_gen_movi(s, op, args, opc, - args[0], temps[args[1]].val); - } + tcg_opt_gen_mov(s, op, args, args[0], args[1]); continue; } break; @@ -1000,7 +991,7 @@ static void tcg_constant_folding(TCGContext *s) CASE_OP_32_64(sub): CASE_OP_32_64(xor): if (temps_are_copies(args[1], args[2])) { - tcg_opt_gen_movi(s, op, args, opc, args[0], 0); + tcg_opt_gen_movi(s, op, args, args[0], 0); continue; } break; @@ -1013,20 +1004,10 @@ static void tcg_constant_folding(TCGContext *s) allocator where needed and possible. Also detect copies. */ switch (opc) { CASE_OP_32_64(mov): - if (temps_are_copies(args[0], args[1])) { - tcg_op_remove(s, op); - break; - } - if (temps[args[1]].state != TCG_TEMP_CONST) { - tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]); - break; - } - /* Source argument is constant. Rewrite the operation and - let movi case handle it. */ - args[1] = temps[args[1]].val; - /* fallthrough */ + tcg_opt_gen_mov(s, op, args, args[0], args[1]); + break; CASE_OP_32_64(movi): - tcg_opt_gen_movi(s, op, args, opc, args[0], args[1]); + tcg_opt_gen_movi(s, op, args, args[0], args[1]); break; CASE_OP_32_64(not): @@ -1039,7 +1020,7 @@ static void tcg_constant_folding(TCGContext *s) case INDEX_op_ext32u_i64: if (temps[args[1]].state == TCG_TEMP_CONST) { tmp = do_constant_folding(opc, temps[args[1]].val, 0); - tcg_opt_gen_movi(s, op, args, opc, args[0], tmp); + tcg_opt_gen_movi(s, op, args, args[0], tmp); break; } goto do_default; @@ -1047,7 +1028,7 @@ static void tcg_constant_folding(TCGContext *s) case INDEX_op_trunc_shr_i32: if (temps[args[1]].state == TCG_TEMP_CONST) { tmp = do_constant_folding(opc, temps[args[1]].val, args[2]); - tcg_opt_gen_movi(s, op, args, opc, args[0], tmp); + tcg_opt_gen_movi(s, op, args, args[0], tmp); break; } goto do_default; @@ -1078,7 +1059,7 @@ static void tcg_constant_folding(TCGContext *s) && temps[args[2]].state == TCG_TEMP_CONST) { tmp = do_constant_folding(opc, temps[args[1]].val, temps[args[2]].val); - tcg_opt_gen_movi(s, op, args, opc, args[0], tmp); + tcg_opt_gen_movi(s, op, args, args[0], tmp); break; } goto do_default; @@ -1088,7 +1069,7 @@ static void tcg_constant_folding(TCGContext *s) && temps[args[2]].state == TCG_TEMP_CONST) { tmp = deposit64(temps[args[1]].val, args[3], args[4], temps[args[2]].val); - tcg_opt_gen_movi(s, op, args, opc, args[0], tmp); + tcg_opt_gen_movi(s, op, args, args[0], tmp); break; } goto do_default; @@ -1096,7 +1077,7 @@ static void tcg_constant_folding(TCGContext *s) CASE_OP_32_64(setcond): tmp = do_constant_folding_cond(opc, args[1], args[2], args[3]); if (tmp != 2) { - tcg_opt_gen_movi(s, op, args, opc, args[0], tmp); + tcg_opt_gen_movi(s, op, args, args[0], tmp); break; } goto do_default; @@ -1118,14 +1099,7 @@ static void tcg_constant_folding(TCGContext *s) CASE_OP_32_64(movcond): tmp = do_constant_folding_cond(opc, args[1], args[2], args[5]); if (tmp != 2) { - if (temps_are_copies(args[0], args[4-tmp])) { - tcg_op_remove(s, op); - } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) { - tcg_opt_gen_movi(s, op, args, opc, - args[0], temps[args[4-tmp]].val); - } else { - tcg_opt_gen_mov(s, op, args, opc, args[0], args[4-tmp]); - } + tcg_opt_gen_mov(s, op, args, args[0], args[4-tmp]); break; } goto do_default; @@ -1154,8 +1128,8 @@ static void tcg_constant_folding(TCGContext *s) rl = args[0]; rh = args[1]; - tcg_opt_gen_movi(s, op, args, opc, rl, (uint32_t)a); - tcg_opt_gen_movi(s, op2, args2, opc, rh, (uint32_t)(a >> 32)); + tcg_opt_gen_movi(s, op, args, rl, (uint32_t)a); + tcg_opt_gen_movi(s, op2, args2, rh, (uint32_t)(a >> 32)); /* We've done all we need to do with the movi. Skip it. */ oi_next = op2->next; @@ -1175,8 +1149,8 @@ static void tcg_constant_folding(TCGContext *s) rl = args[0]; rh = args[1]; - tcg_opt_gen_movi(s, op, args, opc, rl, (uint32_t)r); - tcg_opt_gen_movi(s, op2, args2, opc, rh, (uint32_t)(r >> 32)); + tcg_opt_gen_movi(s, op, args, rl, (uint32_t)r); + tcg_opt_gen_movi(s, op2, args2, rh, (uint32_t)(r >> 32)); /* We've done all we need to do with the movi. Skip it. */ oi_next = op2->next; @@ -1260,7 +1234,7 @@ static void tcg_constant_folding(TCGContext *s) tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]); if (tmp != 2) { do_setcond_const: - tcg_opt_gen_movi(s, op, args, opc, args[0], tmp); + tcg_opt_gen_movi(s, op, args, args[0], tmp); } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE) && temps[args[3]].state == TCG_TEMP_CONST && temps[args[4]].state == TCG_TEMP_CONST @@ -1354,8 +1328,3 @@ static void tcg_constant_folding(TCGContext *s) } } } - -void tcg_optimize(TCGContext *s) -{ - tcg_constant_folding(s); -} diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index d49c7d925f..2b6eafa03c 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -1495,7 +1495,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_movi(s, TCG_TYPE_I32, arg++, oi); tcg_out32(s, MFSPR | RT(arg) | LR); - tcg_out_call(s, qemu_ld_helpers[opc & ~MO_SIGN]); + tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); lo = lb->datalo_reg; hi = lb->datahi_reg; @@ -1565,7 +1565,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_movi(s, TCG_TYPE_I32, arg++, oi); tcg_out32(s, MFSPR | RT(arg) | LR); - tcg_out_call(s, qemu_st_helpers[opc]); + tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); tcg_out_b(s, 0, lb->raddr); } @@ -1624,7 +1624,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) tcg_out32(s, LWZ | TAI(datalo, addrlo, 4)); } } else { - uint32_t insn = qemu_ldx_opc[opc]; + uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)]; if (!HAVE_ISA_2_06 && insn == LDBRX) { tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo)); @@ -1696,7 +1696,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) tcg_out32(s, STW | TAI(datalo, addrlo, 4)); } } else { - uint32_t insn = qemu_stx_opc[opc]; + uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)]; if (!HAVE_ISA_2_06 && insn == STDBRX) { tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo)); tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4)); diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c index 46dedc9f82..669fafe24f 100644 --- a/tcg/s390/tcg-target.c +++ b/tcg/s390/tcg-target.c @@ -1573,7 +1573,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) } tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi); tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr); - tcg_out_call(s, qemu_ld_helpers[opc]); + tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]); tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2); tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); @@ -1610,7 +1610,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) } tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi); tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr); - tcg_out_call(s, qemu_st_helpers[opc]); + tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); } diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c index c1794a33ed..1a870a81d7 100644 --- a/tcg/sparc/tcg-target.c +++ b/tcg/sparc/tcg-target.c @@ -1075,12 +1075,11 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, TCGMemOp memop = get_memop(oi); #ifdef CONFIG_SOFTMMU unsigned memi = get_mmuidx(oi); - TCGMemOp s_bits = memop & MO_SIZE; TCGReg addrz, param; tcg_insn_unit *func; tcg_insn_unit *label_ptr; - addrz = tcg_out_tlb_load(s, addr, memi, s_bits, + addrz = tcg_out_tlb_load(s, addr, memi, memop & MO_SIZE, offsetof(CPUTLBEntry, addr_read)); /* The fast path is exactly one insn. Thus we can perform the @@ -1092,7 +1091,8 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0); /* delay slot */ - tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1, qemu_ld_opc[memop]); + tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1, + qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]); /* TLB Miss. */ @@ -1105,10 +1105,10 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, /* We use the helpers to extend SB and SW data, leaving the case of SL needing explicit extending below. */ - if ((memop & ~MO_BSWAP) == MO_SL) { - func = qemu_ld_trampoline[memop & ~MO_SIGN]; + if ((memop & MO_SSIZE) == MO_SL) { + func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SIZE)]; } else { - func = qemu_ld_trampoline[memop]; + func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SSIZE)]; } assert(func != NULL); tcg_out_call_nodelay(s, func); @@ -1119,13 +1119,13 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, Which complicates things for sparcv8plus. */ if (SPARC64) { /* We let the helper sign-extend SB and SW, but leave SL for here. */ - if (is_64 && (memop & ~MO_BSWAP) == MO_SL) { + if (is_64 && (memop & MO_SSIZE) == MO_SL) { tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA); } else { tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0); } } else { - if (s_bits == MO_64) { + if ((memop & MO_SIZE) == MO_64) { tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, 32, SHIFT_SLLX); tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O1, 0, SHIFT_SRL); tcg_out_arith(s, data, TCG_REG_O0, TCG_REG_O1, ARITH_OR); @@ -1147,7 +1147,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, } tcg_out_ldst_rr(s, data, addr, (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0), - qemu_ld_opc[memop]); + qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]); #endif /* CONFIG_SOFTMMU */ } @@ -1157,12 +1157,11 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, TCGMemOp memop = get_memop(oi); #ifdef CONFIG_SOFTMMU unsigned memi = get_mmuidx(oi); - TCGMemOp s_bits = memop & MO_SIZE; TCGReg addrz, param; tcg_insn_unit *func; tcg_insn_unit *label_ptr; - addrz = tcg_out_tlb_load(s, addr, memi, s_bits, + addrz = tcg_out_tlb_load(s, addr, memi, memop & MO_SIZE, offsetof(CPUTLBEntry, addr_write)); /* The fast path is exactly one insn. Thus we can perform the entire @@ -1172,7 +1171,8 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0); /* delay slot */ - tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1, qemu_st_opc[memop]); + tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1, + qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]); /* TLB Miss. */ @@ -1182,13 +1182,13 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, param++; } tcg_out_mov(s, TCG_TYPE_REG, param++, addr); - if (!SPARC64 && s_bits == MO_64) { + if (!SPARC64 && (memop & MO_SIZE) == MO_64) { /* Skip the high-part; we'll perform the extract in the trampoline. */ param++; } tcg_out_mov(s, TCG_TYPE_REG, param++, data); - func = qemu_st_trampoline[memop]; + func = qemu_st_trampoline[memop & (MO_BSWAP | MO_SIZE)]; assert(func != NULL); tcg_out_call_nodelay(s, func); /* delay slot */ @@ -1202,7 +1202,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, } tcg_out_ldst_rr(s, data, addr, (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0), - qemu_st_opc[memop]); + qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]); #endif /* CONFIG_SOFTMMU */ } @@ -1076,10 +1076,19 @@ void tcg_dump_ops(TCGContext *s) TCGMemOp op = get_memop(oi); unsigned ix = get_mmuidx(oi); - if (op < ARRAY_SIZE(ldst_name) && ldst_name[op]) { - qemu_log(",%s,%u", ldst_name[op], ix); - } else { + if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { qemu_log(",$0x%x,%u", op, ix); + } else { + const char *s_al = "", *s_op; + if (op & MO_AMASK) { + if ((op & MO_AMASK) == MO_ALIGN) { + s_al = "al+"; + } else { + s_al = "un+"; + } + } + s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; + qemu_log(",%s%s,%u", s_al, s_op, ix); } i = 1; } @@ -1378,16 +1387,20 @@ static void tcg_liveness_analysis(TCGContext *s) memset(dead_temps, 1, s->nb_globals); } - /* input args are live */ + /* record arguments that die in this helper */ for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { arg = args[i]; if (arg != TCG_CALL_DUMMY_ARG) { if (dead_temps[arg]) { dead_args |= (1 << i); } - dead_temps[arg] = 0; } } + /* input arguments are live for preceeding opcodes */ + for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { + arg = args[i]; + dead_temps[arg] = 0; + } s->op_dead_args[oi] = dead_args; s->op_sync_args[oi] = sync_args; } @@ -1522,12 +1535,16 @@ static void tcg_liveness_analysis(TCGContext *s) memset(mem_temps, 1, s->nb_globals); } - /* input args are live */ + /* record arguments that die in this opcode */ for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { arg = args[i]; if (dead_temps[arg]) { dead_args |= (1 << i); } + } + /* input arguments are live for preceeding opcodes */ + for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { + arg = args[i]; dead_temps[arg] = 0; } s->op_dead_args[oi] = dead_args; @@ -1998,6 +2015,16 @@ static void tcg_reg_alloc_op(TCGContext *s, if (!IS_DEAD_ARG(i)) { goto allocate_in_reg; } + /* check if the current register has already been allocated + for another input aliased to an output */ + int k2, i2; + for (k2 = 0 ; k2 < k ; k2++) { + i2 = def->sorted_args[nb_oargs + k2]; + if ((def->args_ct[i2].ct & TCG_CT_IALIAS) && + (new_args[i2] == ts->reg)) { + goto allocate_in_reg; + } + } } } reg = ts->reg; @@ -1107,7 +1107,7 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) t0 = *tb_ptr++; taddr = tci_read_ulong(&tb_ptr); oi = tci_read_i(&tb_ptr); - switch (get_memop(oi)) { + switch (get_memop(oi) & (MO_BSWAP | MO_SSIZE)) { case MO_UB: tmp32 = qemu_ld_ub; break; @@ -1144,7 +1144,7 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) } taddr = tci_read_ulong(&tb_ptr); oi = tci_read_i(&tb_ptr); - switch (get_memop(oi)) { + switch (get_memop(oi) & (MO_BSWAP | MO_SSIZE)) { case MO_UB: tmp64 = qemu_ld_ub; break; @@ -1193,7 +1193,7 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) t0 = tci_read_r(&tb_ptr); taddr = tci_read_ulong(&tb_ptr); oi = tci_read_i(&tb_ptr); - switch (get_memop(oi)) { + switch (get_memop(oi) & (MO_BSWAP | MO_SIZE)) { case MO_UB: qemu_st_b(t0); break; @@ -1217,7 +1217,7 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) tmp64 = tci_read_r64(&tb_ptr); taddr = tci_read_ulong(&tb_ptr); oi = tci_read_i(&tb_ptr); - switch (get_memop(oi)) { + switch (get_memop(oi) & (MO_BSWAP | MO_SIZE)) { case MO_UB: qemu_st_b(tmp64); break; |