diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2019-04-28 11:43:09 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2019-04-28 11:43:10 +0100 |
commit | e0fb2c3d89aa77057ac4aa073e01f4ca484449b0 (patch) | |
tree | f28672f770bfe0b95165c10cad34dbaf214fdbea | |
parent | 9ec34ecc97bcd5df04b0f67a774d79ffcd6b0a11 (diff) | |
parent | ef5dae6805cce7b59d129d801bdc5db71bcbd60d (diff) |
Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20190426' into staging
Add tcg_gen_extract2_*.
Deal with overflow of TranslationBlocks.
Respect access_type in io_readx.
# gpg: Signature made Fri 26 Apr 2019 18:17:01 BST
# gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg: issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F
* remotes/rth/tags/pull-tcg-20190426:
cputlb: Fix io_readx() to respect the access_type
tcg/arm: Restrict constant pool displacement to 12 bits
tcg/ppc: Allow the constant pool to overflow at 32k
tcg: Restart TB generation after out-of-line ldst overflow
tcg: Restart TB generation after constant pool overflow
tcg: Restart TB generation after relocation overflow
tcg: Restart after TB code generation overflow
tcg: Hoist max_insns computation to tb_gen_code
tcg/aarch64: Support INDEX_op_extract2_{i32,i64}
tcg/arm: Support INDEX_op_extract2_i32
tcg/i386: Support INDEX_op_extract2_{i32,i64}
tcg: Use extract2 in tcg_gen_deposit_{i32,i64}
tcg: Use deposit and extract2 in tcg_gen_shifti_i64
tcg: Add INDEX_op_extract2_{i32,i64}
tcg: Implement tcg_gen_extract2_{i32,i64}
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
51 files changed, 450 insertions, 308 deletions
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 88cc8389e9..f2f618217d 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -878,10 +878,11 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, CPUTLBEntry *entry; target_ulong tlb_addr; - tlb_fill(cpu, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr); + tlb_fill(cpu, addr, size, access_type, mmu_idx, retaddr); entry = tlb_entry(env, mmu_idx, addr); - tlb_addr = entry->addr_read; + tlb_addr = (access_type == MMU_DATA_LOAD ? + entry->addr_read : entry->addr_code); if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) { /* RAM access */ uintptr_t haddr = addr + entry->addend; diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 75a6cf49f1..20b59f93f4 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -1674,7 +1674,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tb_page_addr_t phys_pc, phys_page2; target_ulong virt_page2; tcg_insn_unit *gen_code_buf; - int gen_code_size, search_size; + int gen_code_size, search_size, max_insns; #ifdef CONFIG_PROFILER TCGProfile *prof = &tcg_ctx->prof; int64_t ti; @@ -1692,6 +1692,17 @@ TranslationBlock *tb_gen_code(CPUState *cpu, cflags &= ~CF_CLUSTER_MASK; cflags |= cpu->cluster_index << CF_CLUSTER_SHIFT; + max_insns = cflags & CF_COUNT_MASK; + if (max_insns == 0) { + max_insns = CF_COUNT_MASK; + } + if (max_insns > TCG_MAX_INSNS) { + max_insns = TCG_MAX_INSNS; + } + if (cpu->singlestep_enabled || singlestep) { + max_insns = 1; + } + buffer_overflow: tb = tb_alloc(pc); if (unlikely(!tb)) { @@ -1711,6 +1722,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tb->cflags = cflags; tb->trace_vcpu_dstate = *cpu->trace_dstate; tcg_ctx->tb_cflags = cflags; + tb_overflow: #ifdef CONFIG_PROFILER /* includes aborted translations because of exceptions */ @@ -1721,7 +1733,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tcg_func_start(tcg_ctx); tcg_ctx->cpu = ENV_GET_CPU(env); - gen_intermediate_code(cpu, tb); + gen_intermediate_code(cpu, tb, max_insns); tcg_ctx->cpu = NULL; trace_translate_block(tb, tb->pc, tb->tc.ptr); @@ -1744,14 +1756,39 @@ TranslationBlock *tb_gen_code(CPUState *cpu, ti = profile_getclock(); #endif - /* ??? Overflow could be handled better here. In particular, we - don't need to re-do gen_intermediate_code, nor should we re-do - the tcg optimization currently hidden inside tcg_gen_code. All - that should be required is to flush the TBs, allocate a new TB, - re-initialize it per above, and re-do the actual code generation. */ gen_code_size = tcg_gen_code(tcg_ctx, tb); if (unlikely(gen_code_size < 0)) { - goto buffer_overflow; + switch (gen_code_size) { + case -1: + /* + * Overflow of code_gen_buffer, or the current slice of it. + * + * TODO: We don't need to re-do gen_intermediate_code, nor + * should we re-do the tcg optimization currently hidden + * inside tcg_gen_code. All that should be required is to + * flush the TBs, allocate a new TB, re-initialize it per + * above, and re-do the actual code generation. + */ + goto buffer_overflow; + + case -2: + /* + * The code generated for the TranslationBlock is too large. + * The maximum size allowed by the unwind info is 64k. + * There may be stricter constraints from relocations + * in the tcg backend. + * + * Try again with half as many insns as we attempted this time. + * If a single insn overflows, there's a bug somewhere... + */ + max_insns = tb->icount; + assert(max_insns > 1); + max_insns /= 2; + goto tb_overflow; + + default: + g_assert_not_reached(); + } } search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size); if (unlikely(search_size < 0)) { diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c index afd0a49ea6..8d65ead708 100644 --- a/accel/tcg/translator.c +++ b/accel/tcg/translator.c @@ -32,7 +32,7 @@ void translator_loop_temp_check(DisasContextBase *db) } void translator_loop(const TranslatorOps *ops, DisasContextBase *db, - CPUState *cpu, TranslationBlock *tb) + CPUState *cpu, TranslationBlock *tb, int max_insns) { int bp_insn = 0; @@ -42,20 +42,9 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db, db->pc_next = db->pc_first; db->is_jmp = DISAS_NEXT; db->num_insns = 0; + db->max_insns = max_insns; db->singlestep_enabled = cpu->singlestep_enabled; - /* Instruction counting */ - db->max_insns = tb_cflags(db->tb) & CF_COUNT_MASK; - if (db->max_insns == 0) { - db->max_insns = CF_COUNT_MASK; - } - if (db->max_insns > TCG_MAX_INSNS) { - db->max_insns = TCG_MAX_INSNS; - } - if (db->singlestep_enabled || singlestep) { - db->max_insns = 1; - } - ops->init_disas_context(db, cpu); tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */ diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 97b90cb0db..58e988b3b1 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -40,8 +40,8 @@ typedef ram_addr_t tb_page_addr_t; #include "qemu/log.h" -void gen_intermediate_code(CPUState *cpu, struct TranslationBlock *tb); -void restore_state_to_opc(CPUArchState *env, struct TranslationBlock *tb, +void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns); +void restore_state_to_opc(CPUArchState *env, TranslationBlock *tb, target_ulong *data); void cpu_gen_init(void); diff --git a/include/exec/translator.h b/include/exec/translator.h index 71e7b2c347..66dfe906c4 100644 --- a/include/exec/translator.h +++ b/include/exec/translator.h @@ -123,6 +123,7 @@ typedef struct TranslatorOps { * @db: Disassembly context. * @cpu: Target vCPU. * @tb: Translation block. + * @max_insns: Maximum number of insns to translate. * * Generic translator loop. * @@ -137,7 +138,7 @@ typedef struct TranslatorOps { * - When too many instructions have been translated. */ void translator_loop(const TranslatorOps *ops, DisasContextBase *db, - CPUState *cpu, TranslationBlock *tb); + CPUState *cpu, TranslationBlock *tb, int max_insns); void translator_loop_temp_check(DisasContextBase *db); diff --git a/target/alpha/translate.c b/target/alpha/translate.c index 9d8f9b3eea..2c9cccf6c1 100644 --- a/target/alpha/translate.c +++ b/target/alpha/translate.c @@ -3049,10 +3049,10 @@ static const TranslatorOps alpha_tr_ops = { .disas_log = alpha_tr_disas_log, }; -void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb) +void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns) { DisasContext dc; - translator_loop(&alpha_tr_ops, &dc.base, cpu, tb); + translator_loop(&alpha_tr_ops, &dc.base, cpu, tb, max_insns); } void restore_state_to_opc(CPUAlphaState *env, TranslationBlock *tb, diff --git a/target/arm/translate.c b/target/arm/translate.c index d9e7bb737a..4ea4018e2b 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -13756,7 +13756,7 @@ static const TranslatorOps thumb_translator_ops = { }; /* generate intermediate code for basic block 'tb'. */ -void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb) +void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns) { DisasContext dc; const TranslatorOps *ops = &arm_translator_ops; @@ -13770,7 +13770,7 @@ void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb) } #endif - translator_loop(ops, &dc.base, cpu, tb); + translator_loop(ops, &dc.base, cpu, tb, max_insns); } void arm_cpu_dump_state(CPUState *cs, FILE *f, int flags) diff --git a/target/cris/translate.c b/target/cris/translate.c index 96359c0d7d..b005a5c20e 100644 --- a/target/cris/translate.c +++ b/target/cris/translate.c @@ -3081,7 +3081,7 @@ static unsigned int crisv32_decoder(CPUCRISState *env, DisasContext *dc) */ /* generate intermediate code for basic block 'tb'. */ -void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { CPUCRISState *env = cs->env_ptr; uint32_t pc_start; @@ -3091,7 +3091,6 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) uint32_t page_start; target_ulong npc; int num_insns; - int max_insns; if (env->pregs[PR_VR] == 32) { dc->decoder = crisv32_decoder; @@ -3137,13 +3136,6 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) page_start = pc_start & TARGET_PAGE_MASK; num_insns = 0; - max_insns = tb_cflags(tb) & CF_COUNT_MASK; - if (max_insns == 0) { - max_insns = CF_COUNT_MASK; - } - if (max_insns > TCG_MAX_INSNS) { - max_insns = TCG_MAX_INSNS; - } gen_tb_start(tb); do { diff --git a/target/hppa/translate.c b/target/hppa/translate.c index 43b74367ea..7c03c62768 100644 --- a/target/hppa/translate.c +++ b/target/hppa/translate.c @@ -4312,11 +4312,10 @@ static const TranslatorOps hppa_tr_ops = { .disas_log = hppa_tr_disas_log, }; -void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) - +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { DisasContext ctx; - translator_loop(&hppa_tr_ops, &ctx.base, cs, tb); + translator_loop(&hppa_tr_ops, &ctx.base, cs, tb, max_insns); } void restore_state_to_opc(CPUHPPAState *env, TranslationBlock *tb, diff --git a/target/i386/translate.c b/target/i386/translate.c index b725bec37c..77d6b73e42 100644 --- a/target/i386/translate.c +++ b/target/i386/translate.c @@ -8590,11 +8590,11 @@ static const TranslatorOps i386_tr_ops = { }; /* generate intermediate code for basic block 'tb'. */ -void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb) +void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns) { DisasContext dc; - translator_loop(&i386_tr_ops, &dc.base, cpu, tb); + translator_loop(&i386_tr_ops, &dc.base, cpu, tb, max_insns); } void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb, diff --git a/target/lm32/translate.c b/target/lm32/translate.c index b8b5e12e63..f0e0e7058e 100644 --- a/target/lm32/translate.c +++ b/target/lm32/translate.c @@ -1050,7 +1050,7 @@ static inline void decode(DisasContext *dc, uint32_t ir) } /* generate intermediate code for basic block 'tb'. */ -void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { CPULM32State *env = cs->env_ptr; LM32CPU *cpu = lm32_env_get_cpu(env); @@ -1058,7 +1058,6 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) uint32_t pc_start; uint32_t page_start; int num_insns; - int max_insns; pc_start = tb->pc; dc->features = cpu->features; @@ -1078,13 +1077,6 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) page_start = pc_start & TARGET_PAGE_MASK; num_insns = 0; - max_insns = tb_cflags(tb) & CF_COUNT_MASK; - if (max_insns == 0) { - max_insns = CF_COUNT_MASK; - } - if (max_insns > TCG_MAX_INSNS) { - max_insns = TCG_MAX_INSNS; - } gen_tb_start(tb); do { diff --git a/target/m68k/translate.c b/target/m68k/translate.c index 3b2280b48b..58596278c2 100644 --- a/target/m68k/translate.c +++ b/target/m68k/translate.c @@ -6170,10 +6170,10 @@ static const TranslatorOps m68k_tr_ops = { .disas_log = m68k_tr_disas_log, }; -void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb) +void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns) { DisasContext dc; - translator_loop(&m68k_tr_ops, &dc.base, cpu, tb); + translator_loop(&m68k_tr_ops, &dc.base, cpu, tb, max_insns); } static double floatx80_to_double(CPUM68KState *env, uint16_t high, uint64_t low) diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c index bc2712ddbd..885fc44b51 100644 --- a/target/microblaze/translate.c +++ b/target/microblaze/translate.c @@ -1601,7 +1601,7 @@ static inline void decode(DisasContext *dc, uint32_t ir) } /* generate intermediate code for basic block 'tb'. */ -void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { CPUMBState *env = cs->env_ptr; MicroBlazeCPU *cpu = mb_env_get_cpu(env); @@ -1611,7 +1611,6 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) uint32_t page_start, org_flags; uint32_t npc; int num_insns; - int max_insns; pc_start = tb->pc; dc->cpu = cpu; @@ -1635,13 +1634,6 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) page_start = pc_start & TARGET_PAGE_MASK; num_insns = 0; - max_insns = tb_cflags(tb) & CF_COUNT_MASK; - if (max_insns == 0) { - max_insns = CF_COUNT_MASK; - } - if (max_insns > TCG_MAX_INSNS) { - max_insns = TCG_MAX_INSNS; - } gen_tb_start(tb); do diff --git a/target/mips/translate.c b/target/mips/translate.c index 7849d53977..f96c0d01ef 100644 --- a/target/mips/translate.c +++ b/target/mips/translate.c @@ -29721,11 +29721,11 @@ static const TranslatorOps mips_tr_ops = { .disas_log = mips_tr_disas_log, }; -void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { DisasContext ctx; - translator_loop(&mips_tr_ops, &ctx.base, cs, tb); + translator_loop(&mips_tr_ops, &ctx.base, cs, tb, max_insns); } static void fpu_dump_state(CPUMIPSState *env, FILE *f, int flags) diff --git a/target/moxie/translate.c b/target/moxie/translate.c index dd055c4ca5..c668178f2c 100644 --- a/target/moxie/translate.c +++ b/target/moxie/translate.c @@ -813,13 +813,13 @@ static int decode_opc(MoxieCPU *cpu, DisasContext *ctx) } /* generate intermediate code for basic block 'tb'. */ -void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { CPUMoxieState *env = cs->env_ptr; MoxieCPU *cpu = moxie_env_get_cpu(env); DisasContext ctx; target_ulong pc_start; - int num_insns, max_insns; + int num_insns; pc_start = tb->pc; ctx.pc = pc_start; @@ -829,13 +829,6 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) ctx.singlestep_enabled = 0; ctx.bstate = BS_NONE; num_insns = 0; - max_insns = tb_cflags(tb) & CF_COUNT_MASK; - if (max_insns == 0) { - max_insns = CF_COUNT_MASK; - } - if (max_insns > TCG_MAX_INSNS) { - max_insns = TCG_MAX_INSNS; - } gen_tb_start(tb); do { diff --git a/target/nios2/translate.c b/target/nios2/translate.c index f0bbf78a32..17d8f1877c 100644 --- a/target/nios2/translate.c +++ b/target/nios2/translate.c @@ -806,12 +806,11 @@ static void gen_exception(DisasContext *dc, uint32_t excp) } /* generate intermediate code for basic block 'tb'. */ -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { CPUNios2State *env = cs->env_ptr; DisasContext dc1, *dc = &dc1; int num_insns; - int max_insns; /* Initialize DC */ dc->cpu_env = cpu_env; @@ -824,20 +823,11 @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb) /* Set up instruction counts */ num_insns = 0; - if (cs->singlestep_enabled || singlestep) { - max_insns = 1; - } else { + if (max_insns > 1) { int page_insns = (TARGET_PAGE_SIZE - (tb->pc & TARGET_PAGE_MASK)) / 4; - max_insns = tb_cflags(tb) & CF_COUNT_MASK; - if (max_insns == 0) { - max_insns = CF_COUNT_MASK; - } if (max_insns > page_insns) { max_insns = page_insns; } - if (max_insns > TCG_MAX_INSNS) { - max_insns = TCG_MAX_INSNS; - } } gen_tb_start(tb); diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c index a88502fdc1..36821948c0 100644 --- a/target/openrisc/translate.c +++ b/target/openrisc/translate.c @@ -1409,11 +1409,11 @@ static const TranslatorOps openrisc_tr_ops = { .disas_log = openrisc_tr_disas_log, }; -void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { DisasContext ctx; - translator_loop(&openrisc_tr_ops, &ctx.base, cs, tb); + translator_loop(&openrisc_tr_ops, &ctx.base, cs, tb, max_insns); } void openrisc_cpu_dump_state(CPUState *cs, FILE *f, int flags) diff --git a/target/ppc/translate.c b/target/ppc/translate.c index c280e0d306..8d08625c33 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -7984,11 +7984,11 @@ static const TranslatorOps ppc_tr_ops = { .disas_log = ppc_tr_disas_log, }; -void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { DisasContext ctx; - translator_loop(&ppc_tr_ops, &ctx.base, cs, tb); + translator_loop(&ppc_tr_ops, &ctx.base, cs, tb, max_insns); } void restore_state_to_opc(CPUPPCState *env, TranslationBlock *tb, diff --git a/target/riscv/translate.c b/target/riscv/translate.c index dd763647ea..967eac7bc3 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -783,11 +783,11 @@ static const TranslatorOps riscv_tr_ops = { .disas_log = riscv_tr_disas_log, }; -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { DisasContext ctx; - translator_loop(&riscv_tr_ops, &ctx.base, cs, tb); + translator_loop(&riscv_tr_ops, &ctx.base, cs, tb, max_insns); } void riscv_translate_init(void) diff --git a/target/s390x/translate.c b/target/s390x/translate.c index 0afa8f7ca5..d4951836ad 100644 --- a/target/s390x/translate.c +++ b/target/s390x/translate.c @@ -6552,11 +6552,11 @@ static const TranslatorOps s390x_tr_ops = { .disas_log = s390x_tr_disas_log, }; -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { DisasContext dc; - translator_loop(&s390x_tr_ops, &dc.base, cs, tb); + translator_loop(&s390x_tr_ops, &dc.base, cs, tb, max_insns); } void restore_state_to_opc(CPUS390XState *env, TranslationBlock *tb, diff --git a/target/sh4/translate.c b/target/sh4/translate.c index cffc6919d0..cdf0888490 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -2383,11 +2383,11 @@ static const TranslatorOps sh4_tr_ops = { .disas_log = sh4_tr_disas_log, }; -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { DisasContext ctx; - translator_loop(&sh4_tr_ops, &ctx.base, cs, tb); + translator_loop(&sh4_tr_ops, &ctx.base, cs, tb, max_insns); } void restore_state_to_opc(CPUSH4State *env, TranslationBlock *tb, diff --git a/target/sparc/translate.c b/target/sparc/translate.c index 74315cdf09..091bab53af 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -5962,11 +5962,11 @@ static const TranslatorOps sparc_tr_ops = { .disas_log = sparc_tr_disas_log, }; -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { DisasContext dc = {}; - translator_loop(&sparc_tr_ops, &dc.base, cs, tb); + translator_loop(&sparc_tr_ops, &dc.base, cs, tb, max_insns); } void sparc_tcg_init(void) diff --git a/target/tilegx/translate.c b/target/tilegx/translate.c index df1e4d0fef..c46a4ab151 100644 --- a/target/tilegx/translate.c +++ b/target/tilegx/translate.c @@ -2369,7 +2369,7 @@ static void translate_one_bundle(DisasContext *dc, uint64_t bundle) } } -void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { CPUTLGState *env = cs->env_ptr; DisasContext ctx; @@ -2377,7 +2377,6 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) uint64_t pc_start = tb->pc; uint64_t page_start = pc_start & TARGET_PAGE_MASK; int num_insns = 0; - int max_insns = tb_cflags(tb) & CF_COUNT_MASK; dc->pc = pc_start; dc->mmuidx = 0; @@ -2392,15 +2391,6 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) qemu_log_lock(); qemu_log("IN: %s\n", lookup_symbol(pc_start)); } - if (!max_insns) { - max_insns = CF_COUNT_MASK; - } - if (cs->singlestep_enabled || singlestep) { - max_insns = 1; - } - if (max_insns > TCG_MAX_INSNS) { - max_insns = TCG_MAX_INSNS; - } gen_tb_start(tb); while (1) { diff --git a/target/tricore/translate.c b/target/tricore/translate.c index 352f52bb4a..8f6416144e 100644 --- a/target/tricore/translate.c +++ b/target/tricore/translate.c @@ -8807,24 +8807,12 @@ static void decode_opc(CPUTriCoreState *env, DisasContext *ctx, int *is_branch) } } -void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { CPUTriCoreState *env = cs->env_ptr; DisasContext ctx; target_ulong pc_start; - int num_insns, max_insns; - - num_insns = 0; - max_insns = tb_cflags(tb) & CF_COUNT_MASK; - if (max_insns == 0) { - max_insns = CF_COUNT_MASK; - } - if (singlestep) { - max_insns = 1; - } - if (max_insns > TCG_MAX_INSNS) { - max_insns = TCG_MAX_INSNS; - } + int num_insns = 0; pc_start = tb->pc; ctx.pc = pc_start; diff --git a/target/unicore32/translate.c b/target/unicore32/translate.c index dfe41c9069..89b02d1c3c 100644 --- a/target/unicore32/translate.c +++ b/target/unicore32/translate.c @@ -1871,14 +1871,13 @@ static void disas_uc32_insn(CPUUniCore32State *env, DisasContext *s) } /* generate intermediate code for basic block 'tb'. */ -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb) +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) { CPUUniCore32State *env = cs->env_ptr; DisasContext dc1, *dc = &dc1; target_ulong pc_start; uint32_t page_start; int num_insns; - int max_insns; /* generate intermediate code */ num_temps = 0; @@ -1897,13 +1896,6 @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb) cpu_F1d = tcg_temp_new_i64(); page_start = pc_start & TARGET_PAGE_MASK; num_insns = 0; - max_insns = tb_cflags(tb) & CF_COUNT_MASK; - if (max_insns == 0) { - max_insns = CF_COUNT_MASK; - } - if (max_insns > TCG_MAX_INSNS) { - max_insns = TCG_MAX_INSNS; - } #ifndef CONFIG_USER_ONLY if ((env->uncached_asr & ASR_M) == ASR_MODE_USER) { diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c index 43a5e94daa..301c8e3161 100644 --- a/target/xtensa/translate.c +++ b/target/xtensa/translate.c @@ -1635,10 +1635,10 @@ static const TranslatorOps xtensa_translator_ops = { .disas_log = xtensa_tr_disas_log, }; -void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb) +void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns) { DisasContext dc = {}; - translator_loop(&xtensa_translator_ops, &dc.base, cpu, tb); + translator_loop(&xtensa_translator_ops, &dc.base, cpu, tb, max_insns); } void xtensa_cpu_dump_state(CPUState *cs, FILE *f, int flags) diff --git a/tcg/README b/tcg/README index 603f4df659..c30e5418a6 100644 --- a/tcg/README +++ b/tcg/README @@ -343,6 +343,13 @@ at bit 8. This operation would be equivalent to (using an arithmetic right shift). +* extract2_i32/i64 dest, t1, t2, pos + +For N = {32,64}, extract an N-bit quantity from the concatenation +of t2:t1, beginning at pos. The tcg_gen_extract2_{i32,i64} expander +accepts 0 <= pos <= N as inputs. The backend code generator will +not see either 0 or N as inputs for these opcodes. + * extrl_i64_i32 t0, t1 For 64-bit hosts only, extract the low 32-bits of input T1 and place it diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 2d93cf404e..ce2bb1f90b 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -77,6 +77,7 @@ typedef enum { #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_extract_i32 1 #define TCG_TARGET_HAS_sextract_i32 1 +#define TCG_TARGET_HAS_extract2_i32 1 #define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 @@ -113,6 +114,7 @@ typedef enum { #define TCG_TARGET_HAS_deposit_i64 1 #define TCG_TARGET_HAS_extract_i64 1 #define TCG_TARGET_HAS_sextract_i64 1 +#define TCG_TARGET_HAS_extract2_i64 1 #define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c index d57f9e500f..eefa929948 100644 --- a/tcg/aarch64/tcg-target.inc.c +++ b/tcg/aarch64/tcg-target.inc.c @@ -1395,14 +1395,15 @@ static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target) tcg_out_insn(s, 3406, ADR, rd, offset); } -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { TCGMemOpIdx oi = lb->oi; TCGMemOp opc = get_memop(oi); TCGMemOp size = opc & MO_SIZE; - bool ok = reloc_pc19(lb->label_ptr[0], s->code_ptr); - tcg_debug_assert(ok); + if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) { + return false; + } tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); @@ -1416,16 +1417,18 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) } tcg_out_goto(s, lb->raddr); + return true; } -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { TCGMemOpIdx oi = lb->oi; TCGMemOp opc = get_memop(oi); TCGMemOp size = opc & MO_SIZE; - bool ok = reloc_pc19(lb->label_ptr[0], s->code_ptr); - tcg_debug_assert(ok); + if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) { + return false; + } tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); @@ -1434,6 +1437,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_adr(s, TCG_REG_X4, lb->raddr); tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); tcg_out_goto(s, lb->raddr); + return true; } static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, @@ -2058,6 +2062,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1); break; + case INDEX_op_extract2_i64: + case INDEX_op_extract2_i32: + tcg_out_extr(s, ext, a0, a1, a2, args[3]); + break; + case INDEX_op_add2_i32: tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), (int32_t)args[4], args[5], const_args[4], @@ -2300,6 +2309,8 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) = { .args_ct_str = { "r", "r", "rAL" } }; static const TCGTargetOpDef dep = { .args_ct_str = { "r", "0", "rZ" } }; + static const TCGTargetOpDef ext2 + = { .args_ct_str = { "r", "rZ", "rZ" } }; static const TCGTargetOpDef movc = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } }; static const TCGTargetOpDef add2 @@ -2430,6 +2441,10 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) case INDEX_op_deposit_i64: return &dep; + case INDEX_op_extract2_i32: + case INDEX_op_extract2_i64: + return &ext2; + case INDEX_op_add2_i32: case INDEX_op_add2_i64: case INDEX_op_sub2_i32: diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 16172f73a3..17e771374d 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -116,6 +116,7 @@ extern bool use_idiv_instructions; #define TCG_TARGET_HAS_deposit_i32 use_armv7_instructions #define TCG_TARGET_HAS_extract_i32 use_armv7_instructions #define TCG_TARGET_HAS_sextract_i32 use_armv7_instructions +#define TCG_TARGET_HAS_extract2_i32 1 #define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_mulu2_i32 1 #define TCG_TARGET_HAS_muls2_i32 1 diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c index 2245a8aeb9..abf0c444b4 100644 --- a/tcg/arm/tcg-target.inc.c +++ b/tcg/arm/tcg-target.inc.c @@ -197,6 +197,24 @@ static inline bool reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target) return false; } +static inline bool reloc_pc13(tcg_insn_unit *code_ptr, tcg_insn_unit *target) +{ + ptrdiff_t offset = tcg_ptr_byte_diff(target, code_ptr) - 8; + + if (offset >= -0xfff && offset <= 0xfff) { + tcg_insn_unit insn = *code_ptr; + bool u = (offset >= 0); + if (!u) { + offset = -offset; + } + insn = deposit32(insn, 23, 1, u); + insn = deposit32(insn, 0, 12, offset); + *code_ptr = insn; + return true; + } + return false; +} + static bool patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { @@ -205,39 +223,10 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, if (type == R_ARM_PC24) { return reloc_pc24(code_ptr, (tcg_insn_unit *)value); } else if (type == R_ARM_PC13) { - intptr_t diff = value - (uintptr_t)(code_ptr + 2); - tcg_insn_unit insn = *code_ptr; - bool u; - - if (diff >= -0xfff && diff <= 0xfff) { - u = (diff >= 0); - if (!u) { - diff = -diff; - } - } else { - int rd = extract32(insn, 12, 4); - int rt = rd == TCG_REG_PC ? TCG_REG_TMP : rd; - - if (diff < 0x1000 || diff >= 0x100000) { - return false; - } - - /* add rt, pc, #high */ - *code_ptr++ = ((insn & 0xf0000000) | (1 << 25) | ARITH_ADD - | (TCG_REG_PC << 16) | (rt << 12) - | (20 << 7) | (diff >> 12)); - /* ldr rd, [rt, #low] */ - insn = deposit32(insn, 12, 4, rt); - diff &= 0xfff; - u = 1; - } - insn = deposit32(insn, 23, 1, u); - insn = deposit32(insn, 0, 12, diff); - *code_ptr = insn; + return reloc_pc13(code_ptr, (tcg_insn_unit *)value); } else { g_assert_not_reached(); } - return true; } #define TCG_CT_CONST_ARM 0x100 @@ -605,12 +594,8 @@ static inline void tcg_out_ld8s_r(TCGContext *s, int cond, TCGReg rt, static void tcg_out_movi_pool(TCGContext *s, int cond, int rd, uint32_t arg) { - /* The 12-bit range on the ldr insn is sometimes a bit too small. - In order to get around that we require two insns, one of which - will usually be a nop, but may be replaced in patch_reloc. */ new_pool_label(s, arg, R_ARM_PC13, s->code_ptr, 0); tcg_out_ld32_12(s, cond, rd, TCG_REG_PC, 0); - tcg_out_nop(s); } static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg) @@ -1069,8 +1054,8 @@ static void tcg_out_call(TCGContext *s, tcg_insn_unit *addr) tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri); tcg_out_blx(s, COND_AL, TCG_REG_TMP); } else { - /* ??? Know that movi_pool emits exactly 2 insns. */ - tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4); + /* ??? Know that movi_pool emits exactly 1 insn. */ + tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 0); tcg_out_movi_pool(s, COND_AL, TCG_REG_PC, addri); } } @@ -1372,15 +1357,16 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, label->label_ptr[0] = label_ptr; } -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { TCGReg argreg, datalo, datahi; TCGMemOpIdx oi = lb->oi; TCGMemOp opc = get_memop(oi); void *func; - bool ok = reloc_pc24(lb->label_ptr[0], s->code_ptr); - tcg_debug_assert(ok); + if (!reloc_pc24(lb->label_ptr[0], s->code_ptr)) { + return false; + } argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0); if (TARGET_LONG_BITS == 64) { @@ -1432,16 +1418,18 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) } tcg_out_goto(s, COND_AL, lb->raddr); + return true; } -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { TCGReg argreg, datalo, datahi; TCGMemOpIdx oi = lb->oi; TCGMemOp opc = get_memop(oi); - bool ok = reloc_pc24(lb->label_ptr[0], s->code_ptr); - tcg_debug_assert(ok); + if (!reloc_pc24(lb->label_ptr[0], s->code_ptr)) { + return false; + } argreg = TCG_REG_R0; argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0); @@ -1474,6 +1462,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) /* Tail-call to the helper, which will return to the fast path. */ tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); + return true; } #endif /* SOFTMMU */ @@ -2064,6 +2053,27 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_sextract_i32: tcg_out_sextract(s, COND_AL, args[0], args[1], args[2], args[3]); break; + case INDEX_op_extract2_i32: + /* ??? These optimization vs zero should be generic. */ + /* ??? But we can't substitute 2 for 1 in the opcode stream yet. */ + if (const_args[1]) { + if (const_args[2]) { + tcg_out_movi(s, TCG_TYPE_REG, args[0], 0); + } else { + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, + args[2], SHIFT_IMM_LSL(32 - args[3])); + } + } else if (const_args[2]) { + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, + args[1], SHIFT_IMM_LSR(args[3])); + } else { + /* We can do extract2 in 2 insns, vs the 3 required otherwise. */ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, + args[2], SHIFT_IMM_LSL(32 - args[3])); + tcg_out_dat_reg(s, COND_AL, ARITH_ORR, args[0], TCG_REG_TMP, + args[1], SHIFT_IMM_LSR(args[3])); + } + break; case INDEX_op_div_i32: tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]); @@ -2108,6 +2118,8 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) = { .args_ct_str = { "s", "s", "s", "s" } }; static const TCGTargetOpDef br = { .args_ct_str = { "r", "rIN" } }; + static const TCGTargetOpDef ext2 + = { .args_ct_str = { "r", "rZ", "rZ" } }; static const TCGTargetOpDef dep = { .args_ct_str = { "r", "0", "rZ" } }; static const TCGTargetOpDef movc @@ -2174,6 +2186,8 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) return &br; case INDEX_op_deposit_i32: return &dep; + case INDEX_op_extract2_i32: + return &ext2; case INDEX_op_movcond_i32: return &movc; case INDEX_op_add2_i32: diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 7995fe3eab..241bf19413 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -124,6 +124,7 @@ extern bool have_avx2; #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_extract_i32 1 #define TCG_TARGET_HAS_sextract_i32 1 +#define TCG_TARGET_HAS_extract2_i32 1 #define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 @@ -162,6 +163,7 @@ extern bool have_avx2; #define TCG_TARGET_HAS_deposit_i64 1 #define TCG_TARGET_HAS_extract_i64 1 #define TCG_TARGET_HAS_sextract_i64 0 +#define TCG_TARGET_HAS_extract2_i64 1 #define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index e0670e5098..d5ed9f1ffd 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -452,6 +452,7 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, #define OPC_SHUFPS (0xc6 | P_EXT) #define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16) #define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2) +#define OPC_SHRD_Ib (0xac | P_EXT) #define OPC_TESTL (0x85) #define OPC_TZCNT (0xbc | P_EXT | P_SIMDF3) #define OPC_UD2 (0x0b | P_EXT) @@ -1729,7 +1730,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, bool is_64, /* * Generate code for the slow path for a load at the end of block */ -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { TCGMemOpIdx oi = l->oi; TCGMemOp opc = get_memop(oi); @@ -1808,12 +1809,13 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) /* Jump to the code corresponding to next IR of qemu_st */ tcg_out_jmp(s, l->raddr); + return true; } /* * Generate code for the slow path for a store at the end of block */ -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { TCGMemOpIdx oi = l->oi; TCGMemOp opc = get_memop(oi); @@ -1876,6 +1878,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) /* "Tail call" to the helper, with the return address back inline. */ tcg_out_push(s, retaddr); tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); + return true; } #elif TCG_TARGET_REG_BITS == 32 # define x86_guest_base_seg 0 @@ -2587,6 +2590,12 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } break; + OP_32_64(extract2): + /* Note that SHRD outputs to the r/m operand. */ + tcg_out_modrm(s, OPC_SHRD_Ib + rexw, a2, a0); + tcg_out8(s, args[3]); + break; + case INDEX_op_mb: tcg_out_mb(s, a0); break; @@ -2845,6 +2854,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) static const TCGTargetOpDef r_0 = { .args_ct_str = { "r", "0" } }; static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } }; static const TCGTargetOpDef r_r_re = { .args_ct_str = { "r", "r", "re" } }; + static const TCGTargetOpDef r_0_r = { .args_ct_str = { "r", "0", "r" } }; static const TCGTargetOpDef r_0_re = { .args_ct_str = { "r", "0", "re" } }; static const TCGTargetOpDef r_0_ci = { .args_ct_str = { "r", "0", "ci" } }; static const TCGTargetOpDef r_L = { .args_ct_str = { "r", "L" } }; @@ -2970,6 +2980,9 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) case INDEX_op_ctpop_i32: case INDEX_op_ctpop_i64: return &r_r; + case INDEX_op_extract2_i32: + case INDEX_op_extract2_i64: + return &r_0_r; case INDEX_op_deposit_i32: case INDEX_op_deposit_i64: diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index 5cb8672470..c6b091d849 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -162,6 +162,7 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_deposit_i32 use_mips32r2_instructions #define TCG_TARGET_HAS_extract_i32 use_mips32r2_instructions #define TCG_TARGET_HAS_sextract_i32 0 +#define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_ext8s_i32 use_mips32r2_instructions #define TCG_TARGET_HAS_ext16s_i32 use_mips32r2_instructions #define TCG_TARGET_HAS_rot_i32 use_mips32r2_instructions @@ -177,6 +178,7 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_deposit_i64 use_mips32r2_instructions #define TCG_TARGET_HAS_extract_i64 use_mips32r2_instructions #define TCG_TARGET_HAS_sextract_i64 0 +#define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_ext8s_i64 use_mips32r2_instructions #define TCG_TARGET_HAS_ext16s_i64 use_mips32r2_instructions #define TCG_TARGET_HAS_rot_i64 use_mips32r2_instructions diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c index 8a92e916dd..412cacdcb9 100644 --- a/tcg/mips/tcg-target.inc.c +++ b/tcg/mips/tcg-target.inc.c @@ -1338,7 +1338,7 @@ static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi, } } -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { TCGMemOpIdx oi = l->oi; TCGMemOp opc = get_memop(oi); @@ -1385,9 +1385,10 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) } else { tcg_out_opc_reg(s, OPC_OR, v0, TCG_REG_V0, TCG_REG_ZERO); } + return true; } -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { TCGMemOpIdx oi = l->oi; TCGMemOp opc = get_memop(oi); @@ -1435,6 +1436,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], true); /* delay slot */ tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + return true; } #endif diff --git a/tcg/optimize.c b/tcg/optimize.c index 01e80c3e46..5150c38a25 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -1202,6 +1202,22 @@ void tcg_optimize(TCGContext *s) } goto do_default; + CASE_OP_32_64(extract2): + if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { + TCGArg v1 = arg_info(op->args[1])->val; + TCGArg v2 = arg_info(op->args[2])->val; + + if (opc == INDEX_op_extract2_i64) { + tmp = (v1 >> op->args[3]) | (v2 << (64 - op->args[3])); + } else { + tmp = (v1 >> op->args[3]) | (v2 << (32 - op->args[3])); + tmp = (int32_t)tmp; + } + tcg_opt_gen_movi(s, op, op->args[0], tmp); + break; + } + goto do_default; + CASE_OP_32_64(setcond): tmp = do_constant_folding_cond(opc, op->args[1], op->args[2], op->args[3]); diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index 52c1bb04b1..7627fb62d3 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -77,6 +77,7 @@ extern bool have_isa_3_00; #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_extract_i32 1 #define TCG_TARGET_HAS_sextract_i32 0 +#define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_muls2_i32 0 @@ -115,6 +116,7 @@ extern bool have_isa_3_00; #define TCG_TARGET_HAS_deposit_i64 1 #define TCG_TARGET_HAS_extract_i64 1 #define TCG_TARGET_HAS_sextract_i64 0 +#define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c index 773690f1d9..36b4791707 100644 --- a/tcg/ppc/tcg-target.inc.c +++ b/tcg/ppc/tcg-target.inc.c @@ -529,7 +529,6 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { tcg_insn_unit *target; - tcg_insn_unit old; value += addend; target = (tcg_insn_unit *)value; @@ -540,22 +539,16 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, case R_PPC_REL24: return reloc_pc24(code_ptr, target); case R_PPC_ADDR16: - /* We are abusing this relocation type. This points to a pair - of insns, addis + load. If the displacement is small, we - can nop out the addis. */ - if (value == (int16_t)value) { - code_ptr[0] = NOP; - old = deposit32(code_ptr[1], 0, 16, value); - code_ptr[1] = deposit32(old, 16, 5, TCG_REG_TB); - } else { - int16_t lo = value; - int hi = value - lo; - if (hi + lo != value) { - return false; - } - code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16); - code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo); + /* + * We are (slightly) abusing this relocation type. In particular, + * assert that the low 2 bits are zero, and do not modify them. + * That way we can use this with LD et al that have opcode bits + * in the low 2 bits of the insn. + */ + if ((value & 3) || value != (int16_t)value) { + return false; } + *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc); break; default: g_assert_not_reached(); @@ -701,8 +694,7 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, if (!in_prologue && USE_REG_TB) { new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr, -(intptr_t)s->code_gen_ptr); - tcg_out32(s, ADDIS | TAI(ret, TCG_REG_TB, 0)); - tcg_out32(s, LD | TAI(ret, ret, 0)); + tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0)); return; } @@ -1653,13 +1645,15 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, label->label_ptr[0] = lptr; } -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { TCGMemOpIdx oi = lb->oi; TCGMemOp opc = get_memop(oi); TCGReg hi, lo, arg = TCG_REG_R3; - **lb->label_ptr |= reloc_pc14_val(*lb->label_ptr, s->code_ptr); + if (!reloc_pc14(lb->label_ptr[0], s->code_ptr)) { + return false; + } tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0); @@ -1695,16 +1689,19 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) } tcg_out_b(s, 0, lb->raddr); + return true; } -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { TCGMemOpIdx oi = lb->oi; TCGMemOp opc = get_memop(oi); TCGMemOp s_bits = opc & MO_SIZE; TCGReg hi, lo, arg = TCG_REG_R3; - **lb->label_ptr |= reloc_pc14_val(*lb->label_ptr, s->code_ptr); + if (!reloc_pc14(lb->label_ptr[0], s->code_ptr)) { + return false; + } tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0); @@ -1753,6 +1750,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); tcg_out_b(s, 0, lb->raddr); + return true; } #endif /* SOFTMMU */ diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h index 60918cacb4..032439d806 100644 --- a/tcg/riscv/tcg-target.h +++ b/tcg/riscv/tcg-target.h @@ -93,6 +93,7 @@ typedef enum { #define TCG_TARGET_HAS_deposit_i32 0 #define TCG_TARGET_HAS_extract_i32 0 #define TCG_TARGET_HAS_sextract_i32 0 +#define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 0 @@ -128,6 +129,7 @@ typedef enum { #define TCG_TARGET_HAS_deposit_i64 0 #define TCG_TARGET_HAS_extract_i64 0 #define TCG_TARGET_HAS_sextract_i64 0 +#define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_extrl_i64_i32 1 #define TCG_TARGET_HAS_extrh_i64_i32 1 #define TCG_TARGET_HAS_ext8s_i64 1 diff --git a/tcg/riscv/tcg-target.inc.c b/tcg/riscv/tcg-target.inc.c index b785f4acb7..2932505094 100644 --- a/tcg/riscv/tcg-target.inc.c +++ b/tcg/riscv/tcg-target.inc.c @@ -1065,7 +1065,7 @@ static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi, label->label_ptr[0] = label_ptr[0]; } -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { TCGMemOpIdx oi = l->oi; TCGMemOp opc = get_memop(oi); @@ -1080,7 +1080,10 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) } /* resolve label address */ - patch_reloc(l->label_ptr[0], R_RISCV_BRANCH, (intptr_t) s->code_ptr, 0); + if (!patch_reloc(l->label_ptr[0], R_RISCV_BRANCH, + (intptr_t) s->code_ptr, 0)) { + return false; + } /* call load helper */ tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0); @@ -1092,9 +1095,10 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_out_mov(s, (opc & MO_SIZE) == MO_64, l->datalo_reg, a0); tcg_out_goto(s, l->raddr); + return true; } -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { TCGMemOpIdx oi = l->oi; TCGMemOp opc = get_memop(oi); @@ -1111,7 +1115,10 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) } /* resolve label address */ - patch_reloc(l->label_ptr[0], R_RISCV_BRANCH, (intptr_t) s->code_ptr, 0); + if (!patch_reloc(l->label_ptr[0], R_RISCV_BRANCH, + (intptr_t) s->code_ptr, 0)) { + return false; + } /* call store helper */ tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0); @@ -1133,6 +1140,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SSIZE)]); tcg_out_goto(s, l->raddr); + return true; } #endif /* CONFIG_SOFTMMU */ diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h index 853ed6e7aa..07accabbd1 100644 --- a/tcg/s390/tcg-target.h +++ b/tcg/s390/tcg-target.h @@ -85,6 +85,7 @@ extern uint64_t s390_facilities; #define TCG_TARGET_HAS_deposit_i32 (s390_facilities & FACILITY_GEN_INST_EXT) #define TCG_TARGET_HAS_extract_i32 (s390_facilities & FACILITY_GEN_INST_EXT) #define TCG_TARGET_HAS_sextract_i32 0 +#define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 @@ -121,6 +122,7 @@ extern uint64_t s390_facilities; #define TCG_TARGET_HAS_deposit_i64 (s390_facilities & FACILITY_GEN_INST_EXT) #define TCG_TARGET_HAS_extract_i64 (s390_facilities & FACILITY_GEN_INST_EXT) #define TCG_TARGET_HAS_sextract_i64 0 +#define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/s390/tcg-target.inc.c b/tcg/s390/tcg-target.inc.c index 7db90b3bae..3d6150b10e 100644 --- a/tcg/s390/tcg-target.inc.c +++ b/tcg/s390/tcg-target.inc.c @@ -1609,16 +1609,17 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, label->label_ptr[0] = label_ptr; } -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { TCGReg addr_reg = lb->addrlo_reg; TCGReg data_reg = lb->datalo_reg; TCGMemOpIdx oi = lb->oi; TCGMemOp opc = get_memop(oi); - bool ok = patch_reloc(lb->label_ptr[0], R_390_PC16DBL, - (intptr_t)s->code_ptr, 2); - tcg_debug_assert(ok); + if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL, + (intptr_t)s->code_ptr, 2)) { + return false; + } tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); if (TARGET_LONG_BITS == 64) { @@ -1630,18 +1631,20 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2); tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); + return true; } -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { TCGReg addr_reg = lb->addrlo_reg; TCGReg data_reg = lb->datalo_reg; TCGMemOpIdx oi = lb->oi; TCGMemOp opc = get_memop(oi); - bool ok = patch_reloc(lb->label_ptr[0], R_390_PC16DBL, - (intptr_t)s->code_ptr, 2); - tcg_debug_assert(ok); + if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL, + (intptr_t)s->code_ptr, 2)) { + return false; + } tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); if (TARGET_LONG_BITS == 64) { @@ -1668,6 +1671,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); + return true; } #else static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg, diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h index a0ed2a3342..633841ebf2 100644 --- a/tcg/sparc/tcg-target.h +++ b/tcg/sparc/tcg-target.h @@ -116,6 +116,7 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_deposit_i32 0 #define TCG_TARGET_HAS_extract_i32 0 #define TCG_TARGET_HAS_sextract_i32 0 +#define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 @@ -153,6 +154,7 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_deposit_i64 0 #define TCG_TARGET_HAS_extract_i64 0 #define TCG_TARGET_HAS_sextract_i64 0 +#define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/tcg-ldst.inc.c b/tcg/tcg-ldst.inc.c index 47f41b921b..05f9b3ccd6 100644 --- a/tcg/tcg-ldst.inc.c +++ b/tcg/tcg-ldst.inc.c @@ -38,19 +38,19 @@ typedef struct TCGLabelQemuLdst { * Generate TB finalization at the end of block */ -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l); -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l); +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l); +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l); -static bool tcg_out_ldst_finalize(TCGContext *s) +static int tcg_out_ldst_finalize(TCGContext *s) { TCGLabelQemuLdst *lb; /* qemu_ld/st slow paths */ QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) { - if (lb->is_ld) { - tcg_out_qemu_ld_slow_path(s, lb); - } else { - tcg_out_qemu_st_slow_path(s, lb); + if (lb->is_ld + ? !tcg_out_qemu_ld_slow_path(s, lb) + : !tcg_out_qemu_st_slow_path(s, lb)) { + return -2; } /* Test for (pending) buffer overflow. The assumption is that any @@ -58,10 +58,10 @@ static bool tcg_out_ldst_finalize(TCGContext *s) the buffer completely. Thus we can test for overflow after generating code without having to check during generation. */ if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { - return false; + return -1; } } - return true; + return 0; } /* diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 1bd7ef24af..a00d1df37e 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -611,9 +611,22 @@ void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2, return; } - mask = (1u << len) - 1; t1 = tcg_temp_new_i32(); + if (TCG_TARGET_HAS_extract2_i32) { + if (ofs + len == 32) { + tcg_gen_shli_i32(t1, arg1, len); + tcg_gen_extract2_i32(ret, t1, arg2, len); + goto done; + } + if (ofs == 0) { + tcg_gen_extract2_i32(ret, arg1, arg2, len); + tcg_gen_rotli_i32(ret, ret, len); + goto done; + } + } + + mask = (1u << len) - 1; if (ofs + len < 32) { tcg_gen_andi_i32(t1, arg2, mask); tcg_gen_shli_i32(t1, t1, ofs); @@ -622,7 +635,7 @@ void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2, } tcg_gen_andi_i32(ret, arg1, ~(mask << ofs)); tcg_gen_or_i32(ret, ret, t1); - + done: tcg_temp_free_i32(t1); } @@ -809,6 +822,30 @@ void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg, tcg_gen_sari_i32(ret, ret, 32 - len); } +/* + * Extract 32-bits from a 64-bit input, ah:al, starting from ofs. + * Unlike tcg_gen_extract_i32 above, len is fixed at 32. + */ +void tcg_gen_extract2_i32(TCGv_i32 ret, TCGv_i32 al, TCGv_i32 ah, + unsigned int ofs) +{ + tcg_debug_assert(ofs <= 32); + if (ofs == 0) { + tcg_gen_mov_i32(ret, al); + } else if (ofs == 32) { + tcg_gen_mov_i32(ret, ah); + } else if (al == ah) { + tcg_gen_rotri_i32(ret, al, ofs); + } else if (TCG_TARGET_HAS_extract2_i32) { + tcg_gen_op4i_i32(INDEX_op_extract2_i32, ret, al, ah, ofs); + } else { + TCGv_i32 t0 = tcg_temp_new_i32(); + tcg_gen_shri_i32(t0, al, ofs); + tcg_gen_deposit_i32(ret, t0, ah, 32 - ofs, ofs); + tcg_temp_free_i32(t0); + } +} + void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1, TCGv_i32 c2, TCGv_i32 v1, TCGv_i32 v2) { @@ -1331,31 +1368,32 @@ static inline void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1, tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_LOW(arg1), c); tcg_gen_movi_i32(TCGV_LOW(ret), 0); } - } else { - TCGv_i32 t0, t1; - - t0 = tcg_temp_new_i32(); - t1 = tcg_temp_new_i32(); - if (right) { - tcg_gen_shli_i32(t0, TCGV_HIGH(arg1), 32 - c); - if (arith) { - tcg_gen_sari_i32(t1, TCGV_HIGH(arg1), c); - } else { - tcg_gen_shri_i32(t1, TCGV_HIGH(arg1), c); - } + } else if (right) { + if (TCG_TARGET_HAS_extract2_i32) { + tcg_gen_extract2_i32(TCGV_LOW(ret), + TCGV_LOW(arg1), TCGV_HIGH(arg1), c); + } else { tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c); - tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t0); - tcg_gen_mov_i32(TCGV_HIGH(ret), t1); + tcg_gen_deposit_i32(TCGV_LOW(ret), TCGV_LOW(ret), + TCGV_HIGH(arg1), 32 - c, c); + } + if (arith) { + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c); } else { + tcg_gen_shri_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c); + } + } else { + if (TCG_TARGET_HAS_extract2_i32) { + tcg_gen_extract2_i32(TCGV_HIGH(ret), + TCGV_LOW(arg1), TCGV_HIGH(arg1), 32 - c); + } else { + TCGv_i32 t0 = tcg_temp_new_i32(); tcg_gen_shri_i32(t0, TCGV_LOW(arg1), 32 - c); - /* Note: ret can be the same as arg1, so we use t1 */ - tcg_gen_shli_i32(t1, TCGV_LOW(arg1), c); - tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c); - tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t0); - tcg_gen_mov_i32(TCGV_LOW(ret), t1); + tcg_gen_deposit_i32(TCGV_HIGH(ret), t0, + TCGV_HIGH(arg1), c, 32 - c); + tcg_temp_free_i32(t0); } - tcg_temp_free_i32(t0); - tcg_temp_free_i32(t1); + tcg_gen_shli_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c); } } @@ -1999,9 +2037,22 @@ void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2, } } - mask = (1ull << len) - 1; t1 = tcg_temp_new_i64(); + if (TCG_TARGET_HAS_extract2_i64) { + if (ofs + len == 64) { + tcg_gen_shli_i64(t1, arg1, len); + tcg_gen_extract2_i64(ret, t1, arg2, len); + goto done; + } + if (ofs == 0) { + tcg_gen_extract2_i64(ret, arg1, arg2, len); + tcg_gen_rotli_i64(ret, ret, len); + goto done; + } + } + + mask = (1ull << len) - 1; if (ofs + len < 64) { tcg_gen_andi_i64(t1, arg2, mask); tcg_gen_shli_i64(t1, t1, ofs); @@ -2010,7 +2061,7 @@ void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2, } tcg_gen_andi_i64(ret, arg1, ~(mask << ofs)); tcg_gen_or_i64(ret, ret, t1); - + done: tcg_temp_free_i64(t1); } @@ -2297,6 +2348,30 @@ void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg, tcg_gen_sari_i64(ret, ret, 64 - len); } +/* + * Extract 64 bits from a 128-bit input, ah:al, starting from ofs. + * Unlike tcg_gen_extract_i64 above, len is fixed at 64. + */ +void tcg_gen_extract2_i64(TCGv_i64 ret, TCGv_i64 al, TCGv_i64 ah, + unsigned int ofs) +{ + tcg_debug_assert(ofs <= 64); + if (ofs == 0) { + tcg_gen_mov_i64(ret, al); + } else if (ofs == 64) { + tcg_gen_mov_i64(ret, ah); + } else if (al == ah) { + tcg_gen_rotri_i64(ret, al, ofs); + } else if (TCG_TARGET_HAS_extract2_i64) { + tcg_gen_op4i_i64(INDEX_op_extract2_i64, ret, al, ah, ofs); + } else { + TCGv_i64 t0 = tcg_temp_new_i64(); + tcg_gen_shri_i64(t0, al, ofs); + tcg_gen_deposit_i64(ret, t0, ah, 64 - ofs, ofs); + tcg_temp_free_i64(t0); + } +} + void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1, TCGv_i64 c2, TCGv_i64 v1, TCGv_i64 v2) { diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index d3e51b15af..1f1824c30a 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -308,6 +308,8 @@ void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg, unsigned int ofs, unsigned int len); void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg, unsigned int ofs, unsigned int len); +void tcg_gen_extract2_i32(TCGv_i32 ret, TCGv_i32 al, TCGv_i32 ah, + unsigned int ofs); void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *); void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1, int32_t arg2, TCGLabel *); void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret, @@ -501,6 +503,8 @@ void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg, unsigned int ofs, unsigned int len); void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg, unsigned int ofs, unsigned int len); +void tcg_gen_extract2_i64(TCGv_i64 ret, TCGv_i64 al, TCGv_i64 ah, + unsigned int ofs); void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *); void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, int64_t arg2, TCGLabel *); void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret, @@ -1068,6 +1072,7 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t); #define tcg_gen_deposit_z_tl tcg_gen_deposit_z_i64 #define tcg_gen_extract_tl tcg_gen_extract_i64 #define tcg_gen_sextract_tl tcg_gen_sextract_i64 +#define tcg_gen_extract2_tl tcg_gen_extract2_i64 #define tcg_const_tl tcg_const_i64 #define tcg_const_local_tl tcg_const_local_i64 #define tcg_gen_movcond_tl tcg_gen_movcond_i64 @@ -1178,6 +1183,7 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t); #define tcg_gen_deposit_z_tl tcg_gen_deposit_z_i32 #define tcg_gen_extract_tl tcg_gen_extract_i32 #define tcg_gen_sextract_tl tcg_gen_sextract_i32 +#define tcg_gen_extract2_tl tcg_gen_extract2_i32 #define tcg_const_tl tcg_const_i32 #define tcg_const_local_tl tcg_const_local_i32 #define tcg_gen_movcond_tl tcg_gen_movcond_i32 diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index 4e0238ad1a..1bad6e4208 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -79,6 +79,7 @@ DEF(rotr_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32)) DEF(deposit_i32, 1, 2, 2, IMPL(TCG_TARGET_HAS_deposit_i32)) DEF(extract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_extract_i32)) DEF(sextract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_sextract_i32)) +DEF(extract2_i32, 1, 2, 1, IMPL(TCG_TARGET_HAS_extract2_i32)) DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END) @@ -146,6 +147,7 @@ DEF(rotr_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64)) DEF(deposit_i64, 1, 2, 2, IMPL64 | IMPL(TCG_TARGET_HAS_deposit_i64)) DEF(extract_i64, 1, 1, 2, IMPL64 | IMPL(TCG_TARGET_HAS_extract_i64)) DEF(sextract_i64, 1, 1, 2, IMPL64 | IMPL(TCG_TARGET_HAS_sextract_i64)) +DEF(extract2_i64, 1, 2, 1, IMPL64 | IMPL(TCG_TARGET_HAS_extract2_i64)) /* size changing ops */ DEF(ext_i32_i64, 1, 1, 0, IMPL64) diff --git a/tcg/tcg-pool.inc.c b/tcg/tcg-pool.inc.c index 7af5513ff3..4eaa84b631 100644 --- a/tcg/tcg-pool.inc.c +++ b/tcg/tcg-pool.inc.c @@ -121,14 +121,14 @@ static inline void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label, /* To be provided by cpu/tcg-target.inc.c. */ static void tcg_out_nop_fill(tcg_insn_unit *p, int count); -static bool tcg_out_pool_finalize(TCGContext *s) +static int tcg_out_pool_finalize(TCGContext *s) { TCGLabelPoolData *p = s->pool_labels; TCGLabelPoolData *l = NULL; void *a; if (p == NULL) { - return true; + return 0; } /* ??? Round up to qemu_icache_linesize, but then do not round @@ -142,15 +142,17 @@ static bool tcg_out_pool_finalize(TCGContext *s) size_t size = sizeof(tcg_target_ulong) * p->nlong; if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) { if (unlikely(a > s->code_gen_highwater)) { - return false; + return -1; } memcpy(a, p->data, size); a += size; l = p; } - patch_reloc(p->label, p->rtype, (intptr_t)a - size, p->addend); + if (!patch_reloc(p->label, p->rtype, (intptr_t)a - size, p->addend)) { + return -2; + } } s->code_ptr = a; - return true; + return 0; } @@ -128,7 +128,7 @@ static void tcg_out_call(TCGContext *s, tcg_insn_unit *target); static int tcg_target_const_match(tcg_target_long val, TCGType type, const TCGArgConstraint *arg_ct); #ifdef TCG_TARGET_NEED_LDST_LABELS -static bool tcg_out_ldst_finalize(TCGContext *s); +static int tcg_out_ldst_finalize(TCGContext *s); #endif #define TCG_HIGHWATER 1024 @@ -263,37 +263,17 @@ static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, TCGLabel *l, intptr_t addend) { - TCGRelocation *r; - - if (l->has_value) { - /* FIXME: This may break relocations on RISC targets that - modify instruction fields in place. The caller may not have - written the initial value. */ - bool ok = patch_reloc(code_ptr, type, l->u.value, addend); - tcg_debug_assert(ok); - } else { - /* add a new relocation entry */ - r = tcg_malloc(sizeof(TCGRelocation)); - r->type = type; - r->ptr = code_ptr; - r->addend = addend; - r->next = l->u.first_reloc; - l->u.first_reloc = r; - } + TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); + + r->type = type; + r->ptr = code_ptr; + r->addend = addend; + QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); } static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr) { - intptr_t value = (intptr_t)ptr; - TCGRelocation *r; - tcg_debug_assert(!l->has_value); - - for (r = l->u.first_reloc; r != NULL; r = r->next) { - bool ok = patch_reloc(r->ptr, r->type, value, r->addend); - tcg_debug_assert(ok); - } - l->has_value = 1; l->u.value_ptr = ptr; } @@ -303,16 +283,32 @@ TCGLabel *gen_new_label(void) TCGContext *s = tcg_ctx; TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); - *l = (TCGLabel){ - .id = s->nb_labels++ - }; -#ifdef CONFIG_DEBUG_TCG + memset(l, 0, sizeof(TCGLabel)); + l->id = s->nb_labels++; + QSIMPLEQ_INIT(&l->relocs); + QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); -#endif return l; } +static bool tcg_resolve_relocs(TCGContext *s) +{ + TCGLabel *l; + + QSIMPLEQ_FOREACH(l, &s->labels, next) { + TCGRelocation *r; + uintptr_t value = l->u.value; + + QSIMPLEQ_FOREACH(r, &l->relocs, next) { + if (!patch_reloc(r->ptr, r->type, value, r->addend)) { + return false; + } + } + } + return true; +} + static void set_jmp_reset_offset(TCGContext *s, int which) { size_t off = tcg_current_code_size(s); @@ -1023,8 +1019,8 @@ void tcg_prologue_init(TCGContext *s) #ifdef TCG_TARGET_NEED_POOL_LABELS /* Allow the prologue to put e.g. guest_base into a pool entry. */ { - bool ok = tcg_out_pool_finalize(s); - tcg_debug_assert(ok); + int result = tcg_out_pool_finalize(s); + tcg_debug_assert(result == 0); } #endif @@ -1096,9 +1092,7 @@ void tcg_func_start(TCGContext *s) QTAILQ_INIT(&s->ops); QTAILQ_INIT(&s->free_ops); -#ifdef CONFIG_DEBUG_TCG QSIMPLEQ_INIT(&s->labels); -#endif } static inline TCGTemp *tcg_temp_alloc(TCGContext *s) @@ -1426,6 +1420,8 @@ bool tcg_op_supported(TCGOpcode op) return TCG_TARGET_HAS_extract_i32; case INDEX_op_sextract_i32: return TCG_TARGET_HAS_sextract_i32; + case INDEX_op_extract2_i32: + return TCG_TARGET_HAS_extract2_i32; case INDEX_op_add2_i32: return TCG_TARGET_HAS_add2_i32; case INDEX_op_sub2_i32: @@ -1523,6 +1519,8 @@ bool tcg_op_supported(TCGOpcode op) return TCG_TARGET_HAS_extract_i64; case INDEX_op_sextract_i64: return TCG_TARGET_HAS_sextract_i64; + case INDEX_op_extract2_i64: + return TCG_TARGET_HAS_extract2_i64; case INDEX_op_extrl_i64_i32: return TCG_TARGET_HAS_extrl_i64_i32; case INDEX_op_extrh_i64_i32: @@ -3992,21 +3990,30 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { return -1; } + /* Test for TB overflow, as seen by gen_insn_end_off. */ + if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { + return -2; + } } tcg_debug_assert(num_insns >= 0); s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); /* Generate TB finalization at the end of block */ #ifdef TCG_TARGET_NEED_LDST_LABELS - if (!tcg_out_ldst_finalize(s)) { - return -1; + i = tcg_out_ldst_finalize(s); + if (i < 0) { + return i; } #endif #ifdef TCG_TARGET_NEED_POOL_LABELS - if (!tcg_out_pool_finalize(s)) { - return -1; + i = tcg_out_pool_finalize(s); + if (i < 0) { + return i; } #endif + if (!tcg_resolve_relocs(s)) { + return -2; + } /* flush instruction cache */ flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr); @@ -125,6 +125,7 @@ typedef uint64_t TCGRegSet; #define TCG_TARGET_HAS_deposit_i64 0 #define TCG_TARGET_HAS_extract_i64 0 #define TCG_TARGET_HAS_sextract_i64 0 +#define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_movcond_i64 0 #define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_sub2_i64 0 @@ -237,12 +238,13 @@ typedef uint64_t tcg_insn_unit; do { if (!(X)) { __builtin_unreachable(); } } while (0) #endif -typedef struct TCGRelocation { - struct TCGRelocation *next; - int type; +typedef struct TCGRelocation TCGRelocation; +struct TCGRelocation { + QSIMPLEQ_ENTRY(TCGRelocation) next; tcg_insn_unit *ptr; intptr_t addend; -} TCGRelocation; + int type; +}; typedef struct TCGLabel TCGLabel; struct TCGLabel { @@ -253,11 +255,9 @@ struct TCGLabel { union { uintptr_t value; tcg_insn_unit *value_ptr; - TCGRelocation *first_reloc; } u; -#ifdef CONFIG_DEBUG_TCG + QSIMPLEQ_HEAD(, TCGRelocation) relocs; QSIMPLEQ_ENTRY(TCGLabel) next; -#endif }; typedef struct TCGPool { @@ -690,7 +690,6 @@ struct TCGContext { #endif #ifdef CONFIG_DEBUG_TCG - QSIMPLEQ_HEAD(, TCGLabel) labels; int temps_in_use; int goto_tb_issue_mask; #endif @@ -728,6 +727,7 @@ struct TCGContext { TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */ QTAILQ_HEAD(, TCGOp) ops, free_ops; + QSIMPLEQ_HEAD(, TCGLabel) labels; /* Tells which temporary holds a given register. It does not take into account fixed registers */ diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index 086f34e69a..8b90ab71cb 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -71,6 +71,7 @@ #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_extract_i32 0 #define TCG_TARGET_HAS_sextract_i32 0 +#define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 @@ -97,6 +98,7 @@ #define TCG_TARGET_HAS_deposit_i64 1 #define TCG_TARGET_HAS_extract_i64 0 #define TCG_TARGET_HAS_sextract_i64 0 +#define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_div_i64 0 #define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_ext8s_i64 1 |