diff options
Diffstat (limited to 'tcg/tcg.c')
-rw-r--r-- | tcg/tcg.c | 488 |
1 files changed, 376 insertions, 112 deletions
@@ -117,8 +117,8 @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg dst, TCGReg src); static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg dst, TCGReg base, intptr_t offset); -static void tcg_out_dupi_vec(TCGContext *s, TCGType type, - TCGReg dst, tcg_target_long arg); +static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, + TCGReg dst, int64_t arg); static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, unsigned vece, const TCGArg *args, const int *const_args); @@ -133,8 +133,8 @@ static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, { g_assert_not_reached(); } -static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, - TCGReg dst, tcg_target_long arg) +static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, + TCGReg dst, int64_t arg) { g_assert_not_reached(); } @@ -1184,6 +1184,13 @@ void tcg_func_start(TCGContext *s) /* No temps have been previously allocated for size or locality. */ memset(s->free_temps, 0, sizeof(s->free_temps)); + /* No constant temps have been previously allocated. */ + for (int i = 0; i < TCG_TYPE_COUNT; ++i) { + if (s->const_table[i]) { + g_hash_table_remove_all(s->const_table[i]); + } + } + s->nb_ops = 0; s->nb_labels = 0; s->current_frame_offset = s->frame_start; @@ -1211,7 +1218,7 @@ static inline TCGTemp *tcg_global_alloc(TCGContext *s) tcg_debug_assert(s->nb_globals == s->nb_temps); s->nb_globals++; ts = tcg_temp_alloc(s); - ts->temp_global = 1; + ts->kind = TEMP_GLOBAL; return ts; } @@ -1228,7 +1235,7 @@ static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, ts = tcg_global_alloc(s); ts->base_type = type; ts->type = type; - ts->fixed_reg = 1; + ts->kind = TEMP_FIXED; ts->reg = reg; ts->name = name; tcg_regset_set_reg(s->reserved_regs, reg); @@ -1255,13 +1262,19 @@ TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, bigendian = 1; #endif - if (!base_ts->fixed_reg) { + switch (base_ts->kind) { + case TEMP_FIXED: + break; + case TEMP_GLOBAL: /* We do not support double-indirect registers. */ tcg_debug_assert(!base_ts->indirect_reg); base_ts->indirect_base = 1; s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 ? 2 : 1); indirect_reg = 1; + break; + default: + g_assert_not_reached(); } if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { @@ -1303,6 +1316,7 @@ TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local) { TCGContext *s = tcg_ctx; + TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL; TCGTemp *ts; int idx, k; @@ -1315,7 +1329,7 @@ TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local) ts = &s->temps[idx]; ts->temp_allocated = 1; tcg_debug_assert(ts->base_type == type); - tcg_debug_assert(ts->temp_local == temp_local); + tcg_debug_assert(ts->kind == kind); } else { ts = tcg_temp_alloc(s); if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { @@ -1324,18 +1338,18 @@ TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local) ts->base_type = type; ts->type = TCG_TYPE_I32; ts->temp_allocated = 1; - ts->temp_local = temp_local; + ts->kind = kind; tcg_debug_assert(ts2 == ts + 1); ts2->base_type = TCG_TYPE_I64; ts2->type = TCG_TYPE_I32; ts2->temp_allocated = 1; - ts2->temp_local = temp_local; + ts2->kind = kind; } else { ts->base_type = type; ts->type = type; ts->temp_allocated = 1; - ts->temp_local = temp_local; + ts->kind = kind; } } @@ -1385,6 +1399,11 @@ void tcg_temp_free_internal(TCGTemp *ts) TCGContext *s = tcg_ctx; int k, idx; + /* In order to simplify users of tcg_constant_*, silently ignore free. */ + if (ts->kind == TEMP_CONST) { + return; + } + #if defined(CONFIG_DEBUG_TCG) s->temps_in_use--; if (s->temps_in_use < 0) { @@ -1392,15 +1411,77 @@ void tcg_temp_free_internal(TCGTemp *ts) } #endif - tcg_debug_assert(ts->temp_global == 0); + tcg_debug_assert(ts->kind < TEMP_GLOBAL); tcg_debug_assert(ts->temp_allocated != 0); ts->temp_allocated = 0; idx = temp_idx(ts); - k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0); + k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT); set_bit(idx, s->free_temps[k].l); } +TCGTemp *tcg_constant_internal(TCGType type, int64_t val) +{ + TCGContext *s = tcg_ctx; + GHashTable *h = s->const_table[type]; + TCGTemp *ts; + + if (h == NULL) { + h = g_hash_table_new(g_int64_hash, g_int64_equal); + s->const_table[type] = h; + } + + ts = g_hash_table_lookup(h, &val); + if (ts == NULL) { + ts = tcg_temp_alloc(s); + + if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { + TCGTemp *ts2 = tcg_temp_alloc(s); + + ts->base_type = TCG_TYPE_I64; + ts->type = TCG_TYPE_I32; + ts->kind = TEMP_CONST; + ts->temp_allocated = 1; + /* + * Retain the full value of the 64-bit constant in the low + * part, so that the hash table works. Actual uses will + * truncate the value to the low part. + */ + ts->val = val; + + tcg_debug_assert(ts2 == ts + 1); + ts2->base_type = TCG_TYPE_I64; + ts2->type = TCG_TYPE_I32; + ts2->kind = TEMP_CONST; + ts2->temp_allocated = 1; + ts2->val = val >> 32; + } else { + ts->base_type = type; + ts->type = type; + ts->kind = TEMP_CONST; + ts->temp_allocated = 1; + ts->val = val; + } + g_hash_table_insert(h, &ts->val, ts); + } + + return ts; +} + +TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) +{ + val = dup_const(vece, val); + return temp_tcgv_vec(tcg_constant_internal(type, val)); +} + +TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val) +{ + TCGTemp *t = tcgv_vec_temp(match); + + tcg_debug_assert(t->temp_allocated != 0); + return tcg_constant_vec(t->base_type, vece, val); +} + TCGv_i32 tcg_const_i32(int32_t val) { TCGv_i32 t0; @@ -1483,7 +1564,6 @@ bool tcg_op_supported(TCGOpcode op) return TCG_TARGET_HAS_goto_ptr; case INDEX_op_mov_i32: - case INDEX_op_movi_i32: case INDEX_op_setcond_i32: case INDEX_op_brcond_i32: case INDEX_op_ld8u_i32: @@ -1577,7 +1657,6 @@ bool tcg_op_supported(TCGOpcode op) return TCG_TARGET_REG_BITS == 32; case INDEX_op_mov_i64: - case INDEX_op_movi_i64: case INDEX_op_setcond_i64: case INDEX_op_brcond_i64: case INDEX_op_ld8u_i64: @@ -1683,7 +1762,6 @@ bool tcg_op_supported(TCGOpcode op) case INDEX_op_mov_vec: case INDEX_op_dup_vec: - case INDEX_op_dupi_vec: case INDEX_op_dupm_vec: case INDEX_op_ld_vec: case INDEX_op_st_vec: @@ -1930,17 +2008,30 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) static void tcg_reg_alloc_start(TCGContext *s) { int i, n; - TCGTemp *ts; - for (i = 0, n = s->nb_globals; i < n; i++) { - ts = &s->temps[i]; - ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM); - } - for (n = s->nb_temps; i < n; i++) { - ts = &s->temps[i]; - ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD); - ts->mem_allocated = 0; - ts->fixed_reg = 0; + for (i = 0, n = s->nb_temps; i < n; i++) { + TCGTemp *ts = &s->temps[i]; + TCGTempVal val = TEMP_VAL_MEM; + + switch (ts->kind) { + case TEMP_CONST: + val = TEMP_VAL_CONST; + break; + case TEMP_FIXED: + val = TEMP_VAL_REG; + break; + case TEMP_GLOBAL: + break; + case TEMP_NORMAL: + val = TEMP_VAL_DEAD; + /* fall through */ + case TEMP_LOCAL: + ts->mem_allocated = 0; + break; + default: + g_assert_not_reached(); + } + ts->val_type = val; } memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); @@ -1951,12 +2042,37 @@ static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, { int idx = temp_idx(ts); - if (ts->temp_global) { + switch (ts->kind) { + case TEMP_FIXED: + case TEMP_GLOBAL: pstrcpy(buf, buf_size, ts->name); - } else if (ts->temp_local) { + break; + case TEMP_LOCAL: snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); - } else { + break; + case TEMP_NORMAL: snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); + break; + case TEMP_CONST: + switch (ts->type) { + case TCG_TYPE_I32: + snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val); + break; +#if TCG_TARGET_REG_BITS > 32 + case TCG_TYPE_I64: + snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); + break; +#endif + case TCG_TYPE_V64: + case TCG_TYPE_V128: + case TCG_TYPE_V256: + snprintf(buf, buf_size, "v%d$0x%" PRIx64, + 64 << (ts->type - TCG_TYPE_V64), ts->val); + break; + default: + g_assert_not_reached(); + } + break; } return buf; } @@ -2547,15 +2663,25 @@ static void la_bb_end(TCGContext *s, int ng, int nt) { int i; - for (i = 0; i < ng; ++i) { - s->temps[i].state = TS_DEAD | TS_MEM; - la_reset_pref(&s->temps[i]); - } - for (i = ng; i < nt; ++i) { - s->temps[i].state = (s->temps[i].temp_local - ? TS_DEAD | TS_MEM - : TS_DEAD); - la_reset_pref(&s->temps[i]); + for (i = 0; i < nt; ++i) { + TCGTemp *ts = &s->temps[i]; + int state; + + switch (ts->kind) { + case TEMP_FIXED: + case TEMP_GLOBAL: + case TEMP_LOCAL: + state = TS_DEAD | TS_MEM; + break; + case TEMP_NORMAL: + case TEMP_CONST: + state = TS_DEAD; + break; + default: + g_assert_not_reached(); + } + ts->state = state; + la_reset_pref(ts); } } @@ -2583,14 +2709,24 @@ static void la_bb_sync(TCGContext *s, int ng, int nt) la_global_sync(s, ng); for (int i = ng; i < nt; ++i) { - if (s->temps[i].temp_local) { - int state = s->temps[i].state; - s->temps[i].state = state | TS_MEM; + TCGTemp *ts = &s->temps[i]; + int state; + + switch (ts->kind) { + case TEMP_LOCAL: + state = ts->state; + ts->state = state | TS_MEM; if (state != TS_DEAD) { continue; } - } else { + break; + case TEMP_NORMAL: s->temps[i].state = TS_DEAD; + break; + case TEMP_CONST: + continue; + default: + g_assert_not_reached(); } la_reset_pref(&s->temps[i]); } @@ -3151,7 +3287,7 @@ static void dump_regs(TCGContext *s) tcg_target_reg_names[ts->mem_base->reg]); break; case TEMP_VAL_CONST: - printf("$0x%" TCG_PRIlx, ts->val); + printf("$0x%" PRIx64, ts->val); break; case TEMP_VAL_DEAD: printf("D"); @@ -3191,7 +3327,8 @@ static void check_regs(TCGContext *s) } for (k = 0; k < s->nb_temps; k++) { ts = &s->temps[k]; - if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg + if (ts->val_type == TEMP_VAL_REG + && ts->kind != TEMP_FIXED && s->reg_to_temp[ts->reg] != ts) { printf("Inconsistency for temp %s:\n", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); @@ -3228,16 +3365,28 @@ static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); mark it free; otherwise mark it dead. */ static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) { - if (ts->fixed_reg) { + TCGTempVal new_type; + + switch (ts->kind) { + case TEMP_FIXED: return; + case TEMP_GLOBAL: + case TEMP_LOCAL: + new_type = TEMP_VAL_MEM; + break; + case TEMP_NORMAL: + new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD; + break; + case TEMP_CONST: + new_type = TEMP_VAL_CONST; + break; + default: + g_assert_not_reached(); } if (ts->val_type == TEMP_VAL_REG) { s->reg_to_temp[ts->reg] = NULL; } - ts->val_type = (free_or_dead < 0 - || ts->temp_local - || ts->temp_global - ? TEMP_VAL_MEM : TEMP_VAL_DEAD); + ts->val_type = new_type; } /* Mark a temporary as dead. */ @@ -3253,10 +3402,7 @@ static inline void temp_dead(TCGContext *s, TCGTemp *ts) static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, TCGRegSet preferred_regs, int free_or_dead) { - if (ts->fixed_reg) { - return; - } - if (!ts->mem_coherent) { + if (!temp_readonly(ts) && !ts->mem_coherent) { if (!ts->mem_allocated) { temp_allocate_frame(s, ts); } @@ -3387,7 +3533,27 @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, case TEMP_VAL_CONST: reg = tcg_reg_alloc(s, desired_regs, allocated_regs, preferred_regs, ts->indirect_base); - tcg_out_movi(s, ts->type, reg, ts->val); + if (ts->type <= TCG_TYPE_I64) { + tcg_out_movi(s, ts->type, reg, ts->val); + } else { + uint64_t val = ts->val; + MemOp vece = MO_64; + + /* + * Find the minimal vector element that matches the constant. + * The targets will, in general, have to do this search anyway, + * do this generically. + */ + if (val == dup_const(MO_8, val)) { + vece = MO_8; + } else if (val == dup_const(MO_16, val)) { + vece = MO_16; + } else if (val == dup_const(MO_32, val)) { + vece = MO_32; + } + + tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val); + } ts->mem_coherent = 0; break; case TEMP_VAL_MEM: @@ -3411,7 +3577,7 @@ static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) { /* The liveness analysis already ensures that globals are back in memory. Keep an tcg_debug_assert for safety. */ - tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg); + tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts)); } /* save globals to their canonical location and assume they can be @@ -3436,7 +3602,7 @@ static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) for (i = 0, n = s->nb_globals; i < n; i++) { TCGTemp *ts = &s->temps[i]; tcg_debug_assert(ts->val_type != TEMP_VAL_REG - || ts->fixed_reg + || ts->kind == TEMP_FIXED || ts->mem_coherent); } } @@ -3449,12 +3615,22 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) for (i = s->nb_globals; i < s->nb_temps; i++) { TCGTemp *ts = &s->temps[i]; - if (ts->temp_local) { + + switch (ts->kind) { + case TEMP_LOCAL: temp_save(s, ts, allocated_regs); - } else { + break; + case TEMP_NORMAL: /* The liveness analysis already ensures that temps are dead. Keep an tcg_debug_assert for safety. */ tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); + break; + case TEMP_CONST: + /* Similarly, we should have freed any allocated register. */ + tcg_debug_assert(ts->val_type == TEMP_VAL_CONST); + break; + default: + g_assert_not_reached(); } } @@ -3475,23 +3651,30 @@ static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) * The liveness analysis already ensures that temps are dead. * Keep tcg_debug_asserts for safety. */ - if (ts->temp_local) { + switch (ts->kind) { + case TEMP_LOCAL: tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); - } else { + break; + case TEMP_NORMAL: tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); + break; + case TEMP_CONST: + break; + default: + g_assert_not_reached(); } } } /* - * Specialized code generation for INDEX_op_movi_*. + * Specialized code generation for INDEX_op_mov_* with a constant. */ static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, tcg_target_ulong val, TCGLifeData arg_life, TCGRegSet preferred_regs) { /* ENV should not be modified. */ - tcg_debug_assert(!ots->fixed_reg); + tcg_debug_assert(!temp_readonly(ots)); /* The movi is not explicitly generated here. */ if (ots->val_type == TEMP_VAL_REG) { @@ -3507,14 +3690,6 @@ static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, } } -static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op) -{ - TCGTemp *ots = arg_temp(op->args[0]); - tcg_target_ulong val = op->args[1]; - - tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]); -} - /* * Specialized code generation for INDEX_op_mov_*. */ @@ -3531,7 +3706,7 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) ts = arg_temp(op->args[1]); /* ENV should not be modified. */ - tcg_debug_assert(!ots->fixed_reg); + tcg_debug_assert(!temp_readonly(ots)); /* Note that otype != itype for no-op truncation. */ otype = ots->type; @@ -3570,7 +3745,7 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) } temp_dead(s, ots); } else { - if (IS_DEAD_ARG(1) && !ts->fixed_reg) { + if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { /* the mov can be suppressed */ if (ots->val_type == TEMP_VAL_REG) { s->reg_to_temp[ots->reg] = NULL; @@ -3592,7 +3767,7 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) * Store the source register into the destination slot * and leave the destination temp as TEMP_VAL_MEM. */ - assert(!ots->fixed_reg); + assert(!temp_readonly(ots)); if (!ts->mem_allocated) { temp_allocate_frame(s, ots); } @@ -3629,7 +3804,7 @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) its = arg_temp(op->args[1]); /* ENV should not be modified. */ - tcg_debug_assert(!ots->fixed_reg); + tcg_debug_assert(!temp_readonly(ots)); itype = its->type; vece = TCGOP_VECE(op); @@ -3769,45 +3944,42 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) i_preferred_regs = o_preferred_regs = 0; if (arg_ct->ialias) { o_preferred_regs = op->output_pref[arg_ct->alias_index]; - if (ts->fixed_reg) { - /* if fixed register, we must allocate a new register - if the alias is not the same register */ - if (arg != op->args[arg_ct->alias_index]) { - goto allocate_in_reg; - } - } else { - /* if the input is aliased to an output and if it is - not dead after the instruction, we must allocate - a new register and move it */ - if (!IS_DEAD_ARG(i)) { - goto allocate_in_reg; - } - /* check if the current register has already been allocated - for another input aliased to an output */ - if (ts->val_type == TEMP_VAL_REG) { - int k2, i2; - reg = ts->reg; - for (k2 = 0 ; k2 < k ; k2++) { - i2 = def->args_ct[nb_oargs + k2].sort_index; - if (def->args_ct[i2].ialias && reg == new_args[i2]) { - goto allocate_in_reg; - } + /* + * If the input is readonly, then it cannot also be an + * output and aliased to itself. If the input is not + * dead after the instruction, we must allocate a new + * register and move it. + */ + if (temp_readonly(ts) || !IS_DEAD_ARG(i)) { + goto allocate_in_reg; + } + + /* + * Check if the current register has already been allocated + * for another input aliased to an output. + */ + if (ts->val_type == TEMP_VAL_REG) { + reg = ts->reg; + for (int k2 = 0; k2 < k; k2++) { + int i2 = def->args_ct[nb_oargs + k2].sort_index; + if (def->args_ct[i2].ialias && reg == new_args[i2]) { + goto allocate_in_reg; } } - i_preferred_regs = o_preferred_regs; } + i_preferred_regs = o_preferred_regs; } temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs); reg = ts->reg; - if (tcg_regset_test_reg(arg_ct->regs, reg)) { - /* nothing to do : the constraint is satisfied */ - } else { - allocate_in_reg: - /* allocate a new register matching the constraint - and move the temporary register into it */ + if (!tcg_regset_test_reg(arg_ct->regs, reg)) { + allocate_in_reg: + /* + * Allocate a new register matching the constraint + * and move the temporary register into it. + */ temp_load(s, ts, tcg_target_available_regs[ts->type], i_allocated_regs, 0); reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs, @@ -3861,7 +4033,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) ts = arg_temp(arg); /* ENV should not be modified. */ - tcg_debug_assert(!ts->fixed_reg); + tcg_debug_assert(!temp_readonly(ts)); if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { reg = new_args[arg_ct->alias_index]; @@ -3902,7 +4074,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) ts = arg_temp(op->args[i]); /* ENV should not be modified. */ - tcg_debug_assert(!ts->fixed_reg); + tcg_debug_assert(!temp_readonly(ts)); if (NEED_SYNC_ARG(i)) { temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); @@ -3912,6 +4084,98 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } } +static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) +{ + const TCGLifeData arg_life = op->life; + TCGTemp *ots, *itsl, *itsh; + TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64; + + /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ + tcg_debug_assert(TCG_TARGET_REG_BITS == 32); + tcg_debug_assert(TCGOP_VECE(op) == MO_64); + + ots = arg_temp(op->args[0]); + itsl = arg_temp(op->args[1]); + itsh = arg_temp(op->args[2]); + + /* ENV should not be modified. */ + tcg_debug_assert(!temp_readonly(ots)); + + /* Allocate the output register now. */ + if (ots->val_type != TEMP_VAL_REG) { + TCGRegSet allocated_regs = s->reserved_regs; + TCGRegSet dup_out_regs = + tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; + + /* Make sure to not spill the input registers. */ + if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { + tcg_regset_set_reg(allocated_regs, itsl->reg); + } + if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) { + tcg_regset_set_reg(allocated_regs, itsh->reg); + } + + ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, + op->output_pref[0], ots->indirect_base); + ots->val_type = TEMP_VAL_REG; + ots->mem_coherent = 0; + s->reg_to_temp[ots->reg] = ots; + } + + /* Promote dup2 of immediates to dupi_vec. */ + if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) { + uint64_t val = deposit64(itsl->val, 32, 32, itsh->val); + MemOp vece = MO_64; + + if (val == dup_const(MO_8, val)) { + vece = MO_8; + } else if (val == dup_const(MO_16, val)) { + vece = MO_16; + } else if (val == dup_const(MO_32, val)) { + vece = MO_32; + } + + tcg_out_dupi_vec(s, vtype, vece, ots->reg, val); + goto done; + } + + /* If the two inputs form one 64-bit value, try dupm_vec. */ + if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) { + if (!itsl->mem_coherent) { + temp_sync(s, itsl, s->reserved_regs, 0, 0); + } + if (!itsh->mem_coherent) { + temp_sync(s, itsh, s->reserved_regs, 0, 0); + } +#ifdef HOST_WORDS_BIGENDIAN + TCGTemp *its = itsh; +#else + TCGTemp *its = itsl; +#endif + if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, + its->mem_base->reg, its->mem_offset)) { + goto done; + } + } + + /* Fall back to generic expansion. */ + return false; + + done: + if (IS_DEAD_ARG(1)) { + temp_dead(s, itsl); + } + if (IS_DEAD_ARG(2)) { + temp_dead(s, itsh); + } + if (NEED_SYNC_ARG(0)) { + temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0)); + } else if (IS_DEAD_ARG(0)) { + temp_dead(s, ots); + } + return true; +} + #ifdef TCG_TARGET_STACK_GROWSUP #define STACK_DIR(x) (-(x)) #else @@ -4034,7 +4298,7 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) ts = arg_temp(arg); /* ENV should not be modified. */ - tcg_debug_assert(!ts->fixed_reg); + tcg_debug_assert(!temp_readonly(ts)); reg = tcg_target_call_oarg_regs[i]; tcg_debug_assert(s->reg_to_temp[reg] == NULL); @@ -4298,11 +4562,6 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) case INDEX_op_mov_vec: tcg_reg_alloc_mov(s, op); break; - case INDEX_op_movi_i32: - case INDEX_op_movi_i64: - case INDEX_op_dupi_vec: - tcg_reg_alloc_movi(s, op); - break; case INDEX_op_dup_vec: tcg_reg_alloc_dup(s, op); break; @@ -4334,6 +4593,11 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) case INDEX_op_call: tcg_reg_alloc_call(s, op); break; + case INDEX_op_dup2_vec: + if (tcg_reg_alloc_dup2(s, op)) { + break; + } + /* fall through */ default: /* Sanity check that we've not introduced any unhandled opcodes. */ tcg_debug_assert(tcg_op_supported(opc)); |