aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Henderson <rth@twiddle.net>2016-06-22 15:46:09 -0700
committerRichard Henderson <rth@twiddle.net>2016-08-05 21:44:17 +0530
commita1b3c48d2b23d6eaeb4529d3e1183d2648731bf8 (patch)
treef75c3f1d950980b386d721fa95183331773ca236
parent51009170d8fc263cfdcd5a60fe3ba213daa3d15b (diff)
tcg: Compress liveness data to 16 bits
This reduces both memory usage and per-insn cacheline usage during code generation. Reviewed-by: Aurelien Jarno <aurelien@aurel32.net> Signed-off-by: Richard Henderson <rth@twiddle.net>
-rw-r--r--tcg/tcg.c58
-rw-r--r--tcg/tcg.h16
2 files changed, 32 insertions, 42 deletions
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 0c46c43cfa..4aa1933a3e 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1341,7 +1341,7 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
}
}
-/* Liveness analysis : update the opc_dead_args array to tell if a
+/* Liveness analysis : update the opc_arg_life array to tell if a
given input arguments is dead. Instructions updating dead
temporaries are removed. */
static void tcg_liveness_analysis(TCGContext *s)
@@ -1350,9 +1350,8 @@ static void tcg_liveness_analysis(TCGContext *s)
int oi, oi_prev, nb_ops;
nb_ops = s->gen_next_op_idx;
- s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
- s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
-
+ s->op_arg_life = tcg_malloc(nb_ops * sizeof(TCGLifeData));
+
dead_temps = tcg_malloc(s->nb_temps);
mem_temps = tcg_malloc(s->nb_temps);
tcg_la_func_end(s, dead_temps, mem_temps);
@@ -1361,8 +1360,7 @@ static void tcg_liveness_analysis(TCGContext *s)
int i, nb_iargs, nb_oargs;
TCGOpcode opc_new, opc_new2;
bool have_opc_new2;
- uint16_t dead_args;
- uint8_t sync_args;
+ TCGLifeData arg_life = 0;
TCGArg arg;
TCGOp * const op = &s->gen_op_buf[oi];
@@ -1394,15 +1392,13 @@ static void tcg_liveness_analysis(TCGContext *s)
do_not_remove_call:
/* output args are dead */
- dead_args = 0;
- sync_args = 0;
for (i = 0; i < nb_oargs; i++) {
arg = args[i];
if (dead_temps[arg]) {
- dead_args |= (1 << i);
+ arg_life |= DEAD_ARG << i;
}
if (mem_temps[arg]) {
- sync_args |= (1 << i);
+ arg_life |= SYNC_ARG << i;
}
dead_temps[arg] = 1;
mem_temps[arg] = 0;
@@ -1423,7 +1419,7 @@ static void tcg_liveness_analysis(TCGContext *s)
arg = args[i];
if (arg != TCG_CALL_DUMMY_ARG) {
if (dead_temps[arg]) {
- dead_args |= (1 << i);
+ arg_life |= DEAD_ARG << i;
}
}
}
@@ -1432,8 +1428,6 @@ static void tcg_liveness_analysis(TCGContext *s)
arg = args[i];
dead_temps[arg] = 0;
}
- s->op_dead_args[oi] = dead_args;
- s->op_sync_args[oi] = sync_args;
}
}
break;
@@ -1544,15 +1538,13 @@ static void tcg_liveness_analysis(TCGContext *s)
} else {
do_not_remove:
/* output args are dead */
- dead_args = 0;
- sync_args = 0;
for (i = 0; i < nb_oargs; i++) {
arg = args[i];
if (dead_temps[arg]) {
- dead_args |= (1 << i);
+ arg_life |= DEAD_ARG << i;
}
if (mem_temps[arg]) {
- sync_args |= (1 << i);
+ arg_life |= SYNC_ARG << i;
}
dead_temps[arg] = 1;
mem_temps[arg] = 0;
@@ -1570,7 +1562,7 @@ static void tcg_liveness_analysis(TCGContext *s)
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
arg = args[i];
if (dead_temps[arg]) {
- dead_args |= (1 << i);
+ arg_life |= DEAD_ARG << i;
}
}
/* input arguments are live for preceding opcodes */
@@ -1578,11 +1570,10 @@ static void tcg_liveness_analysis(TCGContext *s)
arg = args[i];
dead_temps[arg] = 0;
}
- s->op_dead_args[oi] = dead_args;
- s->op_sync_args[oi] = sync_args;
}
break;
}
+ s->op_arg_life[oi] = arg_life;
}
}
#else
@@ -1921,11 +1912,11 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
save_globals(s, allocated_regs);
}
-#define IS_DEAD_ARG(n) ((dead_args >> (n)) & 1)
-#define NEED_SYNC_ARG(n) ((sync_args >> (n)) & 1)
+#define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
+#define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
- uint16_t dead_args, uint8_t sync_args)
+ TCGLifeData arg_life)
{
TCGTemp *ots;
tcg_target_ulong val;
@@ -1954,8 +1945,7 @@ static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
}
static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
- const TCGArg *args, uint16_t dead_args,
- uint8_t sync_args)
+ const TCGArg *args, TCGLifeData arg_life)
{
TCGRegSet allocated_regs;
TCGTemp *ts, *ots;
@@ -2040,8 +2030,7 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
static void tcg_reg_alloc_op(TCGContext *s,
const TCGOpDef *def, TCGOpcode opc,
- const TCGArg *args, uint16_t dead_args,
- uint8_t sync_args)
+ const TCGArg *args, TCGLifeData arg_life)
{
TCGRegSet allocated_regs;
int i, k, nb_iargs, nb_oargs;
@@ -2206,8 +2195,7 @@ static void tcg_reg_alloc_op(TCGContext *s,
#endif
static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs,
- const TCGArg * const args, uint16_t dead_args,
- uint8_t sync_args)
+ const TCGArg * const args, TCGLifeData arg_life)
{
int flags, nb_regs, i;
TCGReg reg;
@@ -2427,8 +2415,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
TCGArg * const args = &s->gen_opparam_buf[op->args];
TCGOpcode opc = op->opc;
const TCGOpDef *def = &tcg_op_defs[opc];
- uint16_t dead_args = s->op_dead_args[oi];
- uint8_t sync_args = s->op_sync_args[oi];
+ TCGLifeData arg_life = s->op_arg_life[oi];
oi_next = op->next;
#ifdef CONFIG_PROFILER
@@ -2438,11 +2425,11 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
switch (opc) {
case INDEX_op_mov_i32:
case INDEX_op_mov_i64:
- tcg_reg_alloc_mov(s, def, args, dead_args, sync_args);
+ tcg_reg_alloc_mov(s, def, args, arg_life);
break;
case INDEX_op_movi_i32:
case INDEX_op_movi_i64:
- tcg_reg_alloc_movi(s, args, dead_args, sync_args);
+ tcg_reg_alloc_movi(s, args, arg_life);
break;
case INDEX_op_insn_start:
if (num_insns >= 0) {
@@ -2467,8 +2454,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
tcg_out_label(s, arg_label(args[0]), s->code_ptr);
break;
case INDEX_op_call:
- tcg_reg_alloc_call(s, op->callo, op->calli, args,
- dead_args, sync_args);
+ tcg_reg_alloc_call(s, op->callo, op->calli, args, arg_life);
break;
default:
/* Sanity check that we've not introduced any unhandled opcodes. */
@@ -2478,7 +2464,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
/* Note: in order to speed up the code, it would be much
faster to have specialized register allocator functions for
some common argument patterns */
- tcg_reg_alloc_op(s, def, opc, args, dead_args, sync_args);
+ tcg_reg_alloc_op(s, def, opc, args, arg_life);
break;
}
#ifdef CONFIG_DEBUG_TCG
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 6046dcdc89..7c0a138152 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -575,6 +575,14 @@ typedef struct TCGTempSet {
unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)];
} TCGTempSet;
+/* While we limit helpers to 6 arguments, for 32-bit hosts, with padding,
+ this imples a max of 6*2 (64-bit in) + 2 (64-bit out) = 14 operands.
+ There are never more than 2 outputs, which means that we can store all
+ dead + sync data within 16 bits. */
+#define DEAD_ARG 4
+#define SYNC_ARG 1
+typedef uint16_t TCGLifeData;
+
typedef struct TCGOp {
TCGOpcode opc : 8;
@@ -608,12 +616,8 @@ struct TCGContext {
uintptr_t *tb_jmp_target_addr; /* tb->jmp_target_addr if !USE_DIRECT_JUMP */
/* liveness analysis */
- uint16_t *op_dead_args; /* for each operation, each bit tells if the
- corresponding argument is dead */
- uint8_t *op_sync_args; /* for each operation, each bit tells if the
- corresponding output argument needs to be
- sync to memory. */
-
+ TCGLifeData *op_arg_life;
+
TCGRegSet reserved_regs;
intptr_t current_frame_offset;
intptr_t frame_start;