diff options
Diffstat (limited to 'tcg/optimize.c')
-rw-r--r-- | tcg/optimize.c | 244 |
1 files changed, 176 insertions, 68 deletions
diff --git a/tcg/optimize.c b/tcg/optimize.c index 3a504a1961..77da2f942a 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -83,6 +83,20 @@ static int op_bits(TCGOpcode op) return def->flags & TCG_OPF_64BIT ? 64 : 32; } +static TCGOpcode op_to_mov(TCGOpcode op) +{ + switch (op_bits(op)) { + case 32: + return INDEX_op_mov_i32; + case 64: + return INDEX_op_mov_i64; + default: + fprintf(stderr, "op_to_mov: unexpected return value of " + "function op_bits.\n"); + tcg_abort(); + } +} + static TCGOpcode op_to_movi(TCGOpcode op) { switch (op_bits(op)) { @@ -148,11 +162,22 @@ static bool temps_are_copies(TCGArg arg1, TCGArg arg2) return false; } -static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, - TCGArg dst, TCGArg src) +static void tcg_opt_gen_mov(TCGContext *s, int op_index, TCGArg *gen_args, + TCGOpcode old_op, TCGArg dst, TCGArg src) { + TCGOpcode new_op = op_to_mov(old_op); + tcg_target_ulong mask; + + s->gen_opc_buf[op_index] = new_op; + reset_temp(dst); - temps[dst].mask = temps[src].mask; + mask = temps[src].mask; + if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { + /* High bits of the destination are now garbage. */ + mask |= ~0xffffffffull; + } + temps[dst].mask = mask; + assert(temps[src].state != TCG_TEMP_CONST); if (s->temps[src].type == s->temps[dst].type) { @@ -172,30 +197,28 @@ static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, gen_args[1] = src; } -static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val) +static void tcg_opt_gen_movi(TCGContext *s, int op_index, TCGArg *gen_args, + TCGOpcode old_op, TCGArg dst, TCGArg val) { + TCGOpcode new_op = op_to_movi(old_op); + tcg_target_ulong mask; + + s->gen_opc_buf[op_index] = new_op; + reset_temp(dst); temps[dst].state = TCG_TEMP_CONST; temps[dst].val = val; - temps[dst].mask = val; + mask = val; + if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { + /* High bits of the destination are now garbage. */ + mask |= ~0xffffffffull; + } + temps[dst].mask = mask; + gen_args[0] = dst; gen_args[1] = val; } -static TCGOpcode op_to_mov(TCGOpcode op) -{ - switch (op_bits(op)) { - case 32: - return INDEX_op_mov_i32; - case 64: - return INDEX_op_mov_i64; - default: - fprintf(stderr, "op_to_mov: unexpected return value of " - "function op_bits.\n"); - tcg_abort(); - } -} - static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) { uint64_t l64, h64; @@ -530,7 +553,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, for (op_index = 0; op_index < nb_ops; op_index++) { TCGOpcode op = s->gen_opc_buf[op_index]; const TCGOpDef *def = &tcg_op_defs[op]; - tcg_target_ulong mask, affected; + tcg_target_ulong mask, partmask, affected; int nb_oargs, nb_iargs, nb_args, i; TCGArg tmp; @@ -619,8 +642,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, CASE_OP_32_64(rotr): if (temps[args[1]].state == TCG_TEMP_CONST && temps[args[1]].val == 0) { - s->gen_opc_buf[op_index] = op_to_movi(op); - tcg_opt_gen_movi(gen_args, args[0], 0); + tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0); args += 3; gen_args += 2; continue; @@ -749,8 +771,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, if (temps_are_copies(args[0], args[1])) { s->gen_opc_buf[op_index] = INDEX_op_nop; } else { - s->gen_opc_buf[op_index] = op_to_mov(op); - tcg_opt_gen_mov(s, gen_args, args[0], args[1]); + tcg_opt_gen_mov(s, op_index, gen_args, op, args[0], args[1]); gen_args += 2; } args += 3; @@ -859,6 +880,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, break; CASE_OP_32_64(setcond): + case INDEX_op_setcond2_i32: mask = 1; break; @@ -894,16 +916,20 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, break; } - /* 32-bit ops (non 64-bit ops and non load/store ops) generate 32-bit - results */ + /* 32-bit ops (non 64-bit ops and non load/store ops) generate + 32-bit results. For the result is zero test below, we can + ignore high bits, but for further optimizations we need to + record that the high bits contain garbage. */ + partmask = mask; if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_64BIT))) { - mask &= 0xffffffffu; + mask |= ~(tcg_target_ulong)0xffffffffu; + partmask &= 0xffffffffu; + affected &= 0xffffffffu; } - if (mask == 0) { + if (partmask == 0) { assert(nb_oargs == 1); - s->gen_opc_buf[op_index] = op_to_movi(op); - tcg_opt_gen_movi(gen_args, args[0], 0); + tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0); args += nb_args; gen_args += 2; continue; @@ -913,12 +939,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, if (temps_are_copies(args[0], args[1])) { s->gen_opc_buf[op_index] = INDEX_op_nop; } else if (temps[args[1]].state != TCG_TEMP_CONST) { - s->gen_opc_buf[op_index] = op_to_mov(op); - tcg_opt_gen_mov(s, gen_args, args[0], args[1]); + tcg_opt_gen_mov(s, op_index, gen_args, op, args[0], args[1]); gen_args += 2; } else { - s->gen_opc_buf[op_index] = op_to_movi(op); - tcg_opt_gen_movi(gen_args, args[0], temps[args[1]].val); + tcg_opt_gen_movi(s, op_index, gen_args, op, + args[0], temps[args[1]].val); gen_args += 2; } args += nb_args; @@ -933,8 +958,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, CASE_OP_32_64(mulsh): if ((temps[args[2]].state == TCG_TEMP_CONST && temps[args[2]].val == 0)) { - s->gen_opc_buf[op_index] = op_to_movi(op); - tcg_opt_gen_movi(gen_args, args[0], 0); + tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0); args += 3; gen_args += 2; continue; @@ -952,8 +976,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, if (temps_are_copies(args[0], args[1])) { s->gen_opc_buf[op_index] = INDEX_op_nop; } else { - s->gen_opc_buf[op_index] = op_to_mov(op); - tcg_opt_gen_mov(s, gen_args, args[0], args[1]); + tcg_opt_gen_mov(s, op_index, gen_args, op, + args[0], args[1]); gen_args += 2; } args += 3; @@ -970,8 +994,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, CASE_OP_32_64(sub): CASE_OP_32_64(xor): if (temps_are_copies(args[1], args[2])) { - s->gen_opc_buf[op_index] = op_to_movi(op); - tcg_opt_gen_movi(gen_args, args[0], 0); + tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0); gen_args += 2; args += 3; continue; @@ -992,19 +1015,17 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, break; } if (temps[args[1]].state != TCG_TEMP_CONST) { - tcg_opt_gen_mov(s, gen_args, args[0], args[1]); + tcg_opt_gen_mov(s, op_index, gen_args, op, args[0], args[1]); gen_args += 2; args += 2; break; } /* Source argument is constant. Rewrite the operation and let movi case handle it. */ - op = op_to_movi(op); - s->gen_opc_buf[op_index] = op; args[1] = temps[args[1]].val; /* fallthrough */ CASE_OP_32_64(movi): - tcg_opt_gen_movi(gen_args, args[0], args[1]); + tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], args[1]); gen_args += 2; args += 2; break; @@ -1018,9 +1039,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, case INDEX_op_ext32s_i64: case INDEX_op_ext32u_i64: if (temps[args[1]].state == TCG_TEMP_CONST) { - s->gen_opc_buf[op_index] = op_to_movi(op); tmp = do_constant_folding(op, temps[args[1]].val, 0); - tcg_opt_gen_movi(gen_args, args[0], tmp); + tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp); gen_args += 2; args += 2; break; @@ -1029,9 +1049,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, case INDEX_op_trunc_shr_i32: if (temps[args[1]].state == TCG_TEMP_CONST) { - s->gen_opc_buf[op_index] = op_to_movi(op); tmp = do_constant_folding(op, temps[args[1]].val, args[2]); - tcg_opt_gen_movi(gen_args, args[0], tmp); + tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp); gen_args += 2; args += 3; break; @@ -1062,10 +1081,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, CASE_OP_32_64(remu): if (temps[args[1]].state == TCG_TEMP_CONST && temps[args[2]].state == TCG_TEMP_CONST) { - s->gen_opc_buf[op_index] = op_to_movi(op); tmp = do_constant_folding(op, temps[args[1]].val, temps[args[2]].val); - tcg_opt_gen_movi(gen_args, args[0], tmp); + tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp); gen_args += 2; args += 3; break; @@ -1075,10 +1093,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, CASE_OP_32_64(deposit): if (temps[args[1]].state == TCG_TEMP_CONST && temps[args[2]].state == TCG_TEMP_CONST) { - s->gen_opc_buf[op_index] = op_to_movi(op); tmp = deposit64(temps[args[1]].val, args[3], args[4], temps[args[2]].val); - tcg_opt_gen_movi(gen_args, args[0], tmp); + tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp); gen_args += 2; args += 5; break; @@ -1088,8 +1105,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, CASE_OP_32_64(setcond): tmp = do_constant_folding_cond(op, args[1], args[2], args[3]); if (tmp != 2) { - s->gen_opc_buf[op_index] = op_to_movi(op); - tcg_opt_gen_movi(gen_args, args[0], tmp); + tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp); gen_args += 2; args += 4; break; @@ -1118,12 +1134,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, if (temps_are_copies(args[0], args[4-tmp])) { s->gen_opc_buf[op_index] = INDEX_op_nop; } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) { - s->gen_opc_buf[op_index] = op_to_movi(op); - tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val); + tcg_opt_gen_movi(s, op_index, gen_args, op, + args[0], temps[args[4-tmp]].val); gen_args += 2; } else { - s->gen_opc_buf[op_index] = op_to_mov(op); - tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]); + tcg_opt_gen_mov(s, op_index, gen_args, op, + args[0], args[4-tmp]); gen_args += 2; } args += 6; @@ -1156,10 +1172,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, rl = args[0]; rh = args[1]; - s->gen_opc_buf[op_index] = INDEX_op_movi_i32; - s->gen_opc_buf[++op_index] = INDEX_op_movi_i32; - tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a); - tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32)); + tcg_opt_gen_movi(s, op_index, &gen_args[0], + op, rl, (uint32_t)a); + tcg_opt_gen_movi(s, ++op_index, &gen_args[2], + op, rh, (uint32_t)(a >> 32)); gen_args += 4; args += 6; break; @@ -1179,10 +1195,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, rl = args[0]; rh = args[1]; - s->gen_opc_buf[op_index] = INDEX_op_movi_i32; - s->gen_opc_buf[++op_index] = INDEX_op_movi_i32; - tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r); - tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32)); + tcg_opt_gen_movi(s, op_index, &gen_args[0], + op, rl, (uint32_t)r); + tcg_opt_gen_movi(s, ++op_index, &gen_args[2], + op, rh, (uint32_t)(r >> 32)); gen_args += 4; args += 4; break; @@ -1193,11 +1209,13 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]); if (tmp != 2) { if (tmp) { + do_brcond_true: reset_all_temps(nb_temps); s->gen_opc_buf[op_index] = INDEX_op_br; gen_args[0] = args[5]; gen_args += 1; } else { + do_brcond_false: s->gen_opc_buf[op_index] = INDEX_op_nop; } } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE) @@ -1207,6 +1225,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, && temps[args[3]].val == 0) { /* Simplify LT/GE comparisons vs zero to a single compare vs the high word of the input. */ + do_brcond_high: reset_all_temps(nb_temps); s->gen_opc_buf[op_index] = INDEX_op_brcond_i32; gen_args[0] = args[1]; @@ -1214,6 +1233,49 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, gen_args[2] = args[4]; gen_args[3] = args[5]; gen_args += 4; + } else if (args[4] == TCG_COND_EQ) { + /* Simplify EQ comparisons where one of the pairs + can be simplified. */ + tmp = do_constant_folding_cond(INDEX_op_brcond_i32, + args[0], args[2], TCG_COND_EQ); + if (tmp == 0) { + goto do_brcond_false; + } else if (tmp == 1) { + goto do_brcond_high; + } + tmp = do_constant_folding_cond(INDEX_op_brcond_i32, + args[1], args[3], TCG_COND_EQ); + if (tmp == 0) { + goto do_brcond_false; + } else if (tmp != 1) { + goto do_default; + } + do_brcond_low: + reset_all_temps(nb_temps); + s->gen_opc_buf[op_index] = INDEX_op_brcond_i32; + gen_args[0] = args[0]; + gen_args[1] = args[2]; + gen_args[2] = args[4]; + gen_args[3] = args[5]; + gen_args += 4; + } else if (args[4] == TCG_COND_NE) { + /* Simplify NE comparisons where one of the pairs + can be simplified. */ + tmp = do_constant_folding_cond(INDEX_op_brcond_i32, + args[0], args[2], TCG_COND_NE); + if (tmp == 0) { + goto do_brcond_high; + } else if (tmp == 1) { + goto do_brcond_true; + } + tmp = do_constant_folding_cond(INDEX_op_brcond_i32, + args[1], args[3], TCG_COND_NE); + if (tmp == 0) { + goto do_brcond_low; + } else if (tmp == 1) { + goto do_brcond_true; + } + goto do_default; } else { goto do_default; } @@ -1223,8 +1285,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, case INDEX_op_setcond2_i32: tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]); if (tmp != 2) { - s->gen_opc_buf[op_index] = INDEX_op_movi_i32; - tcg_opt_gen_movi(gen_args, args[0], tmp); + do_setcond_const: + tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp); gen_args += 2; } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE) && temps[args[3]].state == TCG_TEMP_CONST @@ -1233,13 +1295,59 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, && temps[args[4]].val == 0) { /* Simplify LT/GE comparisons vs zero to a single compare vs the high word of the input. */ + do_setcond_high: s->gen_opc_buf[op_index] = INDEX_op_setcond_i32; reset_temp(args[0]); + temps[args[0]].mask = 1; gen_args[0] = args[0]; gen_args[1] = args[2]; gen_args[2] = args[4]; gen_args[3] = args[5]; gen_args += 4; + } else if (args[5] == TCG_COND_EQ) { + /* Simplify EQ comparisons where one of the pairs + can be simplified. */ + tmp = do_constant_folding_cond(INDEX_op_setcond_i32, + args[1], args[3], TCG_COND_EQ); + if (tmp == 0) { + goto do_setcond_const; + } else if (tmp == 1) { + goto do_setcond_high; + } + tmp = do_constant_folding_cond(INDEX_op_setcond_i32, + args[2], args[4], TCG_COND_EQ); + if (tmp == 0) { + goto do_setcond_high; + } else if (tmp != 1) { + goto do_default; + } + do_setcond_low: + reset_temp(args[0]); + temps[args[0]].mask = 1; + s->gen_opc_buf[op_index] = INDEX_op_setcond_i32; + gen_args[0] = args[0]; + gen_args[1] = args[1]; + gen_args[2] = args[3]; + gen_args[3] = args[5]; + gen_args += 4; + } else if (args[5] == TCG_COND_NE) { + /* Simplify NE comparisons where one of the pairs + can be simplified. */ + tmp = do_constant_folding_cond(INDEX_op_setcond_i32, + args[1], args[3], TCG_COND_NE); + if (tmp == 0) { + goto do_setcond_high; + } else if (tmp == 1) { + goto do_setcond_const; + } + tmp = do_constant_folding_cond(INDEX_op_setcond_i32, + args[2], args[4], TCG_COND_NE); + if (tmp == 0) { + goto do_setcond_low; + } else if (tmp == 1) { + goto do_setcond_const; + } + goto do_default; } else { goto do_default; } |