From eb6ca2b4a69325e95526bc0f2897791df04e44dc Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Sun, 5 Jul 2015 18:50:09 +0200 Subject: target-sh4: improve cmp/str instruction Instead of testing bytes one by one, we can use the following trick from https://graphics.stanford.edu/~seander/bithacks.html: haszero(v) = (v - 0x01010101) & ~v & 0x80808080 The subexpression v - 0x01010101, evaluates to a high bit set in any byte whenever the corresponding byte in v is zero or greater than 0x80. The sub-expression ~v & 0x80808080 evaluates to high bits set in bytes where the byte of v doesn't have its high bit set (so the byte was less than 0x80). Finally, by ANDing these two sub-expressions the result is the high bits set where the bytes in v were zero, since the high bits set due to a value greater than 0x80 in the first sub-expression are masked off by the second. Reviewed-by: Richard Henderson Signed-off-by: Aurelien Jarno --- target-sh4/translate.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/target-sh4/translate.c b/target-sh4/translate.c index 50043cf5b5..ca6ef5aca7 100644 --- a/target-sh4/translate.c +++ b/target-sh4/translate.c @@ -688,18 +688,11 @@ static void _decode_opc(DisasContext * ctx) { TCGv cmp1 = tcg_temp_new(); TCGv cmp2 = tcg_temp_new(); - tcg_gen_xor_i32(cmp1, REG(B7_4), REG(B11_8)); - tcg_gen_andi_i32(cmp2, cmp1, 0xff000000); - tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, cmp2, 0); - tcg_gen_andi_i32(cmp2, cmp1, 0x00ff0000); - tcg_gen_setcondi_i32(TCG_COND_EQ, cmp2, cmp2, 0); - tcg_gen_or_i32(cpu_sr_t, cpu_sr_t, cmp2); - tcg_gen_andi_i32(cmp2, cmp1, 0x0000ff00); - tcg_gen_setcondi_i32(TCG_COND_EQ, cmp2, cmp2, 0); - tcg_gen_or_i32(cpu_sr_t, cpu_sr_t, cmp2); - tcg_gen_andi_i32(cmp2, cmp1, 0x000000ff); - tcg_gen_setcondi_i32(TCG_COND_EQ, cmp2, cmp2, 0); - tcg_gen_or_i32(cpu_sr_t, cpu_sr_t, cmp2); + tcg_gen_xor_i32(cmp2, REG(B7_4), REG(B11_8)); + tcg_gen_subi_i32(cmp1, cmp2, 0x01010101); + tcg_gen_andc_i32(cmp1, cmp1, cmp2); + tcg_gen_andi_i32(cmp1, cmp1, 0x80808080); + tcg_gen_setcondi_i32(TCG_COND_NE, cpu_sr_t, cmp1, 0); tcg_temp_free(cmp2); tcg_temp_free(cmp1); } -- cgit v1.2.3