diff options
author | Richard Henderson <rth@twiddle.net> | 2017-07-17 20:16:57 -1000 |
---|---|---|
committer | Richard Henderson <rth@twiddle.net> | 2017-07-19 14:45:15 -0700 |
commit | abb1066df313602ef0ca631126bd342d399d5359 (patch) | |
tree | 5f0878adce2e9a803c7e12d8e6971998f11a7cdf /target | |
parent | d97dd988ecd67f4fd12a80a55f07c1817f19b002 (diff) |
target/arm: Optimize aarch64 rev16
It is much shorter to reverse all 4 half-words in parallel
than extract, reverse, and deposit each in turn.
Suggested-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Diffstat (limited to 'target')
-rw-r--r-- | target/arm/translate-a64.c | 24 |
1 files changed, 6 insertions, 18 deletions
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 3fa39023ca..5bb0f8ef22 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -4043,25 +4043,13 @@ static void handle_rev16(DisasContext *s, unsigned int sf, TCGv_i64 tcg_rd = cpu_reg(s, rd); TCGv_i64 tcg_tmp = tcg_temp_new_i64(); TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); + TCGv_i64 mask = tcg_const_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff); - tcg_gen_andi_i64(tcg_tmp, tcg_rn, 0xffff); - tcg_gen_bswap16_i64(tcg_rd, tcg_tmp); - - tcg_gen_shri_i64(tcg_tmp, tcg_rn, 16); - tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff); - tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp); - tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 16, 16); - - if (sf) { - tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32); - tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff); - tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp); - tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 32, 16); - - tcg_gen_shri_i64(tcg_tmp, tcg_rn, 48); - tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp); - tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 48, 16); - } + tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8); + tcg_gen_and_i64(tcg_rd, tcg_rn, mask); + tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask); + tcg_gen_shli_i64(tcg_rd, tcg_rd, 8); + tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp); tcg_temp_free_i64(tcg_tmp); } |