From c69806ab82760a2e5ca880d41e8786276c838152 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Thu, 6 Jan 2011 22:43:13 +0100 Subject: tcg/arm: fix branch target change during code retranslation QEMU uses code retranslation to restore the CPU state when an exception happens. For it to work the retranslation must not modify the generated code. This is what is currently implemented in ARM TCG. However on CPU that don't have icache/dcache/memory synchronised like ARM, this requirement is stronger and code retranslation must not modify the generated code "atomically", as the cache line might be flushed at any moment (interrupt, exception, task switching), even if not triggered by QEMU. The probability for this to happen is very low, and depends on cache size and associativiy, machine load, interrupts, so the symptoms are might happen randomly. This requirement is currently not followed in tcg/arm, for the load/store code, which basically has the following structure: 1) tlb access code is written 2) conditional fast path code is written 3) branch is written with a temporary target 4) slow path code is written 5) branch target is updated The cache lines corresponding to the retranslated code is not flushed after code retranslation as the generated code is supposed to be the same. However if the cache line corresponding to the branch instruction is flushed between step 3 and 5, and is not flushed again before the code is executed again, the branch target is wrong. In the guest, the symptoms are MMU page fault at a random addresses, which leads to kernel page fault or segmentation faults. The patch fixes this issue by avoiding writing the branch target until it is known, that is by writing only the branch instruction first, and later only the offset. This fixes booting linux guests on ARM hosts (tested: arm, i386, mips, mipsel, sh4, sparc). Acked-by: Edgar E. Iglesias Signed-off-by: Aurelien Jarno --- tcg/arm/tcg-target.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'tcg/arm/tcg-target.c') diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c index a3af5b222e..9def2e58fb 100644 --- a/tcg/arm/tcg-target.c +++ b/tcg/arm/tcg-target.c @@ -113,12 +113,25 @@ static const int tcg_target_call_oarg_regs[2] = { TCG_REG_R0, TCG_REG_R1 }; +static inline void reloc_abs32(void *code_ptr, tcg_target_long target) +{ + *(uint32_t *) code_ptr = target; +} + +static inline void reloc_pc24(void *code_ptr, tcg_target_long target) +{ + uint32_t offset = ((target - ((tcg_target_long) code_ptr + 8)) >> 2); + + *(uint32_t *) code_ptr = ((*(uint32_t *) code_ptr) & ~0xffffff) + | (offset & 0xffffff); +} + static void patch_reloc(uint8_t *code_ptr, int type, tcg_target_long value, tcg_target_long addend) { switch (type) { case R_ARM_ABS32: - *(uint32_t *) code_ptr = value; + reloc_abs32(code_ptr, value); break; case R_ARM_CALL: @@ -127,8 +140,7 @@ static void patch_reloc(uint8_t *code_ptr, int type, tcg_abort(); case R_ARM_PC24: - *(uint32_t *) code_ptr = ((*(uint32_t *) code_ptr) & 0xff000000) | - (((value - ((tcg_target_long) code_ptr + 8)) >> 2) & 0xffffff); + reloc_pc24(code_ptr, value); break; } } @@ -1031,7 +1043,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) } label_ptr = (void *) s->code_ptr; - tcg_out_b(s, COND_EQ, 8); + tcg_out_b_noaddr(s, COND_EQ); /* TODO: move this code to where the constants pool will be */ if (addr_reg != TCG_REG_R0) { @@ -1076,7 +1088,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) break; } - *label_ptr += ((void *) s->code_ptr - (void *) label_ptr - 8) >> 2; + reloc_pc24(label_ptr, (tcg_target_long)s->code_ptr); #else /* !CONFIG_SOFTMMU */ if (GUEST_BASE) { uint32_t offset = GUEST_BASE; @@ -1245,7 +1257,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) } label_ptr = (void *) s->code_ptr; - tcg_out_b(s, COND_EQ, 8); + tcg_out_b_noaddr(s, COND_EQ); /* TODO: move this code to where the constants pool will be */ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, @@ -1317,7 +1329,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) if (opc == 3) tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R13, TCG_REG_R13, 0x10); - *label_ptr += ((void *) s->code_ptr - (void *) label_ptr - 8) >> 2; + reloc_pc24(label_ptr, (tcg_target_long)s->code_ptr); #else /* !CONFIG_SOFTMMU */ if (GUEST_BASE) { uint32_t offset = GUEST_BASE; @@ -1399,7 +1411,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, /* Direct jump method */ #if defined(USE_DIRECT_JUMP) s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf; - tcg_out_b(s, COND_AL, 8); + tcg_out_b_noaddr(s, COND_AL); #else tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4); s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf; -- cgit v1.2.3 From 9a3abc21a61f95799c1f301b0b480a98a29fa0ff Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Thu, 6 Jan 2011 22:43:13 +0100 Subject: tcg/arm: fix qemu_st64 for big endian targets Due to a typo, qemu_st64 doesn't properly byteswap the 32-bit low word of a 64 bit word before saving it. This patch fixes that. Acked-by: Andrzej Zaborowski Signed-off-by: Aurelien Jarno --- tcg/arm/tcg-target.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tcg/arm/tcg-target.c') diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c index 9def2e58fb..08c44c1123 100644 --- a/tcg/arm/tcg-target.c +++ b/tcg/arm/tcg-target.c @@ -1248,7 +1248,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg2); tcg_out_st32_rwb(s, COND_EQ, TCG_REG_R0, TCG_REG_R1, addr_reg); tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg); - tcg_out_st32_12(s, COND_EQ, data_reg, TCG_REG_R1, 4); + tcg_out_st32_12(s, COND_EQ, TCG_REG_R0, TCG_REG_R1, 4); } else { tcg_out_st32_rwb(s, COND_EQ, data_reg, TCG_REG_R1, addr_reg); tcg_out_st32_12(s, COND_EQ, data_reg2, TCG_REG_R1, 4); -- cgit v1.2.3 From 0f11f25a001f6adca18689192182d415ff3dbb7c Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Thu, 6 Jan 2011 22:43:13 +0100 Subject: tcg/arm: improve constant loading Improve constant loading in two ways: - On all ARM versions, it's possible to load 0xffffff00 = -0x100 using the mvn rd, #0. Fix the conditions. - On <= ARMv6 versions, where movw and movt are not available, load the constants using mov and orr with rotations depending on the constant to load. This is very useful for example to load constants where the low byte is 0. This reduce the generated code size by about 7%. Also fix the coding style at the same time. Cc: Andrzej Zaborowski Signed-off-by: Aurelien Jarno --- tcg/arm/tcg-target.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) (limited to 'tcg/arm/tcg-target.c') diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c index 08c44c1123..1eb5605f8c 100644 --- a/tcg/arm/tcg-target.c +++ b/tcg/arm/tcg-target.c @@ -406,35 +406,38 @@ static inline void tcg_out_dat_imm(TCGContext *s, } static inline void tcg_out_movi32(TCGContext *s, - int cond, int rd, int32_t arg) + int cond, int rd, uint32_t arg) { /* TODO: This is very suboptimal, we can easily have a constant * pool somewhere after all the instructions. */ - - if (arg < 0 && arg > -0x100) - return tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0, (~arg) & 0xff); - - if (use_armv7_instructions) { + if ((int)arg < 0 && (int)arg >= -0x100) { + tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0, (~arg) & 0xff); + } else if (use_armv7_instructions) { /* use movw/movt */ /* movw */ tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12) | ((arg << 4) & 0x000f0000) | (arg & 0xfff)); - if (arg & 0xffff0000) + if (arg & 0xffff0000) { /* movt */ tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12) | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff)); - } else { - tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0, arg & 0xff); - if (arg & 0x0000ff00) - tcg_out_dat_imm(s, cond, ARITH_ORR, rd, rd, - ((arg >> 8) & 0xff) | 0xc00); - if (arg & 0x00ff0000) - tcg_out_dat_imm(s, cond, ARITH_ORR, rd, rd, - ((arg >> 16) & 0xff) | 0x800); - if (arg & 0xff000000) - tcg_out_dat_imm(s, cond, ARITH_ORR, rd, rd, - ((arg >> 24) & 0xff) | 0x400); } + } else { + int opc = ARITH_MOV; + int rn = 0; + + do { + int i, rot; + + i = ctz32(arg) & ~1; + rot = ((32 - i) << 7) & 0xf00; + tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot); + arg &= ~(0xff << i); + + opc = ARITH_ORR; + rn = rd; + } while (arg); + } } static inline void tcg_out_mul32(TCGContext *s, -- cgit v1.2.3