From ce151109813e2770fd3cee2f37bfa2cdd01a12b9 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 23 Feb 2016 14:49:41 +0000 Subject: tcg: Rename tcg-target.c to tcg-target.inc.c Rename the per-architecture tcg-target.c files to tcg-target.inc.c. This makes it clearer that they are not intended to be standalone C files, but are instead #included into another source file. Reviewed-by: Eric Blake Signed-off-by: Peter Maydell Message-Id: <1456238983-10160-2-git-send-email-peter.maydell@linaro.org> Signed-off-by: Richard Henderson --- tcg/README | 5 +- tcg/aarch64/tcg-target.c | 1893 ----------------------------- tcg/aarch64/tcg-target.inc.c | 1893 +++++++++++++++++++++++++++++ tcg/arm/tcg-target.c | 2129 -------------------------------- tcg/arm/tcg-target.inc.c | 2129 ++++++++++++++++++++++++++++++++ tcg/i386/tcg-target.c | 2464 ------------------------------------- tcg/i386/tcg-target.inc.c | 2464 +++++++++++++++++++++++++++++++++++++ tcg/ia64/tcg-target.c | 2453 ------------------------------------- tcg/ia64/tcg-target.inc.c | 2453 +++++++++++++++++++++++++++++++++++++ tcg/mips/tcg-target.c | 1892 ----------------------------- tcg/mips/tcg-target.inc.c | 1892 +++++++++++++++++++++++++++++ tcg/ppc/tcg-target.c | 2762 ------------------------------------------ tcg/ppc/tcg-target.inc.c | 2762 ++++++++++++++++++++++++++++++++++++++++++ tcg/s390/tcg-target.c | 2410 ------------------------------------ tcg/s390/tcg-target.inc.c | 2410 ++++++++++++++++++++++++++++++++++++ tcg/sparc/tcg-target.c | 1653 ------------------------- tcg/sparc/tcg-target.inc.c | 1653 +++++++++++++++++++++++++ tcg/tcg.c | 7 +- tcg/tcg.h | 2 +- tcg/tci/README | 4 +- tcg/tci/tcg-target.c | 880 -------------- tcg/tci/tcg-target.inc.c | 880 ++++++++++++++ 22 files changed, 18546 insertions(+), 18544 deletions(-) delete mode 100644 tcg/aarch64/tcg-target.c create mode 100644 tcg/aarch64/tcg-target.inc.c delete mode 100644 tcg/arm/tcg-target.c create mode 100644 tcg/arm/tcg-target.inc.c delete mode 100644 tcg/i386/tcg-target.c create mode 100644 tcg/i386/tcg-target.inc.c delete mode 100644 tcg/ia64/tcg-target.c create mode 100644 tcg/ia64/tcg-target.inc.c delete mode 100644 tcg/mips/tcg-target.c create mode 100644 tcg/mips/tcg-target.inc.c delete mode 100644 tcg/ppc/tcg-target.c create mode 100644 tcg/ppc/tcg-target.inc.c delete mode 100644 tcg/s390/tcg-target.c create mode 100644 tcg/s390/tcg-target.inc.c delete mode 100644 tcg/sparc/tcg-target.c create mode 100644 tcg/sparc/tcg-target.inc.c delete mode 100644 tcg/tci/tcg-target.c create mode 100644 tcg/tci/tcg-target.inc.c (limited to 'tcg') diff --git a/tcg/README b/tcg/README index 34c0775cff..f4a8ac170b 100644 --- a/tcg/README +++ b/tcg/README @@ -460,8 +460,9 @@ function tcg_gen_xxx(args). 4) Backend -tcg-target.h contains the target specific definitions. tcg-target.c -contains the target specific code. +tcg-target.h contains the target specific definitions. tcg-target.inc.c +contains the target specific code; it is #included by tcg/tcg.c, rather +than being a standalone C file. 4.1) Assumptions diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c deleted file mode 100644 index 8467d5d8b9..0000000000 --- a/tcg/aarch64/tcg-target.c +++ /dev/null @@ -1,1893 +0,0 @@ -/* - * Initial TCG Implementation for aarch64 - * - * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH - * Written by Claudio Fontana - * - * This work is licensed under the terms of the GNU GPL, version 2 or - * (at your option) any later version. - * - * See the COPYING file in the top-level directory for details. - */ - -#include "qemu/osdep.h" -#include "tcg-be-ldst.h" -#include "qemu/bitops.h" - -/* We're going to re-use TCGType in setting of the SF bit, which controls - the size of the operation performed. If we know the values match, it - makes things much cleaner. */ -QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1); - -#ifndef NDEBUG -static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { - "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7", - "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15", - "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23", - "%x24", "%x25", "%x26", "%x27", "%x28", "%fp", "%x30", "%sp", -}; -#endif /* NDEBUG */ - -static const int tcg_target_reg_alloc_order[] = { - TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, - TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, - TCG_REG_X28, /* we will reserve this for guest_base if configured */ - - TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, - TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, - TCG_REG_X16, TCG_REG_X17, - - TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, - TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, - - /* X18 reserved by system */ - /* X19 reserved for AREG0 */ - /* X29 reserved as fp */ - /* X30 reserved as temporary */ -}; - -static const int tcg_target_call_iarg_regs[8] = { - TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, - TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7 -}; -static const int tcg_target_call_oarg_regs[1] = { - TCG_REG_X0 -}; - -#define TCG_REG_TMP TCG_REG_X30 - -#ifndef CONFIG_SOFTMMU -/* Note that XZR cannot be encoded in the address base register slot, - as that actaully encodes SP. So if we need to zero-extend the guest - address, via the address index register slot, we need to load even - a zero guest base into a register. */ -#define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32) -#define TCG_REG_GUEST_BASE TCG_REG_X28 -#endif - -static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target) -{ - ptrdiff_t offset = target - code_ptr; - assert(offset == sextract64(offset, 0, 26)); - /* read instruction, mask away previous PC_REL26 parameter contents, - set the proper offset, then write back the instruction. */ - *code_ptr = deposit32(*code_ptr, 0, 26, offset); -} - -static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target) -{ - ptrdiff_t offset = target - code_ptr; - assert(offset == sextract64(offset, 0, 19)); - *code_ptr = deposit32(*code_ptr, 5, 19, offset); -} - -static inline void patch_reloc(tcg_insn_unit *code_ptr, int type, - intptr_t value, intptr_t addend) -{ - assert(addend == 0); - switch (type) { - case R_AARCH64_JUMP26: - case R_AARCH64_CALL26: - reloc_pc26(code_ptr, (tcg_insn_unit *)value); - break; - case R_AARCH64_CONDBR19: - reloc_pc19(code_ptr, (tcg_insn_unit *)value); - break; - default: - tcg_abort(); - } -} - -#define TCG_CT_CONST_AIMM 0x100 -#define TCG_CT_CONST_LIMM 0x200 -#define TCG_CT_CONST_ZERO 0x400 -#define TCG_CT_CONST_MONE 0x800 - -/* parse target specific constraints */ -static int target_parse_constraint(TCGArgConstraint *ct, - const char **pct_str) -{ - const char *ct_str = *pct_str; - - switch (ct_str[0]) { - case 'r': - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1); - break; - case 'l': /* qemu_ld / qemu_st address, data_reg */ - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1); -#ifdef CONFIG_SOFTMMU - /* x0 and x1 will be overwritten when reading the tlb entry, - and x2, and x3 for helper args, better to avoid using them. */ - tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3); -#endif - break; - case 'A': /* Valid for arithmetic immediate (positive or negative). */ - ct->ct |= TCG_CT_CONST_AIMM; - break; - case 'L': /* Valid for logical immediate. */ - ct->ct |= TCG_CT_CONST_LIMM; - break; - case 'M': /* minus one */ - ct->ct |= TCG_CT_CONST_MONE; - break; - case 'Z': /* zero */ - ct->ct |= TCG_CT_CONST_ZERO; - break; - default: - return -1; - } - - ct_str++; - *pct_str = ct_str; - return 0; -} - -static inline bool is_aimm(uint64_t val) -{ - return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0; -} - -static inline bool is_limm(uint64_t val) -{ - /* Taking a simplified view of the logical immediates for now, ignoring - the replication that can happen across the field. Match bit patterns - of the forms - 0....01....1 - 0..01..10..0 - and their inverses. */ - - /* Make things easier below, by testing the form with msb clear. */ - if ((int64_t)val < 0) { - val = ~val; - } - if (val == 0) { - return false; - } - val += val & -val; - return (val & (val - 1)) == 0; -} - -static int tcg_target_const_match(tcg_target_long val, TCGType type, - const TCGArgConstraint *arg_ct) -{ - int ct = arg_ct->ct; - - if (ct & TCG_CT_CONST) { - return 1; - } - if (type == TCG_TYPE_I32) { - val = (int32_t)val; - } - if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) { - return 1; - } - if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) { - return 1; - } - if ((ct & TCG_CT_CONST_ZERO) && val == 0) { - return 1; - } - if ((ct & TCG_CT_CONST_MONE) && val == -1) { - return 1; - } - - return 0; -} - -enum aarch64_cond_code { - COND_EQ = 0x0, - COND_NE = 0x1, - COND_CS = 0x2, /* Unsigned greater or equal */ - COND_HS = COND_CS, /* ALIAS greater or equal */ - COND_CC = 0x3, /* Unsigned less than */ - COND_LO = COND_CC, /* ALIAS Lower */ - COND_MI = 0x4, /* Negative */ - COND_PL = 0x5, /* Zero or greater */ - COND_VS = 0x6, /* Overflow */ - COND_VC = 0x7, /* No overflow */ - COND_HI = 0x8, /* Unsigned greater than */ - COND_LS = 0x9, /* Unsigned less or equal */ - COND_GE = 0xa, - COND_LT = 0xb, - COND_GT = 0xc, - COND_LE = 0xd, - COND_AL = 0xe, - COND_NV = 0xf, /* behaves like COND_AL here */ -}; - -static const enum aarch64_cond_code tcg_cond_to_aarch64[] = { - [TCG_COND_EQ] = COND_EQ, - [TCG_COND_NE] = COND_NE, - [TCG_COND_LT] = COND_LT, - [TCG_COND_GE] = COND_GE, - [TCG_COND_LE] = COND_LE, - [TCG_COND_GT] = COND_GT, - /* unsigned */ - [TCG_COND_LTU] = COND_LO, - [TCG_COND_GTU] = COND_HI, - [TCG_COND_GEU] = COND_HS, - [TCG_COND_LEU] = COND_LS, -}; - -typedef enum { - LDST_ST = 0, /* store */ - LDST_LD = 1, /* load */ - LDST_LD_S_X = 2, /* load and sign-extend into Xt */ - LDST_LD_S_W = 3, /* load and sign-extend into Wt */ -} AArch64LdstType; - -/* We encode the format of the insn into the beginning of the name, so that - we can have the preprocessor help "typecheck" the insn vs the output - function. Arm didn't provide us with nice names for the formats, so we - use the section number of the architecture reference manual in which the - instruction group is described. */ -typedef enum { - /* Compare and branch (immediate). */ - I3201_CBZ = 0x34000000, - I3201_CBNZ = 0x35000000, - - /* Conditional branch (immediate). */ - I3202_B_C = 0x54000000, - - /* Unconditional branch (immediate). */ - I3206_B = 0x14000000, - I3206_BL = 0x94000000, - - /* Unconditional branch (register). */ - I3207_BR = 0xd61f0000, - I3207_BLR = 0xd63f0000, - I3207_RET = 0xd65f0000, - - /* Load/store register. Described here as 3.3.12, but the helper - that emits them can transform to 3.3.10 or 3.3.13. */ - I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30, - I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30, - I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30, - I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30, - - I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30, - I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30, - I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30, - I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30, - - I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30, - I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30, - - I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30, - I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30, - I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30, - - I3312_TO_I3310 = 0x00200800, - I3312_TO_I3313 = 0x01000000, - - /* Load/store register pair instructions. */ - I3314_LDP = 0x28400000, - I3314_STP = 0x28000000, - - /* Add/subtract immediate instructions. */ - I3401_ADDI = 0x11000000, - I3401_ADDSI = 0x31000000, - I3401_SUBI = 0x51000000, - I3401_SUBSI = 0x71000000, - - /* Bitfield instructions. */ - I3402_BFM = 0x33000000, - I3402_SBFM = 0x13000000, - I3402_UBFM = 0x53000000, - - /* Extract instruction. */ - I3403_EXTR = 0x13800000, - - /* Logical immediate instructions. */ - I3404_ANDI = 0x12000000, - I3404_ORRI = 0x32000000, - I3404_EORI = 0x52000000, - - /* Move wide immediate instructions. */ - I3405_MOVN = 0x12800000, - I3405_MOVZ = 0x52800000, - I3405_MOVK = 0x72800000, - - /* PC relative addressing instructions. */ - I3406_ADR = 0x10000000, - I3406_ADRP = 0x90000000, - - /* Add/subtract shifted register instructions (without a shift). */ - I3502_ADD = 0x0b000000, - I3502_ADDS = 0x2b000000, - I3502_SUB = 0x4b000000, - I3502_SUBS = 0x6b000000, - - /* Add/subtract shifted register instructions (with a shift). */ - I3502S_ADD_LSL = I3502_ADD, - - /* Add/subtract with carry instructions. */ - I3503_ADC = 0x1a000000, - I3503_SBC = 0x5a000000, - - /* Conditional select instructions. */ - I3506_CSEL = 0x1a800000, - I3506_CSINC = 0x1a800400, - - /* Data-processing (1 source) instructions. */ - I3507_REV16 = 0x5ac00400, - I3507_REV32 = 0x5ac00800, - I3507_REV64 = 0x5ac00c00, - - /* Data-processing (2 source) instructions. */ - I3508_LSLV = 0x1ac02000, - I3508_LSRV = 0x1ac02400, - I3508_ASRV = 0x1ac02800, - I3508_RORV = 0x1ac02c00, - I3508_SMULH = 0x9b407c00, - I3508_UMULH = 0x9bc07c00, - I3508_UDIV = 0x1ac00800, - I3508_SDIV = 0x1ac00c00, - - /* Data-processing (3 source) instructions. */ - I3509_MADD = 0x1b000000, - I3509_MSUB = 0x1b008000, - - /* Logical shifted register instructions (without a shift). */ - I3510_AND = 0x0a000000, - I3510_BIC = 0x0a200000, - I3510_ORR = 0x2a000000, - I3510_ORN = 0x2a200000, - I3510_EOR = 0x4a000000, - I3510_EON = 0x4a200000, - I3510_ANDS = 0x6a000000, -} AArch64Insn; - -static inline uint32_t tcg_in32(TCGContext *s) -{ - uint32_t v = *(uint32_t *)s->code_ptr; - return v; -} - -/* Emit an opcode with "type-checking" of the format. */ -#define tcg_out_insn(S, FMT, OP, ...) \ - glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__) - -static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext, - TCGReg rt, int imm19) -{ - tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt); -} - -static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn, - TCGCond c, int imm19) -{ - tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5); -} - -static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26) -{ - tcg_out32(s, insn | (imm26 & 0x03ffffff)); -} - -static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn) -{ - tcg_out32(s, insn | rn << 5); -} - -static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn, - TCGReg r1, TCGReg r2, TCGReg rn, - tcg_target_long ofs, bool pre, bool w) -{ - insn |= 1u << 31; /* ext */ - insn |= pre << 24; - insn |= w << 23; - - assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0); - insn |= (ofs & (0x7f << 3)) << (15 - 3); - - tcg_out32(s, insn | r2 << 10 | rn << 5 | r1); -} - -static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext, - TCGReg rd, TCGReg rn, uint64_t aimm) -{ - if (aimm > 0xfff) { - assert((aimm & 0xfff) == 0); - aimm >>= 12; - assert(aimm <= 0xfff); - aimm |= 1 << 12; /* apply LSL 12 */ - } - tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd); -} - -/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4 - (Logical immediate). Both insn groups have N, IMMR and IMMS fields - that feed the DecodeBitMasks pseudo function. */ -static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext, - TCGReg rd, TCGReg rn, int n, int immr, int imms) -{ - tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10 - | rn << 5 | rd); -} - -#define tcg_out_insn_3404 tcg_out_insn_3402 - -static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext, - TCGReg rd, TCGReg rn, TCGReg rm, int imms) -{ - tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10 - | rn << 5 | rd); -} - -/* This function is used for the Move (wide immediate) instruction group. - Note that SHIFT is a full shift count, not the 2 bit HW field. */ -static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext, - TCGReg rd, uint16_t half, unsigned shift) -{ - assert((shift & ~0x30) == 0); - tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd); -} - -static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn, - TCGReg rd, int64_t disp) -{ - tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd); -} - -/* This function is for both 3.5.2 (Add/Subtract shifted register), for - the rare occasion when we actually want to supply a shift amount. */ -static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn, - TCGType ext, TCGReg rd, TCGReg rn, - TCGReg rm, int imm6) -{ - tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd); -} - -/* This function is for 3.5.2 (Add/subtract shifted register), - and 3.5.10 (Logical shifted register), for the vast majorty of cases - when we don't want to apply a shift. Thus it can also be used for - 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */ -static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext, - TCGReg rd, TCGReg rn, TCGReg rm) -{ - tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd); -} - -#define tcg_out_insn_3503 tcg_out_insn_3502 -#define tcg_out_insn_3508 tcg_out_insn_3502 -#define tcg_out_insn_3510 tcg_out_insn_3502 - -static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext, - TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c) -{ - tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd - | tcg_cond_to_aarch64[c] << 12); -} - -static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext, - TCGReg rd, TCGReg rn) -{ - tcg_out32(s, insn | ext << 31 | rn << 5 | rd); -} - -static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext, - TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra) -{ - tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd); -} - -static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn, - TCGReg rd, TCGReg base, TCGType ext, - TCGReg regoff) -{ - /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ - tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | - 0x4000 | ext << 13 | base << 5 | rd); -} - -static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn, - TCGReg rd, TCGReg rn, intptr_t offset) -{ - tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | rd); -} - -static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn, - TCGReg rd, TCGReg rn, uintptr_t scaled_uimm) -{ - /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ - tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 | rn << 5 | rd); -} - -/* Register to register move using ORR (shifted register with no shift). */ -static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm) -{ - tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm); -} - -/* Register to register move using ADDI (move to/from SP). */ -static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn) -{ - tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0); -} - -/* This function is used for the Logical (immediate) instruction group. - The value of LIMM must satisfy IS_LIMM. See the comment above about - only supporting simplified logical immediates. */ -static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, - TCGReg rd, TCGReg rn, uint64_t limm) -{ - unsigned h, l, r, c; - - assert(is_limm(limm)); - - h = clz64(limm); - l = ctz64(limm); - if (l == 0) { - r = 0; /* form 0....01....1 */ - c = ctz64(~limm) - 1; - if (h == 0) { - r = clz64(~limm); /* form 1..10..01..1 */ - c += r; - } - } else { - r = 64 - l; /* form 1....10....0 or 0..01..10..0 */ - c = r - h - 1; - } - if (ext == TCG_TYPE_I32) { - r &= 31; - c &= 31; - } - - tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c); -} - -static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, - tcg_target_long value) -{ - AArch64Insn insn; - int i, wantinv, shift; - tcg_target_long svalue = value; - tcg_target_long ivalue = ~value; - tcg_target_long imask; - - /* For 32-bit values, discard potential garbage in value. For 64-bit - values within [2**31, 2**32-1], we can create smaller sequences by - interpreting this as a negative 32-bit number, while ensuring that - the high 32 bits are cleared by setting SF=0. */ - if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) { - svalue = (int32_t)value; - value = (uint32_t)value; - ivalue = (uint32_t)ivalue; - type = TCG_TYPE_I32; - } - - /* Speed things up by handling the common case of small positive - and negative values specially. */ - if ((value & ~0xffffull) == 0) { - tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0); - return; - } else if ((ivalue & ~0xffffull) == 0) { - tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0); - return; - } - - /* Check for bitfield immediates. For the benefit of 32-bit quantities, - use the sign-extended value. That lets us match rotated values such - as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */ - if (is_limm(svalue)) { - tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue); - return; - } - - /* Look for host pointer values within 4G of the PC. This happens - often when loading pointers to QEMU's own data structures. */ - if (type == TCG_TYPE_I64) { - tcg_target_long disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12); - if (disp == sextract64(disp, 0, 21)) { - tcg_out_insn(s, 3406, ADRP, rd, disp); - if (value & 0xfff) { - tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff); - } - return; - } - } - - /* Would it take fewer insns to begin with MOVN? For the value and its - inverse, count the number of 16-bit lanes that are 0. */ - for (i = wantinv = imask = 0; i < 64; i += 16) { - tcg_target_long mask = 0xffffull << i; - if ((value & mask) == 0) { - wantinv -= 1; - } - if ((ivalue & mask) == 0) { - wantinv += 1; - imask |= mask; - } - } - - /* If we had more 0xffff than 0x0000, invert VALUE and use MOVN. */ - insn = I3405_MOVZ; - if (wantinv > 0) { - value = ivalue; - insn = I3405_MOVN; - } - - /* Find the lowest lane that is not 0x0000. */ - shift = ctz64(value) & (63 & -16); - tcg_out_insn_3405(s, insn, type, rd, value >> shift, shift); - - if (wantinv > 0) { - /* Re-invert the value, so MOVK sees non-inverted bits. */ - value = ~value; - /* Clear out all the 0xffff lanes. */ - value ^= imask; - } - /* Clear out the lane that we just set. */ - value &= ~(0xffffUL << shift); - - /* Iterate until all lanes have been set, and thus cleared from VALUE. */ - while (value) { - shift = ctz64(value) & (63 & -16); - tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift); - value &= ~(0xffffUL << shift); - } -} - -/* Define something more legible for general use. */ -#define tcg_out_ldst_r tcg_out_insn_3310 - -static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, - TCGReg rd, TCGReg rn, intptr_t offset) -{ - TCGMemOp size = (uint32_t)insn >> 30; - - /* If the offset is naturally aligned and in range, then we can - use the scaled uimm12 encoding */ - if (offset >= 0 && !(offset & ((1 << size) - 1))) { - uintptr_t scaled_uimm = offset >> size; - if (scaled_uimm <= 0xfff) { - tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm); - return; - } - } - - /* Small signed offsets can use the unscaled encoding. */ - if (offset >= -256 && offset < 256) { - tcg_out_insn_3312(s, insn, rd, rn, offset); - return; - } - - /* Worst-case scenario, move offset to temp register, use reg offset. */ - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset); - tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP); -} - -static inline void tcg_out_mov(TCGContext *s, - TCGType type, TCGReg ret, TCGReg arg) -{ - if (ret != arg) { - tcg_out_movr(s, type, ret, arg); - } -} - -static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, intptr_t arg2) -{ - tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_LDRW : I3312_LDRX, - arg, arg1, arg2); -} - -static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, intptr_t arg2) -{ - tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_STRW : I3312_STRX, - arg, arg1, arg2); -} - -static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd, - TCGReg rn, unsigned int a, unsigned int b) -{ - tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b); -} - -static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd, - TCGReg rn, unsigned int a, unsigned int b) -{ - tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b); -} - -static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd, - TCGReg rn, unsigned int a, unsigned int b) -{ - tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b); -} - -static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, - TCGReg rn, TCGReg rm, unsigned int a) -{ - tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); -} - -static inline void tcg_out_shl(TCGContext *s, TCGType ext, - TCGReg rd, TCGReg rn, unsigned int m) -{ - int bits = ext ? 64 : 32; - int max = bits - 1; - tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max)); -} - -static inline void tcg_out_shr(TCGContext *s, TCGType ext, - TCGReg rd, TCGReg rn, unsigned int m) -{ - int max = ext ? 63 : 31; - tcg_out_ubfm(s, ext, rd, rn, m & max, max); -} - -static inline void tcg_out_sar(TCGContext *s, TCGType ext, - TCGReg rd, TCGReg rn, unsigned int m) -{ - int max = ext ? 63 : 31; - tcg_out_sbfm(s, ext, rd, rn, m & max, max); -} - -static inline void tcg_out_rotr(TCGContext *s, TCGType ext, - TCGReg rd, TCGReg rn, unsigned int m) -{ - int max = ext ? 63 : 31; - tcg_out_extr(s, ext, rd, rn, rn, m & max); -} - -static inline void tcg_out_rotl(TCGContext *s, TCGType ext, - TCGReg rd, TCGReg rn, unsigned int m) -{ - int bits = ext ? 64 : 32; - int max = bits - 1; - tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max)); -} - -static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd, - TCGReg rn, unsigned lsb, unsigned width) -{ - unsigned size = ext ? 64 : 32; - unsigned a = (size - lsb) & (size - 1); - unsigned b = width - 1; - tcg_out_bfm(s, ext, rd, rn, a, b); -} - -static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a, - tcg_target_long b, bool const_b) -{ - if (const_b) { - /* Using CMP or CMN aliases. */ - if (b >= 0) { - tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b); - } else { - tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b); - } - } else { - /* Using CMP alias SUBS wzr, Wn, Wm */ - tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); - } -} - -static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target) -{ - ptrdiff_t offset = target - s->code_ptr; - assert(offset == sextract64(offset, 0, 26)); - tcg_out_insn(s, 3206, B, offset); -} - -static inline void tcg_out_goto_noaddr(TCGContext *s) -{ - /* We pay attention here to not modify the branch target by reading from - the buffer. This ensure that caches and memory are kept coherent during - retranslation. Mask away possible garbage in the high bits for the - first translation, while keeping the offset bits for retranslation. */ - uint32_t old = tcg_in32(s); - tcg_out_insn(s, 3206, B, old); -} - -static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c) -{ - /* See comments in tcg_out_goto_noaddr. */ - uint32_t old = tcg_in32(s) >> 5; - tcg_out_insn(s, 3202, B_C, c, old); -} - -static inline void tcg_out_callr(TCGContext *s, TCGReg reg) -{ - tcg_out_insn(s, 3207, BLR, reg); -} - -static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target) -{ - ptrdiff_t offset = target - s->code_ptr; - if (offset == sextract64(offset, 0, 26)) { - tcg_out_insn(s, 3206, BL, offset); - } else { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); - tcg_out_callr(s, TCG_REG_TMP); - } -} - -void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr) -{ - tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr; - tcg_insn_unit *target = (tcg_insn_unit *)addr; - - reloc_pc26(code_ptr, target); - flush_icache_range(jmp_addr, jmp_addr + 4); -} - -static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) -{ - if (!l->has_value) { - tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0); - tcg_out_goto_noaddr(s); - } else { - tcg_out_goto(s, l->u.value_ptr); - } -} - -static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a, - TCGArg b, bool b_const, TCGLabel *l) -{ - intptr_t offset; - bool need_cmp; - - if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) { - need_cmp = false; - } else { - need_cmp = true; - tcg_out_cmp(s, ext, a, b, b_const); - } - - if (!l->has_value) { - tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); - offset = tcg_in32(s) >> 5; - } else { - offset = l->u.value_ptr - s->code_ptr; - assert(offset == sextract64(offset, 0, 19)); - } - - if (need_cmp) { - tcg_out_insn(s, 3202, B_C, c, offset); - } else if (c == TCG_COND_EQ) { - tcg_out_insn(s, 3201, CBZ, ext, a, offset); - } else { - tcg_out_insn(s, 3201, CBNZ, ext, a, offset); - } -} - -static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn) -{ - tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn); -} - -static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn) -{ - tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn); -} - -static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn) -{ - tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn); -} - -static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits, - TCGReg rd, TCGReg rn) -{ - /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */ - int bits = (8 << s_bits) - 1; - tcg_out_sbfm(s, ext, rd, rn, 0, bits); -} - -static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits, - TCGReg rd, TCGReg rn) -{ - /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */ - int bits = (8 << s_bits) - 1; - tcg_out_ubfm(s, 0, rd, rn, 0, bits); -} - -static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd, - TCGReg rn, int64_t aimm) -{ - if (aimm >= 0) { - tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm); - } else { - tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm); - } -} - -static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl, - TCGReg rh, TCGReg al, TCGReg ah, - tcg_target_long bl, tcg_target_long bh, - bool const_bl, bool const_bh, bool sub) -{ - TCGReg orig_rl = rl; - AArch64Insn insn; - - if (rl == ah || (!const_bh && rl == bh)) { - rl = TCG_REG_TMP; - } - - if (const_bl) { - insn = I3401_ADDSI; - if ((bl < 0) ^ sub) { - insn = I3401_SUBSI; - bl = -bl; - } - tcg_out_insn_3401(s, insn, ext, rl, al, bl); - } else { - tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl); - } - - insn = I3503_ADC; - if (const_bh) { - /* Note that the only two constants we support are 0 and -1, and - that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */ - if ((bh != 0) ^ sub) { - insn = I3503_SBC; - } - bh = TCG_REG_XZR; - } else if (sub) { - insn = I3503_SBC; - } - tcg_out_insn_3503(s, insn, ext, rh, ah, bh); - - tcg_out_mov(s, ext, orig_rl, rl); -} - -#ifdef CONFIG_SOFTMMU -/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, - * TCGMemOpIdx oi, uintptr_t ra) - */ -static void * const qemu_ld_helpers[16] = { - [MO_UB] = helper_ret_ldub_mmu, - [MO_LEUW] = helper_le_lduw_mmu, - [MO_LEUL] = helper_le_ldul_mmu, - [MO_LEQ] = helper_le_ldq_mmu, - [MO_BEUW] = helper_be_lduw_mmu, - [MO_BEUL] = helper_be_ldul_mmu, - [MO_BEQ] = helper_be_ldq_mmu, -}; - -/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, - * uintxx_t val, TCGMemOpIdx oi, - * uintptr_t ra) - */ -static void * const qemu_st_helpers[16] = { - [MO_UB] = helper_ret_stb_mmu, - [MO_LEUW] = helper_le_stw_mmu, - [MO_LEUL] = helper_le_stl_mmu, - [MO_LEQ] = helper_le_stq_mmu, - [MO_BEUW] = helper_be_stw_mmu, - [MO_BEUL] = helper_be_stl_mmu, - [MO_BEQ] = helper_be_stq_mmu, -}; - -static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target) -{ - ptrdiff_t offset = tcg_pcrel_diff(s, target); - assert(offset == sextract64(offset, 0, 21)); - tcg_out_insn(s, 3406, ADR, rd, offset); -} - -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) -{ - TCGMemOpIdx oi = lb->oi; - TCGMemOp opc = get_memop(oi); - TCGMemOp size = opc & MO_SIZE; - - reloc_pc19(lb->label_ptr[0], s->code_ptr); - - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); - tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi); - tcg_out_adr(s, TCG_REG_X3, lb->raddr); - tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); - if (opc & MO_SIGN) { - tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0); - } else { - tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0); - } - - tcg_out_goto(s, lb->raddr); -} - -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) -{ - TCGMemOpIdx oi = lb->oi; - TCGMemOp opc = get_memop(oi); - TCGMemOp size = opc & MO_SIZE; - - reloc_pc19(lb->label_ptr[0], s->code_ptr); - - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); - tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); - tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi); - tcg_out_adr(s, TCG_REG_X4, lb->raddr); - tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); - tcg_out_goto(s, lb->raddr); -} - -static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, - TCGType ext, TCGReg data_reg, TCGReg addr_reg, - tcg_insn_unit *raddr, tcg_insn_unit *label_ptr) -{ - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->oi = oi; - label->type = ext; - label->datalo_reg = data_reg; - label->addrlo_reg = addr_reg; - label->raddr = raddr; - label->label_ptr[0] = label_ptr; -} - -/* Load and compare a TLB entry, emitting the conditional jump to the - slow path for the failure case, which will be patched later when finalizing - the slow path. Generated code returns the host addend in X1, - clobbers X0,X2,X3,TMP. */ -static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc, - tcg_insn_unit **label_ptr, int mem_index, - bool is_read) -{ - int tlb_offset = is_read ? - offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) - : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write); - int s_mask = (1 << (opc & MO_SIZE)) - 1; - TCGReg base = TCG_AREG0, x3; - uint64_t tlb_mask; - - /* For aligned accesses, we check the first byte and include the alignment - bits within the address. For unaligned access, we check that we don't - cross pages using the address of the last byte of the access. */ - if ((opc & MO_AMASK) == MO_ALIGN || s_mask == 0) { - tlb_mask = TARGET_PAGE_MASK | s_mask; - x3 = addr_reg; - } else { - tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64, - TCG_REG_X3, addr_reg, s_mask); - tlb_mask = TARGET_PAGE_MASK; - x3 = TCG_REG_X3; - } - - /* Extract the TLB index from the address into X0. - X0 = - addr_reg */ - tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg, - TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS); - - /* Store the page mask part of the address into X3. */ - tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, - TCG_REG_X3, x3, tlb_mask); - - /* Add any "high bits" from the tlb offset to the env address into X2, - to take advantage of the LSL12 form of the ADDI instruction. - X2 = env + (tlb_offset & 0xfff000) */ - if (tlb_offset & 0xfff000) { - tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base, - tlb_offset & 0xfff000); - base = TCG_REG_X2; - } - - /* Merge the tlb index contribution into X2. - X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */ - tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base, - TCG_REG_X0, CPU_TLB_ENTRY_BITS); - - /* Merge "low bits" from tlb offset, load the tlb comparator into X0. - X0 = load [X2 + (tlb_offset & 0x000fff)] */ - tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX, - TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff); - - /* Load the tlb addend. Do that early to avoid stalling. - X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */ - tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2, - (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) - - (is_read ? offsetof(CPUTLBEntry, addr_read) - : offsetof(CPUTLBEntry, addr_write))); - - /* Perform the address comparison. */ - tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0); - - /* If not equal, we jump to the slow path. */ - *label_ptr = s->code_ptr; - tcg_out_goto_cond_noaddr(s, TCG_COND_NE); -} - -#endif /* CONFIG_SOFTMMU */ - -static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext, - TCGReg data_r, TCGReg addr_r, - TCGType otype, TCGReg off_r) -{ - const TCGMemOp bswap = memop & MO_BSWAP; - - switch (memop & MO_SSIZE) { - case MO_UB: - tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r); - break; - case MO_SB: - tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW, - data_r, addr_r, otype, off_r); - break; - case MO_UW: - tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r); - if (bswap) { - tcg_out_rev16(s, data_r, data_r); - } - break; - case MO_SW: - if (bswap) { - tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r); - tcg_out_rev16(s, data_r, data_r); - tcg_out_sxt(s, ext, MO_16, data_r, data_r); - } else { - tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW), - data_r, addr_r, otype, off_r); - } - break; - case MO_UL: - tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r); - if (bswap) { - tcg_out_rev32(s, data_r, data_r); - } - break; - case MO_SL: - if (bswap) { - tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r); - tcg_out_rev32(s, data_r, data_r); - tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r); - } else { - tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r); - } - break; - case MO_Q: - tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r); - if (bswap) { - tcg_out_rev64(s, data_r, data_r); - } - break; - default: - tcg_abort(); - } -} - -static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop, - TCGReg data_r, TCGReg addr_r, - TCGType otype, TCGReg off_r) -{ - const TCGMemOp bswap = memop & MO_BSWAP; - - switch (memop & MO_SIZE) { - case MO_8: - tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r); - break; - case MO_16: - if (bswap && data_r != TCG_REG_XZR) { - tcg_out_rev16(s, TCG_REG_TMP, data_r); - data_r = TCG_REG_TMP; - } - tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r); - break; - case MO_32: - if (bswap && data_r != TCG_REG_XZR) { - tcg_out_rev32(s, TCG_REG_TMP, data_r); - data_r = TCG_REG_TMP; - } - tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r); - break; - case MO_64: - if (bswap && data_r != TCG_REG_XZR) { - tcg_out_rev64(s, TCG_REG_TMP, data_r); - data_r = TCG_REG_TMP; - } - tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r); - break; - default: - tcg_abort(); - } -} - -static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, - TCGMemOpIdx oi, TCGType ext) -{ - TCGMemOp memop = get_memop(oi); - const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; -#ifdef CONFIG_SOFTMMU - unsigned mem_index = get_mmuidx(oi); - tcg_insn_unit *label_ptr; - - tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1); - tcg_out_qemu_ld_direct(s, memop, ext, data_reg, - TCG_REG_X1, otype, addr_reg); - add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg, - s->code_ptr, label_ptr); -#else /* !CONFIG_SOFTMMU */ - if (USE_GUEST_BASE) { - tcg_out_qemu_ld_direct(s, memop, ext, data_reg, - TCG_REG_GUEST_BASE, otype, addr_reg); - } else { - tcg_out_qemu_ld_direct(s, memop, ext, data_reg, - addr_reg, TCG_TYPE_I64, TCG_REG_XZR); - } -#endif /* CONFIG_SOFTMMU */ -} - -static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, - TCGMemOpIdx oi) -{ - TCGMemOp memop = get_memop(oi); - const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; -#ifdef CONFIG_SOFTMMU - unsigned mem_index = get_mmuidx(oi); - tcg_insn_unit *label_ptr; - - tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0); - tcg_out_qemu_st_direct(s, memop, data_reg, - TCG_REG_X1, otype, addr_reg); - add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64, - data_reg, addr_reg, s->code_ptr, label_ptr); -#else /* !CONFIG_SOFTMMU */ - if (USE_GUEST_BASE) { - tcg_out_qemu_st_direct(s, memop, data_reg, - TCG_REG_GUEST_BASE, otype, addr_reg); - } else { - tcg_out_qemu_st_direct(s, memop, data_reg, - addr_reg, TCG_TYPE_I64, TCG_REG_XZR); - } -#endif /* CONFIG_SOFTMMU */ -} - -static tcg_insn_unit *tb_ret_addr; - -static void tcg_out_op(TCGContext *s, TCGOpcode opc, - const TCGArg args[TCG_MAX_OP_ARGS], - const int const_args[TCG_MAX_OP_ARGS]) -{ - /* 99% of the time, we can signal the use of extension registers - by looking to see if the opcode handles 64-bit data. */ - TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0; - - /* Hoist the loads of the most common arguments. */ - TCGArg a0 = args[0]; - TCGArg a1 = args[1]; - TCGArg a2 = args[2]; - int c2 = const_args[2]; - - /* Some operands are defined with "rZ" constraint, a register or - the zero register. These need not actually test args[I] == 0. */ -#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I]) - - switch (opc) { - case INDEX_op_exit_tb: - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); - tcg_out_goto(s, tb_ret_addr); - break; - - case INDEX_op_goto_tb: -#ifndef USE_DIRECT_JUMP -#error "USE_DIRECT_JUMP required for aarch64" -#endif - assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */ - s->tb_jmp_offset[a0] = tcg_current_code_size(s); - /* actual branch destination will be patched by - aarch64_tb_set_jmp_target later, beware retranslation. */ - tcg_out_goto_noaddr(s); - s->tb_next_offset[a0] = tcg_current_code_size(s); - break; - - case INDEX_op_br: - tcg_out_goto_label(s, arg_label(a0)); - break; - - case INDEX_op_ld8u_i32: - case INDEX_op_ld8u_i64: - tcg_out_ldst(s, I3312_LDRB, a0, a1, a2); - break; - case INDEX_op_ld8s_i32: - tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2); - break; - case INDEX_op_ld8s_i64: - tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2); - break; - case INDEX_op_ld16u_i32: - case INDEX_op_ld16u_i64: - tcg_out_ldst(s, I3312_LDRH, a0, a1, a2); - break; - case INDEX_op_ld16s_i32: - tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2); - break; - case INDEX_op_ld16s_i64: - tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2); - break; - case INDEX_op_ld_i32: - case INDEX_op_ld32u_i64: - tcg_out_ldst(s, I3312_LDRW, a0, a1, a2); - break; - case INDEX_op_ld32s_i64: - tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2); - break; - case INDEX_op_ld_i64: - tcg_out_ldst(s, I3312_LDRX, a0, a1, a2); - break; - - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: - tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2); - break; - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: - tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2); - break; - case INDEX_op_st_i32: - case INDEX_op_st32_i64: - tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2); - break; - case INDEX_op_st_i64: - tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2); - break; - - case INDEX_op_add_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_add_i64: - if (c2) { - tcg_out_addsubi(s, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2); - } - break; - - case INDEX_op_sub_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_sub_i64: - if (c2) { - tcg_out_addsubi(s, ext, a0, a1, -a2); - } else { - tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2); - } - break; - - case INDEX_op_neg_i64: - case INDEX_op_neg_i32: - tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); - break; - - case INDEX_op_and_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_and_i64: - if (c2) { - tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3510, AND, ext, a0, a1, a2); - } - break; - - case INDEX_op_andc_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_andc_i64: - if (c2) { - tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2); - } else { - tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2); - } - break; - - case INDEX_op_or_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_or_i64: - if (c2) { - tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2); - } - break; - - case INDEX_op_orc_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_orc_i64: - if (c2) { - tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2); - } else { - tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2); - } - break; - - case INDEX_op_xor_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_xor_i64: - if (c2) { - tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2); - } - break; - - case INDEX_op_eqv_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_eqv_i64: - if (c2) { - tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2); - } else { - tcg_out_insn(s, 3510, EON, ext, a0, a1, a2); - } - break; - - case INDEX_op_not_i64: - case INDEX_op_not_i32: - tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1); - break; - - case INDEX_op_mul_i64: - case INDEX_op_mul_i32: - tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR); - break; - - case INDEX_op_div_i64: - case INDEX_op_div_i32: - tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2); - break; - case INDEX_op_divu_i64: - case INDEX_op_divu_i32: - tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2); - break; - - case INDEX_op_rem_i64: - case INDEX_op_rem_i32: - tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2); - tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); - break; - case INDEX_op_remu_i64: - case INDEX_op_remu_i32: - tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2); - tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); - break; - - case INDEX_op_shl_i64: - case INDEX_op_shl_i32: - if (c2) { - tcg_out_shl(s, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2); - } - break; - - case INDEX_op_shr_i64: - case INDEX_op_shr_i32: - if (c2) { - tcg_out_shr(s, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2); - } - break; - - case INDEX_op_sar_i64: - case INDEX_op_sar_i32: - if (c2) { - tcg_out_sar(s, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2); - } - break; - - case INDEX_op_rotr_i64: - case INDEX_op_rotr_i32: - if (c2) { - tcg_out_rotr(s, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2); - } - break; - - case INDEX_op_rotl_i64: - case INDEX_op_rotl_i32: - if (c2) { - tcg_out_rotl(s, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2); - tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP); - } - break; - - case INDEX_op_brcond_i32: - a1 = (int32_t)a1; - /* FALLTHRU */ - case INDEX_op_brcond_i64: - tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3])); - break; - - case INDEX_op_setcond_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_setcond_i64: - tcg_out_cmp(s, ext, a1, a2, c2); - /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */ - tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR, - TCG_REG_XZR, tcg_invert_cond(args[3])); - break; - - case INDEX_op_movcond_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_movcond_i64: - tcg_out_cmp(s, ext, a1, a2, c2); - tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]); - break; - - case INDEX_op_qemu_ld_i32: - case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, a0, a1, a2, ext); - break; - case INDEX_op_qemu_st_i32: - case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, REG0(0), a1, a2); - break; - - case INDEX_op_bswap64_i64: - tcg_out_rev64(s, a0, a1); - break; - case INDEX_op_bswap32_i64: - case INDEX_op_bswap32_i32: - tcg_out_rev32(s, a0, a1); - break; - case INDEX_op_bswap16_i64: - case INDEX_op_bswap16_i32: - tcg_out_rev16(s, a0, a1); - break; - - case INDEX_op_ext8s_i64: - case INDEX_op_ext8s_i32: - tcg_out_sxt(s, ext, MO_8, a0, a1); - break; - case INDEX_op_ext16s_i64: - case INDEX_op_ext16s_i32: - tcg_out_sxt(s, ext, MO_16, a0, a1); - break; - case INDEX_op_ext_i32_i64: - case INDEX_op_ext32s_i64: - tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1); - break; - case INDEX_op_ext8u_i64: - case INDEX_op_ext8u_i32: - tcg_out_uxt(s, MO_8, a0, a1); - break; - case INDEX_op_ext16u_i64: - case INDEX_op_ext16u_i32: - tcg_out_uxt(s, MO_16, a0, a1); - break; - case INDEX_op_extu_i32_i64: - case INDEX_op_ext32u_i64: - tcg_out_movr(s, TCG_TYPE_I32, a0, a1); - break; - - case INDEX_op_deposit_i64: - case INDEX_op_deposit_i32: - tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]); - break; - - case INDEX_op_add2_i32: - tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), - (int32_t)args[4], args[5], const_args[4], - const_args[5], false); - break; - case INDEX_op_add2_i64: - tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], - args[5], const_args[4], const_args[5], false); - break; - case INDEX_op_sub2_i32: - tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), - (int32_t)args[4], args[5], const_args[4], - const_args[5], true); - break; - case INDEX_op_sub2_i64: - tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], - args[5], const_args[4], const_args[5], true); - break; - - case INDEX_op_muluh_i64: - tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2); - break; - case INDEX_op_mulsh_i64: - tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); - break; - - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: - case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ - case INDEX_op_movi_i64: - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - default: - tcg_abort(); - } - -#undef REG0 -} - -static const TCGTargetOpDef aarch64_op_defs[] = { - { INDEX_op_exit_tb, { } }, - { INDEX_op_goto_tb, { } }, - { INDEX_op_br, { } }, - - { INDEX_op_ld8u_i32, { "r", "r" } }, - { INDEX_op_ld8s_i32, { "r", "r" } }, - { INDEX_op_ld16u_i32, { "r", "r" } }, - { INDEX_op_ld16s_i32, { "r", "r" } }, - { INDEX_op_ld_i32, { "r", "r" } }, - { INDEX_op_ld8u_i64, { "r", "r" } }, - { INDEX_op_ld8s_i64, { "r", "r" } }, - { INDEX_op_ld16u_i64, { "r", "r" } }, - { INDEX_op_ld16s_i64, { "r", "r" } }, - { INDEX_op_ld32u_i64, { "r", "r" } }, - { INDEX_op_ld32s_i64, { "r", "r" } }, - { INDEX_op_ld_i64, { "r", "r" } }, - - { INDEX_op_st8_i32, { "rZ", "r" } }, - { INDEX_op_st16_i32, { "rZ", "r" } }, - { INDEX_op_st_i32, { "rZ", "r" } }, - { INDEX_op_st8_i64, { "rZ", "r" } }, - { INDEX_op_st16_i64, { "rZ", "r" } }, - { INDEX_op_st32_i64, { "rZ", "r" } }, - { INDEX_op_st_i64, { "rZ", "r" } }, - - { INDEX_op_add_i32, { "r", "r", "rA" } }, - { INDEX_op_add_i64, { "r", "r", "rA" } }, - { INDEX_op_sub_i32, { "r", "r", "rA" } }, - { INDEX_op_sub_i64, { "r", "r", "rA" } }, - { INDEX_op_mul_i32, { "r", "r", "r" } }, - { INDEX_op_mul_i64, { "r", "r", "r" } }, - { INDEX_op_div_i32, { "r", "r", "r" } }, - { INDEX_op_div_i64, { "r", "r", "r" } }, - { INDEX_op_divu_i32, { "r", "r", "r" } }, - { INDEX_op_divu_i64, { "r", "r", "r" } }, - { INDEX_op_rem_i32, { "r", "r", "r" } }, - { INDEX_op_rem_i64, { "r", "r", "r" } }, - { INDEX_op_remu_i32, { "r", "r", "r" } }, - { INDEX_op_remu_i64, { "r", "r", "r" } }, - { INDEX_op_and_i32, { "r", "r", "rL" } }, - { INDEX_op_and_i64, { "r", "r", "rL" } }, - { INDEX_op_or_i32, { "r", "r", "rL" } }, - { INDEX_op_or_i64, { "r", "r", "rL" } }, - { INDEX_op_xor_i32, { "r", "r", "rL" } }, - { INDEX_op_xor_i64, { "r", "r", "rL" } }, - { INDEX_op_andc_i32, { "r", "r", "rL" } }, - { INDEX_op_andc_i64, { "r", "r", "rL" } }, - { INDEX_op_orc_i32, { "r", "r", "rL" } }, - { INDEX_op_orc_i64, { "r", "r", "rL" } }, - { INDEX_op_eqv_i32, { "r", "r", "rL" } }, - { INDEX_op_eqv_i64, { "r", "r", "rL" } }, - - { INDEX_op_neg_i32, { "r", "r" } }, - { INDEX_op_neg_i64, { "r", "r" } }, - { INDEX_op_not_i32, { "r", "r" } }, - { INDEX_op_not_i64, { "r", "r" } }, - - { INDEX_op_shl_i32, { "r", "r", "ri" } }, - { INDEX_op_shr_i32, { "r", "r", "ri" } }, - { INDEX_op_sar_i32, { "r", "r", "ri" } }, - { INDEX_op_rotl_i32, { "r", "r", "ri" } }, - { INDEX_op_rotr_i32, { "r", "r", "ri" } }, - { INDEX_op_shl_i64, { "r", "r", "ri" } }, - { INDEX_op_shr_i64, { "r", "r", "ri" } }, - { INDEX_op_sar_i64, { "r", "r", "ri" } }, - { INDEX_op_rotl_i64, { "r", "r", "ri" } }, - { INDEX_op_rotr_i64, { "r", "r", "ri" } }, - - { INDEX_op_brcond_i32, { "r", "rA" } }, - { INDEX_op_brcond_i64, { "r", "rA" } }, - { INDEX_op_setcond_i32, { "r", "r", "rA" } }, - { INDEX_op_setcond_i64, { "r", "r", "rA" } }, - { INDEX_op_movcond_i32, { "r", "r", "rA", "rZ", "rZ" } }, - { INDEX_op_movcond_i64, { "r", "r", "rA", "rZ", "rZ" } }, - - { INDEX_op_qemu_ld_i32, { "r", "l" } }, - { INDEX_op_qemu_ld_i64, { "r", "l" } }, - { INDEX_op_qemu_st_i32, { "lZ", "l" } }, - { INDEX_op_qemu_st_i64, { "lZ", "l" } }, - - { INDEX_op_bswap16_i32, { "r", "r" } }, - { INDEX_op_bswap32_i32, { "r", "r" } }, - { INDEX_op_bswap16_i64, { "r", "r" } }, - { INDEX_op_bswap32_i64, { "r", "r" } }, - { INDEX_op_bswap64_i64, { "r", "r" } }, - - { INDEX_op_ext8s_i32, { "r", "r" } }, - { INDEX_op_ext16s_i32, { "r", "r" } }, - { INDEX_op_ext8u_i32, { "r", "r" } }, - { INDEX_op_ext16u_i32, { "r", "r" } }, - - { INDEX_op_ext8s_i64, { "r", "r" } }, - { INDEX_op_ext16s_i64, { "r", "r" } }, - { INDEX_op_ext32s_i64, { "r", "r" } }, - { INDEX_op_ext8u_i64, { "r", "r" } }, - { INDEX_op_ext16u_i64, { "r", "r" } }, - { INDEX_op_ext32u_i64, { "r", "r" } }, - { INDEX_op_ext_i32_i64, { "r", "r" } }, - { INDEX_op_extu_i32_i64, { "r", "r" } }, - - { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, - { INDEX_op_deposit_i64, { "r", "0", "rZ" } }, - - { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } }, - { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } }, - { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } }, - { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } }, - - { INDEX_op_muluh_i64, { "r", "r", "r" } }, - { INDEX_op_mulsh_i64, { "r", "r", "r" } }, - - { -1 }, -}; - -static void tcg_target_init(TCGContext *s) -{ - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff); - - tcg_regset_set32(tcg_target_call_clobber_regs, 0, - (1 << TCG_REG_X0) | (1 << TCG_REG_X1) | - (1 << TCG_REG_X2) | (1 << TCG_REG_X3) | - (1 << TCG_REG_X4) | (1 << TCG_REG_X5) | - (1 << TCG_REG_X6) | (1 << TCG_REG_X7) | - (1 << TCG_REG_X8) | (1 << TCG_REG_X9) | - (1 << TCG_REG_X10) | (1 << TCG_REG_X11) | - (1 << TCG_REG_X12) | (1 << TCG_REG_X13) | - (1 << TCG_REG_X14) | (1 << TCG_REG_X15) | - (1 << TCG_REG_X16) | (1 << TCG_REG_X17) | - (1 << TCG_REG_X18) | (1 << TCG_REG_X30)); - - tcg_regset_clear(s->reserved_regs); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ - - tcg_add_target_add_op_defs(aarch64_op_defs); -} - -/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ -#define PUSH_SIZE ((30 - 19 + 1) * 8) - -#define FRAME_SIZE \ - ((PUSH_SIZE \ - + TCG_STATIC_CALL_ARGS_SIZE \ - + CPU_TEMP_BUF_NLONGS * sizeof(long) \ - + TCG_TARGET_STACK_ALIGN - 1) \ - & ~(TCG_TARGET_STACK_ALIGN - 1)) - -/* We're expecting a 2 byte uleb128 encoded value. */ -QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); - -/* We're expecting to use a single ADDI insn. */ -QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff); - -static void tcg_target_qemu_prologue(TCGContext *s) -{ - TCGReg r; - - /* Push (FP, LR) and allocate space for all saved registers. */ - tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR, - TCG_REG_SP, -PUSH_SIZE, 1, 1); - - /* Set up frame pointer for canonical unwinding. */ - tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP); - - /* Store callee-preserved regs x19..x28. */ - for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { - int ofs = (r - TCG_REG_X19 + 2) * 8; - tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0); - } - - /* Make stack space for TCG locals. */ - tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, - FRAME_SIZE - PUSH_SIZE); - - /* Inform TCG about how to find TCG locals with register, offset, size. */ - tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, - CPU_TEMP_BUF_NLONGS * sizeof(long)); - -#if !defined(CONFIG_SOFTMMU) - if (USE_GUEST_BASE) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); - } -#endif - - tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); - tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); - - tb_ret_addr = s->code_ptr; - - /* Remove TCG locals stack space. */ - tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, - FRAME_SIZE - PUSH_SIZE); - - /* Restore registers x19..x28. */ - for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { - int ofs = (r - TCG_REG_X19 + 2) * 8; - tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0); - } - - /* Pop (FP, LR), restore SP to previous frame. */ - tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR, - TCG_REG_SP, PUSH_SIZE, 0, 1); - tcg_out_insn(s, 3207, RET, TCG_REG_LR); -} - -typedef struct { - DebugFrameHeader h; - uint8_t fde_def_cfa[4]; - uint8_t fde_reg_ofs[24]; -} DebugFrame; - -#define ELF_HOST_MACHINE EM_AARCH64 - -static const DebugFrame debug_frame = { - .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ - .h.cie.id = -1, - .h.cie.version = 1, - .h.cie.code_align = 1, - .h.cie.data_align = 0x78, /* sleb128 -8 */ - .h.cie.return_column = TCG_REG_LR, - - /* Total FDE size does not include the "len" member. */ - .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), - - .fde_def_cfa = { - 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ - (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ - (FRAME_SIZE >> 7) - }, - .fde_reg_ofs = { - 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */ - 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */ - 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */ - 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */ - 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */ - 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */ - 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */ - 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */ - 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */ - 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */ - 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */ - 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */ - } -}; - -void tcg_register_jit(void *buf, size_t buf_size) -{ - tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); -} diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c new file mode 100644 index 0000000000..8467d5d8b9 --- /dev/null +++ b/tcg/aarch64/tcg-target.inc.c @@ -0,0 +1,1893 @@ +/* + * Initial TCG Implementation for aarch64 + * + * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH + * Written by Claudio Fontana + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. + * + * See the COPYING file in the top-level directory for details. + */ + +#include "qemu/osdep.h" +#include "tcg-be-ldst.h" +#include "qemu/bitops.h" + +/* We're going to re-use TCGType in setting of the SF bit, which controls + the size of the operation performed. If we know the values match, it + makes things much cleaner. */ +QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1); + +#ifndef NDEBUG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { + "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7", + "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15", + "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23", + "%x24", "%x25", "%x26", "%x27", "%x28", "%fp", "%x30", "%sp", +}; +#endif /* NDEBUG */ + +static const int tcg_target_reg_alloc_order[] = { + TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, + TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, + TCG_REG_X28, /* we will reserve this for guest_base if configured */ + + TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, + TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, + TCG_REG_X16, TCG_REG_X17, + + TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, + TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, + + /* X18 reserved by system */ + /* X19 reserved for AREG0 */ + /* X29 reserved as fp */ + /* X30 reserved as temporary */ +}; + +static const int tcg_target_call_iarg_regs[8] = { + TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, + TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7 +}; +static const int tcg_target_call_oarg_regs[1] = { + TCG_REG_X0 +}; + +#define TCG_REG_TMP TCG_REG_X30 + +#ifndef CONFIG_SOFTMMU +/* Note that XZR cannot be encoded in the address base register slot, + as that actaully encodes SP. So if we need to zero-extend the guest + address, via the address index register slot, we need to load even + a zero guest base into a register. */ +#define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32) +#define TCG_REG_GUEST_BASE TCG_REG_X28 +#endif + +static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target) +{ + ptrdiff_t offset = target - code_ptr; + assert(offset == sextract64(offset, 0, 26)); + /* read instruction, mask away previous PC_REL26 parameter contents, + set the proper offset, then write back the instruction. */ + *code_ptr = deposit32(*code_ptr, 0, 26, offset); +} + +static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target) +{ + ptrdiff_t offset = target - code_ptr; + assert(offset == sextract64(offset, 0, 19)); + *code_ptr = deposit32(*code_ptr, 5, 19, offset); +} + +static inline void patch_reloc(tcg_insn_unit *code_ptr, int type, + intptr_t value, intptr_t addend) +{ + assert(addend == 0); + switch (type) { + case R_AARCH64_JUMP26: + case R_AARCH64_CALL26: + reloc_pc26(code_ptr, (tcg_insn_unit *)value); + break; + case R_AARCH64_CONDBR19: + reloc_pc19(code_ptr, (tcg_insn_unit *)value); + break; + default: + tcg_abort(); + } +} + +#define TCG_CT_CONST_AIMM 0x100 +#define TCG_CT_CONST_LIMM 0x200 +#define TCG_CT_CONST_ZERO 0x400 +#define TCG_CT_CONST_MONE 0x800 + +/* parse target specific constraints */ +static int target_parse_constraint(TCGArgConstraint *ct, + const char **pct_str) +{ + const char *ct_str = *pct_str; + + switch (ct_str[0]) { + case 'r': + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1); + break; + case 'l': /* qemu_ld / qemu_st address, data_reg */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1); +#ifdef CONFIG_SOFTMMU + /* x0 and x1 will be overwritten when reading the tlb entry, + and x2, and x3 for helper args, better to avoid using them. */ + tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3); +#endif + break; + case 'A': /* Valid for arithmetic immediate (positive or negative). */ + ct->ct |= TCG_CT_CONST_AIMM; + break; + case 'L': /* Valid for logical immediate. */ + ct->ct |= TCG_CT_CONST_LIMM; + break; + case 'M': /* minus one */ + ct->ct |= TCG_CT_CONST_MONE; + break; + case 'Z': /* zero */ + ct->ct |= TCG_CT_CONST_ZERO; + break; + default: + return -1; + } + + ct_str++; + *pct_str = ct_str; + return 0; +} + +static inline bool is_aimm(uint64_t val) +{ + return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0; +} + +static inline bool is_limm(uint64_t val) +{ + /* Taking a simplified view of the logical immediates for now, ignoring + the replication that can happen across the field. Match bit patterns + of the forms + 0....01....1 + 0..01..10..0 + and their inverses. */ + + /* Make things easier below, by testing the form with msb clear. */ + if ((int64_t)val < 0) { + val = ~val; + } + if (val == 0) { + return false; + } + val += val & -val; + return (val & (val - 1)) == 0; +} + +static int tcg_target_const_match(tcg_target_long val, TCGType type, + const TCGArgConstraint *arg_ct) +{ + int ct = arg_ct->ct; + + if (ct & TCG_CT_CONST) { + return 1; + } + if (type == TCG_TYPE_I32) { + val = (int32_t)val; + } + if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) { + return 1; + } + if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) { + return 1; + } + if ((ct & TCG_CT_CONST_ZERO) && val == 0) { + return 1; + } + if ((ct & TCG_CT_CONST_MONE) && val == -1) { + return 1; + } + + return 0; +} + +enum aarch64_cond_code { + COND_EQ = 0x0, + COND_NE = 0x1, + COND_CS = 0x2, /* Unsigned greater or equal */ + COND_HS = COND_CS, /* ALIAS greater or equal */ + COND_CC = 0x3, /* Unsigned less than */ + COND_LO = COND_CC, /* ALIAS Lower */ + COND_MI = 0x4, /* Negative */ + COND_PL = 0x5, /* Zero or greater */ + COND_VS = 0x6, /* Overflow */ + COND_VC = 0x7, /* No overflow */ + COND_HI = 0x8, /* Unsigned greater than */ + COND_LS = 0x9, /* Unsigned less or equal */ + COND_GE = 0xa, + COND_LT = 0xb, + COND_GT = 0xc, + COND_LE = 0xd, + COND_AL = 0xe, + COND_NV = 0xf, /* behaves like COND_AL here */ +}; + +static const enum aarch64_cond_code tcg_cond_to_aarch64[] = { + [TCG_COND_EQ] = COND_EQ, + [TCG_COND_NE] = COND_NE, + [TCG_COND_LT] = COND_LT, + [TCG_COND_GE] = COND_GE, + [TCG_COND_LE] = COND_LE, + [TCG_COND_GT] = COND_GT, + /* unsigned */ + [TCG_COND_LTU] = COND_LO, + [TCG_COND_GTU] = COND_HI, + [TCG_COND_GEU] = COND_HS, + [TCG_COND_LEU] = COND_LS, +}; + +typedef enum { + LDST_ST = 0, /* store */ + LDST_LD = 1, /* load */ + LDST_LD_S_X = 2, /* load and sign-extend into Xt */ + LDST_LD_S_W = 3, /* load and sign-extend into Wt */ +} AArch64LdstType; + +/* We encode the format of the insn into the beginning of the name, so that + we can have the preprocessor help "typecheck" the insn vs the output + function. Arm didn't provide us with nice names for the formats, so we + use the section number of the architecture reference manual in which the + instruction group is described. */ +typedef enum { + /* Compare and branch (immediate). */ + I3201_CBZ = 0x34000000, + I3201_CBNZ = 0x35000000, + + /* Conditional branch (immediate). */ + I3202_B_C = 0x54000000, + + /* Unconditional branch (immediate). */ + I3206_B = 0x14000000, + I3206_BL = 0x94000000, + + /* Unconditional branch (register). */ + I3207_BR = 0xd61f0000, + I3207_BLR = 0xd63f0000, + I3207_RET = 0xd65f0000, + + /* Load/store register. Described here as 3.3.12, but the helper + that emits them can transform to 3.3.10 or 3.3.13. */ + I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30, + I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30, + I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30, + I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30, + + I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30, + I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30, + I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30, + I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30, + + I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30, + I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30, + + I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30, + I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30, + I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30, + + I3312_TO_I3310 = 0x00200800, + I3312_TO_I3313 = 0x01000000, + + /* Load/store register pair instructions. */ + I3314_LDP = 0x28400000, + I3314_STP = 0x28000000, + + /* Add/subtract immediate instructions. */ + I3401_ADDI = 0x11000000, + I3401_ADDSI = 0x31000000, + I3401_SUBI = 0x51000000, + I3401_SUBSI = 0x71000000, + + /* Bitfield instructions. */ + I3402_BFM = 0x33000000, + I3402_SBFM = 0x13000000, + I3402_UBFM = 0x53000000, + + /* Extract instruction. */ + I3403_EXTR = 0x13800000, + + /* Logical immediate instructions. */ + I3404_ANDI = 0x12000000, + I3404_ORRI = 0x32000000, + I3404_EORI = 0x52000000, + + /* Move wide immediate instructions. */ + I3405_MOVN = 0x12800000, + I3405_MOVZ = 0x52800000, + I3405_MOVK = 0x72800000, + + /* PC relative addressing instructions. */ + I3406_ADR = 0x10000000, + I3406_ADRP = 0x90000000, + + /* Add/subtract shifted register instructions (without a shift). */ + I3502_ADD = 0x0b000000, + I3502_ADDS = 0x2b000000, + I3502_SUB = 0x4b000000, + I3502_SUBS = 0x6b000000, + + /* Add/subtract shifted register instructions (with a shift). */ + I3502S_ADD_LSL = I3502_ADD, + + /* Add/subtract with carry instructions. */ + I3503_ADC = 0x1a000000, + I3503_SBC = 0x5a000000, + + /* Conditional select instructions. */ + I3506_CSEL = 0x1a800000, + I3506_CSINC = 0x1a800400, + + /* Data-processing (1 source) instructions. */ + I3507_REV16 = 0x5ac00400, + I3507_REV32 = 0x5ac00800, + I3507_REV64 = 0x5ac00c00, + + /* Data-processing (2 source) instructions. */ + I3508_LSLV = 0x1ac02000, + I3508_LSRV = 0x1ac02400, + I3508_ASRV = 0x1ac02800, + I3508_RORV = 0x1ac02c00, + I3508_SMULH = 0x9b407c00, + I3508_UMULH = 0x9bc07c00, + I3508_UDIV = 0x1ac00800, + I3508_SDIV = 0x1ac00c00, + + /* Data-processing (3 source) instructions. */ + I3509_MADD = 0x1b000000, + I3509_MSUB = 0x1b008000, + + /* Logical shifted register instructions (without a shift). */ + I3510_AND = 0x0a000000, + I3510_BIC = 0x0a200000, + I3510_ORR = 0x2a000000, + I3510_ORN = 0x2a200000, + I3510_EOR = 0x4a000000, + I3510_EON = 0x4a200000, + I3510_ANDS = 0x6a000000, +} AArch64Insn; + +static inline uint32_t tcg_in32(TCGContext *s) +{ + uint32_t v = *(uint32_t *)s->code_ptr; + return v; +} + +/* Emit an opcode with "type-checking" of the format. */ +#define tcg_out_insn(S, FMT, OP, ...) \ + glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__) + +static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext, + TCGReg rt, int imm19) +{ + tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt); +} + +static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn, + TCGCond c, int imm19) +{ + tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5); +} + +static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26) +{ + tcg_out32(s, insn | (imm26 & 0x03ffffff)); +} + +static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn) +{ + tcg_out32(s, insn | rn << 5); +} + +static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn, + TCGReg r1, TCGReg r2, TCGReg rn, + tcg_target_long ofs, bool pre, bool w) +{ + insn |= 1u << 31; /* ext */ + insn |= pre << 24; + insn |= w << 23; + + assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0); + insn |= (ofs & (0x7f << 3)) << (15 - 3); + + tcg_out32(s, insn | r2 << 10 | rn << 5 | r1); +} + +static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext, + TCGReg rd, TCGReg rn, uint64_t aimm) +{ + if (aimm > 0xfff) { + assert((aimm & 0xfff) == 0); + aimm >>= 12; + assert(aimm <= 0xfff); + aimm |= 1 << 12; /* apply LSL 12 */ + } + tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd); +} + +/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4 + (Logical immediate). Both insn groups have N, IMMR and IMMS fields + that feed the DecodeBitMasks pseudo function. */ +static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext, + TCGReg rd, TCGReg rn, int n, int immr, int imms) +{ + tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10 + | rn << 5 | rd); +} + +#define tcg_out_insn_3404 tcg_out_insn_3402 + +static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext, + TCGReg rd, TCGReg rn, TCGReg rm, int imms) +{ + tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10 + | rn << 5 | rd); +} + +/* This function is used for the Move (wide immediate) instruction group. + Note that SHIFT is a full shift count, not the 2 bit HW field. */ +static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext, + TCGReg rd, uint16_t half, unsigned shift) +{ + assert((shift & ~0x30) == 0); + tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd); +} + +static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn, + TCGReg rd, int64_t disp) +{ + tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd); +} + +/* This function is for both 3.5.2 (Add/Subtract shifted register), for + the rare occasion when we actually want to supply a shift amount. */ +static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn, + TCGType ext, TCGReg rd, TCGReg rn, + TCGReg rm, int imm6) +{ + tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd); +} + +/* This function is for 3.5.2 (Add/subtract shifted register), + and 3.5.10 (Logical shifted register), for the vast majorty of cases + when we don't want to apply a shift. Thus it can also be used for + 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */ +static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext, + TCGReg rd, TCGReg rn, TCGReg rm) +{ + tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd); +} + +#define tcg_out_insn_3503 tcg_out_insn_3502 +#define tcg_out_insn_3508 tcg_out_insn_3502 +#define tcg_out_insn_3510 tcg_out_insn_3502 + +static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext, + TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c) +{ + tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd + | tcg_cond_to_aarch64[c] << 12); +} + +static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext, + TCGReg rd, TCGReg rn) +{ + tcg_out32(s, insn | ext << 31 | rn << 5 | rd); +} + +static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext, + TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra) +{ + tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd); +} + +static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn, + TCGReg rd, TCGReg base, TCGType ext, + TCGReg regoff) +{ + /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ + tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | + 0x4000 | ext << 13 | base << 5 | rd); +} + +static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn, + TCGReg rd, TCGReg rn, intptr_t offset) +{ + tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | rd); +} + +static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn, + TCGReg rd, TCGReg rn, uintptr_t scaled_uimm) +{ + /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ + tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 | rn << 5 | rd); +} + +/* Register to register move using ORR (shifted register with no shift). */ +static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm) +{ + tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm); +} + +/* Register to register move using ADDI (move to/from SP). */ +static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn) +{ + tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0); +} + +/* This function is used for the Logical (immediate) instruction group. + The value of LIMM must satisfy IS_LIMM. See the comment above about + only supporting simplified logical immediates. */ +static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, + TCGReg rd, TCGReg rn, uint64_t limm) +{ + unsigned h, l, r, c; + + assert(is_limm(limm)); + + h = clz64(limm); + l = ctz64(limm); + if (l == 0) { + r = 0; /* form 0....01....1 */ + c = ctz64(~limm) - 1; + if (h == 0) { + r = clz64(~limm); /* form 1..10..01..1 */ + c += r; + } + } else { + r = 64 - l; /* form 1....10....0 or 0..01..10..0 */ + c = r - h - 1; + } + if (ext == TCG_TYPE_I32) { + r &= 31; + c &= 31; + } + + tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c); +} + +static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, + tcg_target_long value) +{ + AArch64Insn insn; + int i, wantinv, shift; + tcg_target_long svalue = value; + tcg_target_long ivalue = ~value; + tcg_target_long imask; + + /* For 32-bit values, discard potential garbage in value. For 64-bit + values within [2**31, 2**32-1], we can create smaller sequences by + interpreting this as a negative 32-bit number, while ensuring that + the high 32 bits are cleared by setting SF=0. */ + if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) { + svalue = (int32_t)value; + value = (uint32_t)value; + ivalue = (uint32_t)ivalue; + type = TCG_TYPE_I32; + } + + /* Speed things up by handling the common case of small positive + and negative values specially. */ + if ((value & ~0xffffull) == 0) { + tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0); + return; + } else if ((ivalue & ~0xffffull) == 0) { + tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0); + return; + } + + /* Check for bitfield immediates. For the benefit of 32-bit quantities, + use the sign-extended value. That lets us match rotated values such + as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */ + if (is_limm(svalue)) { + tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue); + return; + } + + /* Look for host pointer values within 4G of the PC. This happens + often when loading pointers to QEMU's own data structures. */ + if (type == TCG_TYPE_I64) { + tcg_target_long disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12); + if (disp == sextract64(disp, 0, 21)) { + tcg_out_insn(s, 3406, ADRP, rd, disp); + if (value & 0xfff) { + tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff); + } + return; + } + } + + /* Would it take fewer insns to begin with MOVN? For the value and its + inverse, count the number of 16-bit lanes that are 0. */ + for (i = wantinv = imask = 0; i < 64; i += 16) { + tcg_target_long mask = 0xffffull << i; + if ((value & mask) == 0) { + wantinv -= 1; + } + if ((ivalue & mask) == 0) { + wantinv += 1; + imask |= mask; + } + } + + /* If we had more 0xffff than 0x0000, invert VALUE and use MOVN. */ + insn = I3405_MOVZ; + if (wantinv > 0) { + value = ivalue; + insn = I3405_MOVN; + } + + /* Find the lowest lane that is not 0x0000. */ + shift = ctz64(value) & (63 & -16); + tcg_out_insn_3405(s, insn, type, rd, value >> shift, shift); + + if (wantinv > 0) { + /* Re-invert the value, so MOVK sees non-inverted bits. */ + value = ~value; + /* Clear out all the 0xffff lanes. */ + value ^= imask; + } + /* Clear out the lane that we just set. */ + value &= ~(0xffffUL << shift); + + /* Iterate until all lanes have been set, and thus cleared from VALUE. */ + while (value) { + shift = ctz64(value) & (63 & -16); + tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift); + value &= ~(0xffffUL << shift); + } +} + +/* Define something more legible for general use. */ +#define tcg_out_ldst_r tcg_out_insn_3310 + +static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, + TCGReg rd, TCGReg rn, intptr_t offset) +{ + TCGMemOp size = (uint32_t)insn >> 30; + + /* If the offset is naturally aligned and in range, then we can + use the scaled uimm12 encoding */ + if (offset >= 0 && !(offset & ((1 << size) - 1))) { + uintptr_t scaled_uimm = offset >> size; + if (scaled_uimm <= 0xfff) { + tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm); + return; + } + } + + /* Small signed offsets can use the unscaled encoding. */ + if (offset >= -256 && offset < 256) { + tcg_out_insn_3312(s, insn, rd, rn, offset); + return; + } + + /* Worst-case scenario, move offset to temp register, use reg offset. */ + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset); + tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP); +} + +static inline void tcg_out_mov(TCGContext *s, + TCGType type, TCGReg ret, TCGReg arg) +{ + if (ret != arg) { + tcg_out_movr(s, type, ret, arg); + } +} + +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) +{ + tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_LDRW : I3312_LDRX, + arg, arg1, arg2); +} + +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) +{ + tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_STRW : I3312_STRX, + arg, arg1, arg2); +} + +static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd, + TCGReg rn, unsigned int a, unsigned int b) +{ + tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b); +} + +static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd, + TCGReg rn, unsigned int a, unsigned int b) +{ + tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b); +} + +static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd, + TCGReg rn, unsigned int a, unsigned int b) +{ + tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b); +} + +static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, + TCGReg rn, TCGReg rm, unsigned int a) +{ + tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); +} + +static inline void tcg_out_shl(TCGContext *s, TCGType ext, + TCGReg rd, TCGReg rn, unsigned int m) +{ + int bits = ext ? 64 : 32; + int max = bits - 1; + tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max)); +} + +static inline void tcg_out_shr(TCGContext *s, TCGType ext, + TCGReg rd, TCGReg rn, unsigned int m) +{ + int max = ext ? 63 : 31; + tcg_out_ubfm(s, ext, rd, rn, m & max, max); +} + +static inline void tcg_out_sar(TCGContext *s, TCGType ext, + TCGReg rd, TCGReg rn, unsigned int m) +{ + int max = ext ? 63 : 31; + tcg_out_sbfm(s, ext, rd, rn, m & max, max); +} + +static inline void tcg_out_rotr(TCGContext *s, TCGType ext, + TCGReg rd, TCGReg rn, unsigned int m) +{ + int max = ext ? 63 : 31; + tcg_out_extr(s, ext, rd, rn, rn, m & max); +} + +static inline void tcg_out_rotl(TCGContext *s, TCGType ext, + TCGReg rd, TCGReg rn, unsigned int m) +{ + int bits = ext ? 64 : 32; + int max = bits - 1; + tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max)); +} + +static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd, + TCGReg rn, unsigned lsb, unsigned width) +{ + unsigned size = ext ? 64 : 32; + unsigned a = (size - lsb) & (size - 1); + unsigned b = width - 1; + tcg_out_bfm(s, ext, rd, rn, a, b); +} + +static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a, + tcg_target_long b, bool const_b) +{ + if (const_b) { + /* Using CMP or CMN aliases. */ + if (b >= 0) { + tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b); + } else { + tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b); + } + } else { + /* Using CMP alias SUBS wzr, Wn, Wm */ + tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); + } +} + +static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target) +{ + ptrdiff_t offset = target - s->code_ptr; + assert(offset == sextract64(offset, 0, 26)); + tcg_out_insn(s, 3206, B, offset); +} + +static inline void tcg_out_goto_noaddr(TCGContext *s) +{ + /* We pay attention here to not modify the branch target by reading from + the buffer. This ensure that caches and memory are kept coherent during + retranslation. Mask away possible garbage in the high bits for the + first translation, while keeping the offset bits for retranslation. */ + uint32_t old = tcg_in32(s); + tcg_out_insn(s, 3206, B, old); +} + +static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c) +{ + /* See comments in tcg_out_goto_noaddr. */ + uint32_t old = tcg_in32(s) >> 5; + tcg_out_insn(s, 3202, B_C, c, old); +} + +static inline void tcg_out_callr(TCGContext *s, TCGReg reg) +{ + tcg_out_insn(s, 3207, BLR, reg); +} + +static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target) +{ + ptrdiff_t offset = target - s->code_ptr; + if (offset == sextract64(offset, 0, 26)) { + tcg_out_insn(s, 3206, BL, offset); + } else { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); + tcg_out_callr(s, TCG_REG_TMP); + } +} + +void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr) +{ + tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr; + tcg_insn_unit *target = (tcg_insn_unit *)addr; + + reloc_pc26(code_ptr, target); + flush_icache_range(jmp_addr, jmp_addr + 4); +} + +static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) +{ + if (!l->has_value) { + tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0); + tcg_out_goto_noaddr(s); + } else { + tcg_out_goto(s, l->u.value_ptr); + } +} + +static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a, + TCGArg b, bool b_const, TCGLabel *l) +{ + intptr_t offset; + bool need_cmp; + + if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) { + need_cmp = false; + } else { + need_cmp = true; + tcg_out_cmp(s, ext, a, b, b_const); + } + + if (!l->has_value) { + tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); + offset = tcg_in32(s) >> 5; + } else { + offset = l->u.value_ptr - s->code_ptr; + assert(offset == sextract64(offset, 0, 19)); + } + + if (need_cmp) { + tcg_out_insn(s, 3202, B_C, c, offset); + } else if (c == TCG_COND_EQ) { + tcg_out_insn(s, 3201, CBZ, ext, a, offset); + } else { + tcg_out_insn(s, 3201, CBNZ, ext, a, offset); + } +} + +static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn) +{ + tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn); +} + +static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn) +{ + tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn); +} + +static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn) +{ + tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn); +} + +static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits, + TCGReg rd, TCGReg rn) +{ + /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */ + int bits = (8 << s_bits) - 1; + tcg_out_sbfm(s, ext, rd, rn, 0, bits); +} + +static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits, + TCGReg rd, TCGReg rn) +{ + /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */ + int bits = (8 << s_bits) - 1; + tcg_out_ubfm(s, 0, rd, rn, 0, bits); +} + +static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd, + TCGReg rn, int64_t aimm) +{ + if (aimm >= 0) { + tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm); + } else { + tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm); + } +} + +static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl, + TCGReg rh, TCGReg al, TCGReg ah, + tcg_target_long bl, tcg_target_long bh, + bool const_bl, bool const_bh, bool sub) +{ + TCGReg orig_rl = rl; + AArch64Insn insn; + + if (rl == ah || (!const_bh && rl == bh)) { + rl = TCG_REG_TMP; + } + + if (const_bl) { + insn = I3401_ADDSI; + if ((bl < 0) ^ sub) { + insn = I3401_SUBSI; + bl = -bl; + } + tcg_out_insn_3401(s, insn, ext, rl, al, bl); + } else { + tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl); + } + + insn = I3503_ADC; + if (const_bh) { + /* Note that the only two constants we support are 0 and -1, and + that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */ + if ((bh != 0) ^ sub) { + insn = I3503_SBC; + } + bh = TCG_REG_XZR; + } else if (sub) { + insn = I3503_SBC; + } + tcg_out_insn_3503(s, insn, ext, rh, ah, bh); + + tcg_out_mov(s, ext, orig_rl, rl); +} + +#ifdef CONFIG_SOFTMMU +/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, + * TCGMemOpIdx oi, uintptr_t ra) + */ +static void * const qemu_ld_helpers[16] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_LEUW] = helper_le_lduw_mmu, + [MO_LEUL] = helper_le_ldul_mmu, + [MO_LEQ] = helper_le_ldq_mmu, + [MO_BEUW] = helper_be_lduw_mmu, + [MO_BEUL] = helper_be_ldul_mmu, + [MO_BEQ] = helper_be_ldq_mmu, +}; + +/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, + * uintxx_t val, TCGMemOpIdx oi, + * uintptr_t ra) + */ +static void * const qemu_st_helpers[16] = { + [MO_UB] = helper_ret_stb_mmu, + [MO_LEUW] = helper_le_stw_mmu, + [MO_LEUL] = helper_le_stl_mmu, + [MO_LEQ] = helper_le_stq_mmu, + [MO_BEUW] = helper_be_stw_mmu, + [MO_BEUL] = helper_be_stl_mmu, + [MO_BEQ] = helper_be_stq_mmu, +}; + +static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target) +{ + ptrdiff_t offset = tcg_pcrel_diff(s, target); + assert(offset == sextract64(offset, 0, 21)); + tcg_out_insn(s, 3406, ADR, rd, offset); +} + +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + TCGMemOpIdx oi = lb->oi; + TCGMemOp opc = get_memop(oi); + TCGMemOp size = opc & MO_SIZE; + + reloc_pc19(lb->label_ptr[0], s->code_ptr); + + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); + tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi); + tcg_out_adr(s, TCG_REG_X3, lb->raddr); + tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); + if (opc & MO_SIGN) { + tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0); + } else { + tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0); + } + + tcg_out_goto(s, lb->raddr); +} + +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + TCGMemOpIdx oi = lb->oi; + TCGMemOp opc = get_memop(oi); + TCGMemOp size = opc & MO_SIZE; + + reloc_pc19(lb->label_ptr[0], s->code_ptr); + + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); + tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); + tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg); + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi); + tcg_out_adr(s, TCG_REG_X4, lb->raddr); + tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); + tcg_out_goto(s, lb->raddr); +} + +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, + TCGType ext, TCGReg data_reg, TCGReg addr_reg, + tcg_insn_unit *raddr, tcg_insn_unit *label_ptr) +{ + TCGLabelQemuLdst *label = new_ldst_label(s); + + label->is_ld = is_ld; + label->oi = oi; + label->type = ext; + label->datalo_reg = data_reg; + label->addrlo_reg = addr_reg; + label->raddr = raddr; + label->label_ptr[0] = label_ptr; +} + +/* Load and compare a TLB entry, emitting the conditional jump to the + slow path for the failure case, which will be patched later when finalizing + the slow path. Generated code returns the host addend in X1, + clobbers X0,X2,X3,TMP. */ +static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc, + tcg_insn_unit **label_ptr, int mem_index, + bool is_read) +{ + int tlb_offset = is_read ? + offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) + : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write); + int s_mask = (1 << (opc & MO_SIZE)) - 1; + TCGReg base = TCG_AREG0, x3; + uint64_t tlb_mask; + + /* For aligned accesses, we check the first byte and include the alignment + bits within the address. For unaligned access, we check that we don't + cross pages using the address of the last byte of the access. */ + if ((opc & MO_AMASK) == MO_ALIGN || s_mask == 0) { + tlb_mask = TARGET_PAGE_MASK | s_mask; + x3 = addr_reg; + } else { + tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64, + TCG_REG_X3, addr_reg, s_mask); + tlb_mask = TARGET_PAGE_MASK; + x3 = TCG_REG_X3; + } + + /* Extract the TLB index from the address into X0. + X0 = + addr_reg */ + tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg, + TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS); + + /* Store the page mask part of the address into X3. */ + tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, + TCG_REG_X3, x3, tlb_mask); + + /* Add any "high bits" from the tlb offset to the env address into X2, + to take advantage of the LSL12 form of the ADDI instruction. + X2 = env + (tlb_offset & 0xfff000) */ + if (tlb_offset & 0xfff000) { + tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base, + tlb_offset & 0xfff000); + base = TCG_REG_X2; + } + + /* Merge the tlb index contribution into X2. + X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */ + tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base, + TCG_REG_X0, CPU_TLB_ENTRY_BITS); + + /* Merge "low bits" from tlb offset, load the tlb comparator into X0. + X0 = load [X2 + (tlb_offset & 0x000fff)] */ + tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX, + TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff); + + /* Load the tlb addend. Do that early to avoid stalling. + X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */ + tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2, + (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) - + (is_read ? offsetof(CPUTLBEntry, addr_read) + : offsetof(CPUTLBEntry, addr_write))); + + /* Perform the address comparison. */ + tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0); + + /* If not equal, we jump to the slow path. */ + *label_ptr = s->code_ptr; + tcg_out_goto_cond_noaddr(s, TCG_COND_NE); +} + +#endif /* CONFIG_SOFTMMU */ + +static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext, + TCGReg data_r, TCGReg addr_r, + TCGType otype, TCGReg off_r) +{ + const TCGMemOp bswap = memop & MO_BSWAP; + + switch (memop & MO_SSIZE) { + case MO_UB: + tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r); + break; + case MO_SB: + tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW, + data_r, addr_r, otype, off_r); + break; + case MO_UW: + tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r); + if (bswap) { + tcg_out_rev16(s, data_r, data_r); + } + break; + case MO_SW: + if (bswap) { + tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r); + tcg_out_rev16(s, data_r, data_r); + tcg_out_sxt(s, ext, MO_16, data_r, data_r); + } else { + tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW), + data_r, addr_r, otype, off_r); + } + break; + case MO_UL: + tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r); + if (bswap) { + tcg_out_rev32(s, data_r, data_r); + } + break; + case MO_SL: + if (bswap) { + tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r); + tcg_out_rev32(s, data_r, data_r); + tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r); + } else { + tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r); + } + break; + case MO_Q: + tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r); + if (bswap) { + tcg_out_rev64(s, data_r, data_r); + } + break; + default: + tcg_abort(); + } +} + +static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop, + TCGReg data_r, TCGReg addr_r, + TCGType otype, TCGReg off_r) +{ + const TCGMemOp bswap = memop & MO_BSWAP; + + switch (memop & MO_SIZE) { + case MO_8: + tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r); + break; + case MO_16: + if (bswap && data_r != TCG_REG_XZR) { + tcg_out_rev16(s, TCG_REG_TMP, data_r); + data_r = TCG_REG_TMP; + } + tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r); + break; + case MO_32: + if (bswap && data_r != TCG_REG_XZR) { + tcg_out_rev32(s, TCG_REG_TMP, data_r); + data_r = TCG_REG_TMP; + } + tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r); + break; + case MO_64: + if (bswap && data_r != TCG_REG_XZR) { + tcg_out_rev64(s, TCG_REG_TMP, data_r); + data_r = TCG_REG_TMP; + } + tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r); + break; + default: + tcg_abort(); + } +} + +static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, + TCGMemOpIdx oi, TCGType ext) +{ + TCGMemOp memop = get_memop(oi); + const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; +#ifdef CONFIG_SOFTMMU + unsigned mem_index = get_mmuidx(oi); + tcg_insn_unit *label_ptr; + + tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1); + tcg_out_qemu_ld_direct(s, memop, ext, data_reg, + TCG_REG_X1, otype, addr_reg); + add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg, + s->code_ptr, label_ptr); +#else /* !CONFIG_SOFTMMU */ + if (USE_GUEST_BASE) { + tcg_out_qemu_ld_direct(s, memop, ext, data_reg, + TCG_REG_GUEST_BASE, otype, addr_reg); + } else { + tcg_out_qemu_ld_direct(s, memop, ext, data_reg, + addr_reg, TCG_TYPE_I64, TCG_REG_XZR); + } +#endif /* CONFIG_SOFTMMU */ +} + +static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, + TCGMemOpIdx oi) +{ + TCGMemOp memop = get_memop(oi); + const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; +#ifdef CONFIG_SOFTMMU + unsigned mem_index = get_mmuidx(oi); + tcg_insn_unit *label_ptr; + + tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0); + tcg_out_qemu_st_direct(s, memop, data_reg, + TCG_REG_X1, otype, addr_reg); + add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64, + data_reg, addr_reg, s->code_ptr, label_ptr); +#else /* !CONFIG_SOFTMMU */ + if (USE_GUEST_BASE) { + tcg_out_qemu_st_direct(s, memop, data_reg, + TCG_REG_GUEST_BASE, otype, addr_reg); + } else { + tcg_out_qemu_st_direct(s, memop, data_reg, + addr_reg, TCG_TYPE_I64, TCG_REG_XZR); + } +#endif /* CONFIG_SOFTMMU */ +} + +static tcg_insn_unit *tb_ret_addr; + +static void tcg_out_op(TCGContext *s, TCGOpcode opc, + const TCGArg args[TCG_MAX_OP_ARGS], + const int const_args[TCG_MAX_OP_ARGS]) +{ + /* 99% of the time, we can signal the use of extension registers + by looking to see if the opcode handles 64-bit data. */ + TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0; + + /* Hoist the loads of the most common arguments. */ + TCGArg a0 = args[0]; + TCGArg a1 = args[1]; + TCGArg a2 = args[2]; + int c2 = const_args[2]; + + /* Some operands are defined with "rZ" constraint, a register or + the zero register. These need not actually test args[I] == 0. */ +#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I]) + + switch (opc) { + case INDEX_op_exit_tb: + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); + tcg_out_goto(s, tb_ret_addr); + break; + + case INDEX_op_goto_tb: +#ifndef USE_DIRECT_JUMP +#error "USE_DIRECT_JUMP required for aarch64" +#endif + assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */ + s->tb_jmp_offset[a0] = tcg_current_code_size(s); + /* actual branch destination will be patched by + aarch64_tb_set_jmp_target later, beware retranslation. */ + tcg_out_goto_noaddr(s); + s->tb_next_offset[a0] = tcg_current_code_size(s); + break; + + case INDEX_op_br: + tcg_out_goto_label(s, arg_label(a0)); + break; + + case INDEX_op_ld8u_i32: + case INDEX_op_ld8u_i64: + tcg_out_ldst(s, I3312_LDRB, a0, a1, a2); + break; + case INDEX_op_ld8s_i32: + tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2); + break; + case INDEX_op_ld8s_i64: + tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2); + break; + case INDEX_op_ld16u_i32: + case INDEX_op_ld16u_i64: + tcg_out_ldst(s, I3312_LDRH, a0, a1, a2); + break; + case INDEX_op_ld16s_i32: + tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2); + break; + case INDEX_op_ld16s_i64: + tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2); + break; + case INDEX_op_ld_i32: + case INDEX_op_ld32u_i64: + tcg_out_ldst(s, I3312_LDRW, a0, a1, a2); + break; + case INDEX_op_ld32s_i64: + tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2); + break; + case INDEX_op_ld_i64: + tcg_out_ldst(s, I3312_LDRX, a0, a1, a2); + break; + + case INDEX_op_st8_i32: + case INDEX_op_st8_i64: + tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2); + break; + case INDEX_op_st16_i32: + case INDEX_op_st16_i64: + tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2); + break; + case INDEX_op_st_i32: + case INDEX_op_st32_i64: + tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2); + break; + case INDEX_op_st_i64: + tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2); + break; + + case INDEX_op_add_i32: + a2 = (int32_t)a2; + /* FALLTHRU */ + case INDEX_op_add_i64: + if (c2) { + tcg_out_addsubi(s, ext, a0, a1, a2); + } else { + tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2); + } + break; + + case INDEX_op_sub_i32: + a2 = (int32_t)a2; + /* FALLTHRU */ + case INDEX_op_sub_i64: + if (c2) { + tcg_out_addsubi(s, ext, a0, a1, -a2); + } else { + tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2); + } + break; + + case INDEX_op_neg_i64: + case INDEX_op_neg_i32: + tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); + break; + + case INDEX_op_and_i32: + a2 = (int32_t)a2; + /* FALLTHRU */ + case INDEX_op_and_i64: + if (c2) { + tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2); + } else { + tcg_out_insn(s, 3510, AND, ext, a0, a1, a2); + } + break; + + case INDEX_op_andc_i32: + a2 = (int32_t)a2; + /* FALLTHRU */ + case INDEX_op_andc_i64: + if (c2) { + tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2); + } else { + tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2); + } + break; + + case INDEX_op_or_i32: + a2 = (int32_t)a2; + /* FALLTHRU */ + case INDEX_op_or_i64: + if (c2) { + tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2); + } else { + tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2); + } + break; + + case INDEX_op_orc_i32: + a2 = (int32_t)a2; + /* FALLTHRU */ + case INDEX_op_orc_i64: + if (c2) { + tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2); + } else { + tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2); + } + break; + + case INDEX_op_xor_i32: + a2 = (int32_t)a2; + /* FALLTHRU */ + case INDEX_op_xor_i64: + if (c2) { + tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2); + } else { + tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2); + } + break; + + case INDEX_op_eqv_i32: + a2 = (int32_t)a2; + /* FALLTHRU */ + case INDEX_op_eqv_i64: + if (c2) { + tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2); + } else { + tcg_out_insn(s, 3510, EON, ext, a0, a1, a2); + } + break; + + case INDEX_op_not_i64: + case INDEX_op_not_i32: + tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1); + break; + + case INDEX_op_mul_i64: + case INDEX_op_mul_i32: + tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR); + break; + + case INDEX_op_div_i64: + case INDEX_op_div_i32: + tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2); + break; + case INDEX_op_divu_i64: + case INDEX_op_divu_i32: + tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2); + break; + + case INDEX_op_rem_i64: + case INDEX_op_rem_i32: + tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2); + tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); + break; + case INDEX_op_remu_i64: + case INDEX_op_remu_i32: + tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2); + tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); + break; + + case INDEX_op_shl_i64: + case INDEX_op_shl_i32: + if (c2) { + tcg_out_shl(s, ext, a0, a1, a2); + } else { + tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2); + } + break; + + case INDEX_op_shr_i64: + case INDEX_op_shr_i32: + if (c2) { + tcg_out_shr(s, ext, a0, a1, a2); + } else { + tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2); + } + break; + + case INDEX_op_sar_i64: + case INDEX_op_sar_i32: + if (c2) { + tcg_out_sar(s, ext, a0, a1, a2); + } else { + tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2); + } + break; + + case INDEX_op_rotr_i64: + case INDEX_op_rotr_i32: + if (c2) { + tcg_out_rotr(s, ext, a0, a1, a2); + } else { + tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2); + } + break; + + case INDEX_op_rotl_i64: + case INDEX_op_rotl_i32: + if (c2) { + tcg_out_rotl(s, ext, a0, a1, a2); + } else { + tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2); + tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP); + } + break; + + case INDEX_op_brcond_i32: + a1 = (int32_t)a1; + /* FALLTHRU */ + case INDEX_op_brcond_i64: + tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3])); + break; + + case INDEX_op_setcond_i32: + a2 = (int32_t)a2; + /* FALLTHRU */ + case INDEX_op_setcond_i64: + tcg_out_cmp(s, ext, a1, a2, c2); + /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */ + tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR, + TCG_REG_XZR, tcg_invert_cond(args[3])); + break; + + case INDEX_op_movcond_i32: + a2 = (int32_t)a2; + /* FALLTHRU */ + case INDEX_op_movcond_i64: + tcg_out_cmp(s, ext, a1, a2, c2); + tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]); + break; + + case INDEX_op_qemu_ld_i32: + case INDEX_op_qemu_ld_i64: + tcg_out_qemu_ld(s, a0, a1, a2, ext); + break; + case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_st_i64: + tcg_out_qemu_st(s, REG0(0), a1, a2); + break; + + case INDEX_op_bswap64_i64: + tcg_out_rev64(s, a0, a1); + break; + case INDEX_op_bswap32_i64: + case INDEX_op_bswap32_i32: + tcg_out_rev32(s, a0, a1); + break; + case INDEX_op_bswap16_i64: + case INDEX_op_bswap16_i32: + tcg_out_rev16(s, a0, a1); + break; + + case INDEX_op_ext8s_i64: + case INDEX_op_ext8s_i32: + tcg_out_sxt(s, ext, MO_8, a0, a1); + break; + case INDEX_op_ext16s_i64: + case INDEX_op_ext16s_i32: + tcg_out_sxt(s, ext, MO_16, a0, a1); + break; + case INDEX_op_ext_i32_i64: + case INDEX_op_ext32s_i64: + tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1); + break; + case INDEX_op_ext8u_i64: + case INDEX_op_ext8u_i32: + tcg_out_uxt(s, MO_8, a0, a1); + break; + case INDEX_op_ext16u_i64: + case INDEX_op_ext16u_i32: + tcg_out_uxt(s, MO_16, a0, a1); + break; + case INDEX_op_extu_i32_i64: + case INDEX_op_ext32u_i64: + tcg_out_movr(s, TCG_TYPE_I32, a0, a1); + break; + + case INDEX_op_deposit_i64: + case INDEX_op_deposit_i32: + tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]); + break; + + case INDEX_op_add2_i32: + tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), + (int32_t)args[4], args[5], const_args[4], + const_args[5], false); + break; + case INDEX_op_add2_i64: + tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], + args[5], const_args[4], const_args[5], false); + break; + case INDEX_op_sub2_i32: + tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), + (int32_t)args[4], args[5], const_args[4], + const_args[5], true); + break; + case INDEX_op_sub2_i64: + tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], + args[5], const_args[4], const_args[5], true); + break; + + case INDEX_op_muluh_i64: + tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2); + break; + case INDEX_op_mulsh_i64: + tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); + break; + + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_mov_i64: + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ + case INDEX_op_movi_i64: + case INDEX_op_call: /* Always emitted via tcg_out_call. */ + default: + tcg_abort(); + } + +#undef REG0 +} + +static const TCGTargetOpDef aarch64_op_defs[] = { + { INDEX_op_exit_tb, { } }, + { INDEX_op_goto_tb, { } }, + { INDEX_op_br, { } }, + + { INDEX_op_ld8u_i32, { "r", "r" } }, + { INDEX_op_ld8s_i32, { "r", "r" } }, + { INDEX_op_ld16u_i32, { "r", "r" } }, + { INDEX_op_ld16s_i32, { "r", "r" } }, + { INDEX_op_ld_i32, { "r", "r" } }, + { INDEX_op_ld8u_i64, { "r", "r" } }, + { INDEX_op_ld8s_i64, { "r", "r" } }, + { INDEX_op_ld16u_i64, { "r", "r" } }, + { INDEX_op_ld16s_i64, { "r", "r" } }, + { INDEX_op_ld32u_i64, { "r", "r" } }, + { INDEX_op_ld32s_i64, { "r", "r" } }, + { INDEX_op_ld_i64, { "r", "r" } }, + + { INDEX_op_st8_i32, { "rZ", "r" } }, + { INDEX_op_st16_i32, { "rZ", "r" } }, + { INDEX_op_st_i32, { "rZ", "r" } }, + { INDEX_op_st8_i64, { "rZ", "r" } }, + { INDEX_op_st16_i64, { "rZ", "r" } }, + { INDEX_op_st32_i64, { "rZ", "r" } }, + { INDEX_op_st_i64, { "rZ", "r" } }, + + { INDEX_op_add_i32, { "r", "r", "rA" } }, + { INDEX_op_add_i64, { "r", "r", "rA" } }, + { INDEX_op_sub_i32, { "r", "r", "rA" } }, + { INDEX_op_sub_i64, { "r", "r", "rA" } }, + { INDEX_op_mul_i32, { "r", "r", "r" } }, + { INDEX_op_mul_i64, { "r", "r", "r" } }, + { INDEX_op_div_i32, { "r", "r", "r" } }, + { INDEX_op_div_i64, { "r", "r", "r" } }, + { INDEX_op_divu_i32, { "r", "r", "r" } }, + { INDEX_op_divu_i64, { "r", "r", "r" } }, + { INDEX_op_rem_i32, { "r", "r", "r" } }, + { INDEX_op_rem_i64, { "r", "r", "r" } }, + { INDEX_op_remu_i32, { "r", "r", "r" } }, + { INDEX_op_remu_i64, { "r", "r", "r" } }, + { INDEX_op_and_i32, { "r", "r", "rL" } }, + { INDEX_op_and_i64, { "r", "r", "rL" } }, + { INDEX_op_or_i32, { "r", "r", "rL" } }, + { INDEX_op_or_i64, { "r", "r", "rL" } }, + { INDEX_op_xor_i32, { "r", "r", "rL" } }, + { INDEX_op_xor_i64, { "r", "r", "rL" } }, + { INDEX_op_andc_i32, { "r", "r", "rL" } }, + { INDEX_op_andc_i64, { "r", "r", "rL" } }, + { INDEX_op_orc_i32, { "r", "r", "rL" } }, + { INDEX_op_orc_i64, { "r", "r", "rL" } }, + { INDEX_op_eqv_i32, { "r", "r", "rL" } }, + { INDEX_op_eqv_i64, { "r", "r", "rL" } }, + + { INDEX_op_neg_i32, { "r", "r" } }, + { INDEX_op_neg_i64, { "r", "r" } }, + { INDEX_op_not_i32, { "r", "r" } }, + { INDEX_op_not_i64, { "r", "r" } }, + + { INDEX_op_shl_i32, { "r", "r", "ri" } }, + { INDEX_op_shr_i32, { "r", "r", "ri" } }, + { INDEX_op_sar_i32, { "r", "r", "ri" } }, + { INDEX_op_rotl_i32, { "r", "r", "ri" } }, + { INDEX_op_rotr_i32, { "r", "r", "ri" } }, + { INDEX_op_shl_i64, { "r", "r", "ri" } }, + { INDEX_op_shr_i64, { "r", "r", "ri" } }, + { INDEX_op_sar_i64, { "r", "r", "ri" } }, + { INDEX_op_rotl_i64, { "r", "r", "ri" } }, + { INDEX_op_rotr_i64, { "r", "r", "ri" } }, + + { INDEX_op_brcond_i32, { "r", "rA" } }, + { INDEX_op_brcond_i64, { "r", "rA" } }, + { INDEX_op_setcond_i32, { "r", "r", "rA" } }, + { INDEX_op_setcond_i64, { "r", "r", "rA" } }, + { INDEX_op_movcond_i32, { "r", "r", "rA", "rZ", "rZ" } }, + { INDEX_op_movcond_i64, { "r", "r", "rA", "rZ", "rZ" } }, + + { INDEX_op_qemu_ld_i32, { "r", "l" } }, + { INDEX_op_qemu_ld_i64, { "r", "l" } }, + { INDEX_op_qemu_st_i32, { "lZ", "l" } }, + { INDEX_op_qemu_st_i64, { "lZ", "l" } }, + + { INDEX_op_bswap16_i32, { "r", "r" } }, + { INDEX_op_bswap32_i32, { "r", "r" } }, + { INDEX_op_bswap16_i64, { "r", "r" } }, + { INDEX_op_bswap32_i64, { "r", "r" } }, + { INDEX_op_bswap64_i64, { "r", "r" } }, + + { INDEX_op_ext8s_i32, { "r", "r" } }, + { INDEX_op_ext16s_i32, { "r", "r" } }, + { INDEX_op_ext8u_i32, { "r", "r" } }, + { INDEX_op_ext16u_i32, { "r", "r" } }, + + { INDEX_op_ext8s_i64, { "r", "r" } }, + { INDEX_op_ext16s_i64, { "r", "r" } }, + { INDEX_op_ext32s_i64, { "r", "r" } }, + { INDEX_op_ext8u_i64, { "r", "r" } }, + { INDEX_op_ext16u_i64, { "r", "r" } }, + { INDEX_op_ext32u_i64, { "r", "r" } }, + { INDEX_op_ext_i32_i64, { "r", "r" } }, + { INDEX_op_extu_i32_i64, { "r", "r" } }, + + { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, + { INDEX_op_deposit_i64, { "r", "0", "rZ" } }, + + { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } }, + { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } }, + { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } }, + { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } }, + + { INDEX_op_muluh_i64, { "r", "r", "r" } }, + { INDEX_op_mulsh_i64, { "r", "r", "r" } }, + + { -1 }, +}; + +static void tcg_target_init(TCGContext *s) +{ + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff); + + tcg_regset_set32(tcg_target_call_clobber_regs, 0, + (1 << TCG_REG_X0) | (1 << TCG_REG_X1) | + (1 << TCG_REG_X2) | (1 << TCG_REG_X3) | + (1 << TCG_REG_X4) | (1 << TCG_REG_X5) | + (1 << TCG_REG_X6) | (1 << TCG_REG_X7) | + (1 << TCG_REG_X8) | (1 << TCG_REG_X9) | + (1 << TCG_REG_X10) | (1 << TCG_REG_X11) | + (1 << TCG_REG_X12) | (1 << TCG_REG_X13) | + (1 << TCG_REG_X14) | (1 << TCG_REG_X15) | + (1 << TCG_REG_X16) | (1 << TCG_REG_X17) | + (1 << TCG_REG_X18) | (1 << TCG_REG_X30)); + + tcg_regset_clear(s->reserved_regs); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ + + tcg_add_target_add_op_defs(aarch64_op_defs); +} + +/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ +#define PUSH_SIZE ((30 - 19 + 1) * 8) + +#define FRAME_SIZE \ + ((PUSH_SIZE \ + + TCG_STATIC_CALL_ARGS_SIZE \ + + CPU_TEMP_BUF_NLONGS * sizeof(long) \ + + TCG_TARGET_STACK_ALIGN - 1) \ + & ~(TCG_TARGET_STACK_ALIGN - 1)) + +/* We're expecting a 2 byte uleb128 encoded value. */ +QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); + +/* We're expecting to use a single ADDI insn. */ +QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff); + +static void tcg_target_qemu_prologue(TCGContext *s) +{ + TCGReg r; + + /* Push (FP, LR) and allocate space for all saved registers. */ + tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR, + TCG_REG_SP, -PUSH_SIZE, 1, 1); + + /* Set up frame pointer for canonical unwinding. */ + tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP); + + /* Store callee-preserved regs x19..x28. */ + for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { + int ofs = (r - TCG_REG_X19 + 2) * 8; + tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0); + } + + /* Make stack space for TCG locals. */ + tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, + FRAME_SIZE - PUSH_SIZE); + + /* Inform TCG about how to find TCG locals with register, offset, size. */ + tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, + CPU_TEMP_BUF_NLONGS * sizeof(long)); + +#if !defined(CONFIG_SOFTMMU) + if (USE_GUEST_BASE) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); + } +#endif + + tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); + tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); + + tb_ret_addr = s->code_ptr; + + /* Remove TCG locals stack space. */ + tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, + FRAME_SIZE - PUSH_SIZE); + + /* Restore registers x19..x28. */ + for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { + int ofs = (r - TCG_REG_X19 + 2) * 8; + tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0); + } + + /* Pop (FP, LR), restore SP to previous frame. */ + tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR, + TCG_REG_SP, PUSH_SIZE, 0, 1); + tcg_out_insn(s, 3207, RET, TCG_REG_LR); +} + +typedef struct { + DebugFrameHeader h; + uint8_t fde_def_cfa[4]; + uint8_t fde_reg_ofs[24]; +} DebugFrame; + +#define ELF_HOST_MACHINE EM_AARCH64 + +static const DebugFrame debug_frame = { + .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ + .h.cie.id = -1, + .h.cie.version = 1, + .h.cie.code_align = 1, + .h.cie.data_align = 0x78, /* sleb128 -8 */ + .h.cie.return_column = TCG_REG_LR, + + /* Total FDE size does not include the "len" member. */ + .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), + + .fde_def_cfa = { + 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ + (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ + (FRAME_SIZE >> 7) + }, + .fde_reg_ofs = { + 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */ + 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */ + 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */ + 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */ + 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */ + 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */ + 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */ + 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */ + 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */ + 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */ + 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */ + 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */ + } +}; + +void tcg_register_jit(void *buf, size_t buf_size) +{ + tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); +} diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c deleted file mode 100644 index 146ac00f63..0000000000 --- a/tcg/arm/tcg-target.c +++ /dev/null @@ -1,2129 +0,0 @@ -/* - * Tiny Code Generator for QEMU - * - * Copyright (c) 2008 Andrzej Zaborowski - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "elf.h" -#include "tcg-be-ldst.h" - -/* The __ARM_ARCH define is provided by gcc 4.8. Construct it otherwise. */ -#ifndef __ARM_ARCH -# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ - || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ - || defined(__ARM_ARCH_7EM__) -# define __ARM_ARCH 7 -# elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ - || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \ - || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) -# define __ARM_ARCH 6 -# elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5E__) \ - || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \ - || defined(__ARM_ARCH_5TEJ__) -# define __ARM_ARCH 5 -# else -# define __ARM_ARCH 4 -# endif -#endif - -static int arm_arch = __ARM_ARCH; - -#if defined(__ARM_ARCH_5T__) \ - || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__) -# define use_armv5t_instructions 1 -#else -# define use_armv5t_instructions use_armv6_instructions -#endif - -#define use_armv6_instructions (__ARM_ARCH >= 6 || arm_arch >= 6) -#define use_armv7_instructions (__ARM_ARCH >= 7 || arm_arch >= 7) - -#ifndef use_idiv_instructions -bool use_idiv_instructions; -#endif - -/* ??? Ought to think about changing CONFIG_SOFTMMU to always defined. */ -#ifdef CONFIG_SOFTMMU -# define USING_SOFTMMU 1 -#else -# define USING_SOFTMMU 0 -#endif - -#ifndef NDEBUG -static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { - "%r0", - "%r1", - "%r2", - "%r3", - "%r4", - "%r5", - "%r6", - "%r7", - "%r8", - "%r9", - "%r10", - "%r11", - "%r12", - "%r13", - "%r14", - "%pc", -}; -#endif - -static const int tcg_target_reg_alloc_order[] = { - TCG_REG_R4, - TCG_REG_R5, - TCG_REG_R6, - TCG_REG_R7, - TCG_REG_R8, - TCG_REG_R9, - TCG_REG_R10, - TCG_REG_R11, - TCG_REG_R13, - TCG_REG_R0, - TCG_REG_R1, - TCG_REG_R2, - TCG_REG_R3, - TCG_REG_R12, - TCG_REG_R14, -}; - -static const int tcg_target_call_iarg_regs[4] = { - TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3 -}; -static const int tcg_target_call_oarg_regs[2] = { - TCG_REG_R0, TCG_REG_R1 -}; - -#define TCG_REG_TMP TCG_REG_R12 - -static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target) -{ - ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2; - *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff); -} - -static void patch_reloc(tcg_insn_unit *code_ptr, int type, - intptr_t value, intptr_t addend) -{ - assert(type == R_ARM_PC24); - assert(addend == 0); - reloc_pc24(code_ptr, (tcg_insn_unit *)value); -} - -#define TCG_CT_CONST_ARM 0x100 -#define TCG_CT_CONST_INV 0x200 -#define TCG_CT_CONST_NEG 0x400 -#define TCG_CT_CONST_ZERO 0x800 - -/* parse target specific constraints */ -static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) -{ - const char *ct_str; - - ct_str = *pct_str; - switch (ct_str[0]) { - case 'I': - ct->ct |= TCG_CT_CONST_ARM; - break; - case 'K': - ct->ct |= TCG_CT_CONST_INV; - break; - case 'N': /* The gcc constraint letter is L, already used here. */ - ct->ct |= TCG_CT_CONST_NEG; - break; - case 'Z': - ct->ct |= TCG_CT_CONST_ZERO; - break; - - case 'r': - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); - break; - - /* qemu_ld address */ - case 'l': - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); -#ifdef CONFIG_SOFTMMU - /* r0-r2,lr will be overwritten when reading the tlb entry, - so don't use these. */ - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14); -#endif - break; - - /* qemu_st address & data */ - case 's': - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); - /* r0-r2 will be overwritten when reading the tlb entry (softmmu only) - and r0-r1 doing the byte swapping, so don't use these. */ - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1); -#if defined(CONFIG_SOFTMMU) - /* Avoid clashes with registers being used for helper args */ - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2); -#if TARGET_LONG_BITS == 64 - /* Avoid clashes with registers being used for helper args */ - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); -#endif - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14); -#endif - break; - - default: - return -1; - } - ct_str++; - *pct_str = ct_str; - - return 0; -} - -static inline uint32_t rotl(uint32_t val, int n) -{ - return (val << n) | (val >> (32 - n)); -} - -/* ARM immediates for ALU instructions are made of an unsigned 8-bit - right-rotated by an even amount between 0 and 30. */ -static inline int encode_imm(uint32_t imm) -{ - int shift; - - /* simple case, only lower bits */ - if ((imm & ~0xff) == 0) - return 0; - /* then try a simple even shift */ - shift = ctz32(imm) & ~1; - if (((imm >> shift) & ~0xff) == 0) - return 32 - shift; - /* now try harder with rotations */ - if ((rotl(imm, 2) & ~0xff) == 0) - return 2; - if ((rotl(imm, 4) & ~0xff) == 0) - return 4; - if ((rotl(imm, 6) & ~0xff) == 0) - return 6; - /* imm can't be encoded */ - return -1; -} - -static inline int check_fit_imm(uint32_t imm) -{ - return encode_imm(imm) >= 0; -} - -/* Test if a constant matches the constraint. - * TODO: define constraints for: - * - * ldr/str offset: between -0xfff and 0xfff - * ldrh/strh offset: between -0xff and 0xff - * mov operand2: values represented with x << (2 * y), x < 0x100 - * add, sub, eor...: ditto - */ -static inline int tcg_target_const_match(tcg_target_long val, TCGType type, - const TCGArgConstraint *arg_ct) -{ - int ct; - ct = arg_ct->ct; - if (ct & TCG_CT_CONST) { - return 1; - } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) { - return 1; - } else if ((ct & TCG_CT_CONST_INV) && check_fit_imm(~val)) { - return 1; - } else if ((ct & TCG_CT_CONST_NEG) && check_fit_imm(-val)) { - return 1; - } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { - return 1; - } else { - return 0; - } -} - -#define TO_CPSR (1 << 20) - -typedef enum { - ARITH_AND = 0x0 << 21, - ARITH_EOR = 0x1 << 21, - ARITH_SUB = 0x2 << 21, - ARITH_RSB = 0x3 << 21, - ARITH_ADD = 0x4 << 21, - ARITH_ADC = 0x5 << 21, - ARITH_SBC = 0x6 << 21, - ARITH_RSC = 0x7 << 21, - ARITH_TST = 0x8 << 21 | TO_CPSR, - ARITH_CMP = 0xa << 21 | TO_CPSR, - ARITH_CMN = 0xb << 21 | TO_CPSR, - ARITH_ORR = 0xc << 21, - ARITH_MOV = 0xd << 21, - ARITH_BIC = 0xe << 21, - ARITH_MVN = 0xf << 21, - - INSN_LDR_IMM = 0x04100000, - INSN_LDR_REG = 0x06100000, - INSN_STR_IMM = 0x04000000, - INSN_STR_REG = 0x06000000, - - INSN_LDRH_IMM = 0x005000b0, - INSN_LDRH_REG = 0x001000b0, - INSN_LDRSH_IMM = 0x005000f0, - INSN_LDRSH_REG = 0x001000f0, - INSN_STRH_IMM = 0x004000b0, - INSN_STRH_REG = 0x000000b0, - - INSN_LDRB_IMM = 0x04500000, - INSN_LDRB_REG = 0x06500000, - INSN_LDRSB_IMM = 0x005000d0, - INSN_LDRSB_REG = 0x001000d0, - INSN_STRB_IMM = 0x04400000, - INSN_STRB_REG = 0x06400000, - - INSN_LDRD_IMM = 0x004000d0, - INSN_LDRD_REG = 0x000000d0, - INSN_STRD_IMM = 0x004000f0, - INSN_STRD_REG = 0x000000f0, -} ARMInsn; - -#define SHIFT_IMM_LSL(im) (((im) << 7) | 0x00) -#define SHIFT_IMM_LSR(im) (((im) << 7) | 0x20) -#define SHIFT_IMM_ASR(im) (((im) << 7) | 0x40) -#define SHIFT_IMM_ROR(im) (((im) << 7) | 0x60) -#define SHIFT_REG_LSL(rs) (((rs) << 8) | 0x10) -#define SHIFT_REG_LSR(rs) (((rs) << 8) | 0x30) -#define SHIFT_REG_ASR(rs) (((rs) << 8) | 0x50) -#define SHIFT_REG_ROR(rs) (((rs) << 8) | 0x70) - -enum arm_cond_code_e { - COND_EQ = 0x0, - COND_NE = 0x1, - COND_CS = 0x2, /* Unsigned greater or equal */ - COND_CC = 0x3, /* Unsigned less than */ - COND_MI = 0x4, /* Negative */ - COND_PL = 0x5, /* Zero or greater */ - COND_VS = 0x6, /* Overflow */ - COND_VC = 0x7, /* No overflow */ - COND_HI = 0x8, /* Unsigned greater than */ - COND_LS = 0x9, /* Unsigned less or equal */ - COND_GE = 0xa, - COND_LT = 0xb, - COND_GT = 0xc, - COND_LE = 0xd, - COND_AL = 0xe, -}; - -static const uint8_t tcg_cond_to_arm_cond[] = { - [TCG_COND_EQ] = COND_EQ, - [TCG_COND_NE] = COND_NE, - [TCG_COND_LT] = COND_LT, - [TCG_COND_GE] = COND_GE, - [TCG_COND_LE] = COND_LE, - [TCG_COND_GT] = COND_GT, - /* unsigned */ - [TCG_COND_LTU] = COND_CC, - [TCG_COND_GEU] = COND_CS, - [TCG_COND_LEU] = COND_LS, - [TCG_COND_GTU] = COND_HI, -}; - -static inline void tcg_out_bx(TCGContext *s, int cond, int rn) -{ - tcg_out32(s, (cond << 28) | 0x012fff10 | rn); -} - -static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset) -{ - tcg_out32(s, (cond << 28) | 0x0a000000 | - (((offset - 8) >> 2) & 0x00ffffff)); -} - -static inline void tcg_out_b_noaddr(TCGContext *s, int cond) -{ - /* We pay attention here to not modify the branch target by masking - the corresponding bytes. This ensure that caches and memory are - kept coherent during retranslation. */ - tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0a)); -} - -static inline void tcg_out_bl_noaddr(TCGContext *s, int cond) -{ - /* We pay attention here to not modify the branch target by masking - the corresponding bytes. This ensure that caches and memory are - kept coherent during retranslation. */ - tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0b)); -} - -static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset) -{ - tcg_out32(s, (cond << 28) | 0x0b000000 | - (((offset - 8) >> 2) & 0x00ffffff)); -} - -static inline void tcg_out_blx(TCGContext *s, int cond, int rn) -{ - tcg_out32(s, (cond << 28) | 0x012fff30 | rn); -} - -static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset) -{ - tcg_out32(s, 0xfa000000 | ((offset & 2) << 23) | - (((offset - 8) >> 2) & 0x00ffffff)); -} - -static inline void tcg_out_dat_reg(TCGContext *s, - int cond, int opc, int rd, int rn, int rm, int shift) -{ - tcg_out32(s, (cond << 28) | (0 << 25) | opc | - (rn << 16) | (rd << 12) | shift | rm); -} - -static inline void tcg_out_nop(TCGContext *s) -{ - if (use_armv7_instructions) { - /* Architected nop introduced in v6k. */ - /* ??? This is an MSR (imm) 0,0,0 insn. Anyone know if this - also Just So Happened to do nothing on pre-v6k so that we - don't need to conditionalize it? */ - tcg_out32(s, 0xe320f000); - } else { - /* Prior to that the assembler uses mov r0, r0. */ - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 0, 0, 0, SHIFT_IMM_LSL(0)); - } -} - -static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm) -{ - /* Simple reg-reg move, optimising out the 'do nothing' case */ - if (rd != rm) { - tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rm, SHIFT_IMM_LSL(0)); - } -} - -static inline void tcg_out_dat_imm(TCGContext *s, - int cond, int opc, int rd, int rn, int im) -{ - tcg_out32(s, (cond << 28) | (1 << 25) | opc | - (rn << 16) | (rd << 12) | im); -} - -static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg) -{ - int rot, opc, rn; - - /* For armv7, make sure not to use movw+movt when mov/mvn would do. - Speed things up by only checking when movt would be required. - Prior to armv7, have one go at fully rotated immediates before - doing the decomposition thing below. */ - if (!use_armv7_instructions || (arg & 0xffff0000)) { - rot = encode_imm(arg); - if (rot >= 0) { - tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0, - rotl(arg, rot) | (rot << 7)); - return; - } - rot = encode_imm(~arg); - if (rot >= 0) { - tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0, - rotl(~arg, rot) | (rot << 7)); - return; - } - } - - /* Use movw + movt. */ - if (use_armv7_instructions) { - /* movw */ - tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12) - | ((arg << 4) & 0x000f0000) | (arg & 0xfff)); - if (arg & 0xffff0000) { - /* movt */ - tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12) - | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff)); - } - return; - } - - /* TODO: This is very suboptimal, we can easily have a constant - pool somewhere after all the instructions. */ - opc = ARITH_MOV; - rn = 0; - /* If we have lots of leading 1's, we can shorten the sequence by - beginning with mvn and then clearing higher bits with eor. */ - if (clz32(~arg) > clz32(arg)) { - opc = ARITH_MVN, arg = ~arg; - } - do { - int i = ctz32(arg) & ~1; - rot = ((32 - i) << 7) & 0xf00; - tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot); - arg &= ~(0xff << i); - - opc = ARITH_EOR; - rn = rd; - } while (arg); -} - -static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst, - TCGArg lhs, TCGArg rhs, int rhs_is_const) -{ - /* Emit either the reg,imm or reg,reg form of a data-processing insn. - * rhs must satisfy the "rI" constraint. - */ - if (rhs_is_const) { - int rot = encode_imm(rhs); - assert(rot >= 0); - tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7)); - } else { - tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0)); - } -} - -static void tcg_out_dat_rIK(TCGContext *s, int cond, int opc, int opinv, - TCGReg dst, TCGReg lhs, TCGArg rhs, - bool rhs_is_const) -{ - /* Emit either the reg,imm or reg,reg form of a data-processing insn. - * rhs must satisfy the "rIK" constraint. - */ - if (rhs_is_const) { - int rot = encode_imm(rhs); - if (rot < 0) { - rhs = ~rhs; - rot = encode_imm(rhs); - assert(rot >= 0); - opc = opinv; - } - tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7)); - } else { - tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0)); - } -} - -static void tcg_out_dat_rIN(TCGContext *s, int cond, int opc, int opneg, - TCGArg dst, TCGArg lhs, TCGArg rhs, - bool rhs_is_const) -{ - /* Emit either the reg,imm or reg,reg form of a data-processing insn. - * rhs must satisfy the "rIN" constraint. - */ - if (rhs_is_const) { - int rot = encode_imm(rhs); - if (rot < 0) { - rhs = -rhs; - rot = encode_imm(rhs); - assert(rot >= 0); - opc = opneg; - } - tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7)); - } else { - tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0)); - } -} - -static inline void tcg_out_mul32(TCGContext *s, int cond, TCGReg rd, - TCGReg rn, TCGReg rm) -{ - /* if ArchVersion() < 6 && d == n then UNPREDICTABLE; */ - if (!use_armv6_instructions && rd == rn) { - if (rd == rm) { - /* rd == rn == rm; copy an input to tmp first. */ - tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn); - rm = rn = TCG_REG_TMP; - } else { - rn = rm; - rm = rd; - } - } - /* mul */ - tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn); -} - -static inline void tcg_out_umull32(TCGContext *s, int cond, TCGReg rd0, - TCGReg rd1, TCGReg rn, TCGReg rm) -{ - /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */ - if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) { - if (rd0 == rm || rd1 == rm) { - tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn); - rn = TCG_REG_TMP; - } else { - TCGReg t = rn; - rn = rm; - rm = t; - } - } - /* umull */ - tcg_out32(s, (cond << 28) | 0x00800090 | - (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); -} - -static inline void tcg_out_smull32(TCGContext *s, int cond, TCGReg rd0, - TCGReg rd1, TCGReg rn, TCGReg rm) -{ - /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */ - if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) { - if (rd0 == rm || rd1 == rm) { - tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn); - rn = TCG_REG_TMP; - } else { - TCGReg t = rn; - rn = rm; - rm = t; - } - } - /* smull */ - tcg_out32(s, (cond << 28) | 0x00c00090 | - (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); -} - -static inline void tcg_out_sdiv(TCGContext *s, int cond, int rd, int rn, int rm) -{ - tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8)); -} - -static inline void tcg_out_udiv(TCGContext *s, int cond, int rd, int rn, int rm) -{ - tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8)); -} - -static inline void tcg_out_ext8s(TCGContext *s, int cond, - int rd, int rn) -{ - if (use_armv6_instructions) { - /* sxtb */ - tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn); - } else { - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rn, SHIFT_IMM_LSL(24)); - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rd, SHIFT_IMM_ASR(24)); - } -} - -static inline void tcg_out_ext8u(TCGContext *s, int cond, - int rd, int rn) -{ - tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff); -} - -static inline void tcg_out_ext16s(TCGContext *s, int cond, - int rd, int rn) -{ - if (use_armv6_instructions) { - /* sxth */ - tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn); - } else { - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rn, SHIFT_IMM_LSL(16)); - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rd, SHIFT_IMM_ASR(16)); - } -} - -static inline void tcg_out_ext16u(TCGContext *s, int cond, - int rd, int rn) -{ - if (use_armv6_instructions) { - /* uxth */ - tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn); - } else { - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rn, SHIFT_IMM_LSL(16)); - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rd, SHIFT_IMM_LSR(16)); - } -} - -static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn) -{ - if (use_armv6_instructions) { - /* revsh */ - tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn); - } else { - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24)); - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_ASR(16)); - tcg_out_dat_reg(s, cond, ARITH_ORR, - rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8)); - } -} - -static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn) -{ - if (use_armv6_instructions) { - /* rev16 */ - tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn); - } else { - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24)); - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSR(16)); - tcg_out_dat_reg(s, cond, ARITH_ORR, - rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8)); - } -} - -/* swap the two low bytes assuming that the two high input bytes and the - two high output bit can hold any value. */ -static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn) -{ - if (use_armv6_instructions) { - /* rev16 */ - tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn); - } else { - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8)); - tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff); - tcg_out_dat_reg(s, cond, ARITH_ORR, - rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8)); - } -} - -static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn) -{ - if (use_armv6_instructions) { - /* rev */ - tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn); - } else { - tcg_out_dat_reg(s, cond, ARITH_EOR, - TCG_REG_TMP, rn, rn, SHIFT_IMM_ROR(16)); - tcg_out_dat_imm(s, cond, ARITH_BIC, - TCG_REG_TMP, TCG_REG_TMP, 0xff | 0x800); - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rn, SHIFT_IMM_ROR(8)); - tcg_out_dat_reg(s, cond, ARITH_EOR, - rd, rd, TCG_REG_TMP, SHIFT_IMM_LSR(8)); - } -} - -bool tcg_target_deposit_valid(int ofs, int len) -{ - /* ??? Without bfi, we could improve over generic code by combining - the right-shift from a non-zero ofs with the orr. We do run into - problems when rd == rs, and the mask generated from ofs+len doesn't - fit into an immediate. We would have to be careful not to pessimize - wrt the optimizations performed on the expanded code. */ - return use_armv7_instructions; -} - -static inline void tcg_out_deposit(TCGContext *s, int cond, TCGReg rd, - TCGArg a1, int ofs, int len, bool const_a1) -{ - if (const_a1) { - /* bfi becomes bfc with rn == 15. */ - a1 = 15; - } - /* bfi/bfc */ - tcg_out32(s, 0x07c00010 | (cond << 28) | (rd << 12) | a1 - | (ofs << 7) | ((ofs + len - 1) << 16)); -} - -/* Note that this routine is used for both LDR and LDRH formats, so we do - not wish to include an immediate shift at this point. */ -static void tcg_out_memop_r(TCGContext *s, int cond, ARMInsn opc, TCGReg rt, - TCGReg rn, TCGReg rm, bool u, bool p, bool w) -{ - tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) - | (w << 21) | (rn << 16) | (rt << 12) | rm); -} - -static void tcg_out_memop_8(TCGContext *s, int cond, ARMInsn opc, TCGReg rt, - TCGReg rn, int imm8, bool p, bool w) -{ - bool u = 1; - if (imm8 < 0) { - imm8 = -imm8; - u = 0; - } - tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) | - (rn << 16) | (rt << 12) | ((imm8 & 0xf0) << 4) | (imm8 & 0xf)); -} - -static void tcg_out_memop_12(TCGContext *s, int cond, ARMInsn opc, TCGReg rt, - TCGReg rn, int imm12, bool p, bool w) -{ - bool u = 1; - if (imm12 < 0) { - imm12 = -imm12; - u = 0; - } - tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) | - (rn << 16) | (rt << 12) | imm12); -} - -static inline void tcg_out_ld32_12(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, int imm12) -{ - tcg_out_memop_12(s, cond, INSN_LDR_IMM, rt, rn, imm12, 1, 0); -} - -static inline void tcg_out_st32_12(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, int imm12) -{ - tcg_out_memop_12(s, cond, INSN_STR_IMM, rt, rn, imm12, 1, 0); -} - -static inline void tcg_out_ld32_r(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, TCGReg rm) -{ - tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 0); -} - -static inline void tcg_out_st32_r(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, TCGReg rm) -{ - tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0); -} - -static inline void tcg_out_ldrd_8(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, int imm8) -{ - tcg_out_memop_8(s, cond, INSN_LDRD_IMM, rt, rn, imm8, 1, 0); -} - -static inline void tcg_out_ldrd_r(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, TCGReg rm) -{ - tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0); -} - -static inline void tcg_out_strd_8(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, int imm8) -{ - tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0); -} - -static inline void tcg_out_strd_r(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, TCGReg rm) -{ - tcg_out_memop_r(s, cond, INSN_STRD_REG, rt, rn, rm, 1, 1, 0); -} - -/* Register pre-increment with base writeback. */ -static inline void tcg_out_ld32_rwb(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, TCGReg rm) -{ - tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 1); -} - -static inline void tcg_out_st32_rwb(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, TCGReg rm) -{ - tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 1); -} - -static inline void tcg_out_ld16u_8(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, int imm8) -{ - tcg_out_memop_8(s, cond, INSN_LDRH_IMM, rt, rn, imm8, 1, 0); -} - -static inline void tcg_out_st16_8(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, int imm8) -{ - tcg_out_memop_8(s, cond, INSN_STRH_IMM, rt, rn, imm8, 1, 0); -} - -static inline void tcg_out_ld16u_r(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, TCGReg rm) -{ - tcg_out_memop_r(s, cond, INSN_LDRH_REG, rt, rn, rm, 1, 1, 0); -} - -static inline void tcg_out_st16_r(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, TCGReg rm) -{ - tcg_out_memop_r(s, cond, INSN_STRH_REG, rt, rn, rm, 1, 1, 0); -} - -static inline void tcg_out_ld16s_8(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, int imm8) -{ - tcg_out_memop_8(s, cond, INSN_LDRSH_IMM, rt, rn, imm8, 1, 0); -} - -static inline void tcg_out_ld16s_r(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, TCGReg rm) -{ - tcg_out_memop_r(s, cond, INSN_LDRSH_REG, rt, rn, rm, 1, 1, 0); -} - -static inline void tcg_out_ld8_12(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, int imm12) -{ - tcg_out_memop_12(s, cond, INSN_LDRB_IMM, rt, rn, imm12, 1, 0); -} - -static inline void tcg_out_st8_12(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, int imm12) -{ - tcg_out_memop_12(s, cond, INSN_STRB_IMM, rt, rn, imm12, 1, 0); -} - -static inline void tcg_out_ld8_r(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, TCGReg rm) -{ - tcg_out_memop_r(s, cond, INSN_LDRB_REG, rt, rn, rm, 1, 1, 0); -} - -static inline void tcg_out_st8_r(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, TCGReg rm) -{ - tcg_out_memop_r(s, cond, INSN_STRB_REG, rt, rn, rm, 1, 1, 0); -} - -static inline void tcg_out_ld8s_8(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, int imm8) -{ - tcg_out_memop_8(s, cond, INSN_LDRSB_IMM, rt, rn, imm8, 1, 0); -} - -static inline void tcg_out_ld8s_r(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, TCGReg rm) -{ - tcg_out_memop_r(s, cond, INSN_LDRSB_REG, rt, rn, rm, 1, 1, 0); -} - -static inline void tcg_out_ld32u(TCGContext *s, int cond, - int rd, int rn, int32_t offset) -{ - if (offset > 0xfff || offset < -0xfff) { - tcg_out_movi32(s, cond, TCG_REG_TMP, offset); - tcg_out_ld32_r(s, cond, rd, rn, TCG_REG_TMP); - } else - tcg_out_ld32_12(s, cond, rd, rn, offset); -} - -static inline void tcg_out_st32(TCGContext *s, int cond, - int rd, int rn, int32_t offset) -{ - if (offset > 0xfff || offset < -0xfff) { - tcg_out_movi32(s, cond, TCG_REG_TMP, offset); - tcg_out_st32_r(s, cond, rd, rn, TCG_REG_TMP); - } else - tcg_out_st32_12(s, cond, rd, rn, offset); -} - -static inline void tcg_out_ld16u(TCGContext *s, int cond, - int rd, int rn, int32_t offset) -{ - if (offset > 0xff || offset < -0xff) { - tcg_out_movi32(s, cond, TCG_REG_TMP, offset); - tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_TMP); - } else - tcg_out_ld16u_8(s, cond, rd, rn, offset); -} - -static inline void tcg_out_ld16s(TCGContext *s, int cond, - int rd, int rn, int32_t offset) -{ - if (offset > 0xff || offset < -0xff) { - tcg_out_movi32(s, cond, TCG_REG_TMP, offset); - tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_TMP); - } else - tcg_out_ld16s_8(s, cond, rd, rn, offset); -} - -static inline void tcg_out_st16(TCGContext *s, int cond, - int rd, int rn, int32_t offset) -{ - if (offset > 0xff || offset < -0xff) { - tcg_out_movi32(s, cond, TCG_REG_TMP, offset); - tcg_out_st16_r(s, cond, rd, rn, TCG_REG_TMP); - } else - tcg_out_st16_8(s, cond, rd, rn, offset); -} - -static inline void tcg_out_ld8u(TCGContext *s, int cond, - int rd, int rn, int32_t offset) -{ - if (offset > 0xfff || offset < -0xfff) { - tcg_out_movi32(s, cond, TCG_REG_TMP, offset); - tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_TMP); - } else - tcg_out_ld8_12(s, cond, rd, rn, offset); -} - -static inline void tcg_out_ld8s(TCGContext *s, int cond, - int rd, int rn, int32_t offset) -{ - if (offset > 0xff || offset < -0xff) { - tcg_out_movi32(s, cond, TCG_REG_TMP, offset); - tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_TMP); - } else - tcg_out_ld8s_8(s, cond, rd, rn, offset); -} - -static inline void tcg_out_st8(TCGContext *s, int cond, - int rd, int rn, int32_t offset) -{ - if (offset > 0xfff || offset < -0xfff) { - tcg_out_movi32(s, cond, TCG_REG_TMP, offset); - tcg_out_st8_r(s, cond, rd, rn, TCG_REG_TMP); - } else - tcg_out_st8_12(s, cond, rd, rn, offset); -} - -/* The _goto case is normally between TBs within the same code buffer, and - * with the code buffer limited to 16MB we wouldn't need the long case. - * But we also use it for the tail-call to the qemu_ld/st helpers, which does. - */ -static inline void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr) -{ - intptr_t addri = (intptr_t)addr; - ptrdiff_t disp = tcg_pcrel_diff(s, addr); - - if ((addri & 1) == 0 && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) { - tcg_out_b(s, cond, disp); - return; - } - - tcg_out_movi32(s, cond, TCG_REG_TMP, addri); - if (use_armv5t_instructions) { - tcg_out_bx(s, cond, TCG_REG_TMP); - } else { - if (addri & 1) { - tcg_abort(); - } - tcg_out_mov_reg(s, cond, TCG_REG_PC, TCG_REG_TMP); - } -} - -/* The call case is mostly used for helpers - so it's not unreasonable - * for them to be beyond branch range */ -static void tcg_out_call(TCGContext *s, tcg_insn_unit *addr) -{ - intptr_t addri = (intptr_t)addr; - ptrdiff_t disp = tcg_pcrel_diff(s, addr); - - if (disp - 8 < 0x02000000 && disp - 8 >= -0x02000000) { - if (addri & 1) { - /* Use BLX if the target is in Thumb mode */ - if (!use_armv5t_instructions) { - tcg_abort(); - } - tcg_out_blx_imm(s, disp); - } else { - tcg_out_bl(s, COND_AL, disp); - } - } else if (use_armv7_instructions) { - tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri); - tcg_out_blx(s, COND_AL, TCG_REG_TMP); - } else { - tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4); - tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4); - tcg_out32(s, addri); - } -} - -static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l) -{ - if (l->has_value) { - tcg_out_goto(s, cond, l->u.value_ptr); - } else { - tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, l, 0); - tcg_out_b_noaddr(s, cond); - } -} - -#ifdef CONFIG_SOFTMMU -/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, - * int mmu_idx, uintptr_t ra) - */ -static void * const qemu_ld_helpers[16] = { - [MO_UB] = helper_ret_ldub_mmu, - [MO_SB] = helper_ret_ldsb_mmu, - - [MO_LEUW] = helper_le_lduw_mmu, - [MO_LEUL] = helper_le_ldul_mmu, - [MO_LEQ] = helper_le_ldq_mmu, - [MO_LESW] = helper_le_ldsw_mmu, - [MO_LESL] = helper_le_ldul_mmu, - - [MO_BEUW] = helper_be_lduw_mmu, - [MO_BEUL] = helper_be_ldul_mmu, - [MO_BEQ] = helper_be_ldq_mmu, - [MO_BESW] = helper_be_ldsw_mmu, - [MO_BESL] = helper_be_ldul_mmu, -}; - -/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, - * uintxx_t val, int mmu_idx, uintptr_t ra) - */ -static void * const qemu_st_helpers[16] = { - [MO_UB] = helper_ret_stb_mmu, - [MO_LEUW] = helper_le_stw_mmu, - [MO_LEUL] = helper_le_stl_mmu, - [MO_LEQ] = helper_le_stq_mmu, - [MO_BEUW] = helper_be_stw_mmu, - [MO_BEUL] = helper_be_stl_mmu, - [MO_BEQ] = helper_be_stq_mmu, -}; - -/* Helper routines for marshalling helper function arguments into - * the correct registers and stack. - * argreg is where we want to put this argument, arg is the argument itself. - * Return value is the updated argreg ready for the next call. - * Note that argreg 0..3 is real registers, 4+ on stack. - * - * We provide routines for arguments which are: immediate, 32 bit - * value in register, 16 and 8 bit values in register (which must be zero - * extended before use) and 64 bit value in a lo:hi register pair. - */ -#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG) \ -static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg) \ -{ \ - if (argreg < 4) { \ - MOV_ARG(s, COND_AL, argreg, arg); \ - } else { \ - int ofs = (argreg - 4) * 4; \ - EXT_ARG; \ - assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE); \ - tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs); \ - } \ - return argreg + 1; \ -} - -DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32, - (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP)) -DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u, - (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP)) -DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u, - (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP)) -DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, ) - -static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg, - TCGReg arglo, TCGReg arghi) -{ - /* 64 bit arguments must go in even/odd register pairs - * and in 8-aligned stack slots. - */ - if (argreg & 1) { - argreg++; - } - if (use_armv6_instructions && argreg >= 4 - && (arglo & 1) == 0 && arghi == arglo + 1) { - tcg_out_strd_8(s, COND_AL, arglo, - TCG_REG_CALL_STACK, (argreg - 4) * 4); - return argreg + 2; - } else { - argreg = tcg_out_arg_reg32(s, argreg, arglo); - argreg = tcg_out_arg_reg32(s, argreg, arghi); - return argreg; - } -} - -#define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS) - -/* We're expecting to use an 8-bit immediate and to mask. */ -QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8); - -/* We're expecting to use an 8-bit immediate add + 8-bit ldrd offset. - Using the offset of the second entry in the last tlb table ensures - that we can index all of the elements of the first entry. */ -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1]) - > 0xffff); - -/* Load and compare a TLB entry, leaving the flags set. Returns the register - containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */ - -static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, - TCGMemOp s_bits, int mem_index, bool is_load) -{ - TCGReg base = TCG_AREG0; - int cmp_off = - (is_load - ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) - : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); - int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); - - /* Should generate something like the following: - * shr tmp, addrlo, #TARGET_PAGE_BITS (1) - * add r2, env, #high - * and r0, tmp, #(CPU_TLB_SIZE - 1) (2) - * add r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS (3) - * ldr r0, [r2, #cmp] (4) - * tst addrlo, #s_mask - * ldr r2, [r2, #add] (5) - * cmpeq r0, tmp, lsl #TARGET_PAGE_BITS - */ - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, - 0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS)); - - /* We checked that the offset is contained within 16 bits above. */ - if (add_off > 0xfff || (use_armv6_instructions && cmp_off > 0xff)) { - tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base, - (24 << 7) | (cmp_off >> 8)); - base = TCG_REG_R2; - add_off -= cmp_off & 0xff00; - cmp_off &= 0xff; - } - - tcg_out_dat_imm(s, COND_AL, ARITH_AND, - TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1); - tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R2, base, - TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS)); - - /* Load the tlb comparator. Use ldrd if needed and available, - but due to how the pointer needs setting up, ldm isn't useful. - Base arm5 doesn't have ldrd, but armv5te does. */ - if (use_armv6_instructions && TARGET_LONG_BITS == 64) { - tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off); - } else { - tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off); - if (TARGET_LONG_BITS == 64) { - tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4); - } - } - - /* Check alignment. */ - if (s_bits) { - tcg_out_dat_imm(s, COND_AL, ARITH_TST, - 0, addrlo, (1 << s_bits) - 1); - } - - /* Load the tlb addend. */ - tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, add_off); - - tcg_out_dat_reg(s, (s_bits ? COND_EQ : COND_AL), ARITH_CMP, 0, - TCG_REG_R0, TCG_REG_TMP, SHIFT_IMM_LSL(TARGET_PAGE_BITS)); - - if (TARGET_LONG_BITS == 64) { - tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, - TCG_REG_R1, addrhi, SHIFT_IMM_LSL(0)); - } - - return TCG_REG_R2; -} - -/* Record the context of a call to the out of line helper code for the slow - path for a load or store, so that we can later generate the correct - helper code. */ -static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, - TCGReg datalo, TCGReg datahi, TCGReg addrlo, - TCGReg addrhi, tcg_insn_unit *raddr, - tcg_insn_unit *label_ptr) -{ - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->oi = oi; - label->datalo_reg = datalo; - label->datahi_reg = datahi; - label->addrlo_reg = addrlo; - label->addrhi_reg = addrhi; - label->raddr = raddr; - label->label_ptr[0] = label_ptr; -} - -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) -{ - TCGReg argreg, datalo, datahi; - TCGMemOpIdx oi = lb->oi; - TCGMemOp opc = get_memop(oi); - void *func; - - reloc_pc24(lb->label_ptr[0], s->code_ptr); - - argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0); - if (TARGET_LONG_BITS == 64) { - argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg); - } else { - argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg); - } - argreg = tcg_out_arg_imm32(s, argreg, oi); - argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14); - - /* For armv6 we can use the canonical unsigned helpers and minimize - icache usage. For pre-armv6, use the signed helpers since we do - not have a single insn sign-extend. */ - if (use_armv6_instructions) { - func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]; - } else { - func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]; - if (opc & MO_SIGN) { - opc = MO_UL; - } - } - tcg_out_call(s, func); - - datalo = lb->datalo_reg; - datahi = lb->datahi_reg; - switch (opc & MO_SSIZE) { - case MO_SB: - tcg_out_ext8s(s, COND_AL, datalo, TCG_REG_R0); - break; - case MO_SW: - tcg_out_ext16s(s, COND_AL, datalo, TCG_REG_R0); - break; - default: - tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0); - break; - case MO_Q: - if (datalo != TCG_REG_R1) { - tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0); - tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1); - } else if (datahi != TCG_REG_R0) { - tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1); - tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0); - } else { - tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0); - tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1); - tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP); - } - break; - } - - tcg_out_goto(s, COND_AL, lb->raddr); -} - -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) -{ - TCGReg argreg, datalo, datahi; - TCGMemOpIdx oi = lb->oi; - TCGMemOp opc = get_memop(oi); - - reloc_pc24(lb->label_ptr[0], s->code_ptr); - - argreg = TCG_REG_R0; - argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0); - if (TARGET_LONG_BITS == 64) { - argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg); - } else { - argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg); - } - - datalo = lb->datalo_reg; - datahi = lb->datahi_reg; - switch (opc & MO_SIZE) { - case MO_8: - argreg = tcg_out_arg_reg8(s, argreg, datalo); - break; - case MO_16: - argreg = tcg_out_arg_reg16(s, argreg, datalo); - break; - case MO_32: - default: - argreg = tcg_out_arg_reg32(s, argreg, datalo); - break; - case MO_64: - argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi); - break; - } - - argreg = tcg_out_arg_imm32(s, argreg, oi); - argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14); - - /* Tail-call to the helper, which will return to the fast path. */ - tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); -} -#endif /* SOFTMMU */ - -static inline void tcg_out_qemu_ld_index(TCGContext *s, TCGMemOp opc, - TCGReg datalo, TCGReg datahi, - TCGReg addrlo, TCGReg addend) -{ - TCGMemOp bswap = opc & MO_BSWAP; - - switch (opc & MO_SSIZE) { - case MO_UB: - tcg_out_ld8_r(s, COND_AL, datalo, addrlo, addend); - break; - case MO_SB: - tcg_out_ld8s_r(s, COND_AL, datalo, addrlo, addend); - break; - case MO_UW: - tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend); - if (bswap) { - tcg_out_bswap16(s, COND_AL, datalo, datalo); - } - break; - case MO_SW: - if (bswap) { - tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend); - tcg_out_bswap16s(s, COND_AL, datalo, datalo); - } else { - tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend); - } - break; - case MO_UL: - default: - tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend); - if (bswap) { - tcg_out_bswap32(s, COND_AL, datalo, datalo); - } - break; - case MO_Q: - { - TCGReg dl = (bswap ? datahi : datalo); - TCGReg dh = (bswap ? datalo : datahi); - - /* Avoid ldrd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU && use_armv6_instructions - && (dl & 1) == 0 && dh == dl + 1) { - tcg_out_ldrd_r(s, COND_AL, dl, addrlo, addend); - } else if (dl != addend) { - tcg_out_ld32_rwb(s, COND_AL, dl, addend, addrlo); - tcg_out_ld32_12(s, COND_AL, dh, addend, 4); - } else { - tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP, - addend, addrlo, SHIFT_IMM_LSL(0)); - tcg_out_ld32_12(s, COND_AL, dl, TCG_REG_TMP, 0); - tcg_out_ld32_12(s, COND_AL, dh, TCG_REG_TMP, 4); - } - if (bswap) { - tcg_out_bswap32(s, COND_AL, dl, dl); - tcg_out_bswap32(s, COND_AL, dh, dh); - } - } - break; - } -} - -static inline void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc, - TCGReg datalo, TCGReg datahi, - TCGReg addrlo) -{ - TCGMemOp bswap = opc & MO_BSWAP; - - switch (opc & MO_SSIZE) { - case MO_UB: - tcg_out_ld8_12(s, COND_AL, datalo, addrlo, 0); - break; - case MO_SB: - tcg_out_ld8s_8(s, COND_AL, datalo, addrlo, 0); - break; - case MO_UW: - tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0); - if (bswap) { - tcg_out_bswap16(s, COND_AL, datalo, datalo); - } - break; - case MO_SW: - if (bswap) { - tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0); - tcg_out_bswap16s(s, COND_AL, datalo, datalo); - } else { - tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0); - } - break; - case MO_UL: - default: - tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0); - if (bswap) { - tcg_out_bswap32(s, COND_AL, datalo, datalo); - } - break; - case MO_Q: - { - TCGReg dl = (bswap ? datahi : datalo); - TCGReg dh = (bswap ? datalo : datahi); - - /* Avoid ldrd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU && use_armv6_instructions - && (dl & 1) == 0 && dh == dl + 1) { - tcg_out_ldrd_8(s, COND_AL, dl, addrlo, 0); - } else if (dl == addrlo) { - tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4); - tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0); - } else { - tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0); - tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4); - } - if (bswap) { - tcg_out_bswap32(s, COND_AL, dl, dl); - tcg_out_bswap32(s, COND_AL, dh, dh); - } - } - break; - } -} - -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) -{ - TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused)); - TCGMemOpIdx oi; - TCGMemOp opc; -#ifdef CONFIG_SOFTMMU - int mem_index; - TCGReg addend; - tcg_insn_unit *label_ptr; -#endif - - datalo = *args++; - datahi = (is64 ? *args++ : 0); - addrlo = *args++; - addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0); - oi = *args++; - opc = get_memop(oi); - -#ifdef CONFIG_SOFTMMU - mem_index = get_mmuidx(oi); - addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 1); - - /* This a conditional BL only to load a pointer within this opcode into LR - for the slow path. We will not be using the value for a tail call. */ - label_ptr = s->code_ptr; - tcg_out_bl_noaddr(s, COND_NE); - - tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend); - - add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, - s->code_ptr, label_ptr); -#else /* !CONFIG_SOFTMMU */ - if (guest_base) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base); - tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP); - } else { - tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo); - } -#endif -} - -static inline void tcg_out_qemu_st_index(TCGContext *s, int cond, TCGMemOp opc, - TCGReg datalo, TCGReg datahi, - TCGReg addrlo, TCGReg addend) -{ - TCGMemOp bswap = opc & MO_BSWAP; - - switch (opc & MO_SIZE) { - case MO_8: - tcg_out_st8_r(s, cond, datalo, addrlo, addend); - break; - case MO_16: - if (bswap) { - tcg_out_bswap16st(s, cond, TCG_REG_R0, datalo); - tcg_out_st16_r(s, cond, TCG_REG_R0, addrlo, addend); - } else { - tcg_out_st16_r(s, cond, datalo, addrlo, addend); - } - break; - case MO_32: - default: - if (bswap) { - tcg_out_bswap32(s, cond, TCG_REG_R0, datalo); - tcg_out_st32_r(s, cond, TCG_REG_R0, addrlo, addend); - } else { - tcg_out_st32_r(s, cond, datalo, addrlo, addend); - } - break; - case MO_64: - /* Avoid strd for user-only emulation, to handle unaligned. */ - if (bswap) { - tcg_out_bswap32(s, cond, TCG_REG_R0, datahi); - tcg_out_st32_rwb(s, cond, TCG_REG_R0, addend, addrlo); - tcg_out_bswap32(s, cond, TCG_REG_R0, datalo); - tcg_out_st32_12(s, cond, TCG_REG_R0, addend, 4); - } else if (USING_SOFTMMU && use_armv6_instructions - && (datalo & 1) == 0 && datahi == datalo + 1) { - tcg_out_strd_r(s, cond, datalo, addrlo, addend); - } else { - tcg_out_st32_rwb(s, cond, datalo, addend, addrlo); - tcg_out_st32_12(s, cond, datahi, addend, 4); - } - break; - } -} - -static inline void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc, - TCGReg datalo, TCGReg datahi, - TCGReg addrlo) -{ - TCGMemOp bswap = opc & MO_BSWAP; - - switch (opc & MO_SIZE) { - case MO_8: - tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0); - break; - case MO_16: - if (bswap) { - tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, datalo); - tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addrlo, 0); - } else { - tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0); - } - break; - case MO_32: - default: - if (bswap) { - tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo); - tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0); - } else { - tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0); - } - break; - case MO_64: - /* Avoid strd for user-only emulation, to handle unaligned. */ - if (bswap) { - tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datahi); - tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0); - tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo); - tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 4); - } else if (USING_SOFTMMU && use_armv6_instructions - && (datalo & 1) == 0 && datahi == datalo + 1) { - tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0); - } else { - tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0); - tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4); - } - break; - } -} - -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) -{ - TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused)); - TCGMemOpIdx oi; - TCGMemOp opc; -#ifdef CONFIG_SOFTMMU - int mem_index; - TCGReg addend; - tcg_insn_unit *label_ptr; -#endif - - datalo = *args++; - datahi = (is64 ? *args++ : 0); - addrlo = *args++; - addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0); - oi = *args++; - opc = get_memop(oi); - -#ifdef CONFIG_SOFTMMU - mem_index = get_mmuidx(oi); - addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 0); - - tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend); - - /* The conditional call must come last, as we're going to return here. */ - label_ptr = s->code_ptr; - tcg_out_bl_noaddr(s, COND_NE); - - add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, - s->code_ptr, label_ptr); -#else /* !CONFIG_SOFTMMU */ - if (guest_base) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base); - tcg_out_qemu_st_index(s, COND_AL, opc, datalo, - datahi, addrlo, TCG_REG_TMP); - } else { - tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo); - } -#endif -} - -static tcg_insn_unit *tb_ret_addr; - -static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, - const TCGArg *args, const int *const_args) -{ - TCGArg a0, a1, a2, a3, a4, a5; - int c; - - switch (opc) { - case INDEX_op_exit_tb: - tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]); - tcg_out_goto(s, COND_AL, tb_ret_addr); - break; - case INDEX_op_goto_tb: - if (s->tb_jmp_offset) { - /* Direct jump method */ - s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); - tcg_out_b_noaddr(s, COND_AL); - } else { - /* Indirect jump method */ - intptr_t ptr = (intptr_t)(s->tb_next + args[0]); - tcg_out_movi32(s, COND_AL, TCG_REG_R0, ptr & ~0xfff); - tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, ptr & 0xfff); - } - s->tb_next_offset[args[0]] = tcg_current_code_size(s); - break; - case INDEX_op_br: - tcg_out_goto_label(s, COND_AL, arg_label(args[0])); - break; - - case INDEX_op_ld8u_i32: - tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_ld8s_i32: - tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_ld16u_i32: - tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_ld16s_i32: - tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_ld_i32: - tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_st8_i32: - tcg_out_st8(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_st16_i32: - tcg_out_st16(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_st_i32: - tcg_out_st32(s, COND_AL, args[0], args[1], args[2]); - break; - - case INDEX_op_movcond_i32: - /* Constraints mean that v2 is always in the same register as dest, - * so we only need to do "if condition passed, move v1 to dest". - */ - tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, - args[1], args[2], const_args[2]); - tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV, - ARITH_MVN, args[0], 0, args[3], const_args[3]); - break; - case INDEX_op_add_i32: - tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB, - args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_sub_i32: - if (const_args[1]) { - if (const_args[2]) { - tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]); - } else { - tcg_out_dat_rI(s, COND_AL, ARITH_RSB, - args[0], args[2], args[1], 1); - } - } else { - tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD, - args[0], args[1], args[2], const_args[2]); - } - break; - case INDEX_op_and_i32: - tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC, - args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_andc_i32: - tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND, - args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_or_i32: - c = ARITH_ORR; - goto gen_arith; - case INDEX_op_xor_i32: - c = ARITH_EOR; - /* Fall through. */ - gen_arith: - tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_add2_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - a3 = args[3], a4 = args[4], a5 = args[5]; - if (a0 == a3 || (a0 == a5 && !const_args[5])) { - a0 = TCG_REG_TMP; - } - tcg_out_dat_rIN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR, - a0, a2, a4, const_args[4]); - tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC, - a1, a3, a5, const_args[5]); - tcg_out_mov_reg(s, COND_AL, args[0], a0); - break; - case INDEX_op_sub2_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - a3 = args[3], a4 = args[4], a5 = args[5]; - if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) { - a0 = TCG_REG_TMP; - } - if (const_args[2]) { - if (const_args[4]) { - tcg_out_movi32(s, COND_AL, a0, a4); - a4 = a0; - } - tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, a0, a4, a2, 1); - } else { - tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR, - ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]); - } - if (const_args[3]) { - if (const_args[5]) { - tcg_out_movi32(s, COND_AL, a1, a5); - a5 = a1; - } - tcg_out_dat_rI(s, COND_AL, ARITH_RSC, a1, a5, a3, 1); - } else { - tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC, - a1, a3, a5, const_args[5]); - } - tcg_out_mov_reg(s, COND_AL, args[0], a0); - break; - case INDEX_op_neg_i32: - tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0); - break; - case INDEX_op_not_i32: - tcg_out_dat_reg(s, COND_AL, - ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0)); - break; - case INDEX_op_mul_i32: - tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_mulu2_i32: - tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]); - break; - case INDEX_op_muls2_i32: - tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]); - break; - /* XXX: Perhaps args[2] & 0x1f is wrong */ - case INDEX_op_shl_i32: - c = const_args[2] ? - SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]); - goto gen_shift32; - case INDEX_op_shr_i32: - c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) : - SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]); - goto gen_shift32; - case INDEX_op_sar_i32: - c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) : - SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]); - goto gen_shift32; - case INDEX_op_rotr_i32: - c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) : - SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]); - /* Fall through. */ - gen_shift32: - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c); - break; - - case INDEX_op_rotl_i32: - if (const_args[2]) { - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], - ((0x20 - args[2]) & 0x1f) ? - SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) : - SHIFT_IMM_LSL(0)); - } else { - tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20); - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], - SHIFT_REG_ROR(TCG_REG_TMP)); - } - break; - - case INDEX_op_brcond_i32: - tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, - args[0], args[1], const_args[1]); - tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]], - arg_label(args[3])); - break; - case INDEX_op_brcond2_i32: - /* The resulting conditions are: - * TCG_COND_EQ --> a0 == a2 && a1 == a3, - * TCG_COND_NE --> (a0 != a2 && a1 == a3) || a1 != a3, - * TCG_COND_LT(U) --> (a0 < a2 && a1 == a3) || a1 < a3, - * TCG_COND_GE(U) --> (a0 >= a2 && a1 == a3) || (a1 >= a3 && a1 != a3), - * TCG_COND_LE(U) --> (a0 <= a2 && a1 == a3) || (a1 <= a3 && a1 != a3), - * TCG_COND_GT(U) --> (a0 > a2 && a1 == a3) || a1 > a3, - */ - tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, - args[1], args[3], const_args[3]); - tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0, - args[0], args[2], const_args[2]); - tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]], - arg_label(args[5])); - break; - case INDEX_op_setcond_i32: - tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, - args[1], args[2], const_args[2]); - tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]], - ARITH_MOV, args[0], 0, 1); - tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])], - ARITH_MOV, args[0], 0, 0); - break; - case INDEX_op_setcond2_i32: - /* See brcond2_i32 comment */ - tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, - args[2], args[4], const_args[4]); - tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0, - args[1], args[3], const_args[3]); - tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[5]], - ARITH_MOV, args[0], 0, 1); - tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[5])], - ARITH_MOV, args[0], 0, 0); - break; - - case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, args, 0); - break; - case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, args, 1); - break; - case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, args, 0); - break; - case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, args, 1); - break; - - case INDEX_op_bswap16_i32: - tcg_out_bswap16(s, COND_AL, args[0], args[1]); - break; - case INDEX_op_bswap32_i32: - tcg_out_bswap32(s, COND_AL, args[0], args[1]); - break; - - case INDEX_op_ext8s_i32: - tcg_out_ext8s(s, COND_AL, args[0], args[1]); - break; - case INDEX_op_ext16s_i32: - tcg_out_ext16s(s, COND_AL, args[0], args[1]); - break; - case INDEX_op_ext16u_i32: - tcg_out_ext16u(s, COND_AL, args[0], args[1]); - break; - - case INDEX_op_deposit_i32: - tcg_out_deposit(s, COND_AL, args[0], args[2], - args[3], args[4], const_args[2]); - break; - - case INDEX_op_div_i32: - tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_divu_i32: - tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]); - break; - - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - default: - tcg_abort(); - } -} - -static const TCGTargetOpDef arm_op_defs[] = { - { INDEX_op_exit_tb, { } }, - { INDEX_op_goto_tb, { } }, - { INDEX_op_br, { } }, - - { INDEX_op_ld8u_i32, { "r", "r" } }, - { INDEX_op_ld8s_i32, { "r", "r" } }, - { INDEX_op_ld16u_i32, { "r", "r" } }, - { INDEX_op_ld16s_i32, { "r", "r" } }, - { INDEX_op_ld_i32, { "r", "r" } }, - { INDEX_op_st8_i32, { "r", "r" } }, - { INDEX_op_st16_i32, { "r", "r" } }, - { INDEX_op_st_i32, { "r", "r" } }, - - /* TODO: "r", "r", "ri" */ - { INDEX_op_add_i32, { "r", "r", "rIN" } }, - { INDEX_op_sub_i32, { "r", "rI", "rIN" } }, - { INDEX_op_mul_i32, { "r", "r", "r" } }, - { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } }, - { INDEX_op_muls2_i32, { "r", "r", "r", "r" } }, - { INDEX_op_and_i32, { "r", "r", "rIK" } }, - { INDEX_op_andc_i32, { "r", "r", "rIK" } }, - { INDEX_op_or_i32, { "r", "r", "rI" } }, - { INDEX_op_xor_i32, { "r", "r", "rI" } }, - { INDEX_op_neg_i32, { "r", "r" } }, - { INDEX_op_not_i32, { "r", "r" } }, - - { INDEX_op_shl_i32, { "r", "r", "ri" } }, - { INDEX_op_shr_i32, { "r", "r", "ri" } }, - { INDEX_op_sar_i32, { "r", "r", "ri" } }, - { INDEX_op_rotl_i32, { "r", "r", "ri" } }, - { INDEX_op_rotr_i32, { "r", "r", "ri" } }, - - { INDEX_op_brcond_i32, { "r", "rIN" } }, - { INDEX_op_setcond_i32, { "r", "r", "rIN" } }, - { INDEX_op_movcond_i32, { "r", "r", "rIN", "rIK", "0" } }, - - { INDEX_op_add2_i32, { "r", "r", "r", "r", "rIN", "rIK" } }, - { INDEX_op_sub2_i32, { "r", "r", "rI", "rI", "rIN", "rIK" } }, - { INDEX_op_brcond2_i32, { "r", "r", "rIN", "rIN" } }, - { INDEX_op_setcond2_i32, { "r", "r", "r", "rIN", "rIN" } }, - -#if TARGET_LONG_BITS == 32 - { INDEX_op_qemu_ld_i32, { "r", "l" } }, - { INDEX_op_qemu_ld_i64, { "r", "r", "l" } }, - { INDEX_op_qemu_st_i32, { "s", "s" } }, - { INDEX_op_qemu_st_i64, { "s", "s", "s" } }, -#else - { INDEX_op_qemu_ld_i32, { "r", "l", "l" } }, - { INDEX_op_qemu_ld_i64, { "r", "r", "l", "l" } }, - { INDEX_op_qemu_st_i32, { "s", "s", "s" } }, - { INDEX_op_qemu_st_i64, { "s", "s", "s", "s" } }, -#endif - - { INDEX_op_bswap16_i32, { "r", "r" } }, - { INDEX_op_bswap32_i32, { "r", "r" } }, - - { INDEX_op_ext8s_i32, { "r", "r" } }, - { INDEX_op_ext16s_i32, { "r", "r" } }, - { INDEX_op_ext16u_i32, { "r", "r" } }, - - { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, - - { INDEX_op_div_i32, { "r", "r", "r" } }, - { INDEX_op_divu_i32, { "r", "r", "r" } }, - - { -1 }, -}; - -static void tcg_target_init(TCGContext *s) -{ - /* Only probe for the platform and capabilities if we havn't already - determined maximum values at compile time. */ -#ifndef use_idiv_instructions - { - unsigned long hwcap = qemu_getauxval(AT_HWCAP); - use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0; - } -#endif - if (__ARM_ARCH < 7) { - const char *pl = (const char *)qemu_getauxval(AT_PLATFORM); - if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') { - arm_arch = pl[1] - '0'; - } - } - - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff); - tcg_regset_set32(tcg_target_call_clobber_regs, 0, - (1 << TCG_REG_R0) | - (1 << TCG_REG_R1) | - (1 << TCG_REG_R2) | - (1 << TCG_REG_R3) | - (1 << TCG_REG_R12) | - (1 << TCG_REG_R14)); - - tcg_regset_clear(s->reserved_regs); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC); - - tcg_add_target_add_op_defs(arm_op_defs); -} - -static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, intptr_t arg2) -{ - tcg_out_ld32u(s, COND_AL, arg, arg1, arg2); -} - -static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, intptr_t arg2) -{ - tcg_out_st32(s, COND_AL, arg, arg1, arg2); -} - -static inline void tcg_out_mov(TCGContext *s, TCGType type, - TCGReg ret, TCGReg arg) -{ - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, ret, 0, arg, SHIFT_IMM_LSL(0)); -} - -static inline void tcg_out_movi(TCGContext *s, TCGType type, - TCGReg ret, tcg_target_long arg) -{ - tcg_out_movi32(s, COND_AL, ret, arg); -} - -/* Compute frame size via macros, to share between tcg_target_qemu_prologue - and tcg_register_jit. */ - -#define PUSH_SIZE ((11 - 4 + 1 + 1) * sizeof(tcg_target_long)) - -#define FRAME_SIZE \ - ((PUSH_SIZE \ - + TCG_STATIC_CALL_ARGS_SIZE \ - + CPU_TEMP_BUF_NLONGS * sizeof(long) \ - + TCG_TARGET_STACK_ALIGN - 1) \ - & -TCG_TARGET_STACK_ALIGN) - -static void tcg_target_qemu_prologue(TCGContext *s) -{ - int stack_addend; - - /* Calling convention requires us to save r4-r11 and lr. */ - /* stmdb sp!, { r4 - r11, lr } */ - tcg_out32(s, (COND_AL << 28) | 0x092d4ff0); - - /* Reserve callee argument and tcg temp space. */ - stack_addend = FRAME_SIZE - PUSH_SIZE; - - tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK, - TCG_REG_CALL_STACK, stack_addend, 1); - tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, - CPU_TEMP_BUF_NLONGS * sizeof(long)); - - tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); - - tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]); - tb_ret_addr = s->code_ptr; - - /* Epilogue. We branch here via tb_ret_addr. */ - tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK, - TCG_REG_CALL_STACK, stack_addend, 1); - - /* ldmia sp!, { r4 - r11, pc } */ - tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0); -} - -typedef struct { - DebugFrameHeader h; - uint8_t fde_def_cfa[4]; - uint8_t fde_reg_ofs[18]; -} DebugFrame; - -#define ELF_HOST_MACHINE EM_ARM - -/* We're expecting a 2 byte uleb128 encoded value. */ -QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); - -static const DebugFrame debug_frame = { - .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ - .h.cie.id = -1, - .h.cie.version = 1, - .h.cie.code_align = 1, - .h.cie.data_align = 0x7c, /* sleb128 -4 */ - .h.cie.return_column = 14, - - /* Total FDE size does not include the "len" member. */ - .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), - - .fde_def_cfa = { - 12, 13, /* DW_CFA_def_cfa sp, ... */ - (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ - (FRAME_SIZE >> 7) - }, - .fde_reg_ofs = { - /* The following must match the stmdb in the prologue. */ - 0x8e, 1, /* DW_CFA_offset, lr, -4 */ - 0x8b, 2, /* DW_CFA_offset, r11, -8 */ - 0x8a, 3, /* DW_CFA_offset, r10, -12 */ - 0x89, 4, /* DW_CFA_offset, r9, -16 */ - 0x88, 5, /* DW_CFA_offset, r8, -20 */ - 0x87, 6, /* DW_CFA_offset, r7, -24 */ - 0x86, 7, /* DW_CFA_offset, r6, -28 */ - 0x85, 8, /* DW_CFA_offset, r5, -32 */ - 0x84, 9, /* DW_CFA_offset, r4, -36 */ - } -}; - -void tcg_register_jit(void *buf, size_t buf_size) -{ - tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); -} diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c new file mode 100644 index 0000000000..146ac00f63 --- /dev/null +++ b/tcg/arm/tcg-target.inc.c @@ -0,0 +1,2129 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Andrzej Zaborowski + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "elf.h" +#include "tcg-be-ldst.h" + +/* The __ARM_ARCH define is provided by gcc 4.8. Construct it otherwise. */ +#ifndef __ARM_ARCH +# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ + || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ + || defined(__ARM_ARCH_7EM__) +# define __ARM_ARCH 7 +# elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ + || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \ + || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) +# define __ARM_ARCH 6 +# elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5E__) \ + || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \ + || defined(__ARM_ARCH_5TEJ__) +# define __ARM_ARCH 5 +# else +# define __ARM_ARCH 4 +# endif +#endif + +static int arm_arch = __ARM_ARCH; + +#if defined(__ARM_ARCH_5T__) \ + || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__) +# define use_armv5t_instructions 1 +#else +# define use_armv5t_instructions use_armv6_instructions +#endif + +#define use_armv6_instructions (__ARM_ARCH >= 6 || arm_arch >= 6) +#define use_armv7_instructions (__ARM_ARCH >= 7 || arm_arch >= 7) + +#ifndef use_idiv_instructions +bool use_idiv_instructions; +#endif + +/* ??? Ought to think about changing CONFIG_SOFTMMU to always defined. */ +#ifdef CONFIG_SOFTMMU +# define USING_SOFTMMU 1 +#else +# define USING_SOFTMMU 0 +#endif + +#ifndef NDEBUG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { + "%r0", + "%r1", + "%r2", + "%r3", + "%r4", + "%r5", + "%r6", + "%r7", + "%r8", + "%r9", + "%r10", + "%r11", + "%r12", + "%r13", + "%r14", + "%pc", +}; +#endif + +static const int tcg_target_reg_alloc_order[] = { + TCG_REG_R4, + TCG_REG_R5, + TCG_REG_R6, + TCG_REG_R7, + TCG_REG_R8, + TCG_REG_R9, + TCG_REG_R10, + TCG_REG_R11, + TCG_REG_R13, + TCG_REG_R0, + TCG_REG_R1, + TCG_REG_R2, + TCG_REG_R3, + TCG_REG_R12, + TCG_REG_R14, +}; + +static const int tcg_target_call_iarg_regs[4] = { + TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3 +}; +static const int tcg_target_call_oarg_regs[2] = { + TCG_REG_R0, TCG_REG_R1 +}; + +#define TCG_REG_TMP TCG_REG_R12 + +static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target) +{ + ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2; + *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff); +} + +static void patch_reloc(tcg_insn_unit *code_ptr, int type, + intptr_t value, intptr_t addend) +{ + assert(type == R_ARM_PC24); + assert(addend == 0); + reloc_pc24(code_ptr, (tcg_insn_unit *)value); +} + +#define TCG_CT_CONST_ARM 0x100 +#define TCG_CT_CONST_INV 0x200 +#define TCG_CT_CONST_NEG 0x400 +#define TCG_CT_CONST_ZERO 0x800 + +/* parse target specific constraints */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) +{ + const char *ct_str; + + ct_str = *pct_str; + switch (ct_str[0]) { + case 'I': + ct->ct |= TCG_CT_CONST_ARM; + break; + case 'K': + ct->ct |= TCG_CT_CONST_INV; + break; + case 'N': /* The gcc constraint letter is L, already used here. */ + ct->ct |= TCG_CT_CONST_NEG; + break; + case 'Z': + ct->ct |= TCG_CT_CONST_ZERO; + break; + + case 'r': + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); + break; + + /* qemu_ld address */ + case 'l': + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); +#ifdef CONFIG_SOFTMMU + /* r0-r2,lr will be overwritten when reading the tlb entry, + so don't use these. */ + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14); +#endif + break; + + /* qemu_st address & data */ + case 's': + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); + /* r0-r2 will be overwritten when reading the tlb entry (softmmu only) + and r0-r1 doing the byte swapping, so don't use these. */ + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1); +#if defined(CONFIG_SOFTMMU) + /* Avoid clashes with registers being used for helper args */ + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2); +#if TARGET_LONG_BITS == 64 + /* Avoid clashes with registers being used for helper args */ + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); +#endif + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14); +#endif + break; + + default: + return -1; + } + ct_str++; + *pct_str = ct_str; + + return 0; +} + +static inline uint32_t rotl(uint32_t val, int n) +{ + return (val << n) | (val >> (32 - n)); +} + +/* ARM immediates for ALU instructions are made of an unsigned 8-bit + right-rotated by an even amount between 0 and 30. */ +static inline int encode_imm(uint32_t imm) +{ + int shift; + + /* simple case, only lower bits */ + if ((imm & ~0xff) == 0) + return 0; + /* then try a simple even shift */ + shift = ctz32(imm) & ~1; + if (((imm >> shift) & ~0xff) == 0) + return 32 - shift; + /* now try harder with rotations */ + if ((rotl(imm, 2) & ~0xff) == 0) + return 2; + if ((rotl(imm, 4) & ~0xff) == 0) + return 4; + if ((rotl(imm, 6) & ~0xff) == 0) + return 6; + /* imm can't be encoded */ + return -1; +} + +static inline int check_fit_imm(uint32_t imm) +{ + return encode_imm(imm) >= 0; +} + +/* Test if a constant matches the constraint. + * TODO: define constraints for: + * + * ldr/str offset: between -0xfff and 0xfff + * ldrh/strh offset: between -0xff and 0xff + * mov operand2: values represented with x << (2 * y), x < 0x100 + * add, sub, eor...: ditto + */ +static inline int tcg_target_const_match(tcg_target_long val, TCGType type, + const TCGArgConstraint *arg_ct) +{ + int ct; + ct = arg_ct->ct; + if (ct & TCG_CT_CONST) { + return 1; + } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) { + return 1; + } else if ((ct & TCG_CT_CONST_INV) && check_fit_imm(~val)) { + return 1; + } else if ((ct & TCG_CT_CONST_NEG) && check_fit_imm(-val)) { + return 1; + } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { + return 1; + } else { + return 0; + } +} + +#define TO_CPSR (1 << 20) + +typedef enum { + ARITH_AND = 0x0 << 21, + ARITH_EOR = 0x1 << 21, + ARITH_SUB = 0x2 << 21, + ARITH_RSB = 0x3 << 21, + ARITH_ADD = 0x4 << 21, + ARITH_ADC = 0x5 << 21, + ARITH_SBC = 0x6 << 21, + ARITH_RSC = 0x7 << 21, + ARITH_TST = 0x8 << 21 | TO_CPSR, + ARITH_CMP = 0xa << 21 | TO_CPSR, + ARITH_CMN = 0xb << 21 | TO_CPSR, + ARITH_ORR = 0xc << 21, + ARITH_MOV = 0xd << 21, + ARITH_BIC = 0xe << 21, + ARITH_MVN = 0xf << 21, + + INSN_LDR_IMM = 0x04100000, + INSN_LDR_REG = 0x06100000, + INSN_STR_IMM = 0x04000000, + INSN_STR_REG = 0x06000000, + + INSN_LDRH_IMM = 0x005000b0, + INSN_LDRH_REG = 0x001000b0, + INSN_LDRSH_IMM = 0x005000f0, + INSN_LDRSH_REG = 0x001000f0, + INSN_STRH_IMM = 0x004000b0, + INSN_STRH_REG = 0x000000b0, + + INSN_LDRB_IMM = 0x04500000, + INSN_LDRB_REG = 0x06500000, + INSN_LDRSB_IMM = 0x005000d0, + INSN_LDRSB_REG = 0x001000d0, + INSN_STRB_IMM = 0x04400000, + INSN_STRB_REG = 0x06400000, + + INSN_LDRD_IMM = 0x004000d0, + INSN_LDRD_REG = 0x000000d0, + INSN_STRD_IMM = 0x004000f0, + INSN_STRD_REG = 0x000000f0, +} ARMInsn; + +#define SHIFT_IMM_LSL(im) (((im) << 7) | 0x00) +#define SHIFT_IMM_LSR(im) (((im) << 7) | 0x20) +#define SHIFT_IMM_ASR(im) (((im) << 7) | 0x40) +#define SHIFT_IMM_ROR(im) (((im) << 7) | 0x60) +#define SHIFT_REG_LSL(rs) (((rs) << 8) | 0x10) +#define SHIFT_REG_LSR(rs) (((rs) << 8) | 0x30) +#define SHIFT_REG_ASR(rs) (((rs) << 8) | 0x50) +#define SHIFT_REG_ROR(rs) (((rs) << 8) | 0x70) + +enum arm_cond_code_e { + COND_EQ = 0x0, + COND_NE = 0x1, + COND_CS = 0x2, /* Unsigned greater or equal */ + COND_CC = 0x3, /* Unsigned less than */ + COND_MI = 0x4, /* Negative */ + COND_PL = 0x5, /* Zero or greater */ + COND_VS = 0x6, /* Overflow */ + COND_VC = 0x7, /* No overflow */ + COND_HI = 0x8, /* Unsigned greater than */ + COND_LS = 0x9, /* Unsigned less or equal */ + COND_GE = 0xa, + COND_LT = 0xb, + COND_GT = 0xc, + COND_LE = 0xd, + COND_AL = 0xe, +}; + +static const uint8_t tcg_cond_to_arm_cond[] = { + [TCG_COND_EQ] = COND_EQ, + [TCG_COND_NE] = COND_NE, + [TCG_COND_LT] = COND_LT, + [TCG_COND_GE] = COND_GE, + [TCG_COND_LE] = COND_LE, + [TCG_COND_GT] = COND_GT, + /* unsigned */ + [TCG_COND_LTU] = COND_CC, + [TCG_COND_GEU] = COND_CS, + [TCG_COND_LEU] = COND_LS, + [TCG_COND_GTU] = COND_HI, +}; + +static inline void tcg_out_bx(TCGContext *s, int cond, int rn) +{ + tcg_out32(s, (cond << 28) | 0x012fff10 | rn); +} + +static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset) +{ + tcg_out32(s, (cond << 28) | 0x0a000000 | + (((offset - 8) >> 2) & 0x00ffffff)); +} + +static inline void tcg_out_b_noaddr(TCGContext *s, int cond) +{ + /* We pay attention here to not modify the branch target by masking + the corresponding bytes. This ensure that caches and memory are + kept coherent during retranslation. */ + tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0a)); +} + +static inline void tcg_out_bl_noaddr(TCGContext *s, int cond) +{ + /* We pay attention here to not modify the branch target by masking + the corresponding bytes. This ensure that caches and memory are + kept coherent during retranslation. */ + tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0b)); +} + +static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset) +{ + tcg_out32(s, (cond << 28) | 0x0b000000 | + (((offset - 8) >> 2) & 0x00ffffff)); +} + +static inline void tcg_out_blx(TCGContext *s, int cond, int rn) +{ + tcg_out32(s, (cond << 28) | 0x012fff30 | rn); +} + +static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset) +{ + tcg_out32(s, 0xfa000000 | ((offset & 2) << 23) | + (((offset - 8) >> 2) & 0x00ffffff)); +} + +static inline void tcg_out_dat_reg(TCGContext *s, + int cond, int opc, int rd, int rn, int rm, int shift) +{ + tcg_out32(s, (cond << 28) | (0 << 25) | opc | + (rn << 16) | (rd << 12) | shift | rm); +} + +static inline void tcg_out_nop(TCGContext *s) +{ + if (use_armv7_instructions) { + /* Architected nop introduced in v6k. */ + /* ??? This is an MSR (imm) 0,0,0 insn. Anyone know if this + also Just So Happened to do nothing on pre-v6k so that we + don't need to conditionalize it? */ + tcg_out32(s, 0xe320f000); + } else { + /* Prior to that the assembler uses mov r0, r0. */ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 0, 0, 0, SHIFT_IMM_LSL(0)); + } +} + +static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm) +{ + /* Simple reg-reg move, optimising out the 'do nothing' case */ + if (rd != rm) { + tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rm, SHIFT_IMM_LSL(0)); + } +} + +static inline void tcg_out_dat_imm(TCGContext *s, + int cond, int opc, int rd, int rn, int im) +{ + tcg_out32(s, (cond << 28) | (1 << 25) | opc | + (rn << 16) | (rd << 12) | im); +} + +static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg) +{ + int rot, opc, rn; + + /* For armv7, make sure not to use movw+movt when mov/mvn would do. + Speed things up by only checking when movt would be required. + Prior to armv7, have one go at fully rotated immediates before + doing the decomposition thing below. */ + if (!use_armv7_instructions || (arg & 0xffff0000)) { + rot = encode_imm(arg); + if (rot >= 0) { + tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0, + rotl(arg, rot) | (rot << 7)); + return; + } + rot = encode_imm(~arg); + if (rot >= 0) { + tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0, + rotl(~arg, rot) | (rot << 7)); + return; + } + } + + /* Use movw + movt. */ + if (use_armv7_instructions) { + /* movw */ + tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12) + | ((arg << 4) & 0x000f0000) | (arg & 0xfff)); + if (arg & 0xffff0000) { + /* movt */ + tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12) + | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff)); + } + return; + } + + /* TODO: This is very suboptimal, we can easily have a constant + pool somewhere after all the instructions. */ + opc = ARITH_MOV; + rn = 0; + /* If we have lots of leading 1's, we can shorten the sequence by + beginning with mvn and then clearing higher bits with eor. */ + if (clz32(~arg) > clz32(arg)) { + opc = ARITH_MVN, arg = ~arg; + } + do { + int i = ctz32(arg) & ~1; + rot = ((32 - i) << 7) & 0xf00; + tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot); + arg &= ~(0xff << i); + + opc = ARITH_EOR; + rn = rd; + } while (arg); +} + +static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst, + TCGArg lhs, TCGArg rhs, int rhs_is_const) +{ + /* Emit either the reg,imm or reg,reg form of a data-processing insn. + * rhs must satisfy the "rI" constraint. + */ + if (rhs_is_const) { + int rot = encode_imm(rhs); + assert(rot >= 0); + tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7)); + } else { + tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0)); + } +} + +static void tcg_out_dat_rIK(TCGContext *s, int cond, int opc, int opinv, + TCGReg dst, TCGReg lhs, TCGArg rhs, + bool rhs_is_const) +{ + /* Emit either the reg,imm or reg,reg form of a data-processing insn. + * rhs must satisfy the "rIK" constraint. + */ + if (rhs_is_const) { + int rot = encode_imm(rhs); + if (rot < 0) { + rhs = ~rhs; + rot = encode_imm(rhs); + assert(rot >= 0); + opc = opinv; + } + tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7)); + } else { + tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0)); + } +} + +static void tcg_out_dat_rIN(TCGContext *s, int cond, int opc, int opneg, + TCGArg dst, TCGArg lhs, TCGArg rhs, + bool rhs_is_const) +{ + /* Emit either the reg,imm or reg,reg form of a data-processing insn. + * rhs must satisfy the "rIN" constraint. + */ + if (rhs_is_const) { + int rot = encode_imm(rhs); + if (rot < 0) { + rhs = -rhs; + rot = encode_imm(rhs); + assert(rot >= 0); + opc = opneg; + } + tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7)); + } else { + tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0)); + } +} + +static inline void tcg_out_mul32(TCGContext *s, int cond, TCGReg rd, + TCGReg rn, TCGReg rm) +{ + /* if ArchVersion() < 6 && d == n then UNPREDICTABLE; */ + if (!use_armv6_instructions && rd == rn) { + if (rd == rm) { + /* rd == rn == rm; copy an input to tmp first. */ + tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn); + rm = rn = TCG_REG_TMP; + } else { + rn = rm; + rm = rd; + } + } + /* mul */ + tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn); +} + +static inline void tcg_out_umull32(TCGContext *s, int cond, TCGReg rd0, + TCGReg rd1, TCGReg rn, TCGReg rm) +{ + /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */ + if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) { + if (rd0 == rm || rd1 == rm) { + tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn); + rn = TCG_REG_TMP; + } else { + TCGReg t = rn; + rn = rm; + rm = t; + } + } + /* umull */ + tcg_out32(s, (cond << 28) | 0x00800090 | + (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); +} + +static inline void tcg_out_smull32(TCGContext *s, int cond, TCGReg rd0, + TCGReg rd1, TCGReg rn, TCGReg rm) +{ + /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */ + if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) { + if (rd0 == rm || rd1 == rm) { + tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn); + rn = TCG_REG_TMP; + } else { + TCGReg t = rn; + rn = rm; + rm = t; + } + } + /* smull */ + tcg_out32(s, (cond << 28) | 0x00c00090 | + (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); +} + +static inline void tcg_out_sdiv(TCGContext *s, int cond, int rd, int rn, int rm) +{ + tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8)); +} + +static inline void tcg_out_udiv(TCGContext *s, int cond, int rd, int rn, int rm) +{ + tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8)); +} + +static inline void tcg_out_ext8s(TCGContext *s, int cond, + int rd, int rn) +{ + if (use_armv6_instructions) { + /* sxtb */ + tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn); + } else { + tcg_out_dat_reg(s, cond, ARITH_MOV, + rd, 0, rn, SHIFT_IMM_LSL(24)); + tcg_out_dat_reg(s, cond, ARITH_MOV, + rd, 0, rd, SHIFT_IMM_ASR(24)); + } +} + +static inline void tcg_out_ext8u(TCGContext *s, int cond, + int rd, int rn) +{ + tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff); +} + +static inline void tcg_out_ext16s(TCGContext *s, int cond, + int rd, int rn) +{ + if (use_armv6_instructions) { + /* sxth */ + tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn); + } else { + tcg_out_dat_reg(s, cond, ARITH_MOV, + rd, 0, rn, SHIFT_IMM_LSL(16)); + tcg_out_dat_reg(s, cond, ARITH_MOV, + rd, 0, rd, SHIFT_IMM_ASR(16)); + } +} + +static inline void tcg_out_ext16u(TCGContext *s, int cond, + int rd, int rn) +{ + if (use_armv6_instructions) { + /* uxth */ + tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn); + } else { + tcg_out_dat_reg(s, cond, ARITH_MOV, + rd, 0, rn, SHIFT_IMM_LSL(16)); + tcg_out_dat_reg(s, cond, ARITH_MOV, + rd, 0, rd, SHIFT_IMM_LSR(16)); + } +} + +static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn) +{ + if (use_armv6_instructions) { + /* revsh */ + tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn); + } else { + tcg_out_dat_reg(s, cond, ARITH_MOV, + TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24)); + tcg_out_dat_reg(s, cond, ARITH_MOV, + TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_ASR(16)); + tcg_out_dat_reg(s, cond, ARITH_ORR, + rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8)); + } +} + +static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn) +{ + if (use_armv6_instructions) { + /* rev16 */ + tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn); + } else { + tcg_out_dat_reg(s, cond, ARITH_MOV, + TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24)); + tcg_out_dat_reg(s, cond, ARITH_MOV, + TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSR(16)); + tcg_out_dat_reg(s, cond, ARITH_ORR, + rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8)); + } +} + +/* swap the two low bytes assuming that the two high input bytes and the + two high output bit can hold any value. */ +static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn) +{ + if (use_armv6_instructions) { + /* rev16 */ + tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn); + } else { + tcg_out_dat_reg(s, cond, ARITH_MOV, + TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8)); + tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff); + tcg_out_dat_reg(s, cond, ARITH_ORR, + rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8)); + } +} + +static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn) +{ + if (use_armv6_instructions) { + /* rev */ + tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn); + } else { + tcg_out_dat_reg(s, cond, ARITH_EOR, + TCG_REG_TMP, rn, rn, SHIFT_IMM_ROR(16)); + tcg_out_dat_imm(s, cond, ARITH_BIC, + TCG_REG_TMP, TCG_REG_TMP, 0xff | 0x800); + tcg_out_dat_reg(s, cond, ARITH_MOV, + rd, 0, rn, SHIFT_IMM_ROR(8)); + tcg_out_dat_reg(s, cond, ARITH_EOR, + rd, rd, TCG_REG_TMP, SHIFT_IMM_LSR(8)); + } +} + +bool tcg_target_deposit_valid(int ofs, int len) +{ + /* ??? Without bfi, we could improve over generic code by combining + the right-shift from a non-zero ofs with the orr. We do run into + problems when rd == rs, and the mask generated from ofs+len doesn't + fit into an immediate. We would have to be careful not to pessimize + wrt the optimizations performed on the expanded code. */ + return use_armv7_instructions; +} + +static inline void tcg_out_deposit(TCGContext *s, int cond, TCGReg rd, + TCGArg a1, int ofs, int len, bool const_a1) +{ + if (const_a1) { + /* bfi becomes bfc with rn == 15. */ + a1 = 15; + } + /* bfi/bfc */ + tcg_out32(s, 0x07c00010 | (cond << 28) | (rd << 12) | a1 + | (ofs << 7) | ((ofs + len - 1) << 16)); +} + +/* Note that this routine is used for both LDR and LDRH formats, so we do + not wish to include an immediate shift at this point. */ +static void tcg_out_memop_r(TCGContext *s, int cond, ARMInsn opc, TCGReg rt, + TCGReg rn, TCGReg rm, bool u, bool p, bool w) +{ + tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) + | (w << 21) | (rn << 16) | (rt << 12) | rm); +} + +static void tcg_out_memop_8(TCGContext *s, int cond, ARMInsn opc, TCGReg rt, + TCGReg rn, int imm8, bool p, bool w) +{ + bool u = 1; + if (imm8 < 0) { + imm8 = -imm8; + u = 0; + } + tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) | + (rn << 16) | (rt << 12) | ((imm8 & 0xf0) << 4) | (imm8 & 0xf)); +} + +static void tcg_out_memop_12(TCGContext *s, int cond, ARMInsn opc, TCGReg rt, + TCGReg rn, int imm12, bool p, bool w) +{ + bool u = 1; + if (imm12 < 0) { + imm12 = -imm12; + u = 0; + } + tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) | + (rn << 16) | (rt << 12) | imm12); +} + +static inline void tcg_out_ld32_12(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm12) +{ + tcg_out_memop_12(s, cond, INSN_LDR_IMM, rt, rn, imm12, 1, 0); +} + +static inline void tcg_out_st32_12(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm12) +{ + tcg_out_memop_12(s, cond, INSN_STR_IMM, rt, rn, imm12, 1, 0); +} + +static inline void tcg_out_ld32_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) +{ + tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 0); +} + +static inline void tcg_out_st32_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) +{ + tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0); +} + +static inline void tcg_out_ldrd_8(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm8) +{ + tcg_out_memop_8(s, cond, INSN_LDRD_IMM, rt, rn, imm8, 1, 0); +} + +static inline void tcg_out_ldrd_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) +{ + tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0); +} + +static inline void tcg_out_strd_8(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm8) +{ + tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0); +} + +static inline void tcg_out_strd_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) +{ + tcg_out_memop_r(s, cond, INSN_STRD_REG, rt, rn, rm, 1, 1, 0); +} + +/* Register pre-increment with base writeback. */ +static inline void tcg_out_ld32_rwb(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) +{ + tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 1); +} + +static inline void tcg_out_st32_rwb(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) +{ + tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 1); +} + +static inline void tcg_out_ld16u_8(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm8) +{ + tcg_out_memop_8(s, cond, INSN_LDRH_IMM, rt, rn, imm8, 1, 0); +} + +static inline void tcg_out_st16_8(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm8) +{ + tcg_out_memop_8(s, cond, INSN_STRH_IMM, rt, rn, imm8, 1, 0); +} + +static inline void tcg_out_ld16u_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) +{ + tcg_out_memop_r(s, cond, INSN_LDRH_REG, rt, rn, rm, 1, 1, 0); +} + +static inline void tcg_out_st16_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) +{ + tcg_out_memop_r(s, cond, INSN_STRH_REG, rt, rn, rm, 1, 1, 0); +} + +static inline void tcg_out_ld16s_8(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm8) +{ + tcg_out_memop_8(s, cond, INSN_LDRSH_IMM, rt, rn, imm8, 1, 0); +} + +static inline void tcg_out_ld16s_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) +{ + tcg_out_memop_r(s, cond, INSN_LDRSH_REG, rt, rn, rm, 1, 1, 0); +} + +static inline void tcg_out_ld8_12(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm12) +{ + tcg_out_memop_12(s, cond, INSN_LDRB_IMM, rt, rn, imm12, 1, 0); +} + +static inline void tcg_out_st8_12(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm12) +{ + tcg_out_memop_12(s, cond, INSN_STRB_IMM, rt, rn, imm12, 1, 0); +} + +static inline void tcg_out_ld8_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) +{ + tcg_out_memop_r(s, cond, INSN_LDRB_REG, rt, rn, rm, 1, 1, 0); +} + +static inline void tcg_out_st8_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) +{ + tcg_out_memop_r(s, cond, INSN_STRB_REG, rt, rn, rm, 1, 1, 0); +} + +static inline void tcg_out_ld8s_8(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm8) +{ + tcg_out_memop_8(s, cond, INSN_LDRSB_IMM, rt, rn, imm8, 1, 0); +} + +static inline void tcg_out_ld8s_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) +{ + tcg_out_memop_r(s, cond, INSN_LDRSB_REG, rt, rn, rm, 1, 1, 0); +} + +static inline void tcg_out_ld32u(TCGContext *s, int cond, + int rd, int rn, int32_t offset) +{ + if (offset > 0xfff || offset < -0xfff) { + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_ld32_r(s, cond, rd, rn, TCG_REG_TMP); + } else + tcg_out_ld32_12(s, cond, rd, rn, offset); +} + +static inline void tcg_out_st32(TCGContext *s, int cond, + int rd, int rn, int32_t offset) +{ + if (offset > 0xfff || offset < -0xfff) { + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_st32_r(s, cond, rd, rn, TCG_REG_TMP); + } else + tcg_out_st32_12(s, cond, rd, rn, offset); +} + +static inline void tcg_out_ld16u(TCGContext *s, int cond, + int rd, int rn, int32_t offset) +{ + if (offset > 0xff || offset < -0xff) { + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_TMP); + } else + tcg_out_ld16u_8(s, cond, rd, rn, offset); +} + +static inline void tcg_out_ld16s(TCGContext *s, int cond, + int rd, int rn, int32_t offset) +{ + if (offset > 0xff || offset < -0xff) { + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_TMP); + } else + tcg_out_ld16s_8(s, cond, rd, rn, offset); +} + +static inline void tcg_out_st16(TCGContext *s, int cond, + int rd, int rn, int32_t offset) +{ + if (offset > 0xff || offset < -0xff) { + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_st16_r(s, cond, rd, rn, TCG_REG_TMP); + } else + tcg_out_st16_8(s, cond, rd, rn, offset); +} + +static inline void tcg_out_ld8u(TCGContext *s, int cond, + int rd, int rn, int32_t offset) +{ + if (offset > 0xfff || offset < -0xfff) { + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_TMP); + } else + tcg_out_ld8_12(s, cond, rd, rn, offset); +} + +static inline void tcg_out_ld8s(TCGContext *s, int cond, + int rd, int rn, int32_t offset) +{ + if (offset > 0xff || offset < -0xff) { + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_TMP); + } else + tcg_out_ld8s_8(s, cond, rd, rn, offset); +} + +static inline void tcg_out_st8(TCGContext *s, int cond, + int rd, int rn, int32_t offset) +{ + if (offset > 0xfff || offset < -0xfff) { + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_st8_r(s, cond, rd, rn, TCG_REG_TMP); + } else + tcg_out_st8_12(s, cond, rd, rn, offset); +} + +/* The _goto case is normally between TBs within the same code buffer, and + * with the code buffer limited to 16MB we wouldn't need the long case. + * But we also use it for the tail-call to the qemu_ld/st helpers, which does. + */ +static inline void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr) +{ + intptr_t addri = (intptr_t)addr; + ptrdiff_t disp = tcg_pcrel_diff(s, addr); + + if ((addri & 1) == 0 && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) { + tcg_out_b(s, cond, disp); + return; + } + + tcg_out_movi32(s, cond, TCG_REG_TMP, addri); + if (use_armv5t_instructions) { + tcg_out_bx(s, cond, TCG_REG_TMP); + } else { + if (addri & 1) { + tcg_abort(); + } + tcg_out_mov_reg(s, cond, TCG_REG_PC, TCG_REG_TMP); + } +} + +/* The call case is mostly used for helpers - so it's not unreasonable + * for them to be beyond branch range */ +static void tcg_out_call(TCGContext *s, tcg_insn_unit *addr) +{ + intptr_t addri = (intptr_t)addr; + ptrdiff_t disp = tcg_pcrel_diff(s, addr); + + if (disp - 8 < 0x02000000 && disp - 8 >= -0x02000000) { + if (addri & 1) { + /* Use BLX if the target is in Thumb mode */ + if (!use_armv5t_instructions) { + tcg_abort(); + } + tcg_out_blx_imm(s, disp); + } else { + tcg_out_bl(s, COND_AL, disp); + } + } else if (use_armv7_instructions) { + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri); + tcg_out_blx(s, COND_AL, TCG_REG_TMP); + } else { + tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4); + tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4); + tcg_out32(s, addri); + } +} + +static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l) +{ + if (l->has_value) { + tcg_out_goto(s, cond, l->u.value_ptr); + } else { + tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, l, 0); + tcg_out_b_noaddr(s, cond); + } +} + +#ifdef CONFIG_SOFTMMU +/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, + * int mmu_idx, uintptr_t ra) + */ +static void * const qemu_ld_helpers[16] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_SB] = helper_ret_ldsb_mmu, + + [MO_LEUW] = helper_le_lduw_mmu, + [MO_LEUL] = helper_le_ldul_mmu, + [MO_LEQ] = helper_le_ldq_mmu, + [MO_LESW] = helper_le_ldsw_mmu, + [MO_LESL] = helper_le_ldul_mmu, + + [MO_BEUW] = helper_be_lduw_mmu, + [MO_BEUL] = helper_be_ldul_mmu, + [MO_BEQ] = helper_be_ldq_mmu, + [MO_BESW] = helper_be_ldsw_mmu, + [MO_BESL] = helper_be_ldul_mmu, +}; + +/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, + * uintxx_t val, int mmu_idx, uintptr_t ra) + */ +static void * const qemu_st_helpers[16] = { + [MO_UB] = helper_ret_stb_mmu, + [MO_LEUW] = helper_le_stw_mmu, + [MO_LEUL] = helper_le_stl_mmu, + [MO_LEQ] = helper_le_stq_mmu, + [MO_BEUW] = helper_be_stw_mmu, + [MO_BEUL] = helper_be_stl_mmu, + [MO_BEQ] = helper_be_stq_mmu, +}; + +/* Helper routines for marshalling helper function arguments into + * the correct registers and stack. + * argreg is where we want to put this argument, arg is the argument itself. + * Return value is the updated argreg ready for the next call. + * Note that argreg 0..3 is real registers, 4+ on stack. + * + * We provide routines for arguments which are: immediate, 32 bit + * value in register, 16 and 8 bit values in register (which must be zero + * extended before use) and 64 bit value in a lo:hi register pair. + */ +#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG) \ +static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg) \ +{ \ + if (argreg < 4) { \ + MOV_ARG(s, COND_AL, argreg, arg); \ + } else { \ + int ofs = (argreg - 4) * 4; \ + EXT_ARG; \ + assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE); \ + tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs); \ + } \ + return argreg + 1; \ +} + +DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32, + (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP)) +DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u, + (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP)) +DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u, + (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP)) +DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, ) + +static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg, + TCGReg arglo, TCGReg arghi) +{ + /* 64 bit arguments must go in even/odd register pairs + * and in 8-aligned stack slots. + */ + if (argreg & 1) { + argreg++; + } + if (use_armv6_instructions && argreg >= 4 + && (arglo & 1) == 0 && arghi == arglo + 1) { + tcg_out_strd_8(s, COND_AL, arglo, + TCG_REG_CALL_STACK, (argreg - 4) * 4); + return argreg + 2; + } else { + argreg = tcg_out_arg_reg32(s, argreg, arglo); + argreg = tcg_out_arg_reg32(s, argreg, arghi); + return argreg; + } +} + +#define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS) + +/* We're expecting to use an 8-bit immediate and to mask. */ +QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8); + +/* We're expecting to use an 8-bit immediate add + 8-bit ldrd offset. + Using the offset of the second entry in the last tlb table ensures + that we can index all of the elements of the first entry. */ +QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1]) + > 0xffff); + +/* Load and compare a TLB entry, leaving the flags set. Returns the register + containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */ + +static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, + TCGMemOp s_bits, int mem_index, bool is_load) +{ + TCGReg base = TCG_AREG0; + int cmp_off = + (is_load + ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) + : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); + int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); + + /* Should generate something like the following: + * shr tmp, addrlo, #TARGET_PAGE_BITS (1) + * add r2, env, #high + * and r0, tmp, #(CPU_TLB_SIZE - 1) (2) + * add r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS (3) + * ldr r0, [r2, #cmp] (4) + * tst addrlo, #s_mask + * ldr r2, [r2, #add] (5) + * cmpeq r0, tmp, lsl #TARGET_PAGE_BITS + */ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, + 0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS)); + + /* We checked that the offset is contained within 16 bits above. */ + if (add_off > 0xfff || (use_armv6_instructions && cmp_off > 0xff)) { + tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base, + (24 << 7) | (cmp_off >> 8)); + base = TCG_REG_R2; + add_off -= cmp_off & 0xff00; + cmp_off &= 0xff; + } + + tcg_out_dat_imm(s, COND_AL, ARITH_AND, + TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1); + tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R2, base, + TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS)); + + /* Load the tlb comparator. Use ldrd if needed and available, + but due to how the pointer needs setting up, ldm isn't useful. + Base arm5 doesn't have ldrd, but armv5te does. */ + if (use_armv6_instructions && TARGET_LONG_BITS == 64) { + tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off); + } else { + tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off); + if (TARGET_LONG_BITS == 64) { + tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4); + } + } + + /* Check alignment. */ + if (s_bits) { + tcg_out_dat_imm(s, COND_AL, ARITH_TST, + 0, addrlo, (1 << s_bits) - 1); + } + + /* Load the tlb addend. */ + tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, add_off); + + tcg_out_dat_reg(s, (s_bits ? COND_EQ : COND_AL), ARITH_CMP, 0, + TCG_REG_R0, TCG_REG_TMP, SHIFT_IMM_LSL(TARGET_PAGE_BITS)); + + if (TARGET_LONG_BITS == 64) { + tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, + TCG_REG_R1, addrhi, SHIFT_IMM_LSL(0)); + } + + return TCG_REG_R2; +} + +/* Record the context of a call to the out of line helper code for the slow + path for a load or store, so that we can later generate the correct + helper code. */ +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, + TCGReg datalo, TCGReg datahi, TCGReg addrlo, + TCGReg addrhi, tcg_insn_unit *raddr, + tcg_insn_unit *label_ptr) +{ + TCGLabelQemuLdst *label = new_ldst_label(s); + + label->is_ld = is_ld; + label->oi = oi; + label->datalo_reg = datalo; + label->datahi_reg = datahi; + label->addrlo_reg = addrlo; + label->addrhi_reg = addrhi; + label->raddr = raddr; + label->label_ptr[0] = label_ptr; +} + +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + TCGReg argreg, datalo, datahi; + TCGMemOpIdx oi = lb->oi; + TCGMemOp opc = get_memop(oi); + void *func; + + reloc_pc24(lb->label_ptr[0], s->code_ptr); + + argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0); + if (TARGET_LONG_BITS == 64) { + argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg); + } else { + argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg); + } + argreg = tcg_out_arg_imm32(s, argreg, oi); + argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14); + + /* For armv6 we can use the canonical unsigned helpers and minimize + icache usage. For pre-armv6, use the signed helpers since we do + not have a single insn sign-extend. */ + if (use_armv6_instructions) { + func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]; + } else { + func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]; + if (opc & MO_SIGN) { + opc = MO_UL; + } + } + tcg_out_call(s, func); + + datalo = lb->datalo_reg; + datahi = lb->datahi_reg; + switch (opc & MO_SSIZE) { + case MO_SB: + tcg_out_ext8s(s, COND_AL, datalo, TCG_REG_R0); + break; + case MO_SW: + tcg_out_ext16s(s, COND_AL, datalo, TCG_REG_R0); + break; + default: + tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0); + break; + case MO_Q: + if (datalo != TCG_REG_R1) { + tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0); + tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1); + } else if (datahi != TCG_REG_R0) { + tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1); + tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0); + } else { + tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0); + tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1); + tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP); + } + break; + } + + tcg_out_goto(s, COND_AL, lb->raddr); +} + +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + TCGReg argreg, datalo, datahi; + TCGMemOpIdx oi = lb->oi; + TCGMemOp opc = get_memop(oi); + + reloc_pc24(lb->label_ptr[0], s->code_ptr); + + argreg = TCG_REG_R0; + argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0); + if (TARGET_LONG_BITS == 64) { + argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg); + } else { + argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg); + } + + datalo = lb->datalo_reg; + datahi = lb->datahi_reg; + switch (opc & MO_SIZE) { + case MO_8: + argreg = tcg_out_arg_reg8(s, argreg, datalo); + break; + case MO_16: + argreg = tcg_out_arg_reg16(s, argreg, datalo); + break; + case MO_32: + default: + argreg = tcg_out_arg_reg32(s, argreg, datalo); + break; + case MO_64: + argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi); + break; + } + + argreg = tcg_out_arg_imm32(s, argreg, oi); + argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14); + + /* Tail-call to the helper, which will return to the fast path. */ + tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); +} +#endif /* SOFTMMU */ + +static inline void tcg_out_qemu_ld_index(TCGContext *s, TCGMemOp opc, + TCGReg datalo, TCGReg datahi, + TCGReg addrlo, TCGReg addend) +{ + TCGMemOp bswap = opc & MO_BSWAP; + + switch (opc & MO_SSIZE) { + case MO_UB: + tcg_out_ld8_r(s, COND_AL, datalo, addrlo, addend); + break; + case MO_SB: + tcg_out_ld8s_r(s, COND_AL, datalo, addrlo, addend); + break; + case MO_UW: + tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend); + if (bswap) { + tcg_out_bswap16(s, COND_AL, datalo, datalo); + } + break; + case MO_SW: + if (bswap) { + tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend); + tcg_out_bswap16s(s, COND_AL, datalo, datalo); + } else { + tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend); + } + break; + case MO_UL: + default: + tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend); + if (bswap) { + tcg_out_bswap32(s, COND_AL, datalo, datalo); + } + break; + case MO_Q: + { + TCGReg dl = (bswap ? datahi : datalo); + TCGReg dh = (bswap ? datalo : datahi); + + /* Avoid ldrd for user-only emulation, to handle unaligned. */ + if (USING_SOFTMMU && use_armv6_instructions + && (dl & 1) == 0 && dh == dl + 1) { + tcg_out_ldrd_r(s, COND_AL, dl, addrlo, addend); + } else if (dl != addend) { + tcg_out_ld32_rwb(s, COND_AL, dl, addend, addrlo); + tcg_out_ld32_12(s, COND_AL, dh, addend, 4); + } else { + tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP, + addend, addrlo, SHIFT_IMM_LSL(0)); + tcg_out_ld32_12(s, COND_AL, dl, TCG_REG_TMP, 0); + tcg_out_ld32_12(s, COND_AL, dh, TCG_REG_TMP, 4); + } + if (bswap) { + tcg_out_bswap32(s, COND_AL, dl, dl); + tcg_out_bswap32(s, COND_AL, dh, dh); + } + } + break; + } +} + +static inline void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc, + TCGReg datalo, TCGReg datahi, + TCGReg addrlo) +{ + TCGMemOp bswap = opc & MO_BSWAP; + + switch (opc & MO_SSIZE) { + case MO_UB: + tcg_out_ld8_12(s, COND_AL, datalo, addrlo, 0); + break; + case MO_SB: + tcg_out_ld8s_8(s, COND_AL, datalo, addrlo, 0); + break; + case MO_UW: + tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0); + if (bswap) { + tcg_out_bswap16(s, COND_AL, datalo, datalo); + } + break; + case MO_SW: + if (bswap) { + tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0); + tcg_out_bswap16s(s, COND_AL, datalo, datalo); + } else { + tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0); + } + break; + case MO_UL: + default: + tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0); + if (bswap) { + tcg_out_bswap32(s, COND_AL, datalo, datalo); + } + break; + case MO_Q: + { + TCGReg dl = (bswap ? datahi : datalo); + TCGReg dh = (bswap ? datalo : datahi); + + /* Avoid ldrd for user-only emulation, to handle unaligned. */ + if (USING_SOFTMMU && use_armv6_instructions + && (dl & 1) == 0 && dh == dl + 1) { + tcg_out_ldrd_8(s, COND_AL, dl, addrlo, 0); + } else if (dl == addrlo) { + tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4); + tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0); + } else { + tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0); + tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4); + } + if (bswap) { + tcg_out_bswap32(s, COND_AL, dl, dl); + tcg_out_bswap32(s, COND_AL, dh, dh); + } + } + break; + } +} + +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) +{ + TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused)); + TCGMemOpIdx oi; + TCGMemOp opc; +#ifdef CONFIG_SOFTMMU + int mem_index; + TCGReg addend; + tcg_insn_unit *label_ptr; +#endif + + datalo = *args++; + datahi = (is64 ? *args++ : 0); + addrlo = *args++; + addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0); + oi = *args++; + opc = get_memop(oi); + +#ifdef CONFIG_SOFTMMU + mem_index = get_mmuidx(oi); + addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 1); + + /* This a conditional BL only to load a pointer within this opcode into LR + for the slow path. We will not be using the value for a tail call. */ + label_ptr = s->code_ptr; + tcg_out_bl_noaddr(s, COND_NE); + + tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend); + + add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, + s->code_ptr, label_ptr); +#else /* !CONFIG_SOFTMMU */ + if (guest_base) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base); + tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP); + } else { + tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo); + } +#endif +} + +static inline void tcg_out_qemu_st_index(TCGContext *s, int cond, TCGMemOp opc, + TCGReg datalo, TCGReg datahi, + TCGReg addrlo, TCGReg addend) +{ + TCGMemOp bswap = opc & MO_BSWAP; + + switch (opc & MO_SIZE) { + case MO_8: + tcg_out_st8_r(s, cond, datalo, addrlo, addend); + break; + case MO_16: + if (bswap) { + tcg_out_bswap16st(s, cond, TCG_REG_R0, datalo); + tcg_out_st16_r(s, cond, TCG_REG_R0, addrlo, addend); + } else { + tcg_out_st16_r(s, cond, datalo, addrlo, addend); + } + break; + case MO_32: + default: + if (bswap) { + tcg_out_bswap32(s, cond, TCG_REG_R0, datalo); + tcg_out_st32_r(s, cond, TCG_REG_R0, addrlo, addend); + } else { + tcg_out_st32_r(s, cond, datalo, addrlo, addend); + } + break; + case MO_64: + /* Avoid strd for user-only emulation, to handle unaligned. */ + if (bswap) { + tcg_out_bswap32(s, cond, TCG_REG_R0, datahi); + tcg_out_st32_rwb(s, cond, TCG_REG_R0, addend, addrlo); + tcg_out_bswap32(s, cond, TCG_REG_R0, datalo); + tcg_out_st32_12(s, cond, TCG_REG_R0, addend, 4); + } else if (USING_SOFTMMU && use_armv6_instructions + && (datalo & 1) == 0 && datahi == datalo + 1) { + tcg_out_strd_r(s, cond, datalo, addrlo, addend); + } else { + tcg_out_st32_rwb(s, cond, datalo, addend, addrlo); + tcg_out_st32_12(s, cond, datahi, addend, 4); + } + break; + } +} + +static inline void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc, + TCGReg datalo, TCGReg datahi, + TCGReg addrlo) +{ + TCGMemOp bswap = opc & MO_BSWAP; + + switch (opc & MO_SIZE) { + case MO_8: + tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0); + break; + case MO_16: + if (bswap) { + tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, datalo); + tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addrlo, 0); + } else { + tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0); + } + break; + case MO_32: + default: + if (bswap) { + tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo); + tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0); + } else { + tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0); + } + break; + case MO_64: + /* Avoid strd for user-only emulation, to handle unaligned. */ + if (bswap) { + tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datahi); + tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0); + tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo); + tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 4); + } else if (USING_SOFTMMU && use_armv6_instructions + && (datalo & 1) == 0 && datahi == datalo + 1) { + tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0); + } else { + tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0); + tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4); + } + break; + } +} + +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) +{ + TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused)); + TCGMemOpIdx oi; + TCGMemOp opc; +#ifdef CONFIG_SOFTMMU + int mem_index; + TCGReg addend; + tcg_insn_unit *label_ptr; +#endif + + datalo = *args++; + datahi = (is64 ? *args++ : 0); + addrlo = *args++; + addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0); + oi = *args++; + opc = get_memop(oi); + +#ifdef CONFIG_SOFTMMU + mem_index = get_mmuidx(oi); + addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 0); + + tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend); + + /* The conditional call must come last, as we're going to return here. */ + label_ptr = s->code_ptr; + tcg_out_bl_noaddr(s, COND_NE); + + add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, + s->code_ptr, label_ptr); +#else /* !CONFIG_SOFTMMU */ + if (guest_base) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base); + tcg_out_qemu_st_index(s, COND_AL, opc, datalo, + datahi, addrlo, TCG_REG_TMP); + } else { + tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo); + } +#endif +} + +static tcg_insn_unit *tb_ret_addr; + +static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, + const TCGArg *args, const int *const_args) +{ + TCGArg a0, a1, a2, a3, a4, a5; + int c; + + switch (opc) { + case INDEX_op_exit_tb: + tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]); + tcg_out_goto(s, COND_AL, tb_ret_addr); + break; + case INDEX_op_goto_tb: + if (s->tb_jmp_offset) { + /* Direct jump method */ + s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); + tcg_out_b_noaddr(s, COND_AL); + } else { + /* Indirect jump method */ + intptr_t ptr = (intptr_t)(s->tb_next + args[0]); + tcg_out_movi32(s, COND_AL, TCG_REG_R0, ptr & ~0xfff); + tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, ptr & 0xfff); + } + s->tb_next_offset[args[0]] = tcg_current_code_size(s); + break; + case INDEX_op_br: + tcg_out_goto_label(s, COND_AL, arg_label(args[0])); + break; + + case INDEX_op_ld8u_i32: + tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]); + break; + case INDEX_op_ld8s_i32: + tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]); + break; + case INDEX_op_ld16u_i32: + tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]); + break; + case INDEX_op_ld16s_i32: + tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]); + break; + case INDEX_op_ld_i32: + tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]); + break; + case INDEX_op_st8_i32: + tcg_out_st8(s, COND_AL, args[0], args[1], args[2]); + break; + case INDEX_op_st16_i32: + tcg_out_st16(s, COND_AL, args[0], args[1], args[2]); + break; + case INDEX_op_st_i32: + tcg_out_st32(s, COND_AL, args[0], args[1], args[2]); + break; + + case INDEX_op_movcond_i32: + /* Constraints mean that v2 is always in the same register as dest, + * so we only need to do "if condition passed, move v1 to dest". + */ + tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, + args[1], args[2], const_args[2]); + tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV, + ARITH_MVN, args[0], 0, args[3], const_args[3]); + break; + case INDEX_op_add_i32: + tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB, + args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_sub_i32: + if (const_args[1]) { + if (const_args[2]) { + tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]); + } else { + tcg_out_dat_rI(s, COND_AL, ARITH_RSB, + args[0], args[2], args[1], 1); + } + } else { + tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD, + args[0], args[1], args[2], const_args[2]); + } + break; + case INDEX_op_and_i32: + tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC, + args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_andc_i32: + tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND, + args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_or_i32: + c = ARITH_ORR; + goto gen_arith; + case INDEX_op_xor_i32: + c = ARITH_EOR; + /* Fall through. */ + gen_arith: + tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_add2_i32: + a0 = args[0], a1 = args[1], a2 = args[2]; + a3 = args[3], a4 = args[4], a5 = args[5]; + if (a0 == a3 || (a0 == a5 && !const_args[5])) { + a0 = TCG_REG_TMP; + } + tcg_out_dat_rIN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR, + a0, a2, a4, const_args[4]); + tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC, + a1, a3, a5, const_args[5]); + tcg_out_mov_reg(s, COND_AL, args[0], a0); + break; + case INDEX_op_sub2_i32: + a0 = args[0], a1 = args[1], a2 = args[2]; + a3 = args[3], a4 = args[4], a5 = args[5]; + if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) { + a0 = TCG_REG_TMP; + } + if (const_args[2]) { + if (const_args[4]) { + tcg_out_movi32(s, COND_AL, a0, a4); + a4 = a0; + } + tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, a0, a4, a2, 1); + } else { + tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR, + ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]); + } + if (const_args[3]) { + if (const_args[5]) { + tcg_out_movi32(s, COND_AL, a1, a5); + a5 = a1; + } + tcg_out_dat_rI(s, COND_AL, ARITH_RSC, a1, a5, a3, 1); + } else { + tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC, + a1, a3, a5, const_args[5]); + } + tcg_out_mov_reg(s, COND_AL, args[0], a0); + break; + case INDEX_op_neg_i32: + tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0); + break; + case INDEX_op_not_i32: + tcg_out_dat_reg(s, COND_AL, + ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0)); + break; + case INDEX_op_mul_i32: + tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]); + break; + case INDEX_op_mulu2_i32: + tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]); + break; + case INDEX_op_muls2_i32: + tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]); + break; + /* XXX: Perhaps args[2] & 0x1f is wrong */ + case INDEX_op_shl_i32: + c = const_args[2] ? + SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]); + goto gen_shift32; + case INDEX_op_shr_i32: + c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) : + SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]); + goto gen_shift32; + case INDEX_op_sar_i32: + c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) : + SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]); + goto gen_shift32; + case INDEX_op_rotr_i32: + c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) : + SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]); + /* Fall through. */ + gen_shift32: + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c); + break; + + case INDEX_op_rotl_i32: + if (const_args[2]) { + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], + ((0x20 - args[2]) & 0x1f) ? + SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) : + SHIFT_IMM_LSL(0)); + } else { + tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20); + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], + SHIFT_REG_ROR(TCG_REG_TMP)); + } + break; + + case INDEX_op_brcond_i32: + tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, + args[0], args[1], const_args[1]); + tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]], + arg_label(args[3])); + break; + case INDEX_op_brcond2_i32: + /* The resulting conditions are: + * TCG_COND_EQ --> a0 == a2 && a1 == a3, + * TCG_COND_NE --> (a0 != a2 && a1 == a3) || a1 != a3, + * TCG_COND_LT(U) --> (a0 < a2 && a1 == a3) || a1 < a3, + * TCG_COND_GE(U) --> (a0 >= a2 && a1 == a3) || (a1 >= a3 && a1 != a3), + * TCG_COND_LE(U) --> (a0 <= a2 && a1 == a3) || (a1 <= a3 && a1 != a3), + * TCG_COND_GT(U) --> (a0 > a2 && a1 == a3) || a1 > a3, + */ + tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, + args[1], args[3], const_args[3]); + tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0, + args[0], args[2], const_args[2]); + tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]], + arg_label(args[5])); + break; + case INDEX_op_setcond_i32: + tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, + args[1], args[2], const_args[2]); + tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]], + ARITH_MOV, args[0], 0, 1); + tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])], + ARITH_MOV, args[0], 0, 0); + break; + case INDEX_op_setcond2_i32: + /* See brcond2_i32 comment */ + tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, + args[2], args[4], const_args[4]); + tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0, + args[1], args[3], const_args[3]); + tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[5]], + ARITH_MOV, args[0], 0, 1); + tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[5])], + ARITH_MOV, args[0], 0, 0); + break; + + case INDEX_op_qemu_ld_i32: + tcg_out_qemu_ld(s, args, 0); + break; + case INDEX_op_qemu_ld_i64: + tcg_out_qemu_ld(s, args, 1); + break; + case INDEX_op_qemu_st_i32: + tcg_out_qemu_st(s, args, 0); + break; + case INDEX_op_qemu_st_i64: + tcg_out_qemu_st(s, args, 1); + break; + + case INDEX_op_bswap16_i32: + tcg_out_bswap16(s, COND_AL, args[0], args[1]); + break; + case INDEX_op_bswap32_i32: + tcg_out_bswap32(s, COND_AL, args[0], args[1]); + break; + + case INDEX_op_ext8s_i32: + tcg_out_ext8s(s, COND_AL, args[0], args[1]); + break; + case INDEX_op_ext16s_i32: + tcg_out_ext16s(s, COND_AL, args[0], args[1]); + break; + case INDEX_op_ext16u_i32: + tcg_out_ext16u(s, COND_AL, args[0], args[1]); + break; + + case INDEX_op_deposit_i32: + tcg_out_deposit(s, COND_AL, args[0], args[2], + args[3], args[4], const_args[2]); + break; + + case INDEX_op_div_i32: + tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]); + break; + case INDEX_op_divu_i32: + tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]); + break; + + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ + case INDEX_op_call: /* Always emitted via tcg_out_call. */ + default: + tcg_abort(); + } +} + +static const TCGTargetOpDef arm_op_defs[] = { + { INDEX_op_exit_tb, { } }, + { INDEX_op_goto_tb, { } }, + { INDEX_op_br, { } }, + + { INDEX_op_ld8u_i32, { "r", "r" } }, + { INDEX_op_ld8s_i32, { "r", "r" } }, + { INDEX_op_ld16u_i32, { "r", "r" } }, + { INDEX_op_ld16s_i32, { "r", "r" } }, + { INDEX_op_ld_i32, { "r", "r" } }, + { INDEX_op_st8_i32, { "r", "r" } }, + { INDEX_op_st16_i32, { "r", "r" } }, + { INDEX_op_st_i32, { "r", "r" } }, + + /* TODO: "r", "r", "ri" */ + { INDEX_op_add_i32, { "r", "r", "rIN" } }, + { INDEX_op_sub_i32, { "r", "rI", "rIN" } }, + { INDEX_op_mul_i32, { "r", "r", "r" } }, + { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } }, + { INDEX_op_muls2_i32, { "r", "r", "r", "r" } }, + { INDEX_op_and_i32, { "r", "r", "rIK" } }, + { INDEX_op_andc_i32, { "r", "r", "rIK" } }, + { INDEX_op_or_i32, { "r", "r", "rI" } }, + { INDEX_op_xor_i32, { "r", "r", "rI" } }, + { INDEX_op_neg_i32, { "r", "r" } }, + { INDEX_op_not_i32, { "r", "r" } }, + + { INDEX_op_shl_i32, { "r", "r", "ri" } }, + { INDEX_op_shr_i32, { "r", "r", "ri" } }, + { INDEX_op_sar_i32, { "r", "r", "ri" } }, + { INDEX_op_rotl_i32, { "r", "r", "ri" } }, + { INDEX_op_rotr_i32, { "r", "r", "ri" } }, + + { INDEX_op_brcond_i32, { "r", "rIN" } }, + { INDEX_op_setcond_i32, { "r", "r", "rIN" } }, + { INDEX_op_movcond_i32, { "r", "r", "rIN", "rIK", "0" } }, + + { INDEX_op_add2_i32, { "r", "r", "r", "r", "rIN", "rIK" } }, + { INDEX_op_sub2_i32, { "r", "r", "rI", "rI", "rIN", "rIK" } }, + { INDEX_op_brcond2_i32, { "r", "r", "rIN", "rIN" } }, + { INDEX_op_setcond2_i32, { "r", "r", "r", "rIN", "rIN" } }, + +#if TARGET_LONG_BITS == 32 + { INDEX_op_qemu_ld_i32, { "r", "l" } }, + { INDEX_op_qemu_ld_i64, { "r", "r", "l" } }, + { INDEX_op_qemu_st_i32, { "s", "s" } }, + { INDEX_op_qemu_st_i64, { "s", "s", "s" } }, +#else + { INDEX_op_qemu_ld_i32, { "r", "l", "l" } }, + { INDEX_op_qemu_ld_i64, { "r", "r", "l", "l" } }, + { INDEX_op_qemu_st_i32, { "s", "s", "s" } }, + { INDEX_op_qemu_st_i64, { "s", "s", "s", "s" } }, +#endif + + { INDEX_op_bswap16_i32, { "r", "r" } }, + { INDEX_op_bswap32_i32, { "r", "r" } }, + + { INDEX_op_ext8s_i32, { "r", "r" } }, + { INDEX_op_ext16s_i32, { "r", "r" } }, + { INDEX_op_ext16u_i32, { "r", "r" } }, + + { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, + + { INDEX_op_div_i32, { "r", "r", "r" } }, + { INDEX_op_divu_i32, { "r", "r", "r" } }, + + { -1 }, +}; + +static void tcg_target_init(TCGContext *s) +{ + /* Only probe for the platform and capabilities if we havn't already + determined maximum values at compile time. */ +#ifndef use_idiv_instructions + { + unsigned long hwcap = qemu_getauxval(AT_HWCAP); + use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0; + } +#endif + if (__ARM_ARCH < 7) { + const char *pl = (const char *)qemu_getauxval(AT_PLATFORM); + if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') { + arm_arch = pl[1] - '0'; + } + } + + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff); + tcg_regset_set32(tcg_target_call_clobber_regs, 0, + (1 << TCG_REG_R0) | + (1 << TCG_REG_R1) | + (1 << TCG_REG_R2) | + (1 << TCG_REG_R3) | + (1 << TCG_REG_R12) | + (1 << TCG_REG_R14)); + + tcg_regset_clear(s->reserved_regs); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC); + + tcg_add_target_add_op_defs(arm_op_defs); +} + +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) +{ + tcg_out_ld32u(s, COND_AL, arg, arg1, arg2); +} + +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) +{ + tcg_out_st32(s, COND_AL, arg, arg1, arg2); +} + +static inline void tcg_out_mov(TCGContext *s, TCGType type, + TCGReg ret, TCGReg arg) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, ret, 0, arg, SHIFT_IMM_LSL(0)); +} + +static inline void tcg_out_movi(TCGContext *s, TCGType type, + TCGReg ret, tcg_target_long arg) +{ + tcg_out_movi32(s, COND_AL, ret, arg); +} + +/* Compute frame size via macros, to share between tcg_target_qemu_prologue + and tcg_register_jit. */ + +#define PUSH_SIZE ((11 - 4 + 1 + 1) * sizeof(tcg_target_long)) + +#define FRAME_SIZE \ + ((PUSH_SIZE \ + + TCG_STATIC_CALL_ARGS_SIZE \ + + CPU_TEMP_BUF_NLONGS * sizeof(long) \ + + TCG_TARGET_STACK_ALIGN - 1) \ + & -TCG_TARGET_STACK_ALIGN) + +static void tcg_target_qemu_prologue(TCGContext *s) +{ + int stack_addend; + + /* Calling convention requires us to save r4-r11 and lr. */ + /* stmdb sp!, { r4 - r11, lr } */ + tcg_out32(s, (COND_AL << 28) | 0x092d4ff0); + + /* Reserve callee argument and tcg temp space. */ + stack_addend = FRAME_SIZE - PUSH_SIZE; + + tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK, + TCG_REG_CALL_STACK, stack_addend, 1); + tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, + CPU_TEMP_BUF_NLONGS * sizeof(long)); + + tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); + + tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]); + tb_ret_addr = s->code_ptr; + + /* Epilogue. We branch here via tb_ret_addr. */ + tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK, + TCG_REG_CALL_STACK, stack_addend, 1); + + /* ldmia sp!, { r4 - r11, pc } */ + tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0); +} + +typedef struct { + DebugFrameHeader h; + uint8_t fde_def_cfa[4]; + uint8_t fde_reg_ofs[18]; +} DebugFrame; + +#define ELF_HOST_MACHINE EM_ARM + +/* We're expecting a 2 byte uleb128 encoded value. */ +QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); + +static const DebugFrame debug_frame = { + .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ + .h.cie.id = -1, + .h.cie.version = 1, + .h.cie.code_align = 1, + .h.cie.data_align = 0x7c, /* sleb128 -4 */ + .h.cie.return_column = 14, + + /* Total FDE size does not include the "len" member. */ + .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), + + .fde_def_cfa = { + 12, 13, /* DW_CFA_def_cfa sp, ... */ + (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ + (FRAME_SIZE >> 7) + }, + .fde_reg_ofs = { + /* The following must match the stmdb in the prologue. */ + 0x8e, 1, /* DW_CFA_offset, lr, -4 */ + 0x8b, 2, /* DW_CFA_offset, r11, -8 */ + 0x8a, 3, /* DW_CFA_offset, r10, -12 */ + 0x89, 4, /* DW_CFA_offset, r9, -16 */ + 0x88, 5, /* DW_CFA_offset, r8, -20 */ + 0x87, 6, /* DW_CFA_offset, r7, -24 */ + 0x86, 7, /* DW_CFA_offset, r6, -28 */ + 0x85, 8, /* DW_CFA_offset, r5, -32 */ + 0x84, 9, /* DW_CFA_offset, r4, -36 */ + } +}; + +void tcg_register_jit(void *buf, size_t buf_size) +{ + tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); +} diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c deleted file mode 100644 index d90636cb4e..0000000000 --- a/tcg/i386/tcg-target.c +++ /dev/null @@ -1,2464 +0,0 @@ -/* - * Tiny Code Generator for QEMU - * - * Copyright (c) 2008 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "tcg-be-ldst.h" - -#ifndef NDEBUG -static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { -#if TCG_TARGET_REG_BITS == 64 - "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", - "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", -#else - "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", -#endif -}; -#endif - -static const int tcg_target_reg_alloc_order[] = { -#if TCG_TARGET_REG_BITS == 64 - TCG_REG_RBP, - TCG_REG_RBX, - TCG_REG_R12, - TCG_REG_R13, - TCG_REG_R14, - TCG_REG_R15, - TCG_REG_R10, - TCG_REG_R11, - TCG_REG_R9, - TCG_REG_R8, - TCG_REG_RCX, - TCG_REG_RDX, - TCG_REG_RSI, - TCG_REG_RDI, - TCG_REG_RAX, -#else - TCG_REG_EBX, - TCG_REG_ESI, - TCG_REG_EDI, - TCG_REG_EBP, - TCG_REG_ECX, - TCG_REG_EDX, - TCG_REG_EAX, -#endif -}; - -static const int tcg_target_call_iarg_regs[] = { -#if TCG_TARGET_REG_BITS == 64 -#if defined(_WIN64) - TCG_REG_RCX, - TCG_REG_RDX, -#else - TCG_REG_RDI, - TCG_REG_RSI, - TCG_REG_RDX, - TCG_REG_RCX, -#endif - TCG_REG_R8, - TCG_REG_R9, -#else - /* 32 bit mode uses stack based calling convention (GCC default). */ -#endif -}; - -static const int tcg_target_call_oarg_regs[] = { - TCG_REG_EAX, -#if TCG_TARGET_REG_BITS == 32 - TCG_REG_EDX -#endif -}; - -/* Constants we accept. */ -#define TCG_CT_CONST_S32 0x100 -#define TCG_CT_CONST_U32 0x200 -#define TCG_CT_CONST_I32 0x400 - -/* Registers used with L constraint, which are the first argument - registers on x86_64, and two random call clobbered registers on - i386. */ -#if TCG_TARGET_REG_BITS == 64 -# define TCG_REG_L0 tcg_target_call_iarg_regs[0] -# define TCG_REG_L1 tcg_target_call_iarg_regs[1] -#else -# define TCG_REG_L0 TCG_REG_EAX -# define TCG_REG_L1 TCG_REG_EDX -#endif - -/* The host compiler should supply to enable runtime features - detection, as we're not going to go so far as our own inline assembly. - If not available, default values will be assumed. */ -#if defined(CONFIG_CPUID_H) -#include -#endif - -/* For 32-bit, we are going to attempt to determine at runtime whether cmov - is available. */ -#if TCG_TARGET_REG_BITS == 64 -# define have_cmov 1 -#elif defined(CONFIG_CPUID_H) && defined(bit_CMOV) -static bool have_cmov; -#else -# define have_cmov 0 -#endif - -/* If bit_MOVBE is defined in cpuid.h (added in GCC version 4.6), we are - going to attempt to determine at runtime whether movbe is available. */ -#if defined(CONFIG_CPUID_H) && defined(bit_MOVBE) -static bool have_movbe; -#else -# define have_movbe 0 -#endif - -/* We need this symbol in tcg-target.h, and we can't properly conditionalize - it there. Therefore we always define the variable. */ -bool have_bmi1; - -#if defined(CONFIG_CPUID_H) && defined(bit_BMI2) -static bool have_bmi2; -#else -# define have_bmi2 0 -#endif - -static tcg_insn_unit *tb_ret_addr; - -static void patch_reloc(tcg_insn_unit *code_ptr, int type, - intptr_t value, intptr_t addend) -{ - value += addend; - switch(type) { - case R_386_PC32: - value -= (uintptr_t)code_ptr; - if (value != (int32_t)value) { - tcg_abort(); - } - tcg_patch32(code_ptr, value); - break; - case R_386_PC8: - value -= (uintptr_t)code_ptr; - if (value != (int8_t)value) { - tcg_abort(); - } - tcg_patch8(code_ptr, value); - break; - default: - tcg_abort(); - } -} - -/* parse target specific constraints */ -static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) -{ - const char *ct_str; - - ct_str = *pct_str; - switch(ct_str[0]) { - case 'a': - ct->ct |= TCG_CT_REG; - tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX); - break; - case 'b': - ct->ct |= TCG_CT_REG; - tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX); - break; - case 'c': - case_c: - ct->ct |= TCG_CT_REG; - tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX); - break; - case 'd': - ct->ct |= TCG_CT_REG; - tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX); - break; - case 'S': - ct->ct |= TCG_CT_REG; - tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI); - break; - case 'D': - ct->ct |= TCG_CT_REG; - tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI); - break; - case 'q': - ct->ct |= TCG_CT_REG; - if (TCG_TARGET_REG_BITS == 64) { - tcg_regset_set32(ct->u.regs, 0, 0xffff); - } else { - tcg_regset_set32(ct->u.regs, 0, 0xf); - } - break; - case 'Q': - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xf); - break; - case 'r': - case_r: - ct->ct |= TCG_CT_REG; - if (TCG_TARGET_REG_BITS == 64) { - tcg_regset_set32(ct->u.regs, 0, 0xffff); - } else { - tcg_regset_set32(ct->u.regs, 0, 0xff); - } - break; - case 'C': - /* With SHRX et al, we need not use ECX as shift count register. */ - if (have_bmi2) { - goto case_r; - } else { - goto case_c; - } - - /* qemu_ld/st address constraint */ - case 'L': - ct->ct |= TCG_CT_REG; - if (TCG_TARGET_REG_BITS == 64) { - tcg_regset_set32(ct->u.regs, 0, 0xffff); - } else { - tcg_regset_set32(ct->u.regs, 0, 0xff); - } - tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1); - break; - - case 'e': - ct->ct |= TCG_CT_CONST_S32; - break; - case 'Z': - ct->ct |= TCG_CT_CONST_U32; - break; - case 'I': - ct->ct |= TCG_CT_CONST_I32; - break; - - default: - return -1; - } - ct_str++; - *pct_str = ct_str; - return 0; -} - -/* test if a constant matches the constraint */ -static inline int tcg_target_const_match(tcg_target_long val, TCGType type, - const TCGArgConstraint *arg_ct) -{ - int ct = arg_ct->ct; - if (ct & TCG_CT_CONST) { - return 1; - } - if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { - return 1; - } - if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { - return 1; - } - if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) { - return 1; - } - return 0; -} - -#if TCG_TARGET_REG_BITS == 64 -# define LOWREGMASK(x) ((x) & 7) -#else -# define LOWREGMASK(x) (x) -#endif - -#define P_EXT 0x100 /* 0x0f opcode prefix */ -#define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */ -#define P_DATA16 0x400 /* 0x66 opcode prefix */ -#if TCG_TARGET_REG_BITS == 64 -# define P_ADDR32 0x800 /* 0x67 opcode prefix */ -# define P_REXW 0x1000 /* Set REX.W = 1 */ -# define P_REXB_R 0x2000 /* REG field as byte register */ -# define P_REXB_RM 0x4000 /* R/M field as byte register */ -# define P_GS 0x8000 /* gs segment override */ -#else -# define P_ADDR32 0 -# define P_REXW 0 -# define P_REXB_R 0 -# define P_REXB_RM 0 -# define P_GS 0 -#endif -#define P_SIMDF3 0x10000 /* 0xf3 opcode prefix */ -#define P_SIMDF2 0x20000 /* 0xf2 opcode prefix */ - -#define OPC_ARITH_EvIz (0x81) -#define OPC_ARITH_EvIb (0x83) -#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */ -#define OPC_ANDN (0xf2 | P_EXT38) -#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3)) -#define OPC_BSWAP (0xc8 | P_EXT) -#define OPC_CALL_Jz (0xe8) -#define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */ -#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3)) -#define OPC_DEC_r32 (0x48) -#define OPC_IMUL_GvEv (0xaf | P_EXT) -#define OPC_IMUL_GvEvIb (0x6b) -#define OPC_IMUL_GvEvIz (0x69) -#define OPC_INC_r32 (0x40) -#define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */ -#define OPC_JCC_short (0x70) /* ... plus condition code */ -#define OPC_JMP_long (0xe9) -#define OPC_JMP_short (0xeb) -#define OPC_LEA (0x8d) -#define OPC_MOVB_EvGv (0x88) /* stores, more or less */ -#define OPC_MOVL_EvGv (0x89) /* stores, more or less */ -#define OPC_MOVL_GvEv (0x8b) /* loads, more or less */ -#define OPC_MOVB_EvIz (0xc6) -#define OPC_MOVL_EvIz (0xc7) -#define OPC_MOVL_Iv (0xb8) -#define OPC_MOVBE_GyMy (0xf0 | P_EXT38) -#define OPC_MOVBE_MyGy (0xf1 | P_EXT38) -#define OPC_MOVSBL (0xbe | P_EXT) -#define OPC_MOVSWL (0xbf | P_EXT) -#define OPC_MOVSLQ (0x63 | P_REXW) -#define OPC_MOVZBL (0xb6 | P_EXT) -#define OPC_MOVZWL (0xb7 | P_EXT) -#define OPC_POP_r32 (0x58) -#define OPC_PUSH_r32 (0x50) -#define OPC_PUSH_Iv (0x68) -#define OPC_PUSH_Ib (0x6a) -#define OPC_RET (0xc3) -#define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */ -#define OPC_SHIFT_1 (0xd1) -#define OPC_SHIFT_Ib (0xc1) -#define OPC_SHIFT_cl (0xd3) -#define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3) -#define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16) -#define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2) -#define OPC_TESTL (0x85) -#define OPC_XCHG_ax_r32 (0x90) - -#define OPC_GRP3_Ev (0xf7) -#define OPC_GRP5 (0xff) - -/* Group 1 opcode extensions for 0x80-0x83. - These are also used as modifiers for OPC_ARITH. */ -#define ARITH_ADD 0 -#define ARITH_OR 1 -#define ARITH_ADC 2 -#define ARITH_SBB 3 -#define ARITH_AND 4 -#define ARITH_SUB 5 -#define ARITH_XOR 6 -#define ARITH_CMP 7 - -/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */ -#define SHIFT_ROL 0 -#define SHIFT_ROR 1 -#define SHIFT_SHL 4 -#define SHIFT_SHR 5 -#define SHIFT_SAR 7 - -/* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */ -#define EXT3_NOT 2 -#define EXT3_NEG 3 -#define EXT3_MUL 4 -#define EXT3_IMUL 5 -#define EXT3_DIV 6 -#define EXT3_IDIV 7 - -/* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */ -#define EXT5_INC_Ev 0 -#define EXT5_DEC_Ev 1 -#define EXT5_CALLN_Ev 2 -#define EXT5_JMPN_Ev 4 - -/* Condition codes to be added to OPC_JCC_{long,short}. */ -#define JCC_JMP (-1) -#define JCC_JO 0x0 -#define JCC_JNO 0x1 -#define JCC_JB 0x2 -#define JCC_JAE 0x3 -#define JCC_JE 0x4 -#define JCC_JNE 0x5 -#define JCC_JBE 0x6 -#define JCC_JA 0x7 -#define JCC_JS 0x8 -#define JCC_JNS 0x9 -#define JCC_JP 0xa -#define JCC_JNP 0xb -#define JCC_JL 0xc -#define JCC_JGE 0xd -#define JCC_JLE 0xe -#define JCC_JG 0xf - -static const uint8_t tcg_cond_to_jcc[] = { - [TCG_COND_EQ] = JCC_JE, - [TCG_COND_NE] = JCC_JNE, - [TCG_COND_LT] = JCC_JL, - [TCG_COND_GE] = JCC_JGE, - [TCG_COND_LE] = JCC_JLE, - [TCG_COND_GT] = JCC_JG, - [TCG_COND_LTU] = JCC_JB, - [TCG_COND_GEU] = JCC_JAE, - [TCG_COND_LEU] = JCC_JBE, - [TCG_COND_GTU] = JCC_JA, -}; - -#if TCG_TARGET_REG_BITS == 64 -static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x) -{ - int rex; - - if (opc & P_GS) { - tcg_out8(s, 0x65); - } - if (opc & P_DATA16) { - /* We should never be asking for both 16 and 64-bit operation. */ - assert((opc & P_REXW) == 0); - tcg_out8(s, 0x66); - } - if (opc & P_ADDR32) { - tcg_out8(s, 0x67); - } - - rex = 0; - rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */ - rex |= (r & 8) >> 1; /* REX.R */ - rex |= (x & 8) >> 2; /* REX.X */ - rex |= (rm & 8) >> 3; /* REX.B */ - - /* P_REXB_{R,RM} indicates that the given register is the low byte. - For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do, - as otherwise the encoding indicates %[abcd]h. Note that the values - that are ORed in merely indicate that the REX byte must be present; - those bits get discarded in output. */ - rex |= opc & (r >= 4 ? P_REXB_R : 0); - rex |= opc & (rm >= 4 ? P_REXB_RM : 0); - - if (rex) { - tcg_out8(s, (uint8_t)(rex | 0x40)); - } - - if (opc & (P_EXT | P_EXT38)) { - tcg_out8(s, 0x0f); - if (opc & P_EXT38) { - tcg_out8(s, 0x38); - } - } - - tcg_out8(s, opc); -} -#else -static void tcg_out_opc(TCGContext *s, int opc) -{ - if (opc & P_DATA16) { - tcg_out8(s, 0x66); - } - if (opc & (P_EXT | P_EXT38)) { - tcg_out8(s, 0x0f); - if (opc & P_EXT38) { - tcg_out8(s, 0x38); - } - } - tcg_out8(s, opc); -} -/* Discard the register arguments to tcg_out_opc early, so as not to penalize - the 32-bit compilation paths. This method works with all versions of gcc, - whereas relying on optimization may not be able to exclude them. */ -#define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc) -#endif - -static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm) -{ - tcg_out_opc(s, opc, r, rm, 0); - tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); -} - -static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm) -{ - int tmp; - - if ((opc & (P_REXW | P_EXT | P_EXT38)) || (rm & 8)) { - /* Three byte VEX prefix. */ - tcg_out8(s, 0xc4); - - /* VEX.m-mmmm */ - if (opc & P_EXT38) { - tmp = 2; - } else if (opc & P_EXT) { - tmp = 1; - } else { - tcg_abort(); - } - tmp |= 0x40; /* VEX.X */ - tmp |= (r & 8 ? 0 : 0x80); /* VEX.R */ - tmp |= (rm & 8 ? 0 : 0x20); /* VEX.B */ - tcg_out8(s, tmp); - - tmp = (opc & P_REXW ? 0x80 : 0); /* VEX.W */ - } else { - /* Two byte VEX prefix. */ - tcg_out8(s, 0xc5); - - tmp = (r & 8 ? 0 : 0x80); /* VEX.R */ - } - /* VEX.pp */ - if (opc & P_DATA16) { - tmp |= 1; /* 0x66 */ - } else if (opc & P_SIMDF3) { - tmp |= 2; /* 0xf3 */ - } else if (opc & P_SIMDF2) { - tmp |= 3; /* 0xf2 */ - } - tmp |= (~v & 15) << 3; /* VEX.vvvv */ - tcg_out8(s, tmp); - tcg_out8(s, opc); - tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); -} - -/* Output an opcode with a full "rm + (index<code_ptr + 5 + ~rm; - intptr_t disp = offset - pc; - if (disp == (int32_t)disp) { - tcg_out_opc(s, opc, r, 0, 0); - tcg_out8(s, (LOWREGMASK(r) << 3) | 5); - tcg_out32(s, disp); - return; - } - - /* Try for an absolute address encoding. This requires the - use of the MODRM+SIB encoding and is therefore larger than - rip-relative addressing. */ - if (offset == (int32_t)offset) { - tcg_out_opc(s, opc, r, 0, 0); - tcg_out8(s, (LOWREGMASK(r) << 3) | 4); - tcg_out8(s, (4 << 3) | 5); - tcg_out32(s, offset); - return; - } - - /* ??? The memory isn't directly addressable. */ - tcg_abort(); - } else { - /* Absolute address. */ - tcg_out_opc(s, opc, r, 0, 0); - tcg_out8(s, (r << 3) | 5); - tcg_out32(s, offset); - return; - } - } - - /* Find the length of the immediate addend. Note that the encoding - that would be used for (%ebp) indicates absolute addressing. */ - if (rm < 0) { - mod = 0, len = 4, rm = 5; - } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) { - mod = 0, len = 0; - } else if (offset == (int8_t)offset) { - mod = 0x40, len = 1; - } else { - mod = 0x80, len = 4; - } - - /* Use a single byte MODRM format if possible. Note that the encoding - that would be used for %esp is the escape to the two byte form. */ - if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) { - /* Single byte MODRM format. */ - tcg_out_opc(s, opc, r, rm, 0); - tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); - } else { - /* Two byte MODRM+SIB format. */ - - /* Note that the encoding that would place %esp into the index - field indicates no index register. In 64-bit mode, the REX.X - bit counts, so %r12 can be used as the index. */ - if (index < 0) { - index = 4; - } else { - assert(index != TCG_REG_ESP); - } - - tcg_out_opc(s, opc, r, rm, index); - tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4); - tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm)); - } - - if (len == 1) { - tcg_out8(s, offset); - } else if (len == 4) { - tcg_out32(s, offset); - } -} - -/* A simplification of the above with no index or shift. */ -static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r, - int rm, intptr_t offset) -{ - tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset); -} - -/* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */ -static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src) -{ - /* Propagate an opcode prefix, such as P_REXW. */ - int ext = subop & ~0x7; - subop &= 0x7; - - tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src); -} - -static inline void tcg_out_mov(TCGContext *s, TCGType type, - TCGReg ret, TCGReg arg) -{ - if (arg != ret) { - int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0); - tcg_out_modrm(s, opc, ret, arg); - } -} - -static void tcg_out_movi(TCGContext *s, TCGType type, - TCGReg ret, tcg_target_long arg) -{ - tcg_target_long diff; - - if (arg == 0) { - tgen_arithr(s, ARITH_XOR, ret, ret); - return; - } - if (arg == (uint32_t)arg || type == TCG_TYPE_I32) { - tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0); - tcg_out32(s, arg); - return; - } - if (arg == (int32_t)arg) { - tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret); - tcg_out32(s, arg); - return; - } - - /* Try a 7 byte pc-relative lea before the 10 byte movq. */ - diff = arg - ((uintptr_t)s->code_ptr + 7); - if (diff == (int32_t)diff) { - tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0); - tcg_out8(s, (LOWREGMASK(ret) << 3) | 5); - tcg_out32(s, diff); - return; - } - - tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0); - tcg_out64(s, arg); -} - -static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val) -{ - if (val == (int8_t)val) { - tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0); - tcg_out8(s, val); - } else if (val == (int32_t)val) { - tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0); - tcg_out32(s, val); - } else { - tcg_abort(); - } -} - -static inline void tcg_out_push(TCGContext *s, int reg) -{ - tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0); -} - -static inline void tcg_out_pop(TCGContext *s, int reg) -{ - tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0); -} - -static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, - TCGReg arg1, intptr_t arg2) -{ - int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0); - tcg_out_modrm_offset(s, opc, ret, arg1, arg2); -} - -static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, intptr_t arg2) -{ - int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0); - tcg_out_modrm_offset(s, opc, arg, arg1, arg2); -} - -static inline void tcg_out_sti(TCGContext *s, TCGType type, TCGReg base, - tcg_target_long ofs, tcg_target_long val) -{ - int opc = OPC_MOVL_EvIz + (type == TCG_TYPE_I64 ? P_REXW : 0); - tcg_out_modrm_offset(s, opc, 0, base, ofs); - tcg_out32(s, val); -} - -static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count) -{ - /* Propagate an opcode prefix, such as P_DATA16. */ - int ext = subopc & ~0x7; - subopc &= 0x7; - - if (count == 1) { - tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg); - } else { - tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg); - tcg_out8(s, count); - } -} - -static inline void tcg_out_bswap32(TCGContext *s, int reg) -{ - tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0); -} - -static inline void tcg_out_rolw_8(TCGContext *s, int reg) -{ - tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8); -} - -static inline void tcg_out_ext8u(TCGContext *s, int dest, int src) -{ - /* movzbl */ - assert(src < 4 || TCG_TARGET_REG_BITS == 64); - tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src); -} - -static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw) -{ - /* movsbl */ - assert(src < 4 || TCG_TARGET_REG_BITS == 64); - tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src); -} - -static inline void tcg_out_ext16u(TCGContext *s, int dest, int src) -{ - /* movzwl */ - tcg_out_modrm(s, OPC_MOVZWL, dest, src); -} - -static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw) -{ - /* movsw[lq] */ - tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src); -} - -static inline void tcg_out_ext32u(TCGContext *s, int dest, int src) -{ - /* 32-bit mov zero extends. */ - tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src); -} - -static inline void tcg_out_ext32s(TCGContext *s, int dest, int src) -{ - tcg_out_modrm(s, OPC_MOVSLQ, dest, src); -} - -static inline void tcg_out_bswap64(TCGContext *s, int reg) -{ - tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0); -} - -static void tgen_arithi(TCGContext *s, int c, int r0, - tcg_target_long val, int cf) -{ - int rexw = 0; - - if (TCG_TARGET_REG_BITS == 64) { - rexw = c & -8; - c &= 7; - } - - /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce - partial flags update stalls on Pentium4 and are not recommended - by current Intel optimization manuals. */ - if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) { - int is_inc = (c == ARITH_ADD) ^ (val < 0); - if (TCG_TARGET_REG_BITS == 64) { - /* The single-byte increment encodings are re-tasked as the - REX prefixes. Use the MODRM encoding. */ - tcg_out_modrm(s, OPC_GRP5 + rexw, - (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0); - } else { - tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0); - } - return; - } - - if (c == ARITH_AND) { - if (TCG_TARGET_REG_BITS == 64) { - if (val == 0xffffffffu) { - tcg_out_ext32u(s, r0, r0); - return; - } - if (val == (uint32_t)val) { - /* AND with no high bits set can use a 32-bit operation. */ - rexw = 0; - } - } - if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) { - tcg_out_ext8u(s, r0, r0); - return; - } - if (val == 0xffffu) { - tcg_out_ext16u(s, r0, r0); - return; - } - } - - if (val == (int8_t)val) { - tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0); - tcg_out8(s, val); - return; - } - if (rexw == 0 || val == (int32_t)val) { - tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0); - tcg_out32(s, val); - return; - } - - tcg_abort(); -} - -static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val) -{ - if (val != 0) { - tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0); - } -} - -/* Use SMALL != 0 to force a short forward branch. */ -static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, int small) -{ - int32_t val, val1; - - if (l->has_value) { - val = tcg_pcrel_diff(s, l->u.value_ptr); - val1 = val - 2; - if ((int8_t)val1 == val1) { - if (opc == -1) { - tcg_out8(s, OPC_JMP_short); - } else { - tcg_out8(s, OPC_JCC_short + opc); - } - tcg_out8(s, val1); - } else { - if (small) { - tcg_abort(); - } - if (opc == -1) { - tcg_out8(s, OPC_JMP_long); - tcg_out32(s, val - 5); - } else { - tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0); - tcg_out32(s, val - 6); - } - } - } else if (small) { - if (opc == -1) { - tcg_out8(s, OPC_JMP_short); - } else { - tcg_out8(s, OPC_JCC_short + opc); - } - tcg_out_reloc(s, s->code_ptr, R_386_PC8, l, -1); - s->code_ptr += 1; - } else { - if (opc == -1) { - tcg_out8(s, OPC_JMP_long); - } else { - tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0); - } - tcg_out_reloc(s, s->code_ptr, R_386_PC32, l, -4); - s->code_ptr += 4; - } -} - -static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2, - int const_arg2, int rexw) -{ - if (const_arg2) { - if (arg2 == 0) { - /* test r, r */ - tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1); - } else { - tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0); - } - } else { - tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2); - } -} - -static void tcg_out_brcond32(TCGContext *s, TCGCond cond, - TCGArg arg1, TCGArg arg2, int const_arg2, - TCGLabel *label, int small) -{ - tcg_out_cmp(s, arg1, arg2, const_arg2, 0); - tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small); -} - -#if TCG_TARGET_REG_BITS == 64 -static void tcg_out_brcond64(TCGContext *s, TCGCond cond, - TCGArg arg1, TCGArg arg2, int const_arg2, - TCGLabel *label, int small) -{ - tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW); - tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small); -} -#else -/* XXX: we implement it at the target level to avoid having to - handle cross basic blocks temporaries */ -static void tcg_out_brcond2(TCGContext *s, const TCGArg *args, - const int *const_args, int small) -{ - TCGLabel *label_next = gen_new_label(); - TCGLabel *label_this = arg_label(args[5]); - - switch(args[4]) { - case TCG_COND_EQ: - tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2], - label_next, 1); - tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3], - label_this, small); - break; - case TCG_COND_NE: - tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2], - label_this, small); - tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3], - label_this, small); - break; - case TCG_COND_LT: - tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3], - label_this, small); - tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2], - label_this, small); - break; - case TCG_COND_LE: - tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3], - label_this, small); - tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2], - label_this, small); - break; - case TCG_COND_GT: - tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3], - label_this, small); - tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2], - label_this, small); - break; - case TCG_COND_GE: - tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3], - label_this, small); - tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2], - label_this, small); - break; - case TCG_COND_LTU: - tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3], - label_this, small); - tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2], - label_this, small); - break; - case TCG_COND_LEU: - tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3], - label_this, small); - tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2], - label_this, small); - break; - case TCG_COND_GTU: - tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3], - label_this, small); - tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2], - label_this, small); - break; - case TCG_COND_GEU: - tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3], - label_this, small); - tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2], - label_this, small); - break; - default: - tcg_abort(); - } - tcg_out_label(s, label_next, s->code_ptr); -} -#endif - -static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest, - TCGArg arg1, TCGArg arg2, int const_arg2) -{ - tcg_out_cmp(s, arg1, arg2, const_arg2, 0); - tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest); - tcg_out_ext8u(s, dest, dest); -} - -#if TCG_TARGET_REG_BITS == 64 -static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest, - TCGArg arg1, TCGArg arg2, int const_arg2) -{ - tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW); - tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest); - tcg_out_ext8u(s, dest, dest); -} -#else -static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, - const int *const_args) -{ - TCGArg new_args[6]; - TCGLabel *label_true, *label_over; - - memcpy(new_args, args+1, 5*sizeof(TCGArg)); - - if (args[0] == args[1] || args[0] == args[2] - || (!const_args[3] && args[0] == args[3]) - || (!const_args[4] && args[0] == args[4])) { - /* When the destination overlaps with one of the argument - registers, don't do anything tricky. */ - label_true = gen_new_label(); - label_over = gen_new_label(); - - new_args[5] = label_arg(label_true); - tcg_out_brcond2(s, new_args, const_args+1, 1); - - tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); - tcg_out_jxx(s, JCC_JMP, label_over, 1); - tcg_out_label(s, label_true, s->code_ptr); - - tcg_out_movi(s, TCG_TYPE_I32, args[0], 1); - tcg_out_label(s, label_over, s->code_ptr); - } else { - /* When the destination does not overlap one of the arguments, - clear the destination first, jump if cond false, and emit an - increment in the true case. This results in smaller code. */ - - tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); - - label_over = gen_new_label(); - new_args[4] = tcg_invert_cond(new_args[4]); - new_args[5] = label_arg(label_over); - tcg_out_brcond2(s, new_args, const_args+1, 1); - - tgen_arithi(s, ARITH_ADD, args[0], 1, 0); - tcg_out_label(s, label_over, s->code_ptr); - } -} -#endif - -static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest, - TCGArg c1, TCGArg c2, int const_c2, - TCGArg v1) -{ - tcg_out_cmp(s, c1, c2, const_c2, 0); - if (have_cmov) { - tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1); - } else { - TCGLabel *over = gen_new_label(); - tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1); - tcg_out_mov(s, TCG_TYPE_I32, dest, v1); - tcg_out_label(s, over, s->code_ptr); - } -} - -#if TCG_TARGET_REG_BITS == 64 -static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest, - TCGArg c1, TCGArg c2, int const_c2, - TCGArg v1) -{ - tcg_out_cmp(s, c1, c2, const_c2, P_REXW); - tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1); -} -#endif - -static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest) -{ - intptr_t disp = tcg_pcrel_diff(s, dest) - 5; - - if (disp == (int32_t)disp) { - tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0); - tcg_out32(s, disp); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, (uintptr_t)dest); - tcg_out_modrm(s, OPC_GRP5, - call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10); - } -} - -static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *dest) -{ - tcg_out_branch(s, 1, dest); -} - -static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest) -{ - tcg_out_branch(s, 0, dest); -} - -#if defined(CONFIG_SOFTMMU) -/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, - * int mmu_idx, uintptr_t ra) - */ -static void * const qemu_ld_helpers[16] = { - [MO_UB] = helper_ret_ldub_mmu, - [MO_LEUW] = helper_le_lduw_mmu, - [MO_LEUL] = helper_le_ldul_mmu, - [MO_LEQ] = helper_le_ldq_mmu, - [MO_BEUW] = helper_be_lduw_mmu, - [MO_BEUL] = helper_be_ldul_mmu, - [MO_BEQ] = helper_be_ldq_mmu, -}; - -/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, - * uintxx_t val, int mmu_idx, uintptr_t ra) - */ -static void * const qemu_st_helpers[16] = { - [MO_UB] = helper_ret_stb_mmu, - [MO_LEUW] = helper_le_stw_mmu, - [MO_LEUL] = helper_le_stl_mmu, - [MO_LEQ] = helper_le_stq_mmu, - [MO_BEUW] = helper_be_stw_mmu, - [MO_BEUL] = helper_be_stl_mmu, - [MO_BEQ] = helper_be_stq_mmu, -}; - -/* Perform the TLB load and compare. - - Inputs: - ADDRLO and ADDRHI contain the low and high part of the address. - - MEM_INDEX and S_BITS are the memory context and log2 size of the load. - - WHICH is the offset into the CPUTLBEntry structure of the slot to read. - This should be offsetof addr_read or addr_write. - - Outputs: - LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses) - positions of the displacements of forward jumps to the TLB miss case. - - Second argument register is loaded with the low part of the address. - In the TLB hit case, it has been adjusted as indicated by the TLB - and so is a host address. In the TLB miss case, it continues to - hold a guest address. - - First argument register is clobbered. */ - -static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, - int mem_index, TCGMemOp opc, - tcg_insn_unit **label_ptr, int which) -{ - const TCGReg r0 = TCG_REG_L0; - const TCGReg r1 = TCG_REG_L1; - TCGType ttype = TCG_TYPE_I32; - TCGType tlbtype = TCG_TYPE_I32; - int trexw = 0, hrexw = 0, tlbrexw = 0; - int s_mask = (1 << (opc & MO_SIZE)) - 1; - bool aligned = (opc & MO_AMASK) == MO_ALIGN || s_mask == 0; - - if (TCG_TARGET_REG_BITS == 64) { - if (TARGET_LONG_BITS == 64) { - ttype = TCG_TYPE_I64; - trexw = P_REXW; - } - if (TCG_TYPE_PTR == TCG_TYPE_I64) { - hrexw = P_REXW; - if (TARGET_PAGE_BITS + CPU_TLB_BITS > 32) { - tlbtype = TCG_TYPE_I64; - tlbrexw = P_REXW; - } - } - } - - tcg_out_mov(s, tlbtype, r0, addrlo); - if (aligned) { - tcg_out_mov(s, ttype, r1, addrlo); - } else { - /* For unaligned access check that we don't cross pages using - the page address of the last byte. */ - tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask); - } - - tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0, - TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - - tgen_arithi(s, ARITH_AND + trexw, r1, - TARGET_PAGE_MASK | (aligned ? s_mask : 0), 0); - tgen_arithi(s, ARITH_AND + tlbrexw, r0, - (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0); - - tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0, - offsetof(CPUArchState, tlb_table[mem_index][0]) - + which); - - /* cmp 0(r0), r1 */ - tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0); - - /* Prepare for both the fast path add of the tlb addend, and the slow - path function argument setup. There are two cases worth note: - For 32-bit guest and x86_64 host, MOVL zero-extends the guest address - before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ - copies the entire guest address for the slow path, while truncation - for the 32-bit host happens with the fastpath ADDL below. */ - tcg_out_mov(s, ttype, r1, addrlo); - - /* jne slow_path */ - tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); - label_ptr[0] = s->code_ptr; - s->code_ptr += 4; - - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - /* cmp 4(r0), addrhi */ - tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4); - - /* jne slow_path */ - tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); - label_ptr[1] = s->code_ptr; - s->code_ptr += 4; - } - - /* TLB Hit. */ - - /* add addend(r0), r1 */ - tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0, - offsetof(CPUTLBEntry, addend) - which); -} - -/* - * Record the context of a call to the out of line helper code for the slow path - * for a load or store, so that we can later generate the correct helper code - */ -static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, - TCGReg datalo, TCGReg datahi, - TCGReg addrlo, TCGReg addrhi, - tcg_insn_unit *raddr, - tcg_insn_unit **label_ptr) -{ - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->oi = oi; - label->datalo_reg = datalo; - label->datahi_reg = datahi; - label->addrlo_reg = addrlo; - label->addrhi_reg = addrhi; - label->raddr = raddr; - label->label_ptr[0] = label_ptr[0]; - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - label->label_ptr[1] = label_ptr[1]; - } -} - -/* - * Generate code for the slow path for a load at the end of block - */ -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) -{ - TCGMemOpIdx oi = l->oi; - TCGMemOp opc = get_memop(oi); - TCGReg data_reg; - tcg_insn_unit **label_ptr = &l->label_ptr[0]; - - /* resolve label address */ - tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4); - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4); - } - - if (TCG_TARGET_REG_BITS == 32) { - int ofs = 0; - - tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs); - ofs += 4; - - tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs); - ofs += 4; - - if (TARGET_LONG_BITS == 64) { - tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs); - ofs += 4; - } - - tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, oi); - ofs += 4; - - tcg_out_sti(s, TCG_TYPE_PTR, TCG_REG_ESP, ofs, (uintptr_t)l->raddr); - } else { - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); - /* The second argument is already loaded with addrlo. */ - tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi); - tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3], - (uintptr_t)l->raddr); - } - - tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); - - data_reg = l->datalo_reg; - switch (opc & MO_SSIZE) { - case MO_SB: - tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW); - break; - case MO_SW: - tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW); - break; -#if TCG_TARGET_REG_BITS == 64 - case MO_SL: - tcg_out_ext32s(s, data_reg, TCG_REG_EAX); - break; -#endif - case MO_UB: - case MO_UW: - /* Note that the helpers have zero-extended to tcg_target_long. */ - case MO_UL: - tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX); - break; - case MO_Q: - if (TCG_TARGET_REG_BITS == 64) { - tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX); - } else if (data_reg == TCG_REG_EDX) { - /* xchg %edx, %eax */ - tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0); - tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX); - } else { - tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX); - tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX); - } - break; - default: - tcg_abort(); - } - - /* Jump to the code corresponding to next IR of qemu_st */ - tcg_out_jmp(s, l->raddr); -} - -/* - * Generate code for the slow path for a store at the end of block - */ -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) -{ - TCGMemOpIdx oi = l->oi; - TCGMemOp opc = get_memop(oi); - TCGMemOp s_bits = opc & MO_SIZE; - tcg_insn_unit **label_ptr = &l->label_ptr[0]; - TCGReg retaddr; - - /* resolve label address */ - tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4); - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4); - } - - if (TCG_TARGET_REG_BITS == 32) { - int ofs = 0; - - tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs); - ofs += 4; - - tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs); - ofs += 4; - - if (TARGET_LONG_BITS == 64) { - tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs); - ofs += 4; - } - - tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs); - ofs += 4; - - if (s_bits == MO_64) { - tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs); - ofs += 4; - } - - tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, oi); - ofs += 4; - - retaddr = TCG_REG_EAX; - tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr); - tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs); - } else { - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); - /* The second argument is already loaded with addrlo. */ - tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32), - tcg_target_call_iarg_regs[2], l->datalo_reg); - tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi); - - if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) { - retaddr = tcg_target_call_iarg_regs[4]; - tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr); - } else { - retaddr = TCG_REG_RAX; - tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr); - tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, - TCG_TARGET_CALL_STACK_OFFSET); - } - } - - /* "Tail call" to the helper, with the return address back inline. */ - tcg_out_push(s, retaddr); - tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); -} -#elif defined(__x86_64__) && defined(__linux__) -# include -# include - -int arch_prctl(int code, unsigned long addr); - -static int guest_base_flags; -static inline void setup_guest_base_seg(void) -{ - if (arch_prctl(ARCH_SET_GS, guest_base) == 0) { - guest_base_flags = P_GS; - } -} -#else -# define guest_base_flags 0 -static inline void setup_guest_base_seg(void) { } -#endif /* SOFTMMU */ - -static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, - TCGReg base, int index, intptr_t ofs, - int seg, TCGMemOp memop) -{ - const TCGMemOp real_bswap = memop & MO_BSWAP; - TCGMemOp bswap = real_bswap; - int movop = OPC_MOVL_GvEv; - - if (have_movbe && real_bswap) { - bswap = 0; - movop = OPC_MOVBE_GyMy; - } - - switch (memop & MO_SSIZE) { - case MO_UB: - tcg_out_modrm_sib_offset(s, OPC_MOVZBL + seg, datalo, - base, index, 0, ofs); - break; - case MO_SB: - tcg_out_modrm_sib_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, - base, index, 0, ofs); - break; - case MO_UW: - tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo, - base, index, 0, ofs); - if (real_bswap) { - tcg_out_rolw_8(s, datalo); - } - break; - case MO_SW: - if (real_bswap) { - if (have_movbe) { - tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg, - datalo, base, index, 0, ofs); - } else { - tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo, - base, index, 0, ofs); - tcg_out_rolw_8(s, datalo); - } - tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo); - } else { - tcg_out_modrm_sib_offset(s, OPC_MOVSWL + P_REXW + seg, - datalo, base, index, 0, ofs); - } - break; - case MO_UL: - tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs); - if (bswap) { - tcg_out_bswap32(s, datalo); - } - break; -#if TCG_TARGET_REG_BITS == 64 - case MO_SL: - if (real_bswap) { - tcg_out_modrm_sib_offset(s, movop + seg, datalo, - base, index, 0, ofs); - if (bswap) { - tcg_out_bswap32(s, datalo); - } - tcg_out_ext32s(s, datalo, datalo); - } else { - tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo, - base, index, 0, ofs); - } - break; -#endif - case MO_Q: - if (TCG_TARGET_REG_BITS == 64) { - tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo, - base, index, 0, ofs); - if (bswap) { - tcg_out_bswap64(s, datalo); - } - } else { - if (real_bswap) { - int t = datalo; - datalo = datahi; - datahi = t; - } - if (base != datalo) { - tcg_out_modrm_sib_offset(s, movop + seg, datalo, - base, index, 0, ofs); - tcg_out_modrm_sib_offset(s, movop + seg, datahi, - base, index, 0, ofs + 4); - } else { - tcg_out_modrm_sib_offset(s, movop + seg, datahi, - base, index, 0, ofs + 4); - tcg_out_modrm_sib_offset(s, movop + seg, datalo, - base, index, 0, ofs); - } - if (bswap) { - tcg_out_bswap32(s, datalo); - tcg_out_bswap32(s, datahi); - } - } - break; - default: - tcg_abort(); - } -} - -/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and - EAX. It will be useful once fixed registers globals are less - common. */ -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) -{ - TCGReg datalo, datahi, addrlo; - TCGReg addrhi __attribute__((unused)); - TCGMemOpIdx oi; - TCGMemOp opc; -#if defined(CONFIG_SOFTMMU) - int mem_index; - tcg_insn_unit *label_ptr[2]; -#endif - - datalo = *args++; - datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0); - addrlo = *args++; - addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0); - oi = *args++; - opc = get_memop(oi); - -#if defined(CONFIG_SOFTMMU) - mem_index = get_mmuidx(oi); - - tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc, - label_ptr, offsetof(CPUTLBEntry, addr_read)); - - /* TLB Hit. */ - tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc); - - /* Record the current context of a load into ldst label */ - add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, - s->code_ptr, label_ptr); -#else - { - int32_t offset = guest_base; - TCGReg base = addrlo; - int index = -1; - int seg = 0; - - /* For a 32-bit guest, the high 32 bits may contain garbage. - We can do this with the ADDR32 prefix if we're not using - a guest base, or when using segmentation. Otherwise we - need to zero-extend manually. */ - if (guest_base == 0 || guest_base_flags) { - seg = guest_base_flags; - offset = 0; - if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { - seg |= P_ADDR32; - } - } else if (TCG_TARGET_REG_BITS == 64) { - if (TARGET_LONG_BITS == 32) { - tcg_out_ext32u(s, TCG_REG_L0, base); - base = TCG_REG_L0; - } - if (offset != guest_base) { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base); - index = TCG_REG_L1; - offset = 0; - } - } - - tcg_out_qemu_ld_direct(s, datalo, datahi, - base, index, offset, seg, opc); - } -#endif -} - -static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, - TCGReg base, intptr_t ofs, int seg, - TCGMemOp memop) -{ - /* ??? Ideally we wouldn't need a scratch register. For user-only, - we could perform the bswap twice to restore the original value - instead of moving to the scratch. But as it is, the L constraint - means that TCG_REG_L0 is definitely free here. */ - const TCGReg scratch = TCG_REG_L0; - const TCGMemOp real_bswap = memop & MO_BSWAP; - TCGMemOp bswap = real_bswap; - int movop = OPC_MOVL_EvGv; - - if (have_movbe && real_bswap) { - bswap = 0; - movop = OPC_MOVBE_MyGy; - } - - switch (memop & MO_SIZE) { - case MO_8: - /* In 32-bit mode, 8-bit stores can only happen from [abcd]x. - Use the scratch register if necessary. */ - if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) { - tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); - datalo = scratch; - } - tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg, - datalo, base, ofs); - break; - case MO_16: - if (bswap) { - tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); - tcg_out_rolw_8(s, scratch); - datalo = scratch; - } - tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs); - break; - case MO_32: - if (bswap) { - tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); - tcg_out_bswap32(s, scratch); - datalo = scratch; - } - tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs); - break; - case MO_64: - if (TCG_TARGET_REG_BITS == 64) { - if (bswap) { - tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo); - tcg_out_bswap64(s, scratch); - datalo = scratch; - } - tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs); - } else if (bswap) { - tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi); - tcg_out_bswap32(s, scratch); - tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs); - tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); - tcg_out_bswap32(s, scratch); - tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4); - } else { - if (real_bswap) { - int t = datalo; - datalo = datahi; - datahi = t; - } - tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs); - tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs+4); - } - break; - default: - tcg_abort(); - } -} - -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) -{ - TCGReg datalo, datahi, addrlo; - TCGReg addrhi __attribute__((unused)); - TCGMemOpIdx oi; - TCGMemOp opc; -#if defined(CONFIG_SOFTMMU) - int mem_index; - tcg_insn_unit *label_ptr[2]; -#endif - - datalo = *args++; - datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0); - addrlo = *args++; - addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0); - oi = *args++; - opc = get_memop(oi); - -#if defined(CONFIG_SOFTMMU) - mem_index = get_mmuidx(oi); - - tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc, - label_ptr, offsetof(CPUTLBEntry, addr_write)); - - /* TLB Hit. */ - tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); - - /* Record the current context of a store into ldst label */ - add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, - s->code_ptr, label_ptr); -#else - { - int32_t offset = guest_base; - TCGReg base = addrlo; - int seg = 0; - - /* See comment in tcg_out_qemu_ld re zero-extension of addrlo. */ - if (guest_base == 0 || guest_base_flags) { - seg = guest_base_flags; - offset = 0; - if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { - seg |= P_ADDR32; - } - } else if (TCG_TARGET_REG_BITS == 64) { - /* ??? Note that we can't use the same SIB addressing scheme - as for loads, since we require L0 free for bswap. */ - if (offset != guest_base) { - if (TARGET_LONG_BITS == 32) { - tcg_out_ext32u(s, TCG_REG_L0, base); - base = TCG_REG_L0; - } - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base); - tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base); - base = TCG_REG_L1; - offset = 0; - } else if (TARGET_LONG_BITS == 32) { - tcg_out_ext32u(s, TCG_REG_L1, base); - base = TCG_REG_L1; - } - } - - tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc); - } -#endif -} - -static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, - const TCGArg *args, const int *const_args) -{ - int c, vexop, rexw = 0; - -#if TCG_TARGET_REG_BITS == 64 -# define OP_32_64(x) \ - case glue(glue(INDEX_op_, x), _i64): \ - rexw = P_REXW; /* FALLTHRU */ \ - case glue(glue(INDEX_op_, x), _i32) -#else -# define OP_32_64(x) \ - case glue(glue(INDEX_op_, x), _i32) -#endif - - switch(opc) { - case INDEX_op_exit_tb: - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]); - tcg_out_jmp(s, tb_ret_addr); - break; - case INDEX_op_goto_tb: - if (s->tb_jmp_offset) { - /* direct jump method */ - tcg_out8(s, OPC_JMP_long); /* jmp im */ - s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); - tcg_out32(s, 0); - } else { - /* indirect jump method */ - tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1, - (intptr_t)(s->tb_next + args[0])); - } - s->tb_next_offset[args[0]] = tcg_current_code_size(s); - break; - case INDEX_op_br: - tcg_out_jxx(s, JCC_JMP, arg_label(args[0]), 0); - break; - OP_32_64(ld8u): - /* Note that we can ignore REXW for the zero-extend to 64-bit. */ - tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]); - break; - OP_32_64(ld8s): - tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]); - break; - OP_32_64(ld16u): - /* Note that we can ignore REXW for the zero-extend to 64-bit. */ - tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]); - break; - OP_32_64(ld16s): - tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]); - break; -#if TCG_TARGET_REG_BITS == 64 - case INDEX_op_ld32u_i64: -#endif - case INDEX_op_ld_i32: - tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]); - break; - - OP_32_64(st8): - if (const_args[0]) { - tcg_out_modrm_offset(s, OPC_MOVB_EvIz, - 0, args[1], args[2]); - tcg_out8(s, args[0]); - } else { - tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, - args[0], args[1], args[2]); - } - break; - OP_32_64(st16): - if (const_args[0]) { - tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16, - 0, args[1], args[2]); - tcg_out16(s, args[0]); - } else { - tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16, - args[0], args[1], args[2]); - } - break; -#if TCG_TARGET_REG_BITS == 64 - case INDEX_op_st32_i64: -#endif - case INDEX_op_st_i32: - if (const_args[0]) { - tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]); - tcg_out32(s, args[0]); - } else { - tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); - } - break; - - OP_32_64(add): - /* For 3-operand addition, use LEA. */ - if (args[0] != args[1]) { - TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0; - - if (const_args[2]) { - c3 = a2, a2 = -1; - } else if (a0 == a2) { - /* Watch out for dest = src + dest, since we've removed - the matching constraint on the add. */ - tgen_arithr(s, ARITH_ADD + rexw, a0, a1); - break; - } - - tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3); - break; - } - c = ARITH_ADD; - goto gen_arith; - OP_32_64(sub): - c = ARITH_SUB; - goto gen_arith; - OP_32_64(and): - c = ARITH_AND; - goto gen_arith; - OP_32_64(or): - c = ARITH_OR; - goto gen_arith; - OP_32_64(xor): - c = ARITH_XOR; - goto gen_arith; - gen_arith: - if (const_args[2]) { - tgen_arithi(s, c + rexw, args[0], args[2], 0); - } else { - tgen_arithr(s, c + rexw, args[0], args[2]); - } - break; - - OP_32_64(andc): - if (const_args[2]) { - tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, - args[0], args[1]); - tgen_arithi(s, ARITH_AND + rexw, args[0], ~args[2], 0); - } else { - tcg_out_vex_modrm(s, OPC_ANDN + rexw, args[0], args[2], args[1]); - } - break; - - OP_32_64(mul): - if (const_args[2]) { - int32_t val; - val = args[2]; - if (val == (int8_t)val) { - tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]); - tcg_out8(s, val); - } else { - tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]); - tcg_out32(s, val); - } - } else { - tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]); - } - break; - - OP_32_64(div2): - tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]); - break; - OP_32_64(divu2): - tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]); - break; - - OP_32_64(shl): - c = SHIFT_SHL; - vexop = OPC_SHLX; - goto gen_shift_maybe_vex; - OP_32_64(shr): - c = SHIFT_SHR; - vexop = OPC_SHRX; - goto gen_shift_maybe_vex; - OP_32_64(sar): - c = SHIFT_SAR; - vexop = OPC_SARX; - goto gen_shift_maybe_vex; - OP_32_64(rotl): - c = SHIFT_ROL; - goto gen_shift; - OP_32_64(rotr): - c = SHIFT_ROR; - goto gen_shift; - gen_shift_maybe_vex: - if (have_bmi2 && !const_args[2]) { - tcg_out_vex_modrm(s, vexop + rexw, args[0], args[2], args[1]); - break; - } - /* FALLTHRU */ - gen_shift: - if (const_args[2]) { - tcg_out_shifti(s, c + rexw, args[0], args[2]); - } else { - tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]); - } - break; - - case INDEX_op_brcond_i32: - tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1], - arg_label(args[3]), 0); - break; - case INDEX_op_setcond_i32: - tcg_out_setcond32(s, args[3], args[0], args[1], - args[2], const_args[2]); - break; - case INDEX_op_movcond_i32: - tcg_out_movcond32(s, args[5], args[0], args[1], - args[2], const_args[2], args[3]); - break; - - OP_32_64(bswap16): - tcg_out_rolw_8(s, args[0]); - break; - OP_32_64(bswap32): - tcg_out_bswap32(s, args[0]); - break; - - OP_32_64(neg): - tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]); - break; - OP_32_64(not): - tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]); - break; - - OP_32_64(ext8s): - tcg_out_ext8s(s, args[0], args[1], rexw); - break; - OP_32_64(ext16s): - tcg_out_ext16s(s, args[0], args[1], rexw); - break; - OP_32_64(ext8u): - tcg_out_ext8u(s, args[0], args[1]); - break; - OP_32_64(ext16u): - tcg_out_ext16u(s, args[0], args[1]); - break; - - case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, args, 0); - break; - case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, args, 1); - break; - case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, args, 0); - break; - case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, args, 1); - break; - - OP_32_64(mulu2): - tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]); - break; - OP_32_64(muls2): - tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]); - break; - OP_32_64(add2): - if (const_args[4]) { - tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1); - } else { - tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]); - } - if (const_args[5]) { - tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1); - } else { - tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]); - } - break; - OP_32_64(sub2): - if (const_args[4]) { - tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1); - } else { - tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]); - } - if (const_args[5]) { - tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1); - } else { - tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]); - } - break; - -#if TCG_TARGET_REG_BITS == 32 - case INDEX_op_brcond2_i32: - tcg_out_brcond2(s, args, const_args, 0); - break; - case INDEX_op_setcond2_i32: - tcg_out_setcond2(s, args, const_args); - break; -#else /* TCG_TARGET_REG_BITS == 64 */ - case INDEX_op_ld32s_i64: - tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]); - break; - case INDEX_op_ld_i64: - tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]); - break; - case INDEX_op_st_i64: - if (const_args[0]) { - tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW, - 0, args[1], args[2]); - tcg_out32(s, args[0]); - } else { - tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]); - } - break; - - case INDEX_op_brcond_i64: - tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1], - arg_label(args[3]), 0); - break; - case INDEX_op_setcond_i64: - tcg_out_setcond64(s, args[3], args[0], args[1], - args[2], const_args[2]); - break; - case INDEX_op_movcond_i64: - tcg_out_movcond64(s, args[5], args[0], args[1], - args[2], const_args[2], args[3]); - break; - - case INDEX_op_bswap64_i64: - tcg_out_bswap64(s, args[0]); - break; - case INDEX_op_extu_i32_i64: - case INDEX_op_ext32u_i64: - tcg_out_ext32u(s, args[0], args[1]); - break; - case INDEX_op_ext_i32_i64: - case INDEX_op_ext32s_i64: - tcg_out_ext32s(s, args[0], args[1]); - break; -#endif - - OP_32_64(deposit): - if (args[3] == 0 && args[4] == 8) { - /* load bits 0..7 */ - tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, - args[2], args[0]); - } else if (args[3] == 8 && args[4] == 8) { - /* load bits 8..15 */ - tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4); - } else if (args[3] == 0 && args[4] == 16) { - /* load bits 0..15 */ - tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]); - } else { - tcg_abort(); - } - break; - - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: - case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ - case INDEX_op_movi_i64: - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - default: - tcg_abort(); - } - -#undef OP_32_64 -} - -static const TCGTargetOpDef x86_op_defs[] = { - { INDEX_op_exit_tb, { } }, - { INDEX_op_goto_tb, { } }, - { INDEX_op_br, { } }, - { INDEX_op_ld8u_i32, { "r", "r" } }, - { INDEX_op_ld8s_i32, { "r", "r" } }, - { INDEX_op_ld16u_i32, { "r", "r" } }, - { INDEX_op_ld16s_i32, { "r", "r" } }, - { INDEX_op_ld_i32, { "r", "r" } }, - { INDEX_op_st8_i32, { "qi", "r" } }, - { INDEX_op_st16_i32, { "ri", "r" } }, - { INDEX_op_st_i32, { "ri", "r" } }, - - { INDEX_op_add_i32, { "r", "r", "ri" } }, - { INDEX_op_sub_i32, { "r", "0", "ri" } }, - { INDEX_op_mul_i32, { "r", "0", "ri" } }, - { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } }, - { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } }, - { INDEX_op_and_i32, { "r", "0", "ri" } }, - { INDEX_op_or_i32, { "r", "0", "ri" } }, - { INDEX_op_xor_i32, { "r", "0", "ri" } }, - { INDEX_op_andc_i32, { "r", "r", "ri" } }, - - { INDEX_op_shl_i32, { "r", "0", "Ci" } }, - { INDEX_op_shr_i32, { "r", "0", "Ci" } }, - { INDEX_op_sar_i32, { "r", "0", "Ci" } }, - { INDEX_op_rotl_i32, { "r", "0", "ci" } }, - { INDEX_op_rotr_i32, { "r", "0", "ci" } }, - - { INDEX_op_brcond_i32, { "r", "ri" } }, - - { INDEX_op_bswap16_i32, { "r", "0" } }, - { INDEX_op_bswap32_i32, { "r", "0" } }, - - { INDEX_op_neg_i32, { "r", "0" } }, - - { INDEX_op_not_i32, { "r", "0" } }, - - { INDEX_op_ext8s_i32, { "r", "q" } }, - { INDEX_op_ext16s_i32, { "r", "r" } }, - { INDEX_op_ext8u_i32, { "r", "q" } }, - { INDEX_op_ext16u_i32, { "r", "r" } }, - - { INDEX_op_setcond_i32, { "q", "r", "ri" } }, - - { INDEX_op_deposit_i32, { "Q", "0", "Q" } }, - { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } }, - - { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } }, - { INDEX_op_muls2_i32, { "a", "d", "a", "r" } }, - { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } }, - { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } }, - -#if TCG_TARGET_REG_BITS == 32 - { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } }, - { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } }, -#else - { INDEX_op_ld8u_i64, { "r", "r" } }, - { INDEX_op_ld8s_i64, { "r", "r" } }, - { INDEX_op_ld16u_i64, { "r", "r" } }, - { INDEX_op_ld16s_i64, { "r", "r" } }, - { INDEX_op_ld32u_i64, { "r", "r" } }, - { INDEX_op_ld32s_i64, { "r", "r" } }, - { INDEX_op_ld_i64, { "r", "r" } }, - { INDEX_op_st8_i64, { "ri", "r" } }, - { INDEX_op_st16_i64, { "ri", "r" } }, - { INDEX_op_st32_i64, { "ri", "r" } }, - { INDEX_op_st_i64, { "re", "r" } }, - - { INDEX_op_add_i64, { "r", "r", "re" } }, - { INDEX_op_mul_i64, { "r", "0", "re" } }, - { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } }, - { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } }, - { INDEX_op_sub_i64, { "r", "0", "re" } }, - { INDEX_op_and_i64, { "r", "0", "reZ" } }, - { INDEX_op_or_i64, { "r", "0", "re" } }, - { INDEX_op_xor_i64, { "r", "0", "re" } }, - { INDEX_op_andc_i64, { "r", "r", "rI" } }, - - { INDEX_op_shl_i64, { "r", "0", "Ci" } }, - { INDEX_op_shr_i64, { "r", "0", "Ci" } }, - { INDEX_op_sar_i64, { "r", "0", "Ci" } }, - { INDEX_op_rotl_i64, { "r", "0", "ci" } }, - { INDEX_op_rotr_i64, { "r", "0", "ci" } }, - - { INDEX_op_brcond_i64, { "r", "re" } }, - { INDEX_op_setcond_i64, { "r", "r", "re" } }, - - { INDEX_op_bswap16_i64, { "r", "0" } }, - { INDEX_op_bswap32_i64, { "r", "0" } }, - { INDEX_op_bswap64_i64, { "r", "0" } }, - { INDEX_op_neg_i64, { "r", "0" } }, - { INDEX_op_not_i64, { "r", "0" } }, - - { INDEX_op_ext8s_i64, { "r", "r" } }, - { INDEX_op_ext16s_i64, { "r", "r" } }, - { INDEX_op_ext32s_i64, { "r", "r" } }, - { INDEX_op_ext8u_i64, { "r", "r" } }, - { INDEX_op_ext16u_i64, { "r", "r" } }, - { INDEX_op_ext32u_i64, { "r", "r" } }, - - { INDEX_op_ext_i32_i64, { "r", "r" } }, - { INDEX_op_extu_i32_i64, { "r", "r" } }, - - { INDEX_op_deposit_i64, { "Q", "0", "Q" } }, - { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } }, - - { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } }, - { INDEX_op_muls2_i64, { "a", "d", "a", "r" } }, - { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } }, - { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } }, -#endif - -#if TCG_TARGET_REG_BITS == 64 - { INDEX_op_qemu_ld_i32, { "r", "L" } }, - { INDEX_op_qemu_st_i32, { "L", "L" } }, - { INDEX_op_qemu_ld_i64, { "r", "L" } }, - { INDEX_op_qemu_st_i64, { "L", "L" } }, -#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS - { INDEX_op_qemu_ld_i32, { "r", "L" } }, - { INDEX_op_qemu_st_i32, { "L", "L" } }, - { INDEX_op_qemu_ld_i64, { "r", "r", "L" } }, - { INDEX_op_qemu_st_i64, { "L", "L", "L" } }, -#else - { INDEX_op_qemu_ld_i32, { "r", "L", "L" } }, - { INDEX_op_qemu_st_i32, { "L", "L", "L" } }, - { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } }, - { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } }, -#endif - { -1 }, -}; - -static int tcg_target_callee_save_regs[] = { -#if TCG_TARGET_REG_BITS == 64 - TCG_REG_RBP, - TCG_REG_RBX, -#if defined(_WIN64) - TCG_REG_RDI, - TCG_REG_RSI, -#endif - TCG_REG_R12, - TCG_REG_R13, - TCG_REG_R14, /* Currently used for the global env. */ - TCG_REG_R15, -#else - TCG_REG_EBP, /* Currently used for the global env. */ - TCG_REG_EBX, - TCG_REG_ESI, - TCG_REG_EDI, -#endif -}; - -/* Compute frame size via macros, to share between tcg_target_qemu_prologue - and tcg_register_jit. */ - -#define PUSH_SIZE \ - ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \ - * (TCG_TARGET_REG_BITS / 8)) - -#define FRAME_SIZE \ - ((PUSH_SIZE \ - + TCG_STATIC_CALL_ARGS_SIZE \ - + CPU_TEMP_BUF_NLONGS * sizeof(long) \ - + TCG_TARGET_STACK_ALIGN - 1) \ - & ~(TCG_TARGET_STACK_ALIGN - 1)) - -/* Generate global QEMU prologue and epilogue code */ -static void tcg_target_qemu_prologue(TCGContext *s) -{ - int i, stack_addend; - - /* TB prologue */ - - /* Reserve some stack space, also for TCG temps. */ - stack_addend = FRAME_SIZE - PUSH_SIZE; - tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, - CPU_TEMP_BUF_NLONGS * sizeof(long)); - - /* Save all callee saved registers. */ - for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { - tcg_out_push(s, tcg_target_callee_save_regs[i]); - } - -#if TCG_TARGET_REG_BITS == 32 - tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, - (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4); - tcg_out_addi(s, TCG_REG_ESP, -stack_addend); - /* jmp *tb. */ - tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP, - (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4 - + stack_addend); -#else - tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); - tcg_out_addi(s, TCG_REG_ESP, -stack_addend); - /* jmp *tb. */ - tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]); -#endif - - /* TB epilogue */ - tb_ret_addr = s->code_ptr; - - tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend); - - for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) { - tcg_out_pop(s, tcg_target_callee_save_regs[i]); - } - tcg_out_opc(s, OPC_RET, 0, 0, 0); - -#if !defined(CONFIG_SOFTMMU) - /* Try to set up a segment register to point to guest_base. */ - if (guest_base) { - setup_guest_base_seg(); - } -#endif -} - -static void tcg_target_init(TCGContext *s) -{ -#ifdef CONFIG_CPUID_H - unsigned a, b, c, d; - int max = __get_cpuid_max(0, 0); - - if (max >= 1) { - __cpuid(1, a, b, c, d); -#ifndef have_cmov - /* For 32-bit, 99% certainty that we're running on hardware that - supports cmov, but we still need to check. In case cmov is not - available, we'll use a small forward branch. */ - have_cmov = (d & bit_CMOV) != 0; -#endif -#ifndef have_movbe - /* MOVBE is only available on Intel Atom and Haswell CPUs, so we - need to probe for it. */ - have_movbe = (c & bit_MOVBE) != 0; -#endif - } - - if (max >= 7) { - /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */ - __cpuid_count(7, 0, a, b, c, d); -#ifdef bit_BMI - have_bmi1 = (b & bit_BMI) != 0; -#endif -#ifndef have_bmi2 - have_bmi2 = (b & bit_BMI2) != 0; -#endif - } -#endif - - if (TCG_TARGET_REG_BITS == 64) { - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff); - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff); - } else { - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff); - } - - tcg_regset_clear(tcg_target_call_clobber_regs); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX); - if (TCG_TARGET_REG_BITS == 64) { -#if !defined(_WIN64) - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI); -#endif - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11); - } - - tcg_regset_clear(s->reserved_regs); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); - - tcg_add_target_add_op_defs(x86_op_defs); -} - -typedef struct { - DebugFrameHeader h; - uint8_t fde_def_cfa[4]; - uint8_t fde_reg_ofs[14]; -} DebugFrame; - -/* We're expecting a 2 byte uleb128 encoded value. */ -QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); - -#if !defined(__ELF__) - /* Host machine without ELF. */ -#elif TCG_TARGET_REG_BITS == 64 -#define ELF_HOST_MACHINE EM_X86_64 -static const DebugFrame debug_frame = { - .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ - .h.cie.id = -1, - .h.cie.version = 1, - .h.cie.code_align = 1, - .h.cie.data_align = 0x78, /* sleb128 -8 */ - .h.cie.return_column = 16, - - /* Total FDE size does not include the "len" member. */ - .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), - - .fde_def_cfa = { - 12, 7, /* DW_CFA_def_cfa %rsp, ... */ - (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ - (FRAME_SIZE >> 7) - }, - .fde_reg_ofs = { - 0x90, 1, /* DW_CFA_offset, %rip, -8 */ - /* The following ordering must match tcg_target_callee_save_regs. */ - 0x86, 2, /* DW_CFA_offset, %rbp, -16 */ - 0x83, 3, /* DW_CFA_offset, %rbx, -24 */ - 0x8c, 4, /* DW_CFA_offset, %r12, -32 */ - 0x8d, 5, /* DW_CFA_offset, %r13, -40 */ - 0x8e, 6, /* DW_CFA_offset, %r14, -48 */ - 0x8f, 7, /* DW_CFA_offset, %r15, -56 */ - } -}; -#else -#define ELF_HOST_MACHINE EM_386 -static const DebugFrame debug_frame = { - .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ - .h.cie.id = -1, - .h.cie.version = 1, - .h.cie.code_align = 1, - .h.cie.data_align = 0x7c, /* sleb128 -4 */ - .h.cie.return_column = 8, - - /* Total FDE size does not include the "len" member. */ - .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), - - .fde_def_cfa = { - 12, 4, /* DW_CFA_def_cfa %esp, ... */ - (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ - (FRAME_SIZE >> 7) - }, - .fde_reg_ofs = { - 0x88, 1, /* DW_CFA_offset, %eip, -4 */ - /* The following ordering must match tcg_target_callee_save_regs. */ - 0x85, 2, /* DW_CFA_offset, %ebp, -8 */ - 0x83, 3, /* DW_CFA_offset, %ebx, -12 */ - 0x86, 4, /* DW_CFA_offset, %esi, -16 */ - 0x87, 5, /* DW_CFA_offset, %edi, -20 */ - } -}; -#endif - -#if defined(ELF_HOST_MACHINE) -void tcg_register_jit(void *buf, size_t buf_size) -{ - tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); -} -#endif diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c new file mode 100644 index 0000000000..d90636cb4e --- /dev/null +++ b/tcg/i386/tcg-target.inc.c @@ -0,0 +1,2464 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "tcg-be-ldst.h" + +#ifndef NDEBUG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { +#if TCG_TARGET_REG_BITS == 64 + "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", + "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", +#else + "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", +#endif +}; +#endif + +static const int tcg_target_reg_alloc_order[] = { +#if TCG_TARGET_REG_BITS == 64 + TCG_REG_RBP, + TCG_REG_RBX, + TCG_REG_R12, + TCG_REG_R13, + TCG_REG_R14, + TCG_REG_R15, + TCG_REG_R10, + TCG_REG_R11, + TCG_REG_R9, + TCG_REG_R8, + TCG_REG_RCX, + TCG_REG_RDX, + TCG_REG_RSI, + TCG_REG_RDI, + TCG_REG_RAX, +#else + TCG_REG_EBX, + TCG_REG_ESI, + TCG_REG_EDI, + TCG_REG_EBP, + TCG_REG_ECX, + TCG_REG_EDX, + TCG_REG_EAX, +#endif +}; + +static const int tcg_target_call_iarg_regs[] = { +#if TCG_TARGET_REG_BITS == 64 +#if defined(_WIN64) + TCG_REG_RCX, + TCG_REG_RDX, +#else + TCG_REG_RDI, + TCG_REG_RSI, + TCG_REG_RDX, + TCG_REG_RCX, +#endif + TCG_REG_R8, + TCG_REG_R9, +#else + /* 32 bit mode uses stack based calling convention (GCC default). */ +#endif +}; + +static const int tcg_target_call_oarg_regs[] = { + TCG_REG_EAX, +#if TCG_TARGET_REG_BITS == 32 + TCG_REG_EDX +#endif +}; + +/* Constants we accept. */ +#define TCG_CT_CONST_S32 0x100 +#define TCG_CT_CONST_U32 0x200 +#define TCG_CT_CONST_I32 0x400 + +/* Registers used with L constraint, which are the first argument + registers on x86_64, and two random call clobbered registers on + i386. */ +#if TCG_TARGET_REG_BITS == 64 +# define TCG_REG_L0 tcg_target_call_iarg_regs[0] +# define TCG_REG_L1 tcg_target_call_iarg_regs[1] +#else +# define TCG_REG_L0 TCG_REG_EAX +# define TCG_REG_L1 TCG_REG_EDX +#endif + +/* The host compiler should supply to enable runtime features + detection, as we're not going to go so far as our own inline assembly. + If not available, default values will be assumed. */ +#if defined(CONFIG_CPUID_H) +#include +#endif + +/* For 32-bit, we are going to attempt to determine at runtime whether cmov + is available. */ +#if TCG_TARGET_REG_BITS == 64 +# define have_cmov 1 +#elif defined(CONFIG_CPUID_H) && defined(bit_CMOV) +static bool have_cmov; +#else +# define have_cmov 0 +#endif + +/* If bit_MOVBE is defined in cpuid.h (added in GCC version 4.6), we are + going to attempt to determine at runtime whether movbe is available. */ +#if defined(CONFIG_CPUID_H) && defined(bit_MOVBE) +static bool have_movbe; +#else +# define have_movbe 0 +#endif + +/* We need this symbol in tcg-target.h, and we can't properly conditionalize + it there. Therefore we always define the variable. */ +bool have_bmi1; + +#if defined(CONFIG_CPUID_H) && defined(bit_BMI2) +static bool have_bmi2; +#else +# define have_bmi2 0 +#endif + +static tcg_insn_unit *tb_ret_addr; + +static void patch_reloc(tcg_insn_unit *code_ptr, int type, + intptr_t value, intptr_t addend) +{ + value += addend; + switch(type) { + case R_386_PC32: + value -= (uintptr_t)code_ptr; + if (value != (int32_t)value) { + tcg_abort(); + } + tcg_patch32(code_ptr, value); + break; + case R_386_PC8: + value -= (uintptr_t)code_ptr; + if (value != (int8_t)value) { + tcg_abort(); + } + tcg_patch8(code_ptr, value); + break; + default: + tcg_abort(); + } +} + +/* parse target specific constraints */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) +{ + const char *ct_str; + + ct_str = *pct_str; + switch(ct_str[0]) { + case 'a': + ct->ct |= TCG_CT_REG; + tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX); + break; + case 'b': + ct->ct |= TCG_CT_REG; + tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX); + break; + case 'c': + case_c: + ct->ct |= TCG_CT_REG; + tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX); + break; + case 'd': + ct->ct |= TCG_CT_REG; + tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX); + break; + case 'S': + ct->ct |= TCG_CT_REG; + tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI); + break; + case 'D': + ct->ct |= TCG_CT_REG; + tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI); + break; + case 'q': + ct->ct |= TCG_CT_REG; + if (TCG_TARGET_REG_BITS == 64) { + tcg_regset_set32(ct->u.regs, 0, 0xffff); + } else { + tcg_regset_set32(ct->u.regs, 0, 0xf); + } + break; + case 'Q': + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, 0xf); + break; + case 'r': + case_r: + ct->ct |= TCG_CT_REG; + if (TCG_TARGET_REG_BITS == 64) { + tcg_regset_set32(ct->u.regs, 0, 0xffff); + } else { + tcg_regset_set32(ct->u.regs, 0, 0xff); + } + break; + case 'C': + /* With SHRX et al, we need not use ECX as shift count register. */ + if (have_bmi2) { + goto case_r; + } else { + goto case_c; + } + + /* qemu_ld/st address constraint */ + case 'L': + ct->ct |= TCG_CT_REG; + if (TCG_TARGET_REG_BITS == 64) { + tcg_regset_set32(ct->u.regs, 0, 0xffff); + } else { + tcg_regset_set32(ct->u.regs, 0, 0xff); + } + tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1); + break; + + case 'e': + ct->ct |= TCG_CT_CONST_S32; + break; + case 'Z': + ct->ct |= TCG_CT_CONST_U32; + break; + case 'I': + ct->ct |= TCG_CT_CONST_I32; + break; + + default: + return -1; + } + ct_str++; + *pct_str = ct_str; + return 0; +} + +/* test if a constant matches the constraint */ +static inline int tcg_target_const_match(tcg_target_long val, TCGType type, + const TCGArgConstraint *arg_ct) +{ + int ct = arg_ct->ct; + if (ct & TCG_CT_CONST) { + return 1; + } + if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { + return 1; + } + if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { + return 1; + } + if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) { + return 1; + } + return 0; +} + +#if TCG_TARGET_REG_BITS == 64 +# define LOWREGMASK(x) ((x) & 7) +#else +# define LOWREGMASK(x) (x) +#endif + +#define P_EXT 0x100 /* 0x0f opcode prefix */ +#define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */ +#define P_DATA16 0x400 /* 0x66 opcode prefix */ +#if TCG_TARGET_REG_BITS == 64 +# define P_ADDR32 0x800 /* 0x67 opcode prefix */ +# define P_REXW 0x1000 /* Set REX.W = 1 */ +# define P_REXB_R 0x2000 /* REG field as byte register */ +# define P_REXB_RM 0x4000 /* R/M field as byte register */ +# define P_GS 0x8000 /* gs segment override */ +#else +# define P_ADDR32 0 +# define P_REXW 0 +# define P_REXB_R 0 +# define P_REXB_RM 0 +# define P_GS 0 +#endif +#define P_SIMDF3 0x10000 /* 0xf3 opcode prefix */ +#define P_SIMDF2 0x20000 /* 0xf2 opcode prefix */ + +#define OPC_ARITH_EvIz (0x81) +#define OPC_ARITH_EvIb (0x83) +#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */ +#define OPC_ANDN (0xf2 | P_EXT38) +#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3)) +#define OPC_BSWAP (0xc8 | P_EXT) +#define OPC_CALL_Jz (0xe8) +#define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */ +#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3)) +#define OPC_DEC_r32 (0x48) +#define OPC_IMUL_GvEv (0xaf | P_EXT) +#define OPC_IMUL_GvEvIb (0x6b) +#define OPC_IMUL_GvEvIz (0x69) +#define OPC_INC_r32 (0x40) +#define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */ +#define OPC_JCC_short (0x70) /* ... plus condition code */ +#define OPC_JMP_long (0xe9) +#define OPC_JMP_short (0xeb) +#define OPC_LEA (0x8d) +#define OPC_MOVB_EvGv (0x88) /* stores, more or less */ +#define OPC_MOVL_EvGv (0x89) /* stores, more or less */ +#define OPC_MOVL_GvEv (0x8b) /* loads, more or less */ +#define OPC_MOVB_EvIz (0xc6) +#define OPC_MOVL_EvIz (0xc7) +#define OPC_MOVL_Iv (0xb8) +#define OPC_MOVBE_GyMy (0xf0 | P_EXT38) +#define OPC_MOVBE_MyGy (0xf1 | P_EXT38) +#define OPC_MOVSBL (0xbe | P_EXT) +#define OPC_MOVSWL (0xbf | P_EXT) +#define OPC_MOVSLQ (0x63 | P_REXW) +#define OPC_MOVZBL (0xb6 | P_EXT) +#define OPC_MOVZWL (0xb7 | P_EXT) +#define OPC_POP_r32 (0x58) +#define OPC_PUSH_r32 (0x50) +#define OPC_PUSH_Iv (0x68) +#define OPC_PUSH_Ib (0x6a) +#define OPC_RET (0xc3) +#define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */ +#define OPC_SHIFT_1 (0xd1) +#define OPC_SHIFT_Ib (0xc1) +#define OPC_SHIFT_cl (0xd3) +#define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3) +#define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16) +#define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2) +#define OPC_TESTL (0x85) +#define OPC_XCHG_ax_r32 (0x90) + +#define OPC_GRP3_Ev (0xf7) +#define OPC_GRP5 (0xff) + +/* Group 1 opcode extensions for 0x80-0x83. + These are also used as modifiers for OPC_ARITH. */ +#define ARITH_ADD 0 +#define ARITH_OR 1 +#define ARITH_ADC 2 +#define ARITH_SBB 3 +#define ARITH_AND 4 +#define ARITH_SUB 5 +#define ARITH_XOR 6 +#define ARITH_CMP 7 + +/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */ +#define SHIFT_ROL 0 +#define SHIFT_ROR 1 +#define SHIFT_SHL 4 +#define SHIFT_SHR 5 +#define SHIFT_SAR 7 + +/* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */ +#define EXT3_NOT 2 +#define EXT3_NEG 3 +#define EXT3_MUL 4 +#define EXT3_IMUL 5 +#define EXT3_DIV 6 +#define EXT3_IDIV 7 + +/* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */ +#define EXT5_INC_Ev 0 +#define EXT5_DEC_Ev 1 +#define EXT5_CALLN_Ev 2 +#define EXT5_JMPN_Ev 4 + +/* Condition codes to be added to OPC_JCC_{long,short}. */ +#define JCC_JMP (-1) +#define JCC_JO 0x0 +#define JCC_JNO 0x1 +#define JCC_JB 0x2 +#define JCC_JAE 0x3 +#define JCC_JE 0x4 +#define JCC_JNE 0x5 +#define JCC_JBE 0x6 +#define JCC_JA 0x7 +#define JCC_JS 0x8 +#define JCC_JNS 0x9 +#define JCC_JP 0xa +#define JCC_JNP 0xb +#define JCC_JL 0xc +#define JCC_JGE 0xd +#define JCC_JLE 0xe +#define JCC_JG 0xf + +static const uint8_t tcg_cond_to_jcc[] = { + [TCG_COND_EQ] = JCC_JE, + [TCG_COND_NE] = JCC_JNE, + [TCG_COND_LT] = JCC_JL, + [TCG_COND_GE] = JCC_JGE, + [TCG_COND_LE] = JCC_JLE, + [TCG_COND_GT] = JCC_JG, + [TCG_COND_LTU] = JCC_JB, + [TCG_COND_GEU] = JCC_JAE, + [TCG_COND_LEU] = JCC_JBE, + [TCG_COND_GTU] = JCC_JA, +}; + +#if TCG_TARGET_REG_BITS == 64 +static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x) +{ + int rex; + + if (opc & P_GS) { + tcg_out8(s, 0x65); + } + if (opc & P_DATA16) { + /* We should never be asking for both 16 and 64-bit operation. */ + assert((opc & P_REXW) == 0); + tcg_out8(s, 0x66); + } + if (opc & P_ADDR32) { + tcg_out8(s, 0x67); + } + + rex = 0; + rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */ + rex |= (r & 8) >> 1; /* REX.R */ + rex |= (x & 8) >> 2; /* REX.X */ + rex |= (rm & 8) >> 3; /* REX.B */ + + /* P_REXB_{R,RM} indicates that the given register is the low byte. + For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do, + as otherwise the encoding indicates %[abcd]h. Note that the values + that are ORed in merely indicate that the REX byte must be present; + those bits get discarded in output. */ + rex |= opc & (r >= 4 ? P_REXB_R : 0); + rex |= opc & (rm >= 4 ? P_REXB_RM : 0); + + if (rex) { + tcg_out8(s, (uint8_t)(rex | 0x40)); + } + + if (opc & (P_EXT | P_EXT38)) { + tcg_out8(s, 0x0f); + if (opc & P_EXT38) { + tcg_out8(s, 0x38); + } + } + + tcg_out8(s, opc); +} +#else +static void tcg_out_opc(TCGContext *s, int opc) +{ + if (opc & P_DATA16) { + tcg_out8(s, 0x66); + } + if (opc & (P_EXT | P_EXT38)) { + tcg_out8(s, 0x0f); + if (opc & P_EXT38) { + tcg_out8(s, 0x38); + } + } + tcg_out8(s, opc); +} +/* Discard the register arguments to tcg_out_opc early, so as not to penalize + the 32-bit compilation paths. This method works with all versions of gcc, + whereas relying on optimization may not be able to exclude them. */ +#define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc) +#endif + +static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm) +{ + tcg_out_opc(s, opc, r, rm, 0); + tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); +} + +static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm) +{ + int tmp; + + if ((opc & (P_REXW | P_EXT | P_EXT38)) || (rm & 8)) { + /* Three byte VEX prefix. */ + tcg_out8(s, 0xc4); + + /* VEX.m-mmmm */ + if (opc & P_EXT38) { + tmp = 2; + } else if (opc & P_EXT) { + tmp = 1; + } else { + tcg_abort(); + } + tmp |= 0x40; /* VEX.X */ + tmp |= (r & 8 ? 0 : 0x80); /* VEX.R */ + tmp |= (rm & 8 ? 0 : 0x20); /* VEX.B */ + tcg_out8(s, tmp); + + tmp = (opc & P_REXW ? 0x80 : 0); /* VEX.W */ + } else { + /* Two byte VEX prefix. */ + tcg_out8(s, 0xc5); + + tmp = (r & 8 ? 0 : 0x80); /* VEX.R */ + } + /* VEX.pp */ + if (opc & P_DATA16) { + tmp |= 1; /* 0x66 */ + } else if (opc & P_SIMDF3) { + tmp |= 2; /* 0xf3 */ + } else if (opc & P_SIMDF2) { + tmp |= 3; /* 0xf2 */ + } + tmp |= (~v & 15) << 3; /* VEX.vvvv */ + tcg_out8(s, tmp); + tcg_out8(s, opc); + tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); +} + +/* Output an opcode with a full "rm + (index<code_ptr + 5 + ~rm; + intptr_t disp = offset - pc; + if (disp == (int32_t)disp) { + tcg_out_opc(s, opc, r, 0, 0); + tcg_out8(s, (LOWREGMASK(r) << 3) | 5); + tcg_out32(s, disp); + return; + } + + /* Try for an absolute address encoding. This requires the + use of the MODRM+SIB encoding and is therefore larger than + rip-relative addressing. */ + if (offset == (int32_t)offset) { + tcg_out_opc(s, opc, r, 0, 0); + tcg_out8(s, (LOWREGMASK(r) << 3) | 4); + tcg_out8(s, (4 << 3) | 5); + tcg_out32(s, offset); + return; + } + + /* ??? The memory isn't directly addressable. */ + tcg_abort(); + } else { + /* Absolute address. */ + tcg_out_opc(s, opc, r, 0, 0); + tcg_out8(s, (r << 3) | 5); + tcg_out32(s, offset); + return; + } + } + + /* Find the length of the immediate addend. Note that the encoding + that would be used for (%ebp) indicates absolute addressing. */ + if (rm < 0) { + mod = 0, len = 4, rm = 5; + } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) { + mod = 0, len = 0; + } else if (offset == (int8_t)offset) { + mod = 0x40, len = 1; + } else { + mod = 0x80, len = 4; + } + + /* Use a single byte MODRM format if possible. Note that the encoding + that would be used for %esp is the escape to the two byte form. */ + if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) { + /* Single byte MODRM format. */ + tcg_out_opc(s, opc, r, rm, 0); + tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); + } else { + /* Two byte MODRM+SIB format. */ + + /* Note that the encoding that would place %esp into the index + field indicates no index register. In 64-bit mode, the REX.X + bit counts, so %r12 can be used as the index. */ + if (index < 0) { + index = 4; + } else { + assert(index != TCG_REG_ESP); + } + + tcg_out_opc(s, opc, r, rm, index); + tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4); + tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm)); + } + + if (len == 1) { + tcg_out8(s, offset); + } else if (len == 4) { + tcg_out32(s, offset); + } +} + +/* A simplification of the above with no index or shift. */ +static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r, + int rm, intptr_t offset) +{ + tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset); +} + +/* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */ +static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src) +{ + /* Propagate an opcode prefix, such as P_REXW. */ + int ext = subop & ~0x7; + subop &= 0x7; + + tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src); +} + +static inline void tcg_out_mov(TCGContext *s, TCGType type, + TCGReg ret, TCGReg arg) +{ + if (arg != ret) { + int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0); + tcg_out_modrm(s, opc, ret, arg); + } +} + +static void tcg_out_movi(TCGContext *s, TCGType type, + TCGReg ret, tcg_target_long arg) +{ + tcg_target_long diff; + + if (arg == 0) { + tgen_arithr(s, ARITH_XOR, ret, ret); + return; + } + if (arg == (uint32_t)arg || type == TCG_TYPE_I32) { + tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0); + tcg_out32(s, arg); + return; + } + if (arg == (int32_t)arg) { + tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret); + tcg_out32(s, arg); + return; + } + + /* Try a 7 byte pc-relative lea before the 10 byte movq. */ + diff = arg - ((uintptr_t)s->code_ptr + 7); + if (diff == (int32_t)diff) { + tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0); + tcg_out8(s, (LOWREGMASK(ret) << 3) | 5); + tcg_out32(s, diff); + return; + } + + tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0); + tcg_out64(s, arg); +} + +static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val) +{ + if (val == (int8_t)val) { + tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0); + tcg_out8(s, val); + } else if (val == (int32_t)val) { + tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0); + tcg_out32(s, val); + } else { + tcg_abort(); + } +} + +static inline void tcg_out_push(TCGContext *s, int reg) +{ + tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0); +} + +static inline void tcg_out_pop(TCGContext *s, int reg) +{ + tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0); +} + +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, + TCGReg arg1, intptr_t arg2) +{ + int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0); + tcg_out_modrm_offset(s, opc, ret, arg1, arg2); +} + +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) +{ + int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0); + tcg_out_modrm_offset(s, opc, arg, arg1, arg2); +} + +static inline void tcg_out_sti(TCGContext *s, TCGType type, TCGReg base, + tcg_target_long ofs, tcg_target_long val) +{ + int opc = OPC_MOVL_EvIz + (type == TCG_TYPE_I64 ? P_REXW : 0); + tcg_out_modrm_offset(s, opc, 0, base, ofs); + tcg_out32(s, val); +} + +static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count) +{ + /* Propagate an opcode prefix, such as P_DATA16. */ + int ext = subopc & ~0x7; + subopc &= 0x7; + + if (count == 1) { + tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg); + } else { + tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg); + tcg_out8(s, count); + } +} + +static inline void tcg_out_bswap32(TCGContext *s, int reg) +{ + tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0); +} + +static inline void tcg_out_rolw_8(TCGContext *s, int reg) +{ + tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8); +} + +static inline void tcg_out_ext8u(TCGContext *s, int dest, int src) +{ + /* movzbl */ + assert(src < 4 || TCG_TARGET_REG_BITS == 64); + tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src); +} + +static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw) +{ + /* movsbl */ + assert(src < 4 || TCG_TARGET_REG_BITS == 64); + tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src); +} + +static inline void tcg_out_ext16u(TCGContext *s, int dest, int src) +{ + /* movzwl */ + tcg_out_modrm(s, OPC_MOVZWL, dest, src); +} + +static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw) +{ + /* movsw[lq] */ + tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src); +} + +static inline void tcg_out_ext32u(TCGContext *s, int dest, int src) +{ + /* 32-bit mov zero extends. */ + tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src); +} + +static inline void tcg_out_ext32s(TCGContext *s, int dest, int src) +{ + tcg_out_modrm(s, OPC_MOVSLQ, dest, src); +} + +static inline void tcg_out_bswap64(TCGContext *s, int reg) +{ + tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0); +} + +static void tgen_arithi(TCGContext *s, int c, int r0, + tcg_target_long val, int cf) +{ + int rexw = 0; + + if (TCG_TARGET_REG_BITS == 64) { + rexw = c & -8; + c &= 7; + } + + /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce + partial flags update stalls on Pentium4 and are not recommended + by current Intel optimization manuals. */ + if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) { + int is_inc = (c == ARITH_ADD) ^ (val < 0); + if (TCG_TARGET_REG_BITS == 64) { + /* The single-byte increment encodings are re-tasked as the + REX prefixes. Use the MODRM encoding. */ + tcg_out_modrm(s, OPC_GRP5 + rexw, + (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0); + } else { + tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0); + } + return; + } + + if (c == ARITH_AND) { + if (TCG_TARGET_REG_BITS == 64) { + if (val == 0xffffffffu) { + tcg_out_ext32u(s, r0, r0); + return; + } + if (val == (uint32_t)val) { + /* AND with no high bits set can use a 32-bit operation. */ + rexw = 0; + } + } + if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) { + tcg_out_ext8u(s, r0, r0); + return; + } + if (val == 0xffffu) { + tcg_out_ext16u(s, r0, r0); + return; + } + } + + if (val == (int8_t)val) { + tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0); + tcg_out8(s, val); + return; + } + if (rexw == 0 || val == (int32_t)val) { + tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0); + tcg_out32(s, val); + return; + } + + tcg_abort(); +} + +static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val) +{ + if (val != 0) { + tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0); + } +} + +/* Use SMALL != 0 to force a short forward branch. */ +static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, int small) +{ + int32_t val, val1; + + if (l->has_value) { + val = tcg_pcrel_diff(s, l->u.value_ptr); + val1 = val - 2; + if ((int8_t)val1 == val1) { + if (opc == -1) { + tcg_out8(s, OPC_JMP_short); + } else { + tcg_out8(s, OPC_JCC_short + opc); + } + tcg_out8(s, val1); + } else { + if (small) { + tcg_abort(); + } + if (opc == -1) { + tcg_out8(s, OPC_JMP_long); + tcg_out32(s, val - 5); + } else { + tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0); + tcg_out32(s, val - 6); + } + } + } else if (small) { + if (opc == -1) { + tcg_out8(s, OPC_JMP_short); + } else { + tcg_out8(s, OPC_JCC_short + opc); + } + tcg_out_reloc(s, s->code_ptr, R_386_PC8, l, -1); + s->code_ptr += 1; + } else { + if (opc == -1) { + tcg_out8(s, OPC_JMP_long); + } else { + tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0); + } + tcg_out_reloc(s, s->code_ptr, R_386_PC32, l, -4); + s->code_ptr += 4; + } +} + +static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2, + int const_arg2, int rexw) +{ + if (const_arg2) { + if (arg2 == 0) { + /* test r, r */ + tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1); + } else { + tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0); + } + } else { + tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2); + } +} + +static void tcg_out_brcond32(TCGContext *s, TCGCond cond, + TCGArg arg1, TCGArg arg2, int const_arg2, + TCGLabel *label, int small) +{ + tcg_out_cmp(s, arg1, arg2, const_arg2, 0); + tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small); +} + +#if TCG_TARGET_REG_BITS == 64 +static void tcg_out_brcond64(TCGContext *s, TCGCond cond, + TCGArg arg1, TCGArg arg2, int const_arg2, + TCGLabel *label, int small) +{ + tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW); + tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small); +} +#else +/* XXX: we implement it at the target level to avoid having to + handle cross basic blocks temporaries */ +static void tcg_out_brcond2(TCGContext *s, const TCGArg *args, + const int *const_args, int small) +{ + TCGLabel *label_next = gen_new_label(); + TCGLabel *label_this = arg_label(args[5]); + + switch(args[4]) { + case TCG_COND_EQ: + tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2], + label_next, 1); + tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3], + label_this, small); + break; + case TCG_COND_NE: + tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2], + label_this, small); + tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3], + label_this, small); + break; + case TCG_COND_LT: + tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3], + label_this, small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2], + label_this, small); + break; + case TCG_COND_LE: + tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3], + label_this, small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2], + label_this, small); + break; + case TCG_COND_GT: + tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3], + label_this, small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2], + label_this, small); + break; + case TCG_COND_GE: + tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3], + label_this, small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2], + label_this, small); + break; + case TCG_COND_LTU: + tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3], + label_this, small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2], + label_this, small); + break; + case TCG_COND_LEU: + tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3], + label_this, small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2], + label_this, small); + break; + case TCG_COND_GTU: + tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3], + label_this, small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2], + label_this, small); + break; + case TCG_COND_GEU: + tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3], + label_this, small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2], + label_this, small); + break; + default: + tcg_abort(); + } + tcg_out_label(s, label_next, s->code_ptr); +} +#endif + +static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest, + TCGArg arg1, TCGArg arg2, int const_arg2) +{ + tcg_out_cmp(s, arg1, arg2, const_arg2, 0); + tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest); + tcg_out_ext8u(s, dest, dest); +} + +#if TCG_TARGET_REG_BITS == 64 +static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest, + TCGArg arg1, TCGArg arg2, int const_arg2) +{ + tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW); + tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest); + tcg_out_ext8u(s, dest, dest); +} +#else +static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, + const int *const_args) +{ + TCGArg new_args[6]; + TCGLabel *label_true, *label_over; + + memcpy(new_args, args+1, 5*sizeof(TCGArg)); + + if (args[0] == args[1] || args[0] == args[2] + || (!const_args[3] && args[0] == args[3]) + || (!const_args[4] && args[0] == args[4])) { + /* When the destination overlaps with one of the argument + registers, don't do anything tricky. */ + label_true = gen_new_label(); + label_over = gen_new_label(); + + new_args[5] = label_arg(label_true); + tcg_out_brcond2(s, new_args, const_args+1, 1); + + tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); + tcg_out_jxx(s, JCC_JMP, label_over, 1); + tcg_out_label(s, label_true, s->code_ptr); + + tcg_out_movi(s, TCG_TYPE_I32, args[0], 1); + tcg_out_label(s, label_over, s->code_ptr); + } else { + /* When the destination does not overlap one of the arguments, + clear the destination first, jump if cond false, and emit an + increment in the true case. This results in smaller code. */ + + tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); + + label_over = gen_new_label(); + new_args[4] = tcg_invert_cond(new_args[4]); + new_args[5] = label_arg(label_over); + tcg_out_brcond2(s, new_args, const_args+1, 1); + + tgen_arithi(s, ARITH_ADD, args[0], 1, 0); + tcg_out_label(s, label_over, s->code_ptr); + } +} +#endif + +static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest, + TCGArg c1, TCGArg c2, int const_c2, + TCGArg v1) +{ + tcg_out_cmp(s, c1, c2, const_c2, 0); + if (have_cmov) { + tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1); + } else { + TCGLabel *over = gen_new_label(); + tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1); + tcg_out_mov(s, TCG_TYPE_I32, dest, v1); + tcg_out_label(s, over, s->code_ptr); + } +} + +#if TCG_TARGET_REG_BITS == 64 +static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest, + TCGArg c1, TCGArg c2, int const_c2, + TCGArg v1) +{ + tcg_out_cmp(s, c1, c2, const_c2, P_REXW); + tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1); +} +#endif + +static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest) +{ + intptr_t disp = tcg_pcrel_diff(s, dest) - 5; + + if (disp == (int32_t)disp) { + tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0); + tcg_out32(s, disp); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, (uintptr_t)dest); + tcg_out_modrm(s, OPC_GRP5, + call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10); + } +} + +static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *dest) +{ + tcg_out_branch(s, 1, dest); +} + +static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest) +{ + tcg_out_branch(s, 0, dest); +} + +#if defined(CONFIG_SOFTMMU) +/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, + * int mmu_idx, uintptr_t ra) + */ +static void * const qemu_ld_helpers[16] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_LEUW] = helper_le_lduw_mmu, + [MO_LEUL] = helper_le_ldul_mmu, + [MO_LEQ] = helper_le_ldq_mmu, + [MO_BEUW] = helper_be_lduw_mmu, + [MO_BEUL] = helper_be_ldul_mmu, + [MO_BEQ] = helper_be_ldq_mmu, +}; + +/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, + * uintxx_t val, int mmu_idx, uintptr_t ra) + */ +static void * const qemu_st_helpers[16] = { + [MO_UB] = helper_ret_stb_mmu, + [MO_LEUW] = helper_le_stw_mmu, + [MO_LEUL] = helper_le_stl_mmu, + [MO_LEQ] = helper_le_stq_mmu, + [MO_BEUW] = helper_be_stw_mmu, + [MO_BEUL] = helper_be_stl_mmu, + [MO_BEQ] = helper_be_stq_mmu, +}; + +/* Perform the TLB load and compare. + + Inputs: + ADDRLO and ADDRHI contain the low and high part of the address. + + MEM_INDEX and S_BITS are the memory context and log2 size of the load. + + WHICH is the offset into the CPUTLBEntry structure of the slot to read. + This should be offsetof addr_read or addr_write. + + Outputs: + LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses) + positions of the displacements of forward jumps to the TLB miss case. + + Second argument register is loaded with the low part of the address. + In the TLB hit case, it has been adjusted as indicated by the TLB + and so is a host address. In the TLB miss case, it continues to + hold a guest address. + + First argument register is clobbered. */ + +static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, + int mem_index, TCGMemOp opc, + tcg_insn_unit **label_ptr, int which) +{ + const TCGReg r0 = TCG_REG_L0; + const TCGReg r1 = TCG_REG_L1; + TCGType ttype = TCG_TYPE_I32; + TCGType tlbtype = TCG_TYPE_I32; + int trexw = 0, hrexw = 0, tlbrexw = 0; + int s_mask = (1 << (opc & MO_SIZE)) - 1; + bool aligned = (opc & MO_AMASK) == MO_ALIGN || s_mask == 0; + + if (TCG_TARGET_REG_BITS == 64) { + if (TARGET_LONG_BITS == 64) { + ttype = TCG_TYPE_I64; + trexw = P_REXW; + } + if (TCG_TYPE_PTR == TCG_TYPE_I64) { + hrexw = P_REXW; + if (TARGET_PAGE_BITS + CPU_TLB_BITS > 32) { + tlbtype = TCG_TYPE_I64; + tlbrexw = P_REXW; + } + } + } + + tcg_out_mov(s, tlbtype, r0, addrlo); + if (aligned) { + tcg_out_mov(s, ttype, r1, addrlo); + } else { + /* For unaligned access check that we don't cross pages using + the page address of the last byte. */ + tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask); + } + + tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + + tgen_arithi(s, ARITH_AND + trexw, r1, + TARGET_PAGE_MASK | (aligned ? s_mask : 0), 0); + tgen_arithi(s, ARITH_AND + tlbrexw, r0, + (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0); + + tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0, + offsetof(CPUArchState, tlb_table[mem_index][0]) + + which); + + /* cmp 0(r0), r1 */ + tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0); + + /* Prepare for both the fast path add of the tlb addend, and the slow + path function argument setup. There are two cases worth note: + For 32-bit guest and x86_64 host, MOVL zero-extends the guest address + before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ + copies the entire guest address for the slow path, while truncation + for the 32-bit host happens with the fastpath ADDL below. */ + tcg_out_mov(s, ttype, r1, addrlo); + + /* jne slow_path */ + tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); + label_ptr[0] = s->code_ptr; + s->code_ptr += 4; + + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + /* cmp 4(r0), addrhi */ + tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4); + + /* jne slow_path */ + tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); + label_ptr[1] = s->code_ptr; + s->code_ptr += 4; + } + + /* TLB Hit. */ + + /* add addend(r0), r1 */ + tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0, + offsetof(CPUTLBEntry, addend) - which); +} + +/* + * Record the context of a call to the out of line helper code for the slow path + * for a load or store, so that we can later generate the correct helper code + */ +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, + TCGReg datalo, TCGReg datahi, + TCGReg addrlo, TCGReg addrhi, + tcg_insn_unit *raddr, + tcg_insn_unit **label_ptr) +{ + TCGLabelQemuLdst *label = new_ldst_label(s); + + label->is_ld = is_ld; + label->oi = oi; + label->datalo_reg = datalo; + label->datahi_reg = datahi; + label->addrlo_reg = addrlo; + label->addrhi_reg = addrhi; + label->raddr = raddr; + label->label_ptr[0] = label_ptr[0]; + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + label->label_ptr[1] = label_ptr[1]; + } +} + +/* + * Generate code for the slow path for a load at the end of block + */ +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + TCGMemOpIdx oi = l->oi; + TCGMemOp opc = get_memop(oi); + TCGReg data_reg; + tcg_insn_unit **label_ptr = &l->label_ptr[0]; + + /* resolve label address */ + tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4); + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4); + } + + if (TCG_TARGET_REG_BITS == 32) { + int ofs = 0; + + tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs); + ofs += 4; + + tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs); + ofs += 4; + + if (TARGET_LONG_BITS == 64) { + tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs); + ofs += 4; + } + + tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, oi); + ofs += 4; + + tcg_out_sti(s, TCG_TYPE_PTR, TCG_REG_ESP, ofs, (uintptr_t)l->raddr); + } else { + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + /* The second argument is already loaded with addrlo. */ + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi); + tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3], + (uintptr_t)l->raddr); + } + + tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); + + data_reg = l->datalo_reg; + switch (opc & MO_SSIZE) { + case MO_SB: + tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW); + break; + case MO_SW: + tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW); + break; +#if TCG_TARGET_REG_BITS == 64 + case MO_SL: + tcg_out_ext32s(s, data_reg, TCG_REG_EAX); + break; +#endif + case MO_UB: + case MO_UW: + /* Note that the helpers have zero-extended to tcg_target_long. */ + case MO_UL: + tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX); + break; + case MO_Q: + if (TCG_TARGET_REG_BITS == 64) { + tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX); + } else if (data_reg == TCG_REG_EDX) { + /* xchg %edx, %eax */ + tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0); + tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX); + } else { + tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX); + tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX); + } + break; + default: + tcg_abort(); + } + + /* Jump to the code corresponding to next IR of qemu_st */ + tcg_out_jmp(s, l->raddr); +} + +/* + * Generate code for the slow path for a store at the end of block + */ +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + TCGMemOpIdx oi = l->oi; + TCGMemOp opc = get_memop(oi); + TCGMemOp s_bits = opc & MO_SIZE; + tcg_insn_unit **label_ptr = &l->label_ptr[0]; + TCGReg retaddr; + + /* resolve label address */ + tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4); + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4); + } + + if (TCG_TARGET_REG_BITS == 32) { + int ofs = 0; + + tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs); + ofs += 4; + + tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs); + ofs += 4; + + if (TARGET_LONG_BITS == 64) { + tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs); + ofs += 4; + } + + tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs); + ofs += 4; + + if (s_bits == MO_64) { + tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs); + ofs += 4; + } + + tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, oi); + ofs += 4; + + retaddr = TCG_REG_EAX; + tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr); + tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs); + } else { + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + /* The second argument is already loaded with addrlo. */ + tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32), + tcg_target_call_iarg_regs[2], l->datalo_reg); + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi); + + if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) { + retaddr = tcg_target_call_iarg_regs[4]; + tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr); + } else { + retaddr = TCG_REG_RAX; + tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr); + tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, + TCG_TARGET_CALL_STACK_OFFSET); + } + } + + /* "Tail call" to the helper, with the return address back inline. */ + tcg_out_push(s, retaddr); + tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); +} +#elif defined(__x86_64__) && defined(__linux__) +# include +# include + +int arch_prctl(int code, unsigned long addr); + +static int guest_base_flags; +static inline void setup_guest_base_seg(void) +{ + if (arch_prctl(ARCH_SET_GS, guest_base) == 0) { + guest_base_flags = P_GS; + } +} +#else +# define guest_base_flags 0 +static inline void setup_guest_base_seg(void) { } +#endif /* SOFTMMU */ + +static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, + TCGReg base, int index, intptr_t ofs, + int seg, TCGMemOp memop) +{ + const TCGMemOp real_bswap = memop & MO_BSWAP; + TCGMemOp bswap = real_bswap; + int movop = OPC_MOVL_GvEv; + + if (have_movbe && real_bswap) { + bswap = 0; + movop = OPC_MOVBE_GyMy; + } + + switch (memop & MO_SSIZE) { + case MO_UB: + tcg_out_modrm_sib_offset(s, OPC_MOVZBL + seg, datalo, + base, index, 0, ofs); + break; + case MO_SB: + tcg_out_modrm_sib_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, + base, index, 0, ofs); + break; + case MO_UW: + tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo, + base, index, 0, ofs); + if (real_bswap) { + tcg_out_rolw_8(s, datalo); + } + break; + case MO_SW: + if (real_bswap) { + if (have_movbe) { + tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg, + datalo, base, index, 0, ofs); + } else { + tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo, + base, index, 0, ofs); + tcg_out_rolw_8(s, datalo); + } + tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo); + } else { + tcg_out_modrm_sib_offset(s, OPC_MOVSWL + P_REXW + seg, + datalo, base, index, 0, ofs); + } + break; + case MO_UL: + tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs); + if (bswap) { + tcg_out_bswap32(s, datalo); + } + break; +#if TCG_TARGET_REG_BITS == 64 + case MO_SL: + if (real_bswap) { + tcg_out_modrm_sib_offset(s, movop + seg, datalo, + base, index, 0, ofs); + if (bswap) { + tcg_out_bswap32(s, datalo); + } + tcg_out_ext32s(s, datalo, datalo); + } else { + tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo, + base, index, 0, ofs); + } + break; +#endif + case MO_Q: + if (TCG_TARGET_REG_BITS == 64) { + tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo, + base, index, 0, ofs); + if (bswap) { + tcg_out_bswap64(s, datalo); + } + } else { + if (real_bswap) { + int t = datalo; + datalo = datahi; + datahi = t; + } + if (base != datalo) { + tcg_out_modrm_sib_offset(s, movop + seg, datalo, + base, index, 0, ofs); + tcg_out_modrm_sib_offset(s, movop + seg, datahi, + base, index, 0, ofs + 4); + } else { + tcg_out_modrm_sib_offset(s, movop + seg, datahi, + base, index, 0, ofs + 4); + tcg_out_modrm_sib_offset(s, movop + seg, datalo, + base, index, 0, ofs); + } + if (bswap) { + tcg_out_bswap32(s, datalo); + tcg_out_bswap32(s, datahi); + } + } + break; + default: + tcg_abort(); + } +} + +/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and + EAX. It will be useful once fixed registers globals are less + common. */ +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) +{ + TCGReg datalo, datahi, addrlo; + TCGReg addrhi __attribute__((unused)); + TCGMemOpIdx oi; + TCGMemOp opc; +#if defined(CONFIG_SOFTMMU) + int mem_index; + tcg_insn_unit *label_ptr[2]; +#endif + + datalo = *args++; + datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0); + addrlo = *args++; + addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0); + oi = *args++; + opc = get_memop(oi); + +#if defined(CONFIG_SOFTMMU) + mem_index = get_mmuidx(oi); + + tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc, + label_ptr, offsetof(CPUTLBEntry, addr_read)); + + /* TLB Hit. */ + tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc); + + /* Record the current context of a load into ldst label */ + add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, + s->code_ptr, label_ptr); +#else + { + int32_t offset = guest_base; + TCGReg base = addrlo; + int index = -1; + int seg = 0; + + /* For a 32-bit guest, the high 32 bits may contain garbage. + We can do this with the ADDR32 prefix if we're not using + a guest base, or when using segmentation. Otherwise we + need to zero-extend manually. */ + if (guest_base == 0 || guest_base_flags) { + seg = guest_base_flags; + offset = 0; + if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { + seg |= P_ADDR32; + } + } else if (TCG_TARGET_REG_BITS == 64) { + if (TARGET_LONG_BITS == 32) { + tcg_out_ext32u(s, TCG_REG_L0, base); + base = TCG_REG_L0; + } + if (offset != guest_base) { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base); + index = TCG_REG_L1; + offset = 0; + } + } + + tcg_out_qemu_ld_direct(s, datalo, datahi, + base, index, offset, seg, opc); + } +#endif +} + +static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, + TCGReg base, intptr_t ofs, int seg, + TCGMemOp memop) +{ + /* ??? Ideally we wouldn't need a scratch register. For user-only, + we could perform the bswap twice to restore the original value + instead of moving to the scratch. But as it is, the L constraint + means that TCG_REG_L0 is definitely free here. */ + const TCGReg scratch = TCG_REG_L0; + const TCGMemOp real_bswap = memop & MO_BSWAP; + TCGMemOp bswap = real_bswap; + int movop = OPC_MOVL_EvGv; + + if (have_movbe && real_bswap) { + bswap = 0; + movop = OPC_MOVBE_MyGy; + } + + switch (memop & MO_SIZE) { + case MO_8: + /* In 32-bit mode, 8-bit stores can only happen from [abcd]x. + Use the scratch register if necessary. */ + if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) { + tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); + datalo = scratch; + } + tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg, + datalo, base, ofs); + break; + case MO_16: + if (bswap) { + tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); + tcg_out_rolw_8(s, scratch); + datalo = scratch; + } + tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs); + break; + case MO_32: + if (bswap) { + tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); + tcg_out_bswap32(s, scratch); + datalo = scratch; + } + tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs); + break; + case MO_64: + if (TCG_TARGET_REG_BITS == 64) { + if (bswap) { + tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo); + tcg_out_bswap64(s, scratch); + datalo = scratch; + } + tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs); + } else if (bswap) { + tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi); + tcg_out_bswap32(s, scratch); + tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs); + tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); + tcg_out_bswap32(s, scratch); + tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4); + } else { + if (real_bswap) { + int t = datalo; + datalo = datahi; + datahi = t; + } + tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs); + tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs+4); + } + break; + default: + tcg_abort(); + } +} + +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) +{ + TCGReg datalo, datahi, addrlo; + TCGReg addrhi __attribute__((unused)); + TCGMemOpIdx oi; + TCGMemOp opc; +#if defined(CONFIG_SOFTMMU) + int mem_index; + tcg_insn_unit *label_ptr[2]; +#endif + + datalo = *args++; + datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0); + addrlo = *args++; + addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0); + oi = *args++; + opc = get_memop(oi); + +#if defined(CONFIG_SOFTMMU) + mem_index = get_mmuidx(oi); + + tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc, + label_ptr, offsetof(CPUTLBEntry, addr_write)); + + /* TLB Hit. */ + tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); + + /* Record the current context of a store into ldst label */ + add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, + s->code_ptr, label_ptr); +#else + { + int32_t offset = guest_base; + TCGReg base = addrlo; + int seg = 0; + + /* See comment in tcg_out_qemu_ld re zero-extension of addrlo. */ + if (guest_base == 0 || guest_base_flags) { + seg = guest_base_flags; + offset = 0; + if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { + seg |= P_ADDR32; + } + } else if (TCG_TARGET_REG_BITS == 64) { + /* ??? Note that we can't use the same SIB addressing scheme + as for loads, since we require L0 free for bswap. */ + if (offset != guest_base) { + if (TARGET_LONG_BITS == 32) { + tcg_out_ext32u(s, TCG_REG_L0, base); + base = TCG_REG_L0; + } + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base); + tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base); + base = TCG_REG_L1; + offset = 0; + } else if (TARGET_LONG_BITS == 32) { + tcg_out_ext32u(s, TCG_REG_L1, base); + base = TCG_REG_L1; + } + } + + tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc); + } +#endif +} + +static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, + const TCGArg *args, const int *const_args) +{ + int c, vexop, rexw = 0; + +#if TCG_TARGET_REG_BITS == 64 +# define OP_32_64(x) \ + case glue(glue(INDEX_op_, x), _i64): \ + rexw = P_REXW; /* FALLTHRU */ \ + case glue(glue(INDEX_op_, x), _i32) +#else +# define OP_32_64(x) \ + case glue(glue(INDEX_op_, x), _i32) +#endif + + switch(opc) { + case INDEX_op_exit_tb: + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]); + tcg_out_jmp(s, tb_ret_addr); + break; + case INDEX_op_goto_tb: + if (s->tb_jmp_offset) { + /* direct jump method */ + tcg_out8(s, OPC_JMP_long); /* jmp im */ + s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); + tcg_out32(s, 0); + } else { + /* indirect jump method */ + tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1, + (intptr_t)(s->tb_next + args[0])); + } + s->tb_next_offset[args[0]] = tcg_current_code_size(s); + break; + case INDEX_op_br: + tcg_out_jxx(s, JCC_JMP, arg_label(args[0]), 0); + break; + OP_32_64(ld8u): + /* Note that we can ignore REXW for the zero-extend to 64-bit. */ + tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]); + break; + OP_32_64(ld8s): + tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]); + break; + OP_32_64(ld16u): + /* Note that we can ignore REXW for the zero-extend to 64-bit. */ + tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]); + break; + OP_32_64(ld16s): + tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]); + break; +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_ld32u_i64: +#endif + case INDEX_op_ld_i32: + tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]); + break; + + OP_32_64(st8): + if (const_args[0]) { + tcg_out_modrm_offset(s, OPC_MOVB_EvIz, + 0, args[1], args[2]); + tcg_out8(s, args[0]); + } else { + tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, + args[0], args[1], args[2]); + } + break; + OP_32_64(st16): + if (const_args[0]) { + tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16, + 0, args[1], args[2]); + tcg_out16(s, args[0]); + } else { + tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16, + args[0], args[1], args[2]); + } + break; +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_st32_i64: +#endif + case INDEX_op_st_i32: + if (const_args[0]) { + tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]); + tcg_out32(s, args[0]); + } else { + tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); + } + break; + + OP_32_64(add): + /* For 3-operand addition, use LEA. */ + if (args[0] != args[1]) { + TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0; + + if (const_args[2]) { + c3 = a2, a2 = -1; + } else if (a0 == a2) { + /* Watch out for dest = src + dest, since we've removed + the matching constraint on the add. */ + tgen_arithr(s, ARITH_ADD + rexw, a0, a1); + break; + } + + tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3); + break; + } + c = ARITH_ADD; + goto gen_arith; + OP_32_64(sub): + c = ARITH_SUB; + goto gen_arith; + OP_32_64(and): + c = ARITH_AND; + goto gen_arith; + OP_32_64(or): + c = ARITH_OR; + goto gen_arith; + OP_32_64(xor): + c = ARITH_XOR; + goto gen_arith; + gen_arith: + if (const_args[2]) { + tgen_arithi(s, c + rexw, args[0], args[2], 0); + } else { + tgen_arithr(s, c + rexw, args[0], args[2]); + } + break; + + OP_32_64(andc): + if (const_args[2]) { + tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, + args[0], args[1]); + tgen_arithi(s, ARITH_AND + rexw, args[0], ~args[2], 0); + } else { + tcg_out_vex_modrm(s, OPC_ANDN + rexw, args[0], args[2], args[1]); + } + break; + + OP_32_64(mul): + if (const_args[2]) { + int32_t val; + val = args[2]; + if (val == (int8_t)val) { + tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]); + tcg_out8(s, val); + } else { + tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]); + tcg_out32(s, val); + } + } else { + tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]); + } + break; + + OP_32_64(div2): + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]); + break; + OP_32_64(divu2): + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]); + break; + + OP_32_64(shl): + c = SHIFT_SHL; + vexop = OPC_SHLX; + goto gen_shift_maybe_vex; + OP_32_64(shr): + c = SHIFT_SHR; + vexop = OPC_SHRX; + goto gen_shift_maybe_vex; + OP_32_64(sar): + c = SHIFT_SAR; + vexop = OPC_SARX; + goto gen_shift_maybe_vex; + OP_32_64(rotl): + c = SHIFT_ROL; + goto gen_shift; + OP_32_64(rotr): + c = SHIFT_ROR; + goto gen_shift; + gen_shift_maybe_vex: + if (have_bmi2 && !const_args[2]) { + tcg_out_vex_modrm(s, vexop + rexw, args[0], args[2], args[1]); + break; + } + /* FALLTHRU */ + gen_shift: + if (const_args[2]) { + tcg_out_shifti(s, c + rexw, args[0], args[2]); + } else { + tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]); + } + break; + + case INDEX_op_brcond_i32: + tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1], + arg_label(args[3]), 0); + break; + case INDEX_op_setcond_i32: + tcg_out_setcond32(s, args[3], args[0], args[1], + args[2], const_args[2]); + break; + case INDEX_op_movcond_i32: + tcg_out_movcond32(s, args[5], args[0], args[1], + args[2], const_args[2], args[3]); + break; + + OP_32_64(bswap16): + tcg_out_rolw_8(s, args[0]); + break; + OP_32_64(bswap32): + tcg_out_bswap32(s, args[0]); + break; + + OP_32_64(neg): + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]); + break; + OP_32_64(not): + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]); + break; + + OP_32_64(ext8s): + tcg_out_ext8s(s, args[0], args[1], rexw); + break; + OP_32_64(ext16s): + tcg_out_ext16s(s, args[0], args[1], rexw); + break; + OP_32_64(ext8u): + tcg_out_ext8u(s, args[0], args[1]); + break; + OP_32_64(ext16u): + tcg_out_ext16u(s, args[0], args[1]); + break; + + case INDEX_op_qemu_ld_i32: + tcg_out_qemu_ld(s, args, 0); + break; + case INDEX_op_qemu_ld_i64: + tcg_out_qemu_ld(s, args, 1); + break; + case INDEX_op_qemu_st_i32: + tcg_out_qemu_st(s, args, 0); + break; + case INDEX_op_qemu_st_i64: + tcg_out_qemu_st(s, args, 1); + break; + + OP_32_64(mulu2): + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]); + break; + OP_32_64(muls2): + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]); + break; + OP_32_64(add2): + if (const_args[4]) { + tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1); + } else { + tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]); + } + if (const_args[5]) { + tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1); + } else { + tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]); + } + break; + OP_32_64(sub2): + if (const_args[4]) { + tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1); + } else { + tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]); + } + if (const_args[5]) { + tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1); + } else { + tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]); + } + break; + +#if TCG_TARGET_REG_BITS == 32 + case INDEX_op_brcond2_i32: + tcg_out_brcond2(s, args, const_args, 0); + break; + case INDEX_op_setcond2_i32: + tcg_out_setcond2(s, args, const_args); + break; +#else /* TCG_TARGET_REG_BITS == 64 */ + case INDEX_op_ld32s_i64: + tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]); + break; + case INDEX_op_ld_i64: + tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]); + break; + case INDEX_op_st_i64: + if (const_args[0]) { + tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW, + 0, args[1], args[2]); + tcg_out32(s, args[0]); + } else { + tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]); + } + break; + + case INDEX_op_brcond_i64: + tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1], + arg_label(args[3]), 0); + break; + case INDEX_op_setcond_i64: + tcg_out_setcond64(s, args[3], args[0], args[1], + args[2], const_args[2]); + break; + case INDEX_op_movcond_i64: + tcg_out_movcond64(s, args[5], args[0], args[1], + args[2], const_args[2], args[3]); + break; + + case INDEX_op_bswap64_i64: + tcg_out_bswap64(s, args[0]); + break; + case INDEX_op_extu_i32_i64: + case INDEX_op_ext32u_i64: + tcg_out_ext32u(s, args[0], args[1]); + break; + case INDEX_op_ext_i32_i64: + case INDEX_op_ext32s_i64: + tcg_out_ext32s(s, args[0], args[1]); + break; +#endif + + OP_32_64(deposit): + if (args[3] == 0 && args[4] == 8) { + /* load bits 0..7 */ + tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, + args[2], args[0]); + } else if (args[3] == 8 && args[4] == 8) { + /* load bits 8..15 */ + tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4); + } else if (args[3] == 0 && args[4] == 16) { + /* load bits 0..15 */ + tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]); + } else { + tcg_abort(); + } + break; + + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_mov_i64: + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ + case INDEX_op_movi_i64: + case INDEX_op_call: /* Always emitted via tcg_out_call. */ + default: + tcg_abort(); + } + +#undef OP_32_64 +} + +static const TCGTargetOpDef x86_op_defs[] = { + { INDEX_op_exit_tb, { } }, + { INDEX_op_goto_tb, { } }, + { INDEX_op_br, { } }, + { INDEX_op_ld8u_i32, { "r", "r" } }, + { INDEX_op_ld8s_i32, { "r", "r" } }, + { INDEX_op_ld16u_i32, { "r", "r" } }, + { INDEX_op_ld16s_i32, { "r", "r" } }, + { INDEX_op_ld_i32, { "r", "r" } }, + { INDEX_op_st8_i32, { "qi", "r" } }, + { INDEX_op_st16_i32, { "ri", "r" } }, + { INDEX_op_st_i32, { "ri", "r" } }, + + { INDEX_op_add_i32, { "r", "r", "ri" } }, + { INDEX_op_sub_i32, { "r", "0", "ri" } }, + { INDEX_op_mul_i32, { "r", "0", "ri" } }, + { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } }, + { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } }, + { INDEX_op_and_i32, { "r", "0", "ri" } }, + { INDEX_op_or_i32, { "r", "0", "ri" } }, + { INDEX_op_xor_i32, { "r", "0", "ri" } }, + { INDEX_op_andc_i32, { "r", "r", "ri" } }, + + { INDEX_op_shl_i32, { "r", "0", "Ci" } }, + { INDEX_op_shr_i32, { "r", "0", "Ci" } }, + { INDEX_op_sar_i32, { "r", "0", "Ci" } }, + { INDEX_op_rotl_i32, { "r", "0", "ci" } }, + { INDEX_op_rotr_i32, { "r", "0", "ci" } }, + + { INDEX_op_brcond_i32, { "r", "ri" } }, + + { INDEX_op_bswap16_i32, { "r", "0" } }, + { INDEX_op_bswap32_i32, { "r", "0" } }, + + { INDEX_op_neg_i32, { "r", "0" } }, + + { INDEX_op_not_i32, { "r", "0" } }, + + { INDEX_op_ext8s_i32, { "r", "q" } }, + { INDEX_op_ext16s_i32, { "r", "r" } }, + { INDEX_op_ext8u_i32, { "r", "q" } }, + { INDEX_op_ext16u_i32, { "r", "r" } }, + + { INDEX_op_setcond_i32, { "q", "r", "ri" } }, + + { INDEX_op_deposit_i32, { "Q", "0", "Q" } }, + { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } }, + + { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } }, + { INDEX_op_muls2_i32, { "a", "d", "a", "r" } }, + { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } }, + { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } }, + +#if TCG_TARGET_REG_BITS == 32 + { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } }, + { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } }, +#else + { INDEX_op_ld8u_i64, { "r", "r" } }, + { INDEX_op_ld8s_i64, { "r", "r" } }, + { INDEX_op_ld16u_i64, { "r", "r" } }, + { INDEX_op_ld16s_i64, { "r", "r" } }, + { INDEX_op_ld32u_i64, { "r", "r" } }, + { INDEX_op_ld32s_i64, { "r", "r" } }, + { INDEX_op_ld_i64, { "r", "r" } }, + { INDEX_op_st8_i64, { "ri", "r" } }, + { INDEX_op_st16_i64, { "ri", "r" } }, + { INDEX_op_st32_i64, { "ri", "r" } }, + { INDEX_op_st_i64, { "re", "r" } }, + + { INDEX_op_add_i64, { "r", "r", "re" } }, + { INDEX_op_mul_i64, { "r", "0", "re" } }, + { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } }, + { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } }, + { INDEX_op_sub_i64, { "r", "0", "re" } }, + { INDEX_op_and_i64, { "r", "0", "reZ" } }, + { INDEX_op_or_i64, { "r", "0", "re" } }, + { INDEX_op_xor_i64, { "r", "0", "re" } }, + { INDEX_op_andc_i64, { "r", "r", "rI" } }, + + { INDEX_op_shl_i64, { "r", "0", "Ci" } }, + { INDEX_op_shr_i64, { "r", "0", "Ci" } }, + { INDEX_op_sar_i64, { "r", "0", "Ci" } }, + { INDEX_op_rotl_i64, { "r", "0", "ci" } }, + { INDEX_op_rotr_i64, { "r", "0", "ci" } }, + + { INDEX_op_brcond_i64, { "r", "re" } }, + { INDEX_op_setcond_i64, { "r", "r", "re" } }, + + { INDEX_op_bswap16_i64, { "r", "0" } }, + { INDEX_op_bswap32_i64, { "r", "0" } }, + { INDEX_op_bswap64_i64, { "r", "0" } }, + { INDEX_op_neg_i64, { "r", "0" } }, + { INDEX_op_not_i64, { "r", "0" } }, + + { INDEX_op_ext8s_i64, { "r", "r" } }, + { INDEX_op_ext16s_i64, { "r", "r" } }, + { INDEX_op_ext32s_i64, { "r", "r" } }, + { INDEX_op_ext8u_i64, { "r", "r" } }, + { INDEX_op_ext16u_i64, { "r", "r" } }, + { INDEX_op_ext32u_i64, { "r", "r" } }, + + { INDEX_op_ext_i32_i64, { "r", "r" } }, + { INDEX_op_extu_i32_i64, { "r", "r" } }, + + { INDEX_op_deposit_i64, { "Q", "0", "Q" } }, + { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } }, + + { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } }, + { INDEX_op_muls2_i64, { "a", "d", "a", "r" } }, + { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } }, + { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } }, +#endif + +#if TCG_TARGET_REG_BITS == 64 + { INDEX_op_qemu_ld_i32, { "r", "L" } }, + { INDEX_op_qemu_st_i32, { "L", "L" } }, + { INDEX_op_qemu_ld_i64, { "r", "L" } }, + { INDEX_op_qemu_st_i64, { "L", "L" } }, +#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS + { INDEX_op_qemu_ld_i32, { "r", "L" } }, + { INDEX_op_qemu_st_i32, { "L", "L" } }, + { INDEX_op_qemu_ld_i64, { "r", "r", "L" } }, + { INDEX_op_qemu_st_i64, { "L", "L", "L" } }, +#else + { INDEX_op_qemu_ld_i32, { "r", "L", "L" } }, + { INDEX_op_qemu_st_i32, { "L", "L", "L" } }, + { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } }, + { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } }, +#endif + { -1 }, +}; + +static int tcg_target_callee_save_regs[] = { +#if TCG_TARGET_REG_BITS == 64 + TCG_REG_RBP, + TCG_REG_RBX, +#if defined(_WIN64) + TCG_REG_RDI, + TCG_REG_RSI, +#endif + TCG_REG_R12, + TCG_REG_R13, + TCG_REG_R14, /* Currently used for the global env. */ + TCG_REG_R15, +#else + TCG_REG_EBP, /* Currently used for the global env. */ + TCG_REG_EBX, + TCG_REG_ESI, + TCG_REG_EDI, +#endif +}; + +/* Compute frame size via macros, to share between tcg_target_qemu_prologue + and tcg_register_jit. */ + +#define PUSH_SIZE \ + ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \ + * (TCG_TARGET_REG_BITS / 8)) + +#define FRAME_SIZE \ + ((PUSH_SIZE \ + + TCG_STATIC_CALL_ARGS_SIZE \ + + CPU_TEMP_BUF_NLONGS * sizeof(long) \ + + TCG_TARGET_STACK_ALIGN - 1) \ + & ~(TCG_TARGET_STACK_ALIGN - 1)) + +/* Generate global QEMU prologue and epilogue code */ +static void tcg_target_qemu_prologue(TCGContext *s) +{ + int i, stack_addend; + + /* TB prologue */ + + /* Reserve some stack space, also for TCG temps. */ + stack_addend = FRAME_SIZE - PUSH_SIZE; + tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, + CPU_TEMP_BUF_NLONGS * sizeof(long)); + + /* Save all callee saved registers. */ + for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { + tcg_out_push(s, tcg_target_callee_save_regs[i]); + } + +#if TCG_TARGET_REG_BITS == 32 + tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, + (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4); + tcg_out_addi(s, TCG_REG_ESP, -stack_addend); + /* jmp *tb. */ + tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP, + (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4 + + stack_addend); +#else + tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); + tcg_out_addi(s, TCG_REG_ESP, -stack_addend); + /* jmp *tb. */ + tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]); +#endif + + /* TB epilogue */ + tb_ret_addr = s->code_ptr; + + tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend); + + for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) { + tcg_out_pop(s, tcg_target_callee_save_regs[i]); + } + tcg_out_opc(s, OPC_RET, 0, 0, 0); + +#if !defined(CONFIG_SOFTMMU) + /* Try to set up a segment register to point to guest_base. */ + if (guest_base) { + setup_guest_base_seg(); + } +#endif +} + +static void tcg_target_init(TCGContext *s) +{ +#ifdef CONFIG_CPUID_H + unsigned a, b, c, d; + int max = __get_cpuid_max(0, 0); + + if (max >= 1) { + __cpuid(1, a, b, c, d); +#ifndef have_cmov + /* For 32-bit, 99% certainty that we're running on hardware that + supports cmov, but we still need to check. In case cmov is not + available, we'll use a small forward branch. */ + have_cmov = (d & bit_CMOV) != 0; +#endif +#ifndef have_movbe + /* MOVBE is only available on Intel Atom and Haswell CPUs, so we + need to probe for it. */ + have_movbe = (c & bit_MOVBE) != 0; +#endif + } + + if (max >= 7) { + /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */ + __cpuid_count(7, 0, a, b, c, d); +#ifdef bit_BMI + have_bmi1 = (b & bit_BMI) != 0; +#endif +#ifndef have_bmi2 + have_bmi2 = (b & bit_BMI2) != 0; +#endif + } +#endif + + if (TCG_TARGET_REG_BITS == 64) { + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff); + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff); + } else { + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff); + } + + tcg_regset_clear(tcg_target_call_clobber_regs); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX); + if (TCG_TARGET_REG_BITS == 64) { +#if !defined(_WIN64) + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI); +#endif + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11); + } + + tcg_regset_clear(s->reserved_regs); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); + + tcg_add_target_add_op_defs(x86_op_defs); +} + +typedef struct { + DebugFrameHeader h; + uint8_t fde_def_cfa[4]; + uint8_t fde_reg_ofs[14]; +} DebugFrame; + +/* We're expecting a 2 byte uleb128 encoded value. */ +QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); + +#if !defined(__ELF__) + /* Host machine without ELF. */ +#elif TCG_TARGET_REG_BITS == 64 +#define ELF_HOST_MACHINE EM_X86_64 +static const DebugFrame debug_frame = { + .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ + .h.cie.id = -1, + .h.cie.version = 1, + .h.cie.code_align = 1, + .h.cie.data_align = 0x78, /* sleb128 -8 */ + .h.cie.return_column = 16, + + /* Total FDE size does not include the "len" member. */ + .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), + + .fde_def_cfa = { + 12, 7, /* DW_CFA_def_cfa %rsp, ... */ + (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ + (FRAME_SIZE >> 7) + }, + .fde_reg_ofs = { + 0x90, 1, /* DW_CFA_offset, %rip, -8 */ + /* The following ordering must match tcg_target_callee_save_regs. */ + 0x86, 2, /* DW_CFA_offset, %rbp, -16 */ + 0x83, 3, /* DW_CFA_offset, %rbx, -24 */ + 0x8c, 4, /* DW_CFA_offset, %r12, -32 */ + 0x8d, 5, /* DW_CFA_offset, %r13, -40 */ + 0x8e, 6, /* DW_CFA_offset, %r14, -48 */ + 0x8f, 7, /* DW_CFA_offset, %r15, -56 */ + } +}; +#else +#define ELF_HOST_MACHINE EM_386 +static const DebugFrame debug_frame = { + .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ + .h.cie.id = -1, + .h.cie.version = 1, + .h.cie.code_align = 1, + .h.cie.data_align = 0x7c, /* sleb128 -4 */ + .h.cie.return_column = 8, + + /* Total FDE size does not include the "len" member. */ + .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), + + .fde_def_cfa = { + 12, 4, /* DW_CFA_def_cfa %esp, ... */ + (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ + (FRAME_SIZE >> 7) + }, + .fde_reg_ofs = { + 0x88, 1, /* DW_CFA_offset, %eip, -4 */ + /* The following ordering must match tcg_target_callee_save_regs. */ + 0x85, 2, /* DW_CFA_offset, %ebp, -8 */ + 0x83, 3, /* DW_CFA_offset, %ebx, -12 */ + 0x86, 4, /* DW_CFA_offset, %esi, -16 */ + 0x87, 5, /* DW_CFA_offset, %edi, -20 */ + } +}; +#endif + +#if defined(ELF_HOST_MACHINE) +void tcg_register_jit(void *buf, size_t buf_size) +{ + tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); +} +#endif diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c deleted file mode 100644 index 62d654943c..0000000000 --- a/tcg/ia64/tcg-target.c +++ /dev/null @@ -1,2453 +0,0 @@ -/* - * Tiny Code Generator for QEMU - * - * Copyright (c) 2009-2010 Aurelien Jarno - * Based on i386/tcg-target.c - Copyright (c) 2008 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -/* - * Register definitions - */ - -#ifndef NDEBUG -static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { - "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", - "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", - "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", - "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", - "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39", - "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47", - "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55", - "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63", -}; -#endif - -#ifndef CONFIG_SOFTMMU -#define TCG_GUEST_BASE_REG TCG_REG_R55 -#endif - -/* Branch registers */ -enum { - TCG_REG_B0 = 0, - TCG_REG_B1, - TCG_REG_B2, - TCG_REG_B3, - TCG_REG_B4, - TCG_REG_B5, - TCG_REG_B6, - TCG_REG_B7, -}; - -/* Floating point registers */ -enum { - TCG_REG_F0 = 0, - TCG_REG_F1, - TCG_REG_F2, - TCG_REG_F3, - TCG_REG_F4, - TCG_REG_F5, - TCG_REG_F6, - TCG_REG_F7, - TCG_REG_F8, - TCG_REG_F9, - TCG_REG_F10, - TCG_REG_F11, - TCG_REG_F12, - TCG_REG_F13, - TCG_REG_F14, - TCG_REG_F15, -}; - -/* Predicate registers */ -enum { - TCG_REG_P0 = 0, - TCG_REG_P1, - TCG_REG_P2, - TCG_REG_P3, - TCG_REG_P4, - TCG_REG_P5, - TCG_REG_P6, - TCG_REG_P7, - TCG_REG_P8, - TCG_REG_P9, - TCG_REG_P10, - TCG_REG_P11, - TCG_REG_P12, - TCG_REG_P13, - TCG_REG_P14, - TCG_REG_P15, -}; - -/* Application registers */ -enum { - TCG_REG_PFS = 64, -}; - -static const int tcg_target_reg_alloc_order[] = { - TCG_REG_R35, - TCG_REG_R36, - TCG_REG_R37, - TCG_REG_R38, - TCG_REG_R39, - TCG_REG_R40, - TCG_REG_R41, - TCG_REG_R42, - TCG_REG_R43, - TCG_REG_R44, - TCG_REG_R45, - TCG_REG_R46, - TCG_REG_R47, - TCG_REG_R48, - TCG_REG_R49, - TCG_REG_R50, - TCG_REG_R51, - TCG_REG_R52, - TCG_REG_R53, - TCG_REG_R54, - TCG_REG_R55, - TCG_REG_R14, - TCG_REG_R15, - TCG_REG_R16, - TCG_REG_R17, - TCG_REG_R18, - TCG_REG_R19, - TCG_REG_R20, - TCG_REG_R21, - TCG_REG_R22, - TCG_REG_R23, - TCG_REG_R24, - TCG_REG_R25, - TCG_REG_R26, - TCG_REG_R27, - TCG_REG_R28, - TCG_REG_R29, - TCG_REG_R30, - TCG_REG_R31, - TCG_REG_R56, - TCG_REG_R57, - TCG_REG_R58, - TCG_REG_R59, - TCG_REG_R60, - TCG_REG_R61, - TCG_REG_R62, - TCG_REG_R63, - TCG_REG_R8, - TCG_REG_R9, - TCG_REG_R10, - TCG_REG_R11 -}; - -static const int tcg_target_call_iarg_regs[8] = { - TCG_REG_R56, - TCG_REG_R57, - TCG_REG_R58, - TCG_REG_R59, - TCG_REG_R60, - TCG_REG_R61, - TCG_REG_R62, - TCG_REG_R63, -}; - -static const int tcg_target_call_oarg_regs[] = { - TCG_REG_R8 -}; - -/* - * opcode formation - */ - -/* bundle templates: stops (double bar in the IA64 manual) are marked with - an uppercase letter. */ -enum { - mii = 0x00, - miI = 0x01, - mIi = 0x02, - mII = 0x03, - mlx = 0x04, - mLX = 0x05, - mmi = 0x08, - mmI = 0x09, - Mmi = 0x0a, - MmI = 0x0b, - mfi = 0x0c, - mfI = 0x0d, - mmf = 0x0e, - mmF = 0x0f, - mib = 0x10, - miB = 0x11, - mbb = 0x12, - mbB = 0x13, - bbb = 0x16, - bbB = 0x17, - mmb = 0x18, - mmB = 0x19, - mfb = 0x1c, - mfB = 0x1d, -}; - -enum { - OPC_ADD_A1 = 0x10000000000ull, - OPC_AND_A1 = 0x10060000000ull, - OPC_AND_A3 = 0x10160000000ull, - OPC_ANDCM_A1 = 0x10068000000ull, - OPC_ANDCM_A3 = 0x10168000000ull, - OPC_ADDS_A4 = 0x10800000000ull, - OPC_ADDL_A5 = 0x12000000000ull, - OPC_ALLOC_M34 = 0x02c00000000ull, - OPC_BR_DPTK_FEW_B1 = 0x08400000000ull, - OPC_BR_SPTK_MANY_B1 = 0x08000001000ull, - OPC_BR_CALL_SPNT_FEW_B3 = 0x0a200000000ull, - OPC_BR_SPTK_MANY_B4 = 0x00100001000ull, - OPC_BR_CALL_SPTK_MANY_B5 = 0x02100001000ull, - OPC_BR_RET_SPTK_MANY_B4 = 0x00108001100ull, - OPC_BRL_SPTK_MANY_X3 = 0x18000001000ull, - OPC_BRL_CALL_SPNT_MANY_X4 = 0x1a200001000ull, - OPC_BRL_CALL_SPTK_MANY_X4 = 0x1a000001000ull, - OPC_CMP_LT_A6 = 0x18000000000ull, - OPC_CMP_LTU_A6 = 0x1a000000000ull, - OPC_CMP_EQ_A6 = 0x1c000000000ull, - OPC_CMP4_LT_A6 = 0x18400000000ull, - OPC_CMP4_LTU_A6 = 0x1a400000000ull, - OPC_CMP4_EQ_A6 = 0x1c400000000ull, - OPC_DEP_I14 = 0x0ae00000000ull, - OPC_DEP_I15 = 0x08000000000ull, - OPC_DEP_Z_I12 = 0x0a600000000ull, - OPC_EXTR_I11 = 0x0a400002000ull, - OPC_EXTR_U_I11 = 0x0a400000000ull, - OPC_FCVT_FX_TRUNC_S1_F10 = 0x004d0000000ull, - OPC_FCVT_FXU_TRUNC_S1_F10 = 0x004d8000000ull, - OPC_FCVT_XF_F11 = 0x000e0000000ull, - OPC_FMA_S1_F1 = 0x10400000000ull, - OPC_FNMA_S1_F1 = 0x18400000000ull, - OPC_FRCPA_S1_F6 = 0x00600000000ull, - OPC_GETF_SIG_M19 = 0x08708000000ull, - OPC_LD1_M1 = 0x08000000000ull, - OPC_LD1_M3 = 0x0a000000000ull, - OPC_LD2_M1 = 0x08040000000ull, - OPC_LD2_M3 = 0x0a040000000ull, - OPC_LD4_M1 = 0x08080000000ull, - OPC_LD4_M3 = 0x0a080000000ull, - OPC_LD8_M1 = 0x080c0000000ull, - OPC_LD8_M3 = 0x0a0c0000000ull, - OPC_MUX1_I3 = 0x0eca0000000ull, - OPC_NOP_B9 = 0x04008000000ull, - OPC_NOP_F16 = 0x00008000000ull, - OPC_NOP_I18 = 0x00008000000ull, - OPC_NOP_M48 = 0x00008000000ull, - OPC_MOV_I21 = 0x00e00100000ull, - OPC_MOV_RET_I21 = 0x00e00500000ull, - OPC_MOV_I22 = 0x00188000000ull, - OPC_MOV_I_I26 = 0x00150000000ull, - OPC_MOVL_X2 = 0x0c000000000ull, - OPC_OR_A1 = 0x10070000000ull, - OPC_OR_A3 = 0x10170000000ull, - OPC_SETF_EXP_M18 = 0x0c748000000ull, - OPC_SETF_SIG_M18 = 0x0c708000000ull, - OPC_SHL_I7 = 0x0f240000000ull, - OPC_SHR_I5 = 0x0f220000000ull, - OPC_SHR_U_I5 = 0x0f200000000ull, - OPC_SHRP_I10 = 0x0ac00000000ull, - OPC_SXT1_I29 = 0x000a0000000ull, - OPC_SXT2_I29 = 0x000a8000000ull, - OPC_SXT4_I29 = 0x000b0000000ull, - OPC_ST1_M4 = 0x08c00000000ull, - OPC_ST2_M4 = 0x08c40000000ull, - OPC_ST4_M4 = 0x08c80000000ull, - OPC_ST8_M4 = 0x08cc0000000ull, - OPC_SUB_A1 = 0x10028000000ull, - OPC_SUB_A3 = 0x10128000000ull, - OPC_UNPACK4_L_I2 = 0x0f860000000ull, - OPC_XMA_L_F2 = 0x1d000000000ull, - OPC_XOR_A1 = 0x10078000000ull, - OPC_XOR_A3 = 0x10178000000ull, - OPC_ZXT1_I29 = 0x00080000000ull, - OPC_ZXT2_I29 = 0x00088000000ull, - OPC_ZXT4_I29 = 0x00090000000ull, - - INSN_NOP_M = OPC_NOP_M48, /* nop.m 0 */ - INSN_NOP_I = OPC_NOP_I18, /* nop.i 0 */ -}; - -static inline uint64_t tcg_opc_a1(int qp, uint64_t opc, int r1, - int r2, int r3) -{ - return opc - | ((r3 & 0x7f) << 20) - | ((r2 & 0x7f) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_a3(int qp, uint64_t opc, int r1, - uint64_t imm, int r3) -{ - return opc - | ((imm & 0x80) << 29) /* s */ - | ((imm & 0x7f) << 13) /* imm7b */ - | ((r3 & 0x7f) << 20) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_a4(int qp, uint64_t opc, int r1, - uint64_t imm, int r3) -{ - return opc - | ((imm & 0x2000) << 23) /* s */ - | ((imm & 0x1f80) << 20) /* imm6d */ - | ((imm & 0x007f) << 13) /* imm7b */ - | ((r3 & 0x7f) << 20) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_a5(int qp, uint64_t opc, int r1, - uint64_t imm, int r3) -{ - return opc - | ((imm & 0x200000) << 15) /* s */ - | ((imm & 0x1f0000) << 6) /* imm5c */ - | ((imm & 0x00ff80) << 20) /* imm9d */ - | ((imm & 0x00007f) << 13) /* imm7b */ - | ((r3 & 0x03) << 20) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_a6(int qp, uint64_t opc, int p1, - int p2, int r2, int r3) -{ - return opc - | ((p2 & 0x3f) << 27) - | ((r3 & 0x7f) << 20) - | ((r2 & 0x7f) << 13) - | ((p1 & 0x3f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_b1(int qp, uint64_t opc, uint64_t imm) -{ - return opc - | ((imm & 0x100000) << 16) /* s */ - | ((imm & 0x0fffff) << 13) /* imm20b */ - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_b3(int qp, uint64_t opc, int b1, uint64_t imm) -{ - return opc - | ((imm & 0x100000) << 16) /* s */ - | ((imm & 0x0fffff) << 13) /* imm20b */ - | ((b1 & 0x7) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_b4(int qp, uint64_t opc, int b2) -{ - return opc - | ((b2 & 0x7) << 13) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_b5(int qp, uint64_t opc, int b1, int b2) -{ - return opc - | ((b2 & 0x7) << 13) - | ((b1 & 0x7) << 6) - | (qp & 0x3f); -} - - -static inline uint64_t tcg_opc_b9(int qp, uint64_t opc, uint64_t imm) -{ - return opc - | ((imm & 0x100000) << 16) /* i */ - | ((imm & 0x0fffff) << 6) /* imm20a */ - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_f1(int qp, uint64_t opc, int f1, - int f3, int f4, int f2) -{ - return opc - | ((f4 & 0x7f) << 27) - | ((f3 & 0x7f) << 20) - | ((f2 & 0x7f) << 13) - | ((f1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_f2(int qp, uint64_t opc, int f1, - int f3, int f4, int f2) -{ - return opc - | ((f4 & 0x7f) << 27) - | ((f3 & 0x7f) << 20) - | ((f2 & 0x7f) << 13) - | ((f1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_f6(int qp, uint64_t opc, int f1, - int p2, int f2, int f3) -{ - return opc - | ((p2 & 0x3f) << 27) - | ((f3 & 0x7f) << 20) - | ((f2 & 0x7f) << 13) - | ((f1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_f10(int qp, uint64_t opc, int f1, int f2) -{ - return opc - | ((f2 & 0x7f) << 13) - | ((f1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_f11(int qp, uint64_t opc, int f1, int f2) -{ - return opc - | ((f2 & 0x7f) << 13) - | ((f1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_f16(int qp, uint64_t opc, uint64_t imm) -{ - return opc - | ((imm & 0x100000) << 16) /* i */ - | ((imm & 0x0fffff) << 6) /* imm20a */ - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_i2(int qp, uint64_t opc, int r1, - int r2, int r3) -{ - return opc - | ((r3 & 0x7f) << 20) - | ((r2 & 0x7f) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_i3(int qp, uint64_t opc, int r1, - int r2, int mbtype) -{ - return opc - | ((mbtype & 0x0f) << 20) - | ((r2 & 0x7f) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_i5(int qp, uint64_t opc, int r1, - int r3, int r2) -{ - return opc - | ((r3 & 0x7f) << 20) - | ((r2 & 0x7f) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_i7(int qp, uint64_t opc, int r1, - int r2, int r3) -{ - return opc - | ((r3 & 0x7f) << 20) - | ((r2 & 0x7f) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_i10(int qp, uint64_t opc, int r1, - int r2, int r3, uint64_t count) -{ - return opc - | ((count & 0x3f) << 27) - | ((r3 & 0x7f) << 20) - | ((r2 & 0x7f) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_i11(int qp, uint64_t opc, int r1, - int r3, uint64_t pos, uint64_t len) -{ - return opc - | ((len & 0x3f) << 27) - | ((r3 & 0x7f) << 20) - | ((pos & 0x3f) << 14) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_i12(int qp, uint64_t opc, int r1, - int r2, uint64_t pos, uint64_t len) -{ - return opc - | ((len & 0x3f) << 27) - | ((pos & 0x3f) << 20) - | ((r2 & 0x7f) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_i14(int qp, uint64_t opc, int r1, uint64_t imm, - int r3, uint64_t pos, uint64_t len) -{ - return opc - | ((imm & 0x01) << 36) - | ((len & 0x3f) << 27) - | ((r3 & 0x7f) << 20) - | ((pos & 0x3f) << 14) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_i15(int qp, uint64_t opc, int r1, int r2, - int r3, uint64_t pos, uint64_t len) -{ - return opc - | ((pos & 0x3f) << 31) - | ((len & 0x0f) << 27) - | ((r3 & 0x7f) << 20) - | ((r2 & 0x7f) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_i18(int qp, uint64_t opc, uint64_t imm) -{ - return opc - | ((imm & 0x100000) << 16) /* i */ - | ((imm & 0x0fffff) << 6) /* imm20a */ - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_i21(int qp, uint64_t opc, int b1, - int r2, uint64_t imm) -{ - return opc - | ((imm & 0x1ff) << 24) - | ((r2 & 0x7f) << 13) - | ((b1 & 0x7) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_i22(int qp, uint64_t opc, int r1, int b2) -{ - return opc - | ((b2 & 0x7) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_i26(int qp, uint64_t opc, int ar3, int r2) -{ - return opc - | ((ar3 & 0x7f) << 20) - | ((r2 & 0x7f) << 13) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_i29(int qp, uint64_t opc, int r1, int r3) -{ - return opc - | ((r3 & 0x7f) << 20) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_l2(uint64_t imm) -{ - return (imm & 0x7fffffffffc00000ull) >> 22; -} - -static inline uint64_t tcg_opc_l3(uint64_t imm) -{ - return (imm & 0x07fffffffff00000ull) >> 18; -} - -#define tcg_opc_l4 tcg_opc_l3 - -static inline uint64_t tcg_opc_m1(int qp, uint64_t opc, int r1, int r3) -{ - return opc - | ((r3 & 0x7f) << 20) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_m3(int qp, uint64_t opc, int r1, - int r3, uint64_t imm) -{ - return opc - | ((imm & 0x100) << 28) /* s */ - | ((imm & 0x080) << 20) /* i */ - | ((imm & 0x07f) << 13) /* imm7b */ - | ((r3 & 0x7f) << 20) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_m4(int qp, uint64_t opc, int r2, int r3) -{ - return opc - | ((r3 & 0x7f) << 20) - | ((r2 & 0x7f) << 13) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_m18(int qp, uint64_t opc, int f1, int r2) -{ - return opc - | ((r2 & 0x7f) << 13) - | ((f1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_m19(int qp, uint64_t opc, int r1, int f2) -{ - return opc - | ((f2 & 0x7f) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_m34(int qp, uint64_t opc, int r1, - int sof, int sol, int sor) -{ - return opc - | ((sor & 0x0f) << 27) - | ((sol & 0x7f) << 20) - | ((sof & 0x7f) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_m48(int qp, uint64_t opc, uint64_t imm) -{ - return opc - | ((imm & 0x100000) << 16) /* i */ - | ((imm & 0x0fffff) << 6) /* imm20a */ - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_x2(int qp, uint64_t opc, - int r1, uint64_t imm) -{ - return opc - | ((imm & 0x8000000000000000ull) >> 27) /* i */ - | (imm & 0x0000000000200000ull) /* ic */ - | ((imm & 0x00000000001f0000ull) << 6) /* imm5c */ - | ((imm & 0x000000000000ff80ull) << 20) /* imm9d */ - | ((imm & 0x000000000000007full) << 13) /* imm7b */ - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_x3(int qp, uint64_t opc, uint64_t imm) -{ - return opc - | ((imm & 0x0800000000000000ull) >> 23) /* i */ - | ((imm & 0x00000000000fffffull) << 13) /* imm20b */ - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_x4(int qp, uint64_t opc, int b1, uint64_t imm) -{ - return opc - | ((imm & 0x0800000000000000ull) >> 23) /* i */ - | ((imm & 0x00000000000fffffull) << 13) /* imm20b */ - | ((b1 & 0x7) << 6) - | (qp & 0x3f); -} - - -/* - * Relocations - Note that we never encode branches elsewhere than slot 2. - */ - -static void reloc_pcrel21b_slot2(tcg_insn_unit *pc, tcg_insn_unit *target) -{ - uint64_t imm = target - pc; - - pc->hi = (pc->hi & 0xf700000fffffffffull) - | ((imm & 0x100000) << 39) /* s */ - | ((imm & 0x0fffff) << 36); /* imm20b */ -} - -static uint64_t get_reloc_pcrel21b_slot2(tcg_insn_unit *pc) -{ - int64_t high = pc->hi; - - return ((high >> 39) & 0x100000) + /* s */ - ((high >> 36) & 0x0fffff); /* imm20b */ -} - -static void patch_reloc(tcg_insn_unit *code_ptr, int type, - intptr_t value, intptr_t addend) -{ - assert(addend == 0); - assert(type == R_IA64_PCREL21B); - reloc_pcrel21b_slot2(code_ptr, (tcg_insn_unit *)value); -} - -/* - * Constraints - */ - -/* parse target specific constraints */ -static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) -{ - const char *ct_str; - - ct_str = *pct_str; - switch(ct_str[0]) { - case 'r': - ct->ct |= TCG_CT_REG; - tcg_regset_set(ct->u.regs, 0xffffffffffffffffull); - break; - case 'I': - ct->ct |= TCG_CT_CONST_S22; - break; - case 'S': - ct->ct |= TCG_CT_REG; - tcg_regset_set(ct->u.regs, 0xffffffffffffffffull); -#if defined(CONFIG_SOFTMMU) - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R56); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R57); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R58); -#endif - break; - case 'Z': - /* We are cheating a bit here, using the fact that the register - r0 is also the register number 0. Hence there is no need - to check for const_args in each instruction. */ - ct->ct |= TCG_CT_CONST_ZERO; - break; - default: - return -1; - } - ct_str++; - *pct_str = ct_str; - return 0; -} - -/* test if a constant matches the constraint */ -static inline int tcg_target_const_match(tcg_target_long val, TCGType type, - const TCGArgConstraint *arg_ct) -{ - int ct; - ct = arg_ct->ct; - if (ct & TCG_CT_CONST) - return 1; - else if ((ct & TCG_CT_CONST_ZERO) && val == 0) - return 1; - else if ((ct & TCG_CT_CONST_S22) && val == ((int32_t)val << 10) >> 10) - return 1; - else - return 0; -} - -/* - * Code generation - */ - -static tcg_insn_unit *tb_ret_addr; - -static inline void tcg_out_bundle(TCGContext *s, int template, - uint64_t slot0, uint64_t slot1, - uint64_t slot2) -{ - template &= 0x1f; /* 5 bits */ - slot0 &= 0x1ffffffffffull; /* 41 bits */ - slot1 &= 0x1ffffffffffull; /* 41 bits */ - slot2 &= 0x1ffffffffffull; /* 41 bits */ - - *s->code_ptr++ = (tcg_insn_unit){ - (slot1 << 46) | (slot0 << 5) | template, - (slot2 << 23) | (slot1 >> 18) - }; -} - -static inline uint64_t tcg_opc_mov_a(int qp, TCGReg dst, TCGReg src) -{ - return tcg_opc_a4(qp, OPC_ADDS_A4, dst, 0, src); -} - -static inline void tcg_out_mov(TCGContext *s, TCGType type, - TCGReg ret, TCGReg arg) -{ - tcg_out_bundle(s, mmI, - INSN_NOP_M, - INSN_NOP_M, - tcg_opc_mov_a(TCG_REG_P0, ret, arg)); -} - -static inline uint64_t tcg_opc_movi_a(int qp, TCGReg dst, int64_t src) -{ - assert(src == sextract64(src, 0, 22)); - return tcg_opc_a5(qp, OPC_ADDL_A5, dst, src, TCG_REG_R0); -} - -static inline void tcg_out_movi(TCGContext *s, TCGType type, - TCGReg reg, tcg_target_long arg) -{ - tcg_out_bundle(s, mLX, - INSN_NOP_M, - tcg_opc_l2 (arg), - tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, reg, arg)); -} - -static void tcg_out_br(TCGContext *s, TCGLabel *l) -{ - uint64_t imm; - - /* We pay attention here to not modify the branch target by reading - the existing value and using it again. This ensure that caches and - memory are kept coherent during retranslation. */ - if (l->has_value) { - imm = l->u.value_ptr - s->code_ptr; - } else { - imm = get_reloc_pcrel21b_slot2(s->code_ptr); - tcg_out_reloc(s, s->code_ptr, R_IA64_PCREL21B, l, 0); - } - - tcg_out_bundle(s, mmB, - INSN_NOP_M, - INSN_NOP_M, - tcg_opc_b1(TCG_REG_P0, OPC_BR_SPTK_MANY_B1, imm)); -} - -static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *desc) -{ - uintptr_t func = desc->lo, gp = desc->hi, disp; - - /* Look through the function descriptor. */ - tcg_out_bundle(s, mlx, - INSN_NOP_M, - tcg_opc_l2 (gp), - tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, TCG_REG_R1, gp)); - disp = (tcg_insn_unit *)func - s->code_ptr; - tcg_out_bundle(s, mLX, - INSN_NOP_M, - tcg_opc_l4 (disp), - tcg_opc_x4 (TCG_REG_P0, OPC_BRL_CALL_SPTK_MANY_X4, - TCG_REG_B0, disp)); -} - -static void tcg_out_exit_tb(TCGContext *s, tcg_target_long arg) -{ - uint64_t imm, opc1; - - /* At least arg == 0 is a common operation. */ - if (arg == sextract64(arg, 0, 22)) { - opc1 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R8, arg); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R8, arg); - opc1 = INSN_NOP_M; - } - - imm = tb_ret_addr - s->code_ptr; - - tcg_out_bundle(s, mLX, - opc1, - tcg_opc_l3 (imm), - tcg_opc_x3 (TCG_REG_P0, OPC_BRL_SPTK_MANY_X3, imm)); -} - -static inline void tcg_out_goto_tb(TCGContext *s, TCGArg arg) -{ - if (s->tb_jmp_offset) { - /* direct jump method */ - tcg_abort(); - } else { - /* indirect jump method */ - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, - (tcg_target_long)(s->tb_next + arg)); - tcg_out_bundle(s, MmI, - tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, - TCG_REG_R2, TCG_REG_R2), - INSN_NOP_M, - tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, TCG_REG_B6, - TCG_REG_R2, 0)); - tcg_out_bundle(s, mmB, - INSN_NOP_M, - INSN_NOP_M, - tcg_opc_b4 (TCG_REG_P0, OPC_BR_SPTK_MANY_B4, - TCG_REG_B6)); - } - s->tb_next_offset[arg] = tcg_current_code_size(s); -} - -static inline void tcg_out_jmp(TCGContext *s, TCGArg addr) -{ - tcg_out_bundle(s, mmI, - INSN_NOP_M, - INSN_NOP_M, - tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, TCG_REG_B6, addr, 0)); - tcg_out_bundle(s, mmB, - INSN_NOP_M, - INSN_NOP_M, - tcg_opc_b4(TCG_REG_P0, OPC_BR_SPTK_MANY_B4, TCG_REG_B6)); -} - -static inline void tcg_out_ld_rel(TCGContext *s, uint64_t opc_m4, TCGArg arg, - TCGArg arg1, tcg_target_long arg2) -{ - if (arg2 == ((int16_t)arg2 >> 2) << 2) { - tcg_out_bundle(s, MmI, - tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, - TCG_REG_R2, arg2, arg1), - tcg_opc_m1 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2), - INSN_NOP_I); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, arg2); - tcg_out_bundle(s, MmI, - tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, - TCG_REG_R2, TCG_REG_R2, arg1), - tcg_opc_m1 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2), - INSN_NOP_I); - } -} - -static inline void tcg_out_st_rel(TCGContext *s, uint64_t opc_m4, TCGArg arg, - TCGArg arg1, tcg_target_long arg2) -{ - if (arg2 == ((int16_t)arg2 >> 2) << 2) { - tcg_out_bundle(s, MmI, - tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, - TCG_REG_R2, arg2, arg1), - tcg_opc_m4 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2), - INSN_NOP_I); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, arg2); - tcg_out_bundle(s, MmI, - tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, - TCG_REG_R2, TCG_REG_R2, arg1), - tcg_opc_m4 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2), - INSN_NOP_I); - } -} - -static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, intptr_t arg2) -{ - if (type == TCG_TYPE_I32) { - tcg_out_ld_rel(s, OPC_LD4_M1, arg, arg1, arg2); - } else { - tcg_out_ld_rel(s, OPC_LD8_M1, arg, arg1, arg2); - } -} - -static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, intptr_t arg2) -{ - if (type == TCG_TYPE_I32) { - tcg_out_st_rel(s, OPC_ST4_M4, arg, arg1, arg2); - } else { - tcg_out_st_rel(s, OPC_ST8_M4, arg, arg1, arg2); - } -} - -static inline void tcg_out_alu(TCGContext *s, uint64_t opc_a1, uint64_t opc_a3, - TCGReg ret, TCGArg arg1, int const_arg1, - TCGArg arg2, int const_arg2) -{ - uint64_t opc1 = 0, opc2 = 0, opc3 = 0; - - if (const_arg2 && arg2 != 0) { - opc2 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R3, arg2); - arg2 = TCG_REG_R3; - } - if (const_arg1 && arg1 != 0) { - if (opc_a3 && arg1 == (int8_t)arg1) { - opc3 = tcg_opc_a3(TCG_REG_P0, opc_a3, ret, arg1, arg2); - } else { - opc1 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, arg1); - arg1 = TCG_REG_R2; - } - } - if (opc3 == 0) { - opc3 = tcg_opc_a1(TCG_REG_P0, opc_a1, ret, arg1, arg2); - } - - tcg_out_bundle(s, (opc1 || opc2 ? mII : miI), - opc1 ? opc1 : INSN_NOP_M, - opc2 ? opc2 : INSN_NOP_I, - opc3); -} - -static inline void tcg_out_add(TCGContext *s, TCGReg ret, TCGReg arg1, - TCGArg arg2, int const_arg2) -{ - if (const_arg2 && arg2 == sextract64(arg2, 0, 14)) { - tcg_out_bundle(s, mmI, - INSN_NOP_M, - INSN_NOP_M, - tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, arg2, arg1)); - } else { - tcg_out_alu(s, OPC_ADD_A1, 0, ret, arg1, 0, arg2, const_arg2); - } -} - -static inline void tcg_out_sub(TCGContext *s, TCGReg ret, TCGArg arg1, - int const_arg1, TCGArg arg2, int const_arg2) -{ - if (!const_arg1 && const_arg2 && -arg2 == sextract64(-arg2, 0, 14)) { - tcg_out_bundle(s, mmI, - INSN_NOP_M, - INSN_NOP_M, - tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, -arg2, arg1)); - } else { - tcg_out_alu(s, OPC_SUB_A1, OPC_SUB_A3, ret, - arg1, const_arg1, arg2, const_arg2); - } -} - -static inline void tcg_out_eqv(TCGContext *s, TCGArg ret, - TCGArg arg1, int const_arg1, - TCGArg arg2, int const_arg2) -{ - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_a1 (TCG_REG_P0, OPC_XOR_A1, ret, arg1, arg2), - tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret)); -} - -static inline void tcg_out_nand(TCGContext *s, TCGArg ret, - TCGArg arg1, int const_arg1, - TCGArg arg2, int const_arg2) -{ - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_a1 (TCG_REG_P0, OPC_AND_A1, ret, arg1, arg2), - tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret)); -} - -static inline void tcg_out_nor(TCGContext *s, TCGArg ret, - TCGArg arg1, int const_arg1, - TCGArg arg2, int const_arg2) -{ - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, arg1, arg2), - tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret)); -} - -static inline void tcg_out_orc(TCGContext *s, TCGArg ret, - TCGArg arg1, int const_arg1, - TCGArg arg2, int const_arg2) -{ - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, TCG_REG_R2, -1, arg2), - tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, arg1, TCG_REG_R2)); -} - -static inline void tcg_out_mul(TCGContext *s, TCGArg ret, - TCGArg arg1, TCGArg arg2) -{ - tcg_out_bundle(s, mmI, - tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F6, arg1), - tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F7, arg2), - INSN_NOP_I); - tcg_out_bundle(s, mmF, - INSN_NOP_M, - INSN_NOP_M, - tcg_opc_f2 (TCG_REG_P0, OPC_XMA_L_F2, TCG_REG_F6, TCG_REG_F6, - TCG_REG_F7, TCG_REG_F0)); - tcg_out_bundle(s, miI, - tcg_opc_m19(TCG_REG_P0, OPC_GETF_SIG_M19, ret, TCG_REG_F6), - INSN_NOP_I, - INSN_NOP_I); -} - -static inline void tcg_out_sar_i32(TCGContext *s, TCGArg ret, TCGArg arg1, - TCGArg arg2, int const_arg2) -{ - if (const_arg2) { - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_i11(TCG_REG_P0, OPC_EXTR_I11, - ret, arg1, arg2, 31 - arg2)); - } else { - tcg_out_bundle(s, mII, - tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, - TCG_REG_R3, 0x1f, arg2), - tcg_opc_i29(TCG_REG_P0, OPC_SXT4_I29, TCG_REG_R2, arg1), - tcg_opc_i5 (TCG_REG_P0, OPC_SHR_I5, ret, - TCG_REG_R2, TCG_REG_R3)); - } -} - -static inline void tcg_out_sar_i64(TCGContext *s, TCGArg ret, TCGArg arg1, - TCGArg arg2, int const_arg2) -{ - if (const_arg2) { - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_i11(TCG_REG_P0, OPC_EXTR_I11, - ret, arg1, arg2, 63 - arg2)); - } else { - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_i5 (TCG_REG_P0, OPC_SHR_I5, ret, arg1, arg2)); - } -} - -static inline void tcg_out_shl_i32(TCGContext *s, TCGArg ret, TCGArg arg1, - TCGArg arg2, int const_arg2) -{ - if (const_arg2) { - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, - arg1, 63 - arg2, 31 - arg2)); - } else { - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R2, - 0x1f, arg2), - tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, ret, - arg1, TCG_REG_R2)); - } -} - -static inline void tcg_out_shl_i64(TCGContext *s, TCGArg ret, TCGArg arg1, - TCGArg arg2, int const_arg2) -{ - if (const_arg2) { - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, - arg1, 63 - arg2, 63 - arg2)); - } else { - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, ret, - arg1, arg2)); - } -} - -static inline void tcg_out_shr_i32(TCGContext *s, TCGArg ret, TCGArg arg1, - TCGArg arg2, int const_arg2) -{ - if (const_arg2) { - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret, - arg1, arg2, 31 - arg2)); - } else { - tcg_out_bundle(s, mII, - tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R3, - 0x1f, arg2), - tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R2, arg1), - tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret, - TCG_REG_R2, TCG_REG_R3)); - } -} - -static inline void tcg_out_shr_i64(TCGContext *s, TCGArg ret, TCGArg arg1, - TCGArg arg2, int const_arg2) -{ - if (const_arg2) { - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret, - arg1, arg2, 63 - arg2)); - } else { - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret, - arg1, arg2)); - } -} - -static inline void tcg_out_rotl_i32(TCGContext *s, TCGArg ret, TCGArg arg1, - TCGArg arg2, int const_arg2) -{ - if (const_arg2) { - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2, - TCG_REG_R2, arg1, arg1), - tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret, - TCG_REG_R2, 32 - arg2, 31)); - } else { - tcg_out_bundle(s, miI, - INSN_NOP_M, - tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2, - TCG_REG_R2, arg1, arg1), - tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R3, - 0x1f, arg2)); - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_a3 (TCG_REG_P0, OPC_SUB_A3, TCG_REG_R3, - 0x20, TCG_REG_R3), - tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret, - TCG_REG_R2, TCG_REG_R3)); - } -} - -static inline void tcg_out_rotl_i64(TCGContext *s, TCGArg ret, TCGArg arg1, - TCGArg arg2, int const_arg2) -{ - if (const_arg2) { - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, ret, arg1, - arg1, 0x40 - arg2)); - } else { - tcg_out_bundle(s, mII, - tcg_opc_a3 (TCG_REG_P0, OPC_SUB_A3, TCG_REG_R2, - 0x40, arg2), - tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, TCG_REG_R3, - arg1, arg2), - tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, TCG_REG_R2, - arg1, TCG_REG_R2)); - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, - TCG_REG_R2, TCG_REG_R3)); - } -} - -static inline void tcg_out_rotr_i32(TCGContext *s, TCGArg ret, TCGArg arg1, - TCGArg arg2, int const_arg2) -{ - if (const_arg2) { - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2, - TCG_REG_R2, arg1, arg1), - tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret, - TCG_REG_R2, arg2, 31)); - } else { - tcg_out_bundle(s, mII, - tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R3, - 0x1f, arg2), - tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2, - TCG_REG_R2, arg1, arg1), - tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret, - TCG_REG_R2, TCG_REG_R3)); - } -} - -static inline void tcg_out_rotr_i64(TCGContext *s, TCGArg ret, TCGArg arg1, - TCGArg arg2, int const_arg2) -{ - if (const_arg2) { - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, ret, arg1, - arg1, arg2)); - } else { - tcg_out_bundle(s, mII, - tcg_opc_a3 (TCG_REG_P0, OPC_SUB_A3, TCG_REG_R2, - 0x40, arg2), - tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, TCG_REG_R3, - arg1, arg2), - tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, TCG_REG_R2, - arg1, TCG_REG_R2)); - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, - TCG_REG_R2, TCG_REG_R3)); - } -} - -static const uint64_t opc_ext_i29[8] = { - OPC_ZXT1_I29, OPC_ZXT2_I29, OPC_ZXT4_I29, 0, - OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0 -}; - -static inline uint64_t tcg_opc_ext_i(int qp, TCGMemOp opc, TCGReg d, TCGReg s) -{ - if ((opc & MO_SIZE) == MO_64) { - return tcg_opc_mov_a(qp, d, s); - } else { - return tcg_opc_i29(qp, opc_ext_i29[opc & MO_SSIZE], d, s); - } -} - -static inline void tcg_out_ext(TCGContext *s, uint64_t opc_i29, - TCGArg ret, TCGArg arg) -{ - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_i29(TCG_REG_P0, opc_i29, ret, arg)); -} - -static inline uint64_t tcg_opc_bswap64_i(int qp, TCGReg d, TCGReg s) -{ - return tcg_opc_i3(qp, OPC_MUX1_I3, d, s, 0xb); -} - -static inline void tcg_out_bswap16(TCGContext *s, TCGArg ret, TCGArg arg) -{ - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, arg, 15, 15), - tcg_opc_bswap64_i(TCG_REG_P0, ret, ret)); -} - -static inline void tcg_out_bswap32(TCGContext *s, TCGArg ret, TCGArg arg) -{ - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, arg, 31, 31), - tcg_opc_bswap64_i(TCG_REG_P0, ret, ret)); -} - -static inline void tcg_out_bswap64(TCGContext *s, TCGArg ret, TCGArg arg) -{ - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_bswap64_i(TCG_REG_P0, ret, arg)); -} - -static inline void tcg_out_deposit(TCGContext *s, TCGArg ret, TCGArg a1, - TCGArg a2, int const_a2, int pos, int len) -{ - uint64_t i1 = 0, i2 = 0; - int cpos = 63 - pos, lm1 = len - 1; - - if (const_a2) { - /* Truncate the value of a constant a2 to the width of the field. */ - int mask = (1u << len) - 1; - a2 &= mask; - - if (a2 == 0 || a2 == mask) { - /* 1-bit signed constant inserted into register. */ - i2 = tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, ret, a2, a1, cpos, lm1); - } else { - /* Otherwise, load any constant into a temporary. Do this into - the first I slot to help out with cross-unit delays. */ - i1 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, a2); - a2 = TCG_REG_R2; - } - } - if (i2 == 0) { - i2 = tcg_opc_i15(TCG_REG_P0, OPC_DEP_I15, ret, a2, a1, cpos, lm1); - } - tcg_out_bundle(s, (i1 ? mII : miI), - INSN_NOP_M, - i1 ? i1 : INSN_NOP_I, - i2); -} - -static inline uint64_t tcg_opc_cmp_a(int qp, TCGCond cond, TCGArg arg1, - TCGArg arg2, int cmp4) -{ - uint64_t opc_eq_a6, opc_lt_a6, opc_ltu_a6; - - if (cmp4) { - opc_eq_a6 = OPC_CMP4_EQ_A6; - opc_lt_a6 = OPC_CMP4_LT_A6; - opc_ltu_a6 = OPC_CMP4_LTU_A6; - } else { - opc_eq_a6 = OPC_CMP_EQ_A6; - opc_lt_a6 = OPC_CMP_LT_A6; - opc_ltu_a6 = OPC_CMP_LTU_A6; - } - - switch (cond) { - case TCG_COND_EQ: - return tcg_opc_a6 (qp, opc_eq_a6, TCG_REG_P6, TCG_REG_P7, arg1, arg2); - case TCG_COND_NE: - return tcg_opc_a6 (qp, opc_eq_a6, TCG_REG_P7, TCG_REG_P6, arg1, arg2); - case TCG_COND_LT: - return tcg_opc_a6 (qp, opc_lt_a6, TCG_REG_P6, TCG_REG_P7, arg1, arg2); - case TCG_COND_LTU: - return tcg_opc_a6 (qp, opc_ltu_a6, TCG_REG_P6, TCG_REG_P7, arg1, arg2); - case TCG_COND_GE: - return tcg_opc_a6 (qp, opc_lt_a6, TCG_REG_P7, TCG_REG_P6, arg1, arg2); - case TCG_COND_GEU: - return tcg_opc_a6 (qp, opc_ltu_a6, TCG_REG_P7, TCG_REG_P6, arg1, arg2); - case TCG_COND_LE: - return tcg_opc_a6 (qp, opc_lt_a6, TCG_REG_P7, TCG_REG_P6, arg2, arg1); - case TCG_COND_LEU: - return tcg_opc_a6 (qp, opc_ltu_a6, TCG_REG_P7, TCG_REG_P6, arg2, arg1); - case TCG_COND_GT: - return tcg_opc_a6 (qp, opc_lt_a6, TCG_REG_P6, TCG_REG_P7, arg2, arg1); - case TCG_COND_GTU: - return tcg_opc_a6 (qp, opc_ltu_a6, TCG_REG_P6, TCG_REG_P7, arg2, arg1); - default: - tcg_abort(); - break; - } -} - -static inline void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, - TCGReg arg2, TCGLabel *l, int cmp4) -{ - uint64_t imm; - - /* We pay attention here to not modify the branch target by reading - the existing value and using it again. This ensure that caches and - memory are kept coherent during retranslation. */ - if (l->has_value) { - imm = l->u.value_ptr - s->code_ptr; - } else { - imm = get_reloc_pcrel21b_slot2(s->code_ptr); - tcg_out_reloc(s, s->code_ptr, R_IA64_PCREL21B, l, 0); - } - - tcg_out_bundle(s, miB, - INSN_NOP_M, - tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4), - tcg_opc_b1(TCG_REG_P6, OPC_BR_DPTK_FEW_B1, imm)); -} - -static inline void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGArg ret, - TCGArg arg1, TCGArg arg2, int cmp4) -{ - tcg_out_bundle(s, MmI, - tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4), - tcg_opc_movi_a(TCG_REG_P6, ret, 1), - tcg_opc_movi_a(TCG_REG_P7, ret, 0)); -} - -static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret, - TCGArg c1, TCGArg c2, - TCGArg v1, int const_v1, - TCGArg v2, int const_v2, int cmp4) -{ - uint64_t opc1, opc2; - - if (const_v1) { - opc1 = tcg_opc_movi_a(TCG_REG_P6, ret, v1); - } else if (ret == v1) { - opc1 = INSN_NOP_M; - } else { - opc1 = tcg_opc_mov_a(TCG_REG_P6, ret, v1); - } - if (const_v2) { - opc2 = tcg_opc_movi_a(TCG_REG_P7, ret, v2); - } else if (ret == v2) { - opc2 = INSN_NOP_I; - } else { - opc2 = tcg_opc_mov_a(TCG_REG_P7, ret, v2); - } - - tcg_out_bundle(s, MmI, - tcg_opc_cmp_a(TCG_REG_P0, cond, c1, c2, cmp4), - opc1, - opc2); -} - -#if defined(CONFIG_SOFTMMU) -/* We're expecting to use an signed 22-bit immediate add. */ -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1]) - > 0x1fffff) - -/* Load and compare a TLB entry, and return the result in (p6, p7). - R2 is loaded with the addend TLB entry. - R57 is loaded with the address, zero extented on 32-bit targets. - R1, R3 are clobbered, leaving R56 free for... - BSWAP_1, BSWAP_2 and I-slot insns for swapping data for store. */ -static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg, - TCGMemOp s_bits, int off_rw, int off_add, - uint64_t bswap1, uint64_t bswap2) -{ - /* - .mii - mov r2 = off_rw - extr.u r3 = addr_reg, ... # extract tlb page - zxt4 r57 = addr_reg # or mov for 64-bit guest - ;; - .mii - addl r2 = r2, areg0 - shl r3 = r3, cteb # via dep.z - dep r1 = 0, r57, ... # zero page ofs, keep align - ;; - .mmi - add r2 = r2, r3 - ;; - ld4 r3 = [r2], off_add-off_rw # or ld8 for 64-bit guest - nop - ;; - .mmi - nop - cmp.eq p6, p7 = r3, r58 - nop - ;; - */ - tcg_out_bundle(s, miI, - tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, off_rw), - tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R3, - addr_reg, TARGET_PAGE_BITS, CPU_TLB_BITS - 1), - tcg_opc_ext_i(TCG_REG_P0, - TARGET_LONG_BITS == 32 ? MO_UL : MO_Q, - TCG_REG_R57, addr_reg)); - tcg_out_bundle(s, miI, - tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, - TCG_REG_R2, TCG_AREG0), - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R3, - TCG_REG_R3, 63 - CPU_TLB_ENTRY_BITS, - 63 - CPU_TLB_ENTRY_BITS), - tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, TCG_REG_R1, 0, - TCG_REG_R57, 63 - s_bits, - TARGET_PAGE_BITS - s_bits - 1)); - tcg_out_bundle(s, MmI, - tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, - TCG_REG_R2, TCG_REG_R2, TCG_REG_R3), - tcg_opc_m3 (TCG_REG_P0, - (TARGET_LONG_BITS == 32 - ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R3, - TCG_REG_R2, off_add - off_rw), - bswap1); - tcg_out_bundle(s, mmI, - tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, TCG_REG_R2, TCG_REG_R2), - tcg_opc_a6 (TCG_REG_P0, OPC_CMP_EQ_A6, TCG_REG_P6, - TCG_REG_P7, TCG_REG_R1, TCG_REG_R3), - bswap2); -} - -typedef struct TCGLabelQemuLdst { - bool is_ld; - TCGMemOp size; - tcg_insn_unit *label_ptr; /* label pointers to be updated */ - struct TCGLabelQemuLdst *next; -} TCGLabelQemuLdst; - -typedef struct TCGBackendData { - TCGLabelQemuLdst *labels; -} TCGBackendData; - -static inline void tcg_out_tb_init(TCGContext *s) -{ - s->be->labels = NULL; -} - -static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc, - tcg_insn_unit *label_ptr) -{ - TCGBackendData *be = s->be; - TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l)); - - l->is_ld = is_ld; - l->size = opc & MO_SIZE; - l->label_ptr = label_ptr; - l->next = be->labels; - be->labels = l; -} - -static bool tcg_out_tb_finalize(TCGContext *s) -{ - static const void * const helpers[8] = { - helper_ret_stb_mmu, - helper_le_stw_mmu, - helper_le_stl_mmu, - helper_le_stq_mmu, - helper_ret_ldub_mmu, - helper_le_lduw_mmu, - helper_le_ldul_mmu, - helper_le_ldq_mmu, - }; - tcg_insn_unit *thunks[8] = { }; - TCGLabelQemuLdst *l; - - for (l = s->be->labels; l != NULL; l = l->next) { - long x = l->is_ld * 4 + l->size; - tcg_insn_unit *dest = thunks[x]; - - /* The out-of-line thunks are all the same; load the return address - from B0, load the GP, and branch to the code. Note that we are - always post-call, so the register window has rolled, so we're - using incoming parameter register numbers, not outgoing. */ - if (dest == NULL) { - uintptr_t *desc = (uintptr_t *)helpers[x]; - uintptr_t func = desc[0], gp = desc[1], disp; - - thunks[x] = dest = s->code_ptr; - - tcg_out_bundle(s, mlx, - INSN_NOP_M, - tcg_opc_l2 (gp), - tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, - TCG_REG_R1, gp)); - tcg_out_bundle(s, mii, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_i22(TCG_REG_P0, OPC_MOV_I22, - l->is_ld ? TCG_REG_R35 : TCG_REG_R36, - TCG_REG_B0)); - disp = (tcg_insn_unit *)func - s->code_ptr; - tcg_out_bundle(s, mLX, - INSN_NOP_M, - tcg_opc_l3 (disp), - tcg_opc_x3 (TCG_REG_P0, OPC_BRL_SPTK_MANY_X3, disp)); - } - - reloc_pcrel21b_slot2(l->label_ptr, dest); - - /* Test for (pending) buffer overflow. The assumption is that any - one operation beginning below the high water mark cannot overrun - the buffer completely. Thus we can test for overflow after - generating code without having to check during generation. */ - if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { - return false; - } - } - return true; -} - -static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args) -{ - static const uint64_t opc_ld_m1[4] = { - OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1 - }; - int addr_reg, data_reg, mem_index; - TCGMemOpIdx oi; - TCGMemOp opc, s_bits; - uint64_t fin1, fin2; - tcg_insn_unit *label_ptr; - - data_reg = args[0]; - addr_reg = args[1]; - oi = args[2]; - opc = get_memop(oi); - mem_index = get_mmuidx(oi); - s_bits = opc & MO_SIZE; - - /* Read the TLB entry */ - tcg_out_qemu_tlb(s, addr_reg, s_bits, - offsetof(CPUArchState, tlb_table[mem_index][0].addr_read), - offsetof(CPUArchState, tlb_table[mem_index][0].addend), - INSN_NOP_I, INSN_NOP_I); - - /* P6 is the fast path, and P7 the slow path */ - - fin2 = 0; - if (opc & MO_BSWAP) { - fin1 = tcg_opc_bswap64_i(TCG_REG_P0, data_reg, TCG_REG_R8); - if (s_bits < MO_64) { - int shift = 64 - (8 << s_bits); - fin2 = (opc & MO_SIGN ? OPC_EXTR_I11 : OPC_EXTR_U_I11); - fin2 = tcg_opc_i11(TCG_REG_P0, fin2, - data_reg, data_reg, shift, 63 - shift); - } - } else { - fin1 = tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, TCG_REG_R8); - } - - tcg_out_bundle(s, mmI, - tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0), - tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2, - TCG_REG_R2, TCG_REG_R57), - tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, oi)); - label_ptr = s->code_ptr; - tcg_out_bundle(s, miB, - tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits], - TCG_REG_R8, TCG_REG_R2), - INSN_NOP_I, - tcg_opc_b3 (TCG_REG_P7, OPC_BR_CALL_SPNT_FEW_B3, TCG_REG_B0, - get_reloc_pcrel21b_slot2(label_ptr))); - - add_qemu_ldst_label(s, 1, opc, label_ptr); - - /* Note that we always use LE helper functions, so the bswap insns - here for the fast path also apply to the slow path. */ - tcg_out_bundle(s, (fin2 ? mII : miI), - INSN_NOP_M, - fin1, - fin2 ? fin2 : INSN_NOP_I); -} - -static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) -{ - static const uint64_t opc_st_m4[4] = { - OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 - }; - TCGReg addr_reg, data_reg; - int mem_index; - uint64_t pre1, pre2; - TCGMemOpIdx oi; - TCGMemOp opc, s_bits; - tcg_insn_unit *label_ptr; - - data_reg = args[0]; - addr_reg = args[1]; - oi = args[2]; - opc = get_memop(oi); - mem_index = get_mmuidx(oi); - s_bits = opc & MO_SIZE; - - /* Note that we always use LE helper functions, so the bswap insns - that are here for the fast path also apply to the slow path, - and move the data into the argument register. */ - pre2 = INSN_NOP_I; - if (opc & MO_BSWAP) { - pre1 = tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R58, data_reg); - if (s_bits < MO_64) { - int shift = 64 - (8 << s_bits); - pre2 = tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, - TCG_REG_R58, TCG_REG_R58, shift, 63 - shift); - } - } else { - /* Just move the data into place for the slow path. */ - pre1 = tcg_opc_ext_i(TCG_REG_P0, opc, TCG_REG_R58, data_reg); - } - - tcg_out_qemu_tlb(s, addr_reg, s_bits, - offsetof(CPUArchState, tlb_table[mem_index][0].addr_write), - offsetof(CPUArchState, tlb_table[mem_index][0].addend), - pre1, pre2); - - /* P6 is the fast path, and P7 the slow path */ - tcg_out_bundle(s, mmI, - tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0), - tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2, - TCG_REG_R2, TCG_REG_R57), - tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, oi)); - label_ptr = s->code_ptr; - tcg_out_bundle(s, miB, - tcg_opc_m4 (TCG_REG_P6, opc_st_m4[s_bits], - TCG_REG_R58, TCG_REG_R2), - INSN_NOP_I, - tcg_opc_b3 (TCG_REG_P7, OPC_BR_CALL_SPNT_FEW_B3, TCG_REG_B0, - get_reloc_pcrel21b_slot2(label_ptr))); - - add_qemu_ldst_label(s, 0, opc, label_ptr); -} - -#else /* !CONFIG_SOFTMMU */ -# include "tcg-be-null.h" - -static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args) -{ - static uint64_t const opc_ld_m1[4] = { - OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1 - }; - int addr_reg, data_reg; - TCGMemOp opc, s_bits, bswap; - - data_reg = args[0]; - addr_reg = args[1]; - opc = args[2]; - s_bits = opc & MO_SIZE; - bswap = opc & MO_BSWAP; - -#if TARGET_LONG_BITS == 32 - if (guest_base != 0) { - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, - TCG_REG_R3, addr_reg), - tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, - TCG_GUEST_BASE_REG, TCG_REG_R3)); - } else { - tcg_out_bundle(s, miI, - INSN_NOP_M, - tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, - TCG_REG_R2, addr_reg), - INSN_NOP_I); - } - - if (!bswap) { - if (!(opc & MO_SIGN)) { - tcg_out_bundle(s, miI, - tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], - data_reg, TCG_REG_R2), - INSN_NOP_I, - INSN_NOP_I); - } else { - tcg_out_bundle(s, mII, - tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], - data_reg, TCG_REG_R2), - INSN_NOP_I, - tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg)); - } - } else if (s_bits == MO_64) { - tcg_out_bundle(s, mII, - tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], - data_reg, TCG_REG_R2), - INSN_NOP_I, - tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg)); - } else { - if (s_bits == MO_16) { - tcg_out_bundle(s, mII, - tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], - data_reg, TCG_REG_R2), - INSN_NOP_I, - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, - data_reg, data_reg, 15, 15)); - } else { - tcg_out_bundle(s, mII, - tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], - data_reg, TCG_REG_R2), - INSN_NOP_I, - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, - data_reg, data_reg, 31, 31)); - } - if (!(opc & MO_SIGN)) { - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg)); - } else { - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg), - tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg)); - } - } -#else - if (guest_base != 0) { - tcg_out_bundle(s, MmI, - tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, - TCG_GUEST_BASE_REG, addr_reg), - tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], - data_reg, TCG_REG_R2), - INSN_NOP_I); - } else { - tcg_out_bundle(s, mmI, - INSN_NOP_M, - tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], - data_reg, addr_reg), - INSN_NOP_I); - } - - if (bswap && s_bits == MO_16) { - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, - data_reg, data_reg, 15, 15), - tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg)); - } else if (bswap && s_bits == MO_32) { - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, - data_reg, data_reg, 31, 31), - tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg)); - } else if (bswap && s_bits == MO_64) { - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg)); - } - if (opc & MO_SIGN) { - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg)); - } -#endif -} - -static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) -{ - static uint64_t const opc_st_m4[4] = { - OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 - }; - int addr_reg, data_reg; -#if TARGET_LONG_BITS == 64 - uint64_t add_guest_base; -#endif - TCGMemOp opc, s_bits, bswap; - - data_reg = args[0]; - addr_reg = args[1]; - opc = args[2]; - s_bits = opc & MO_SIZE; - bswap = opc & MO_BSWAP; - -#if TARGET_LONG_BITS == 32 - if (guest_base != 0) { - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, - TCG_REG_R3, addr_reg), - tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, - TCG_GUEST_BASE_REG, TCG_REG_R3)); - } else { - tcg_out_bundle(s, miI, - INSN_NOP_M, - tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, - TCG_REG_R2, addr_reg), - INSN_NOP_I); - } - - if (bswap) { - if (s_bits == MO_16) { - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, - TCG_REG_R3, data_reg, 15, 15), - tcg_opc_bswap64_i(TCG_REG_P0, - TCG_REG_R3, TCG_REG_R3)); - data_reg = TCG_REG_R3; - } else if (s_bits == MO_32) { - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, - TCG_REG_R3, data_reg, 31, 31), - tcg_opc_bswap64_i(TCG_REG_P0, - TCG_REG_R3, TCG_REG_R3)); - data_reg = TCG_REG_R3; - } else if (s_bits == MO_64) { - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R3, data_reg)); - data_reg = TCG_REG_R3; - } - } - tcg_out_bundle(s, mmI, - tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits], - data_reg, TCG_REG_R2), - INSN_NOP_M, - INSN_NOP_I); -#else - if (guest_base != 0) { - add_guest_base = tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, - TCG_GUEST_BASE_REG, addr_reg); - addr_reg = TCG_REG_R2; - } else { - add_guest_base = INSN_NOP_M; - } - - if (!bswap) { - tcg_out_bundle(s, (guest_base ? MmI : mmI), - add_guest_base, - tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits], - data_reg, addr_reg), - INSN_NOP_I); - } else { - if (s_bits == MO_16) { - tcg_out_bundle(s, mII, - add_guest_base, - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, - TCG_REG_R3, data_reg, 15, 15), - tcg_opc_bswap64_i(TCG_REG_P0, - TCG_REG_R3, TCG_REG_R3)); - data_reg = TCG_REG_R3; - } else if (s_bits == MO_32) { - tcg_out_bundle(s, mII, - add_guest_base, - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, - TCG_REG_R3, data_reg, 31, 31), - tcg_opc_bswap64_i(TCG_REG_P0, - TCG_REG_R3, TCG_REG_R3)); - data_reg = TCG_REG_R3; - } else if (s_bits == MO_64) { - tcg_out_bundle(s, miI, - add_guest_base, - INSN_NOP_I, - tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R3, data_reg)); - data_reg = TCG_REG_R3; - } - tcg_out_bundle(s, miI, - tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits], - data_reg, addr_reg), - INSN_NOP_I, - INSN_NOP_I); - } -#endif -} - -#endif - -static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, - const TCGArg *args, const int *const_args) -{ - switch(opc) { - case INDEX_op_exit_tb: - tcg_out_exit_tb(s, args[0]); - break; - case INDEX_op_br: - tcg_out_br(s, arg_label(args[0])); - break; - case INDEX_op_goto_tb: - tcg_out_goto_tb(s, args[0]); - break; - - case INDEX_op_ld8u_i32: - case INDEX_op_ld8u_i64: - tcg_out_ld_rel(s, OPC_LD1_M1, args[0], args[1], args[2]); - break; - case INDEX_op_ld8s_i32: - case INDEX_op_ld8s_i64: - tcg_out_ld_rel(s, OPC_LD1_M1, args[0], args[1], args[2]); - tcg_out_ext(s, OPC_SXT1_I29, args[0], args[0]); - break; - case INDEX_op_ld16u_i32: - case INDEX_op_ld16u_i64: - tcg_out_ld_rel(s, OPC_LD2_M1, args[0], args[1], args[2]); - break; - case INDEX_op_ld16s_i32: - case INDEX_op_ld16s_i64: - tcg_out_ld_rel(s, OPC_LD2_M1, args[0], args[1], args[2]); - tcg_out_ext(s, OPC_SXT2_I29, args[0], args[0]); - break; - case INDEX_op_ld_i32: - case INDEX_op_ld32u_i64: - tcg_out_ld_rel(s, OPC_LD4_M1, args[0], args[1], args[2]); - break; - case INDEX_op_ld32s_i64: - tcg_out_ld_rel(s, OPC_LD4_M1, args[0], args[1], args[2]); - tcg_out_ext(s, OPC_SXT4_I29, args[0], args[0]); - break; - case INDEX_op_ld_i64: - tcg_out_ld_rel(s, OPC_LD8_M1, args[0], args[1], args[2]); - break; - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: - tcg_out_st_rel(s, OPC_ST1_M4, args[0], args[1], args[2]); - break; - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: - tcg_out_st_rel(s, OPC_ST2_M4, args[0], args[1], args[2]); - break; - case INDEX_op_st_i32: - case INDEX_op_st32_i64: - tcg_out_st_rel(s, OPC_ST4_M4, args[0], args[1], args[2]); - break; - case INDEX_op_st_i64: - tcg_out_st_rel(s, OPC_ST8_M4, args[0], args[1], args[2]); - break; - - case INDEX_op_add_i32: - case INDEX_op_add_i64: - tcg_out_add(s, args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_sub_i32: - case INDEX_op_sub_i64: - tcg_out_sub(s, args[0], args[1], const_args[1], args[2], const_args[2]); - break; - - case INDEX_op_and_i32: - case INDEX_op_and_i64: - /* TCG expects arg2 constant; A3 expects arg1 constant. Swap. */ - tcg_out_alu(s, OPC_AND_A1, OPC_AND_A3, args[0], - args[2], const_args[2], args[1], const_args[1]); - break; - case INDEX_op_andc_i32: - case INDEX_op_andc_i64: - tcg_out_alu(s, OPC_ANDCM_A1, OPC_ANDCM_A3, args[0], - args[1], const_args[1], args[2], const_args[2]); - break; - case INDEX_op_eqv_i32: - case INDEX_op_eqv_i64: - tcg_out_eqv(s, args[0], args[1], const_args[1], - args[2], const_args[2]); - break; - case INDEX_op_nand_i32: - case INDEX_op_nand_i64: - tcg_out_nand(s, args[0], args[1], const_args[1], - args[2], const_args[2]); - break; - case INDEX_op_nor_i32: - case INDEX_op_nor_i64: - tcg_out_nor(s, args[0], args[1], const_args[1], - args[2], const_args[2]); - break; - case INDEX_op_or_i32: - case INDEX_op_or_i64: - /* TCG expects arg2 constant; A3 expects arg1 constant. Swap. */ - tcg_out_alu(s, OPC_OR_A1, OPC_OR_A3, args[0], - args[2], const_args[2], args[1], const_args[1]); - break; - case INDEX_op_orc_i32: - case INDEX_op_orc_i64: - tcg_out_orc(s, args[0], args[1], const_args[1], - args[2], const_args[2]); - break; - case INDEX_op_xor_i32: - case INDEX_op_xor_i64: - /* TCG expects arg2 constant; A3 expects arg1 constant. Swap. */ - tcg_out_alu(s, OPC_XOR_A1, OPC_XOR_A3, args[0], - args[2], const_args[2], args[1], const_args[1]); - break; - - case INDEX_op_mul_i32: - case INDEX_op_mul_i64: - tcg_out_mul(s, args[0], args[1], args[2]); - break; - - case INDEX_op_sar_i32: - tcg_out_sar_i32(s, args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_sar_i64: - tcg_out_sar_i64(s, args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_shl_i32: - tcg_out_shl_i32(s, args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_shl_i64: - tcg_out_shl_i64(s, args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_shr_i32: - tcg_out_shr_i32(s, args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_shr_i64: - tcg_out_shr_i64(s, args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_rotl_i32: - tcg_out_rotl_i32(s, args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_rotl_i64: - tcg_out_rotl_i64(s, args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_rotr_i32: - tcg_out_rotr_i32(s, args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_rotr_i64: - tcg_out_rotr_i64(s, args[0], args[1], args[2], const_args[2]); - break; - - case INDEX_op_ext8s_i32: - case INDEX_op_ext8s_i64: - tcg_out_ext(s, OPC_SXT1_I29, args[0], args[1]); - break; - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - tcg_out_ext(s, OPC_ZXT1_I29, args[0], args[1]); - break; - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - tcg_out_ext(s, OPC_SXT2_I29, args[0], args[1]); - break; - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - tcg_out_ext(s, OPC_ZXT2_I29, args[0], args[1]); - break; - case INDEX_op_ext_i32_i64: - case INDEX_op_ext32s_i64: - tcg_out_ext(s, OPC_SXT4_I29, args[0], args[1]); - break; - case INDEX_op_extu_i32_i64: - case INDEX_op_ext32u_i64: - tcg_out_ext(s, OPC_ZXT4_I29, args[0], args[1]); - break; - - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: - tcg_out_bswap16(s, args[0], args[1]); - break; - case INDEX_op_bswap32_i32: - case INDEX_op_bswap32_i64: - tcg_out_bswap32(s, args[0], args[1]); - break; - case INDEX_op_bswap64_i64: - tcg_out_bswap64(s, args[0], args[1]); - break; - - case INDEX_op_deposit_i32: - case INDEX_op_deposit_i64: - tcg_out_deposit(s, args[0], args[1], args[2], const_args[2], - args[3], args[4]); - break; - - case INDEX_op_brcond_i32: - tcg_out_brcond(s, args[2], args[0], args[1], arg_label(args[3]), 1); - break; - case INDEX_op_brcond_i64: - tcg_out_brcond(s, args[2], args[0], args[1], arg_label(args[3]), 0); - break; - case INDEX_op_setcond_i32: - tcg_out_setcond(s, args[3], args[0], args[1], args[2], 1); - break; - case INDEX_op_setcond_i64: - tcg_out_setcond(s, args[3], args[0], args[1], args[2], 0); - break; - case INDEX_op_movcond_i32: - tcg_out_movcond(s, args[5], args[0], args[1], args[2], - args[3], const_args[3], args[4], const_args[4], 1); - break; - case INDEX_op_movcond_i64: - tcg_out_movcond(s, args[5], args[0], args[1], args[2], - args[3], const_args[3], args[4], const_args[4], 0); - break; - - case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, args); - break; - case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, args); - break; - case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, args); - break; - case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, args); - break; - - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: - case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ - case INDEX_op_movi_i64: - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - default: - tcg_abort(); - } -} - -static const TCGTargetOpDef ia64_op_defs[] = { - { INDEX_op_br, { } }, - { INDEX_op_exit_tb, { } }, - { INDEX_op_goto_tb, { } }, - - { INDEX_op_ld8u_i32, { "r", "r" } }, - { INDEX_op_ld8s_i32, { "r", "r" } }, - { INDEX_op_ld16u_i32, { "r", "r" } }, - { INDEX_op_ld16s_i32, { "r", "r" } }, - { INDEX_op_ld_i32, { "r", "r" } }, - { INDEX_op_st8_i32, { "rZ", "r" } }, - { INDEX_op_st16_i32, { "rZ", "r" } }, - { INDEX_op_st_i32, { "rZ", "r" } }, - - { INDEX_op_add_i32, { "r", "rZ", "rI" } }, - { INDEX_op_sub_i32, { "r", "rI", "rI" } }, - - { INDEX_op_and_i32, { "r", "rI", "rI" } }, - { INDEX_op_andc_i32, { "r", "rI", "rI" } }, - { INDEX_op_eqv_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_nand_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_nor_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_or_i32, { "r", "rI", "rI" } }, - { INDEX_op_orc_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_xor_i32, { "r", "rI", "rI" } }, - - { INDEX_op_mul_i32, { "r", "rZ", "rZ" } }, - - { INDEX_op_sar_i32, { "r", "rZ", "ri" } }, - { INDEX_op_shl_i32, { "r", "rZ", "ri" } }, - { INDEX_op_shr_i32, { "r", "rZ", "ri" } }, - { INDEX_op_rotl_i32, { "r", "rZ", "ri" } }, - { INDEX_op_rotr_i32, { "r", "rZ", "ri" } }, - - { INDEX_op_ext8s_i32, { "r", "rZ"} }, - { INDEX_op_ext8u_i32, { "r", "rZ"} }, - { INDEX_op_ext16s_i32, { "r", "rZ"} }, - { INDEX_op_ext16u_i32, { "r", "rZ"} }, - - { INDEX_op_bswap16_i32, { "r", "rZ" } }, - { INDEX_op_bswap32_i32, { "r", "rZ" } }, - - { INDEX_op_brcond_i32, { "rZ", "rZ" } }, - { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rI", "rI" } }, - - { INDEX_op_ld8u_i64, { "r", "r" } }, - { INDEX_op_ld8s_i64, { "r", "r" } }, - { INDEX_op_ld16u_i64, { "r", "r" } }, - { INDEX_op_ld16s_i64, { "r", "r" } }, - { INDEX_op_ld32u_i64, { "r", "r" } }, - { INDEX_op_ld32s_i64, { "r", "r" } }, - { INDEX_op_ld_i64, { "r", "r" } }, - { INDEX_op_st8_i64, { "rZ", "r" } }, - { INDEX_op_st16_i64, { "rZ", "r" } }, - { INDEX_op_st32_i64, { "rZ", "r" } }, - { INDEX_op_st_i64, { "rZ", "r" } }, - - { INDEX_op_add_i64, { "r", "rZ", "rI" } }, - { INDEX_op_sub_i64, { "r", "rI", "rI" } }, - - { INDEX_op_and_i64, { "r", "rI", "rI" } }, - { INDEX_op_andc_i64, { "r", "rI", "rI" } }, - { INDEX_op_eqv_i64, { "r", "rZ", "rZ" } }, - { INDEX_op_nand_i64, { "r", "rZ", "rZ" } }, - { INDEX_op_nor_i64, { "r", "rZ", "rZ" } }, - { INDEX_op_or_i64, { "r", "rI", "rI" } }, - { INDEX_op_orc_i64, { "r", "rZ", "rZ" } }, - { INDEX_op_xor_i64, { "r", "rI", "rI" } }, - - { INDEX_op_mul_i64, { "r", "rZ", "rZ" } }, - - { INDEX_op_sar_i64, { "r", "rZ", "ri" } }, - { INDEX_op_shl_i64, { "r", "rZ", "ri" } }, - { INDEX_op_shr_i64, { "r", "rZ", "ri" } }, - { INDEX_op_rotl_i64, { "r", "rZ", "ri" } }, - { INDEX_op_rotr_i64, { "r", "rZ", "ri" } }, - - { INDEX_op_ext8s_i64, { "r", "rZ"} }, - { INDEX_op_ext8u_i64, { "r", "rZ"} }, - { INDEX_op_ext16s_i64, { "r", "rZ"} }, - { INDEX_op_ext16u_i64, { "r", "rZ"} }, - { INDEX_op_ext32s_i64, { "r", "rZ"} }, - { INDEX_op_ext32u_i64, { "r", "rZ"} }, - { INDEX_op_ext_i32_i64, { "r", "rZ" } }, - { INDEX_op_extu_i32_i64, { "r", "rZ" } }, - - { INDEX_op_bswap16_i64, { "r", "rZ" } }, - { INDEX_op_bswap32_i64, { "r", "rZ" } }, - { INDEX_op_bswap64_i64, { "r", "rZ" } }, - - { INDEX_op_brcond_i64, { "rZ", "rZ" } }, - { INDEX_op_setcond_i64, { "r", "rZ", "rZ" } }, - { INDEX_op_movcond_i64, { "r", "rZ", "rZ", "rI", "rI" } }, - - { INDEX_op_deposit_i32, { "r", "rZ", "ri" } }, - { INDEX_op_deposit_i64, { "r", "rZ", "ri" } }, - - { INDEX_op_qemu_ld_i32, { "r", "r" } }, - { INDEX_op_qemu_ld_i64, { "r", "r" } }, - { INDEX_op_qemu_st_i32, { "SZ", "r" } }, - { INDEX_op_qemu_st_i64, { "SZ", "r" } }, - - { -1 }, -}; - -/* Generate global QEMU prologue and epilogue code */ -static void tcg_target_qemu_prologue(TCGContext *s) -{ - int frame_size; - - /* reserve some stack space */ - frame_size = TCG_STATIC_CALL_ARGS_SIZE + - CPU_TEMP_BUF_NLONGS * sizeof(long); - frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) & - ~(TCG_TARGET_STACK_ALIGN - 1); - tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, - CPU_TEMP_BUF_NLONGS * sizeof(long)); - - /* First emit adhoc function descriptor */ - *s->code_ptr = (tcg_insn_unit){ - (uint64_t)(s->code_ptr + 1), /* entry point */ - 0 /* skip gp */ - }; - s->code_ptr++; - - /* prologue */ - tcg_out_bundle(s, miI, - tcg_opc_m34(TCG_REG_P0, OPC_ALLOC_M34, - TCG_REG_R34, 32, 24, 0), - INSN_NOP_I, - tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, - TCG_REG_B6, TCG_REG_R33, 0)); - - /* ??? If guest_base < 0x200000, we could load the register via - an ADDL in the M slot of the next bundle. */ - if (guest_base != 0) { - tcg_out_bundle(s, mlx, - INSN_NOP_M, - tcg_opc_l2(guest_base), - tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, - TCG_GUEST_BASE_REG, guest_base)); - tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); - } - - tcg_out_bundle(s, miB, - tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4, - TCG_REG_R12, -frame_size, TCG_REG_R12), - tcg_opc_i22(TCG_REG_P0, OPC_MOV_I22, - TCG_REG_R33, TCG_REG_B0), - tcg_opc_b4 (TCG_REG_P0, OPC_BR_SPTK_MANY_B4, TCG_REG_B6)); - - /* epilogue */ - tb_ret_addr = s->code_ptr; - tcg_out_bundle(s, miI, - INSN_NOP_M, - tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, - TCG_REG_B0, TCG_REG_R33, 0), - tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4, - TCG_REG_R12, frame_size, TCG_REG_R12)); - tcg_out_bundle(s, miB, - INSN_NOP_M, - tcg_opc_i26(TCG_REG_P0, OPC_MOV_I_I26, - TCG_REG_PFS, TCG_REG_R34), - tcg_opc_b4 (TCG_REG_P0, OPC_BR_RET_SPTK_MANY_B4, - TCG_REG_B0)); -} - -static void tcg_target_init(TCGContext *s) -{ - tcg_regset_set(tcg_target_available_regs[TCG_TYPE_I32], - 0xffffffffffffffffull); - tcg_regset_set(tcg_target_available_regs[TCG_TYPE_I64], - 0xffffffffffffffffull); - - tcg_regset_clear(tcg_target_call_clobber_regs); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R15); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R16); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R17); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R18); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R19); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R20); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R21); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R22); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R23); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R24); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R25); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R26); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R27); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R28); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R29); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R30); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R31); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R56); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R57); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R58); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R59); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R60); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R61); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R62); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R63); - - tcg_regset_clear(s->reserved_regs); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* zero register */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* global pointer */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* internal use */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R3); /* internal use */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R12); /* stack pointer */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R33); /* return address */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R34); /* PFS */ - - /* The following 4 are not in use, are call-saved, but *not* saved - by the prologue. Therefore we cannot use them without modifying - the prologue. There doesn't seem to be any good reason to use - these as opposed to the windowed registers. */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R4); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R5); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R6); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R7); - - tcg_add_target_add_op_defs(ia64_op_defs); -} diff --git a/tcg/ia64/tcg-target.inc.c b/tcg/ia64/tcg-target.inc.c new file mode 100644 index 0000000000..62d654943c --- /dev/null +++ b/tcg/ia64/tcg-target.inc.c @@ -0,0 +1,2453 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2009-2010 Aurelien Jarno + * Based on i386/tcg-target.c - Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* + * Register definitions + */ + +#ifndef NDEBUG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", + "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", + "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39", + "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47", + "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55", + "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63", +}; +#endif + +#ifndef CONFIG_SOFTMMU +#define TCG_GUEST_BASE_REG TCG_REG_R55 +#endif + +/* Branch registers */ +enum { + TCG_REG_B0 = 0, + TCG_REG_B1, + TCG_REG_B2, + TCG_REG_B3, + TCG_REG_B4, + TCG_REG_B5, + TCG_REG_B6, + TCG_REG_B7, +}; + +/* Floating point registers */ +enum { + TCG_REG_F0 = 0, + TCG_REG_F1, + TCG_REG_F2, + TCG_REG_F3, + TCG_REG_F4, + TCG_REG_F5, + TCG_REG_F6, + TCG_REG_F7, + TCG_REG_F8, + TCG_REG_F9, + TCG_REG_F10, + TCG_REG_F11, + TCG_REG_F12, + TCG_REG_F13, + TCG_REG_F14, + TCG_REG_F15, +}; + +/* Predicate registers */ +enum { + TCG_REG_P0 = 0, + TCG_REG_P1, + TCG_REG_P2, + TCG_REG_P3, + TCG_REG_P4, + TCG_REG_P5, + TCG_REG_P6, + TCG_REG_P7, + TCG_REG_P8, + TCG_REG_P9, + TCG_REG_P10, + TCG_REG_P11, + TCG_REG_P12, + TCG_REG_P13, + TCG_REG_P14, + TCG_REG_P15, +}; + +/* Application registers */ +enum { + TCG_REG_PFS = 64, +}; + +static const int tcg_target_reg_alloc_order[] = { + TCG_REG_R35, + TCG_REG_R36, + TCG_REG_R37, + TCG_REG_R38, + TCG_REG_R39, + TCG_REG_R40, + TCG_REG_R41, + TCG_REG_R42, + TCG_REG_R43, + TCG_REG_R44, + TCG_REG_R45, + TCG_REG_R46, + TCG_REG_R47, + TCG_REG_R48, + TCG_REG_R49, + TCG_REG_R50, + TCG_REG_R51, + TCG_REG_R52, + TCG_REG_R53, + TCG_REG_R54, + TCG_REG_R55, + TCG_REG_R14, + TCG_REG_R15, + TCG_REG_R16, + TCG_REG_R17, + TCG_REG_R18, + TCG_REG_R19, + TCG_REG_R20, + TCG_REG_R21, + TCG_REG_R22, + TCG_REG_R23, + TCG_REG_R24, + TCG_REG_R25, + TCG_REG_R26, + TCG_REG_R27, + TCG_REG_R28, + TCG_REG_R29, + TCG_REG_R30, + TCG_REG_R31, + TCG_REG_R56, + TCG_REG_R57, + TCG_REG_R58, + TCG_REG_R59, + TCG_REG_R60, + TCG_REG_R61, + TCG_REG_R62, + TCG_REG_R63, + TCG_REG_R8, + TCG_REG_R9, + TCG_REG_R10, + TCG_REG_R11 +}; + +static const int tcg_target_call_iarg_regs[8] = { + TCG_REG_R56, + TCG_REG_R57, + TCG_REG_R58, + TCG_REG_R59, + TCG_REG_R60, + TCG_REG_R61, + TCG_REG_R62, + TCG_REG_R63, +}; + +static const int tcg_target_call_oarg_regs[] = { + TCG_REG_R8 +}; + +/* + * opcode formation + */ + +/* bundle templates: stops (double bar in the IA64 manual) are marked with + an uppercase letter. */ +enum { + mii = 0x00, + miI = 0x01, + mIi = 0x02, + mII = 0x03, + mlx = 0x04, + mLX = 0x05, + mmi = 0x08, + mmI = 0x09, + Mmi = 0x0a, + MmI = 0x0b, + mfi = 0x0c, + mfI = 0x0d, + mmf = 0x0e, + mmF = 0x0f, + mib = 0x10, + miB = 0x11, + mbb = 0x12, + mbB = 0x13, + bbb = 0x16, + bbB = 0x17, + mmb = 0x18, + mmB = 0x19, + mfb = 0x1c, + mfB = 0x1d, +}; + +enum { + OPC_ADD_A1 = 0x10000000000ull, + OPC_AND_A1 = 0x10060000000ull, + OPC_AND_A3 = 0x10160000000ull, + OPC_ANDCM_A1 = 0x10068000000ull, + OPC_ANDCM_A3 = 0x10168000000ull, + OPC_ADDS_A4 = 0x10800000000ull, + OPC_ADDL_A5 = 0x12000000000ull, + OPC_ALLOC_M34 = 0x02c00000000ull, + OPC_BR_DPTK_FEW_B1 = 0x08400000000ull, + OPC_BR_SPTK_MANY_B1 = 0x08000001000ull, + OPC_BR_CALL_SPNT_FEW_B3 = 0x0a200000000ull, + OPC_BR_SPTK_MANY_B4 = 0x00100001000ull, + OPC_BR_CALL_SPTK_MANY_B5 = 0x02100001000ull, + OPC_BR_RET_SPTK_MANY_B4 = 0x00108001100ull, + OPC_BRL_SPTK_MANY_X3 = 0x18000001000ull, + OPC_BRL_CALL_SPNT_MANY_X4 = 0x1a200001000ull, + OPC_BRL_CALL_SPTK_MANY_X4 = 0x1a000001000ull, + OPC_CMP_LT_A6 = 0x18000000000ull, + OPC_CMP_LTU_A6 = 0x1a000000000ull, + OPC_CMP_EQ_A6 = 0x1c000000000ull, + OPC_CMP4_LT_A6 = 0x18400000000ull, + OPC_CMP4_LTU_A6 = 0x1a400000000ull, + OPC_CMP4_EQ_A6 = 0x1c400000000ull, + OPC_DEP_I14 = 0x0ae00000000ull, + OPC_DEP_I15 = 0x08000000000ull, + OPC_DEP_Z_I12 = 0x0a600000000ull, + OPC_EXTR_I11 = 0x0a400002000ull, + OPC_EXTR_U_I11 = 0x0a400000000ull, + OPC_FCVT_FX_TRUNC_S1_F10 = 0x004d0000000ull, + OPC_FCVT_FXU_TRUNC_S1_F10 = 0x004d8000000ull, + OPC_FCVT_XF_F11 = 0x000e0000000ull, + OPC_FMA_S1_F1 = 0x10400000000ull, + OPC_FNMA_S1_F1 = 0x18400000000ull, + OPC_FRCPA_S1_F6 = 0x00600000000ull, + OPC_GETF_SIG_M19 = 0x08708000000ull, + OPC_LD1_M1 = 0x08000000000ull, + OPC_LD1_M3 = 0x0a000000000ull, + OPC_LD2_M1 = 0x08040000000ull, + OPC_LD2_M3 = 0x0a040000000ull, + OPC_LD4_M1 = 0x08080000000ull, + OPC_LD4_M3 = 0x0a080000000ull, + OPC_LD8_M1 = 0x080c0000000ull, + OPC_LD8_M3 = 0x0a0c0000000ull, + OPC_MUX1_I3 = 0x0eca0000000ull, + OPC_NOP_B9 = 0x04008000000ull, + OPC_NOP_F16 = 0x00008000000ull, + OPC_NOP_I18 = 0x00008000000ull, + OPC_NOP_M48 = 0x00008000000ull, + OPC_MOV_I21 = 0x00e00100000ull, + OPC_MOV_RET_I21 = 0x00e00500000ull, + OPC_MOV_I22 = 0x00188000000ull, + OPC_MOV_I_I26 = 0x00150000000ull, + OPC_MOVL_X2 = 0x0c000000000ull, + OPC_OR_A1 = 0x10070000000ull, + OPC_OR_A3 = 0x10170000000ull, + OPC_SETF_EXP_M18 = 0x0c748000000ull, + OPC_SETF_SIG_M18 = 0x0c708000000ull, + OPC_SHL_I7 = 0x0f240000000ull, + OPC_SHR_I5 = 0x0f220000000ull, + OPC_SHR_U_I5 = 0x0f200000000ull, + OPC_SHRP_I10 = 0x0ac00000000ull, + OPC_SXT1_I29 = 0x000a0000000ull, + OPC_SXT2_I29 = 0x000a8000000ull, + OPC_SXT4_I29 = 0x000b0000000ull, + OPC_ST1_M4 = 0x08c00000000ull, + OPC_ST2_M4 = 0x08c40000000ull, + OPC_ST4_M4 = 0x08c80000000ull, + OPC_ST8_M4 = 0x08cc0000000ull, + OPC_SUB_A1 = 0x10028000000ull, + OPC_SUB_A3 = 0x10128000000ull, + OPC_UNPACK4_L_I2 = 0x0f860000000ull, + OPC_XMA_L_F2 = 0x1d000000000ull, + OPC_XOR_A1 = 0x10078000000ull, + OPC_XOR_A3 = 0x10178000000ull, + OPC_ZXT1_I29 = 0x00080000000ull, + OPC_ZXT2_I29 = 0x00088000000ull, + OPC_ZXT4_I29 = 0x00090000000ull, + + INSN_NOP_M = OPC_NOP_M48, /* nop.m 0 */ + INSN_NOP_I = OPC_NOP_I18, /* nop.i 0 */ +}; + +static inline uint64_t tcg_opc_a1(int qp, uint64_t opc, int r1, + int r2, int r3) +{ + return opc + | ((r3 & 0x7f) << 20) + | ((r2 & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_a3(int qp, uint64_t opc, int r1, + uint64_t imm, int r3) +{ + return opc + | ((imm & 0x80) << 29) /* s */ + | ((imm & 0x7f) << 13) /* imm7b */ + | ((r3 & 0x7f) << 20) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_a4(int qp, uint64_t opc, int r1, + uint64_t imm, int r3) +{ + return opc + | ((imm & 0x2000) << 23) /* s */ + | ((imm & 0x1f80) << 20) /* imm6d */ + | ((imm & 0x007f) << 13) /* imm7b */ + | ((r3 & 0x7f) << 20) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_a5(int qp, uint64_t opc, int r1, + uint64_t imm, int r3) +{ + return opc + | ((imm & 0x200000) << 15) /* s */ + | ((imm & 0x1f0000) << 6) /* imm5c */ + | ((imm & 0x00ff80) << 20) /* imm9d */ + | ((imm & 0x00007f) << 13) /* imm7b */ + | ((r3 & 0x03) << 20) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_a6(int qp, uint64_t opc, int p1, + int p2, int r2, int r3) +{ + return opc + | ((p2 & 0x3f) << 27) + | ((r3 & 0x7f) << 20) + | ((r2 & 0x7f) << 13) + | ((p1 & 0x3f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_b1(int qp, uint64_t opc, uint64_t imm) +{ + return opc + | ((imm & 0x100000) << 16) /* s */ + | ((imm & 0x0fffff) << 13) /* imm20b */ + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_b3(int qp, uint64_t opc, int b1, uint64_t imm) +{ + return opc + | ((imm & 0x100000) << 16) /* s */ + | ((imm & 0x0fffff) << 13) /* imm20b */ + | ((b1 & 0x7) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_b4(int qp, uint64_t opc, int b2) +{ + return opc + | ((b2 & 0x7) << 13) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_b5(int qp, uint64_t opc, int b1, int b2) +{ + return opc + | ((b2 & 0x7) << 13) + | ((b1 & 0x7) << 6) + | (qp & 0x3f); +} + + +static inline uint64_t tcg_opc_b9(int qp, uint64_t opc, uint64_t imm) +{ + return opc + | ((imm & 0x100000) << 16) /* i */ + | ((imm & 0x0fffff) << 6) /* imm20a */ + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_f1(int qp, uint64_t opc, int f1, + int f3, int f4, int f2) +{ + return opc + | ((f4 & 0x7f) << 27) + | ((f3 & 0x7f) << 20) + | ((f2 & 0x7f) << 13) + | ((f1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_f2(int qp, uint64_t opc, int f1, + int f3, int f4, int f2) +{ + return opc + | ((f4 & 0x7f) << 27) + | ((f3 & 0x7f) << 20) + | ((f2 & 0x7f) << 13) + | ((f1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_f6(int qp, uint64_t opc, int f1, + int p2, int f2, int f3) +{ + return opc + | ((p2 & 0x3f) << 27) + | ((f3 & 0x7f) << 20) + | ((f2 & 0x7f) << 13) + | ((f1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_f10(int qp, uint64_t opc, int f1, int f2) +{ + return opc + | ((f2 & 0x7f) << 13) + | ((f1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_f11(int qp, uint64_t opc, int f1, int f2) +{ + return opc + | ((f2 & 0x7f) << 13) + | ((f1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_f16(int qp, uint64_t opc, uint64_t imm) +{ + return opc + | ((imm & 0x100000) << 16) /* i */ + | ((imm & 0x0fffff) << 6) /* imm20a */ + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i2(int qp, uint64_t opc, int r1, + int r2, int r3) +{ + return opc + | ((r3 & 0x7f) << 20) + | ((r2 & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i3(int qp, uint64_t opc, int r1, + int r2, int mbtype) +{ + return opc + | ((mbtype & 0x0f) << 20) + | ((r2 & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i5(int qp, uint64_t opc, int r1, + int r3, int r2) +{ + return opc + | ((r3 & 0x7f) << 20) + | ((r2 & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i7(int qp, uint64_t opc, int r1, + int r2, int r3) +{ + return opc + | ((r3 & 0x7f) << 20) + | ((r2 & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i10(int qp, uint64_t opc, int r1, + int r2, int r3, uint64_t count) +{ + return opc + | ((count & 0x3f) << 27) + | ((r3 & 0x7f) << 20) + | ((r2 & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i11(int qp, uint64_t opc, int r1, + int r3, uint64_t pos, uint64_t len) +{ + return opc + | ((len & 0x3f) << 27) + | ((r3 & 0x7f) << 20) + | ((pos & 0x3f) << 14) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i12(int qp, uint64_t opc, int r1, + int r2, uint64_t pos, uint64_t len) +{ + return opc + | ((len & 0x3f) << 27) + | ((pos & 0x3f) << 20) + | ((r2 & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i14(int qp, uint64_t opc, int r1, uint64_t imm, + int r3, uint64_t pos, uint64_t len) +{ + return opc + | ((imm & 0x01) << 36) + | ((len & 0x3f) << 27) + | ((r3 & 0x7f) << 20) + | ((pos & 0x3f) << 14) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i15(int qp, uint64_t opc, int r1, int r2, + int r3, uint64_t pos, uint64_t len) +{ + return opc + | ((pos & 0x3f) << 31) + | ((len & 0x0f) << 27) + | ((r3 & 0x7f) << 20) + | ((r2 & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i18(int qp, uint64_t opc, uint64_t imm) +{ + return opc + | ((imm & 0x100000) << 16) /* i */ + | ((imm & 0x0fffff) << 6) /* imm20a */ + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i21(int qp, uint64_t opc, int b1, + int r2, uint64_t imm) +{ + return opc + | ((imm & 0x1ff) << 24) + | ((r2 & 0x7f) << 13) + | ((b1 & 0x7) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i22(int qp, uint64_t opc, int r1, int b2) +{ + return opc + | ((b2 & 0x7) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i26(int qp, uint64_t opc, int ar3, int r2) +{ + return opc + | ((ar3 & 0x7f) << 20) + | ((r2 & 0x7f) << 13) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i29(int qp, uint64_t opc, int r1, int r3) +{ + return opc + | ((r3 & 0x7f) << 20) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_l2(uint64_t imm) +{ + return (imm & 0x7fffffffffc00000ull) >> 22; +} + +static inline uint64_t tcg_opc_l3(uint64_t imm) +{ + return (imm & 0x07fffffffff00000ull) >> 18; +} + +#define tcg_opc_l4 tcg_opc_l3 + +static inline uint64_t tcg_opc_m1(int qp, uint64_t opc, int r1, int r3) +{ + return opc + | ((r3 & 0x7f) << 20) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_m3(int qp, uint64_t opc, int r1, + int r3, uint64_t imm) +{ + return opc + | ((imm & 0x100) << 28) /* s */ + | ((imm & 0x080) << 20) /* i */ + | ((imm & 0x07f) << 13) /* imm7b */ + | ((r3 & 0x7f) << 20) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_m4(int qp, uint64_t opc, int r2, int r3) +{ + return opc + | ((r3 & 0x7f) << 20) + | ((r2 & 0x7f) << 13) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_m18(int qp, uint64_t opc, int f1, int r2) +{ + return opc + | ((r2 & 0x7f) << 13) + | ((f1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_m19(int qp, uint64_t opc, int r1, int f2) +{ + return opc + | ((f2 & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_m34(int qp, uint64_t opc, int r1, + int sof, int sol, int sor) +{ + return opc + | ((sor & 0x0f) << 27) + | ((sol & 0x7f) << 20) + | ((sof & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_m48(int qp, uint64_t opc, uint64_t imm) +{ + return opc + | ((imm & 0x100000) << 16) /* i */ + | ((imm & 0x0fffff) << 6) /* imm20a */ + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_x2(int qp, uint64_t opc, + int r1, uint64_t imm) +{ + return opc + | ((imm & 0x8000000000000000ull) >> 27) /* i */ + | (imm & 0x0000000000200000ull) /* ic */ + | ((imm & 0x00000000001f0000ull) << 6) /* imm5c */ + | ((imm & 0x000000000000ff80ull) << 20) /* imm9d */ + | ((imm & 0x000000000000007full) << 13) /* imm7b */ + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_x3(int qp, uint64_t opc, uint64_t imm) +{ + return opc + | ((imm & 0x0800000000000000ull) >> 23) /* i */ + | ((imm & 0x00000000000fffffull) << 13) /* imm20b */ + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_x4(int qp, uint64_t opc, int b1, uint64_t imm) +{ + return opc + | ((imm & 0x0800000000000000ull) >> 23) /* i */ + | ((imm & 0x00000000000fffffull) << 13) /* imm20b */ + | ((b1 & 0x7) << 6) + | (qp & 0x3f); +} + + +/* + * Relocations - Note that we never encode branches elsewhere than slot 2. + */ + +static void reloc_pcrel21b_slot2(tcg_insn_unit *pc, tcg_insn_unit *target) +{ + uint64_t imm = target - pc; + + pc->hi = (pc->hi & 0xf700000fffffffffull) + | ((imm & 0x100000) << 39) /* s */ + | ((imm & 0x0fffff) << 36); /* imm20b */ +} + +static uint64_t get_reloc_pcrel21b_slot2(tcg_insn_unit *pc) +{ + int64_t high = pc->hi; + + return ((high >> 39) & 0x100000) + /* s */ + ((high >> 36) & 0x0fffff); /* imm20b */ +} + +static void patch_reloc(tcg_insn_unit *code_ptr, int type, + intptr_t value, intptr_t addend) +{ + assert(addend == 0); + assert(type == R_IA64_PCREL21B); + reloc_pcrel21b_slot2(code_ptr, (tcg_insn_unit *)value); +} + +/* + * Constraints + */ + +/* parse target specific constraints */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) +{ + const char *ct_str; + + ct_str = *pct_str; + switch(ct_str[0]) { + case 'r': + ct->ct |= TCG_CT_REG; + tcg_regset_set(ct->u.regs, 0xffffffffffffffffull); + break; + case 'I': + ct->ct |= TCG_CT_CONST_S22; + break; + case 'S': + ct->ct |= TCG_CT_REG; + tcg_regset_set(ct->u.regs, 0xffffffffffffffffull); +#if defined(CONFIG_SOFTMMU) + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R56); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R57); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R58); +#endif + break; + case 'Z': + /* We are cheating a bit here, using the fact that the register + r0 is also the register number 0. Hence there is no need + to check for const_args in each instruction. */ + ct->ct |= TCG_CT_CONST_ZERO; + break; + default: + return -1; + } + ct_str++; + *pct_str = ct_str; + return 0; +} + +/* test if a constant matches the constraint */ +static inline int tcg_target_const_match(tcg_target_long val, TCGType type, + const TCGArgConstraint *arg_ct) +{ + int ct; + ct = arg_ct->ct; + if (ct & TCG_CT_CONST) + return 1; + else if ((ct & TCG_CT_CONST_ZERO) && val == 0) + return 1; + else if ((ct & TCG_CT_CONST_S22) && val == ((int32_t)val << 10) >> 10) + return 1; + else + return 0; +} + +/* + * Code generation + */ + +static tcg_insn_unit *tb_ret_addr; + +static inline void tcg_out_bundle(TCGContext *s, int template, + uint64_t slot0, uint64_t slot1, + uint64_t slot2) +{ + template &= 0x1f; /* 5 bits */ + slot0 &= 0x1ffffffffffull; /* 41 bits */ + slot1 &= 0x1ffffffffffull; /* 41 bits */ + slot2 &= 0x1ffffffffffull; /* 41 bits */ + + *s->code_ptr++ = (tcg_insn_unit){ + (slot1 << 46) | (slot0 << 5) | template, + (slot2 << 23) | (slot1 >> 18) + }; +} + +static inline uint64_t tcg_opc_mov_a(int qp, TCGReg dst, TCGReg src) +{ + return tcg_opc_a4(qp, OPC_ADDS_A4, dst, 0, src); +} + +static inline void tcg_out_mov(TCGContext *s, TCGType type, + TCGReg ret, TCGReg arg) +{ + tcg_out_bundle(s, mmI, + INSN_NOP_M, + INSN_NOP_M, + tcg_opc_mov_a(TCG_REG_P0, ret, arg)); +} + +static inline uint64_t tcg_opc_movi_a(int qp, TCGReg dst, int64_t src) +{ + assert(src == sextract64(src, 0, 22)); + return tcg_opc_a5(qp, OPC_ADDL_A5, dst, src, TCG_REG_R0); +} + +static inline void tcg_out_movi(TCGContext *s, TCGType type, + TCGReg reg, tcg_target_long arg) +{ + tcg_out_bundle(s, mLX, + INSN_NOP_M, + tcg_opc_l2 (arg), + tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, reg, arg)); +} + +static void tcg_out_br(TCGContext *s, TCGLabel *l) +{ + uint64_t imm; + + /* We pay attention here to not modify the branch target by reading + the existing value and using it again. This ensure that caches and + memory are kept coherent during retranslation. */ + if (l->has_value) { + imm = l->u.value_ptr - s->code_ptr; + } else { + imm = get_reloc_pcrel21b_slot2(s->code_ptr); + tcg_out_reloc(s, s->code_ptr, R_IA64_PCREL21B, l, 0); + } + + tcg_out_bundle(s, mmB, + INSN_NOP_M, + INSN_NOP_M, + tcg_opc_b1(TCG_REG_P0, OPC_BR_SPTK_MANY_B1, imm)); +} + +static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *desc) +{ + uintptr_t func = desc->lo, gp = desc->hi, disp; + + /* Look through the function descriptor. */ + tcg_out_bundle(s, mlx, + INSN_NOP_M, + tcg_opc_l2 (gp), + tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, TCG_REG_R1, gp)); + disp = (tcg_insn_unit *)func - s->code_ptr; + tcg_out_bundle(s, mLX, + INSN_NOP_M, + tcg_opc_l4 (disp), + tcg_opc_x4 (TCG_REG_P0, OPC_BRL_CALL_SPTK_MANY_X4, + TCG_REG_B0, disp)); +} + +static void tcg_out_exit_tb(TCGContext *s, tcg_target_long arg) +{ + uint64_t imm, opc1; + + /* At least arg == 0 is a common operation. */ + if (arg == sextract64(arg, 0, 22)) { + opc1 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R8, arg); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R8, arg); + opc1 = INSN_NOP_M; + } + + imm = tb_ret_addr - s->code_ptr; + + tcg_out_bundle(s, mLX, + opc1, + tcg_opc_l3 (imm), + tcg_opc_x3 (TCG_REG_P0, OPC_BRL_SPTK_MANY_X3, imm)); +} + +static inline void tcg_out_goto_tb(TCGContext *s, TCGArg arg) +{ + if (s->tb_jmp_offset) { + /* direct jump method */ + tcg_abort(); + } else { + /* indirect jump method */ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, + (tcg_target_long)(s->tb_next + arg)); + tcg_out_bundle(s, MmI, + tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, + TCG_REG_R2, TCG_REG_R2), + INSN_NOP_M, + tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, TCG_REG_B6, + TCG_REG_R2, 0)); + tcg_out_bundle(s, mmB, + INSN_NOP_M, + INSN_NOP_M, + tcg_opc_b4 (TCG_REG_P0, OPC_BR_SPTK_MANY_B4, + TCG_REG_B6)); + } + s->tb_next_offset[arg] = tcg_current_code_size(s); +} + +static inline void tcg_out_jmp(TCGContext *s, TCGArg addr) +{ + tcg_out_bundle(s, mmI, + INSN_NOP_M, + INSN_NOP_M, + tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, TCG_REG_B6, addr, 0)); + tcg_out_bundle(s, mmB, + INSN_NOP_M, + INSN_NOP_M, + tcg_opc_b4(TCG_REG_P0, OPC_BR_SPTK_MANY_B4, TCG_REG_B6)); +} + +static inline void tcg_out_ld_rel(TCGContext *s, uint64_t opc_m4, TCGArg arg, + TCGArg arg1, tcg_target_long arg2) +{ + if (arg2 == ((int16_t)arg2 >> 2) << 2) { + tcg_out_bundle(s, MmI, + tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, + TCG_REG_R2, arg2, arg1), + tcg_opc_m1 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2), + INSN_NOP_I); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, arg2); + tcg_out_bundle(s, MmI, + tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, + TCG_REG_R2, TCG_REG_R2, arg1), + tcg_opc_m1 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2), + INSN_NOP_I); + } +} + +static inline void tcg_out_st_rel(TCGContext *s, uint64_t opc_m4, TCGArg arg, + TCGArg arg1, tcg_target_long arg2) +{ + if (arg2 == ((int16_t)arg2 >> 2) << 2) { + tcg_out_bundle(s, MmI, + tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, + TCG_REG_R2, arg2, arg1), + tcg_opc_m4 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2), + INSN_NOP_I); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, arg2); + tcg_out_bundle(s, MmI, + tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, + TCG_REG_R2, TCG_REG_R2, arg1), + tcg_opc_m4 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2), + INSN_NOP_I); + } +} + +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_ld_rel(s, OPC_LD4_M1, arg, arg1, arg2); + } else { + tcg_out_ld_rel(s, OPC_LD8_M1, arg, arg1, arg2); + } +} + +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_st_rel(s, OPC_ST4_M4, arg, arg1, arg2); + } else { + tcg_out_st_rel(s, OPC_ST8_M4, arg, arg1, arg2); + } +} + +static inline void tcg_out_alu(TCGContext *s, uint64_t opc_a1, uint64_t opc_a3, + TCGReg ret, TCGArg arg1, int const_arg1, + TCGArg arg2, int const_arg2) +{ + uint64_t opc1 = 0, opc2 = 0, opc3 = 0; + + if (const_arg2 && arg2 != 0) { + opc2 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R3, arg2); + arg2 = TCG_REG_R3; + } + if (const_arg1 && arg1 != 0) { + if (opc_a3 && arg1 == (int8_t)arg1) { + opc3 = tcg_opc_a3(TCG_REG_P0, opc_a3, ret, arg1, arg2); + } else { + opc1 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, arg1); + arg1 = TCG_REG_R2; + } + } + if (opc3 == 0) { + opc3 = tcg_opc_a1(TCG_REG_P0, opc_a1, ret, arg1, arg2); + } + + tcg_out_bundle(s, (opc1 || opc2 ? mII : miI), + opc1 ? opc1 : INSN_NOP_M, + opc2 ? opc2 : INSN_NOP_I, + opc3); +} + +static inline void tcg_out_add(TCGContext *s, TCGReg ret, TCGReg arg1, + TCGArg arg2, int const_arg2) +{ + if (const_arg2 && arg2 == sextract64(arg2, 0, 14)) { + tcg_out_bundle(s, mmI, + INSN_NOP_M, + INSN_NOP_M, + tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, arg2, arg1)); + } else { + tcg_out_alu(s, OPC_ADD_A1, 0, ret, arg1, 0, arg2, const_arg2); + } +} + +static inline void tcg_out_sub(TCGContext *s, TCGReg ret, TCGArg arg1, + int const_arg1, TCGArg arg2, int const_arg2) +{ + if (!const_arg1 && const_arg2 && -arg2 == sextract64(-arg2, 0, 14)) { + tcg_out_bundle(s, mmI, + INSN_NOP_M, + INSN_NOP_M, + tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, -arg2, arg1)); + } else { + tcg_out_alu(s, OPC_SUB_A1, OPC_SUB_A3, ret, + arg1, const_arg1, arg2, const_arg2); + } +} + +static inline void tcg_out_eqv(TCGContext *s, TCGArg ret, + TCGArg arg1, int const_arg1, + TCGArg arg2, int const_arg2) +{ + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_a1 (TCG_REG_P0, OPC_XOR_A1, ret, arg1, arg2), + tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret)); +} + +static inline void tcg_out_nand(TCGContext *s, TCGArg ret, + TCGArg arg1, int const_arg1, + TCGArg arg2, int const_arg2) +{ + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_a1 (TCG_REG_P0, OPC_AND_A1, ret, arg1, arg2), + tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret)); +} + +static inline void tcg_out_nor(TCGContext *s, TCGArg ret, + TCGArg arg1, int const_arg1, + TCGArg arg2, int const_arg2) +{ + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, arg1, arg2), + tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret)); +} + +static inline void tcg_out_orc(TCGContext *s, TCGArg ret, + TCGArg arg1, int const_arg1, + TCGArg arg2, int const_arg2) +{ + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, TCG_REG_R2, -1, arg2), + tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, arg1, TCG_REG_R2)); +} + +static inline void tcg_out_mul(TCGContext *s, TCGArg ret, + TCGArg arg1, TCGArg arg2) +{ + tcg_out_bundle(s, mmI, + tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F6, arg1), + tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F7, arg2), + INSN_NOP_I); + tcg_out_bundle(s, mmF, + INSN_NOP_M, + INSN_NOP_M, + tcg_opc_f2 (TCG_REG_P0, OPC_XMA_L_F2, TCG_REG_F6, TCG_REG_F6, + TCG_REG_F7, TCG_REG_F0)); + tcg_out_bundle(s, miI, + tcg_opc_m19(TCG_REG_P0, OPC_GETF_SIG_M19, ret, TCG_REG_F6), + INSN_NOP_I, + INSN_NOP_I); +} + +static inline void tcg_out_sar_i32(TCGContext *s, TCGArg ret, TCGArg arg1, + TCGArg arg2, int const_arg2) +{ + if (const_arg2) { + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_i11(TCG_REG_P0, OPC_EXTR_I11, + ret, arg1, arg2, 31 - arg2)); + } else { + tcg_out_bundle(s, mII, + tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, + TCG_REG_R3, 0x1f, arg2), + tcg_opc_i29(TCG_REG_P0, OPC_SXT4_I29, TCG_REG_R2, arg1), + tcg_opc_i5 (TCG_REG_P0, OPC_SHR_I5, ret, + TCG_REG_R2, TCG_REG_R3)); + } +} + +static inline void tcg_out_sar_i64(TCGContext *s, TCGArg ret, TCGArg arg1, + TCGArg arg2, int const_arg2) +{ + if (const_arg2) { + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_i11(TCG_REG_P0, OPC_EXTR_I11, + ret, arg1, arg2, 63 - arg2)); + } else { + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_i5 (TCG_REG_P0, OPC_SHR_I5, ret, arg1, arg2)); + } +} + +static inline void tcg_out_shl_i32(TCGContext *s, TCGArg ret, TCGArg arg1, + TCGArg arg2, int const_arg2) +{ + if (const_arg2) { + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, + arg1, 63 - arg2, 31 - arg2)); + } else { + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R2, + 0x1f, arg2), + tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, ret, + arg1, TCG_REG_R2)); + } +} + +static inline void tcg_out_shl_i64(TCGContext *s, TCGArg ret, TCGArg arg1, + TCGArg arg2, int const_arg2) +{ + if (const_arg2) { + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, + arg1, 63 - arg2, 63 - arg2)); + } else { + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, ret, + arg1, arg2)); + } +} + +static inline void tcg_out_shr_i32(TCGContext *s, TCGArg ret, TCGArg arg1, + TCGArg arg2, int const_arg2) +{ + if (const_arg2) { + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret, + arg1, arg2, 31 - arg2)); + } else { + tcg_out_bundle(s, mII, + tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R3, + 0x1f, arg2), + tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R2, arg1), + tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret, + TCG_REG_R2, TCG_REG_R3)); + } +} + +static inline void tcg_out_shr_i64(TCGContext *s, TCGArg ret, TCGArg arg1, + TCGArg arg2, int const_arg2) +{ + if (const_arg2) { + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret, + arg1, arg2, 63 - arg2)); + } else { + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret, + arg1, arg2)); + } +} + +static inline void tcg_out_rotl_i32(TCGContext *s, TCGArg ret, TCGArg arg1, + TCGArg arg2, int const_arg2) +{ + if (const_arg2) { + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2, + TCG_REG_R2, arg1, arg1), + tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret, + TCG_REG_R2, 32 - arg2, 31)); + } else { + tcg_out_bundle(s, miI, + INSN_NOP_M, + tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2, + TCG_REG_R2, arg1, arg1), + tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R3, + 0x1f, arg2)); + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_a3 (TCG_REG_P0, OPC_SUB_A3, TCG_REG_R3, + 0x20, TCG_REG_R3), + tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret, + TCG_REG_R2, TCG_REG_R3)); + } +} + +static inline void tcg_out_rotl_i64(TCGContext *s, TCGArg ret, TCGArg arg1, + TCGArg arg2, int const_arg2) +{ + if (const_arg2) { + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, ret, arg1, + arg1, 0x40 - arg2)); + } else { + tcg_out_bundle(s, mII, + tcg_opc_a3 (TCG_REG_P0, OPC_SUB_A3, TCG_REG_R2, + 0x40, arg2), + tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, TCG_REG_R3, + arg1, arg2), + tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, TCG_REG_R2, + arg1, TCG_REG_R2)); + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, + TCG_REG_R2, TCG_REG_R3)); + } +} + +static inline void tcg_out_rotr_i32(TCGContext *s, TCGArg ret, TCGArg arg1, + TCGArg arg2, int const_arg2) +{ + if (const_arg2) { + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2, + TCG_REG_R2, arg1, arg1), + tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret, + TCG_REG_R2, arg2, 31)); + } else { + tcg_out_bundle(s, mII, + tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R3, + 0x1f, arg2), + tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2, + TCG_REG_R2, arg1, arg1), + tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret, + TCG_REG_R2, TCG_REG_R3)); + } +} + +static inline void tcg_out_rotr_i64(TCGContext *s, TCGArg ret, TCGArg arg1, + TCGArg arg2, int const_arg2) +{ + if (const_arg2) { + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, ret, arg1, + arg1, arg2)); + } else { + tcg_out_bundle(s, mII, + tcg_opc_a3 (TCG_REG_P0, OPC_SUB_A3, TCG_REG_R2, + 0x40, arg2), + tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, TCG_REG_R3, + arg1, arg2), + tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, TCG_REG_R2, + arg1, TCG_REG_R2)); + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, + TCG_REG_R2, TCG_REG_R3)); + } +} + +static const uint64_t opc_ext_i29[8] = { + OPC_ZXT1_I29, OPC_ZXT2_I29, OPC_ZXT4_I29, 0, + OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0 +}; + +static inline uint64_t tcg_opc_ext_i(int qp, TCGMemOp opc, TCGReg d, TCGReg s) +{ + if ((opc & MO_SIZE) == MO_64) { + return tcg_opc_mov_a(qp, d, s); + } else { + return tcg_opc_i29(qp, opc_ext_i29[opc & MO_SSIZE], d, s); + } +} + +static inline void tcg_out_ext(TCGContext *s, uint64_t opc_i29, + TCGArg ret, TCGArg arg) +{ + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_i29(TCG_REG_P0, opc_i29, ret, arg)); +} + +static inline uint64_t tcg_opc_bswap64_i(int qp, TCGReg d, TCGReg s) +{ + return tcg_opc_i3(qp, OPC_MUX1_I3, d, s, 0xb); +} + +static inline void tcg_out_bswap16(TCGContext *s, TCGArg ret, TCGArg arg) +{ + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, arg, 15, 15), + tcg_opc_bswap64_i(TCG_REG_P0, ret, ret)); +} + +static inline void tcg_out_bswap32(TCGContext *s, TCGArg ret, TCGArg arg) +{ + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, arg, 31, 31), + tcg_opc_bswap64_i(TCG_REG_P0, ret, ret)); +} + +static inline void tcg_out_bswap64(TCGContext *s, TCGArg ret, TCGArg arg) +{ + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_bswap64_i(TCG_REG_P0, ret, arg)); +} + +static inline void tcg_out_deposit(TCGContext *s, TCGArg ret, TCGArg a1, + TCGArg a2, int const_a2, int pos, int len) +{ + uint64_t i1 = 0, i2 = 0; + int cpos = 63 - pos, lm1 = len - 1; + + if (const_a2) { + /* Truncate the value of a constant a2 to the width of the field. */ + int mask = (1u << len) - 1; + a2 &= mask; + + if (a2 == 0 || a2 == mask) { + /* 1-bit signed constant inserted into register. */ + i2 = tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, ret, a2, a1, cpos, lm1); + } else { + /* Otherwise, load any constant into a temporary. Do this into + the first I slot to help out with cross-unit delays. */ + i1 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, a2); + a2 = TCG_REG_R2; + } + } + if (i2 == 0) { + i2 = tcg_opc_i15(TCG_REG_P0, OPC_DEP_I15, ret, a2, a1, cpos, lm1); + } + tcg_out_bundle(s, (i1 ? mII : miI), + INSN_NOP_M, + i1 ? i1 : INSN_NOP_I, + i2); +} + +static inline uint64_t tcg_opc_cmp_a(int qp, TCGCond cond, TCGArg arg1, + TCGArg arg2, int cmp4) +{ + uint64_t opc_eq_a6, opc_lt_a6, opc_ltu_a6; + + if (cmp4) { + opc_eq_a6 = OPC_CMP4_EQ_A6; + opc_lt_a6 = OPC_CMP4_LT_A6; + opc_ltu_a6 = OPC_CMP4_LTU_A6; + } else { + opc_eq_a6 = OPC_CMP_EQ_A6; + opc_lt_a6 = OPC_CMP_LT_A6; + opc_ltu_a6 = OPC_CMP_LTU_A6; + } + + switch (cond) { + case TCG_COND_EQ: + return tcg_opc_a6 (qp, opc_eq_a6, TCG_REG_P6, TCG_REG_P7, arg1, arg2); + case TCG_COND_NE: + return tcg_opc_a6 (qp, opc_eq_a6, TCG_REG_P7, TCG_REG_P6, arg1, arg2); + case TCG_COND_LT: + return tcg_opc_a6 (qp, opc_lt_a6, TCG_REG_P6, TCG_REG_P7, arg1, arg2); + case TCG_COND_LTU: + return tcg_opc_a6 (qp, opc_ltu_a6, TCG_REG_P6, TCG_REG_P7, arg1, arg2); + case TCG_COND_GE: + return tcg_opc_a6 (qp, opc_lt_a6, TCG_REG_P7, TCG_REG_P6, arg1, arg2); + case TCG_COND_GEU: + return tcg_opc_a6 (qp, opc_ltu_a6, TCG_REG_P7, TCG_REG_P6, arg1, arg2); + case TCG_COND_LE: + return tcg_opc_a6 (qp, opc_lt_a6, TCG_REG_P7, TCG_REG_P6, arg2, arg1); + case TCG_COND_LEU: + return tcg_opc_a6 (qp, opc_ltu_a6, TCG_REG_P7, TCG_REG_P6, arg2, arg1); + case TCG_COND_GT: + return tcg_opc_a6 (qp, opc_lt_a6, TCG_REG_P6, TCG_REG_P7, arg2, arg1); + case TCG_COND_GTU: + return tcg_opc_a6 (qp, opc_ltu_a6, TCG_REG_P6, TCG_REG_P7, arg2, arg1); + default: + tcg_abort(); + break; + } +} + +static inline void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, + TCGReg arg2, TCGLabel *l, int cmp4) +{ + uint64_t imm; + + /* We pay attention here to not modify the branch target by reading + the existing value and using it again. This ensure that caches and + memory are kept coherent during retranslation. */ + if (l->has_value) { + imm = l->u.value_ptr - s->code_ptr; + } else { + imm = get_reloc_pcrel21b_slot2(s->code_ptr); + tcg_out_reloc(s, s->code_ptr, R_IA64_PCREL21B, l, 0); + } + + tcg_out_bundle(s, miB, + INSN_NOP_M, + tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4), + tcg_opc_b1(TCG_REG_P6, OPC_BR_DPTK_FEW_B1, imm)); +} + +static inline void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGArg ret, + TCGArg arg1, TCGArg arg2, int cmp4) +{ + tcg_out_bundle(s, MmI, + tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4), + tcg_opc_movi_a(TCG_REG_P6, ret, 1), + tcg_opc_movi_a(TCG_REG_P7, ret, 0)); +} + +static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret, + TCGArg c1, TCGArg c2, + TCGArg v1, int const_v1, + TCGArg v2, int const_v2, int cmp4) +{ + uint64_t opc1, opc2; + + if (const_v1) { + opc1 = tcg_opc_movi_a(TCG_REG_P6, ret, v1); + } else if (ret == v1) { + opc1 = INSN_NOP_M; + } else { + opc1 = tcg_opc_mov_a(TCG_REG_P6, ret, v1); + } + if (const_v2) { + opc2 = tcg_opc_movi_a(TCG_REG_P7, ret, v2); + } else if (ret == v2) { + opc2 = INSN_NOP_I; + } else { + opc2 = tcg_opc_mov_a(TCG_REG_P7, ret, v2); + } + + tcg_out_bundle(s, MmI, + tcg_opc_cmp_a(TCG_REG_P0, cond, c1, c2, cmp4), + opc1, + opc2); +} + +#if defined(CONFIG_SOFTMMU) +/* We're expecting to use an signed 22-bit immediate add. */ +QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1]) + > 0x1fffff) + +/* Load and compare a TLB entry, and return the result in (p6, p7). + R2 is loaded with the addend TLB entry. + R57 is loaded with the address, zero extented on 32-bit targets. + R1, R3 are clobbered, leaving R56 free for... + BSWAP_1, BSWAP_2 and I-slot insns for swapping data for store. */ +static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg, + TCGMemOp s_bits, int off_rw, int off_add, + uint64_t bswap1, uint64_t bswap2) +{ + /* + .mii + mov r2 = off_rw + extr.u r3 = addr_reg, ... # extract tlb page + zxt4 r57 = addr_reg # or mov for 64-bit guest + ;; + .mii + addl r2 = r2, areg0 + shl r3 = r3, cteb # via dep.z + dep r1 = 0, r57, ... # zero page ofs, keep align + ;; + .mmi + add r2 = r2, r3 + ;; + ld4 r3 = [r2], off_add-off_rw # or ld8 for 64-bit guest + nop + ;; + .mmi + nop + cmp.eq p6, p7 = r3, r58 + nop + ;; + */ + tcg_out_bundle(s, miI, + tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, off_rw), + tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R3, + addr_reg, TARGET_PAGE_BITS, CPU_TLB_BITS - 1), + tcg_opc_ext_i(TCG_REG_P0, + TARGET_LONG_BITS == 32 ? MO_UL : MO_Q, + TCG_REG_R57, addr_reg)); + tcg_out_bundle(s, miI, + tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, + TCG_REG_R2, TCG_AREG0), + tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R3, + TCG_REG_R3, 63 - CPU_TLB_ENTRY_BITS, + 63 - CPU_TLB_ENTRY_BITS), + tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, TCG_REG_R1, 0, + TCG_REG_R57, 63 - s_bits, + TARGET_PAGE_BITS - s_bits - 1)); + tcg_out_bundle(s, MmI, + tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, + TCG_REG_R2, TCG_REG_R2, TCG_REG_R3), + tcg_opc_m3 (TCG_REG_P0, + (TARGET_LONG_BITS == 32 + ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R3, + TCG_REG_R2, off_add - off_rw), + bswap1); + tcg_out_bundle(s, mmI, + tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, TCG_REG_R2, TCG_REG_R2), + tcg_opc_a6 (TCG_REG_P0, OPC_CMP_EQ_A6, TCG_REG_P6, + TCG_REG_P7, TCG_REG_R1, TCG_REG_R3), + bswap2); +} + +typedef struct TCGLabelQemuLdst { + bool is_ld; + TCGMemOp size; + tcg_insn_unit *label_ptr; /* label pointers to be updated */ + struct TCGLabelQemuLdst *next; +} TCGLabelQemuLdst; + +typedef struct TCGBackendData { + TCGLabelQemuLdst *labels; +} TCGBackendData; + +static inline void tcg_out_tb_init(TCGContext *s) +{ + s->be->labels = NULL; +} + +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc, + tcg_insn_unit *label_ptr) +{ + TCGBackendData *be = s->be; + TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l)); + + l->is_ld = is_ld; + l->size = opc & MO_SIZE; + l->label_ptr = label_ptr; + l->next = be->labels; + be->labels = l; +} + +static bool tcg_out_tb_finalize(TCGContext *s) +{ + static const void * const helpers[8] = { + helper_ret_stb_mmu, + helper_le_stw_mmu, + helper_le_stl_mmu, + helper_le_stq_mmu, + helper_ret_ldub_mmu, + helper_le_lduw_mmu, + helper_le_ldul_mmu, + helper_le_ldq_mmu, + }; + tcg_insn_unit *thunks[8] = { }; + TCGLabelQemuLdst *l; + + for (l = s->be->labels; l != NULL; l = l->next) { + long x = l->is_ld * 4 + l->size; + tcg_insn_unit *dest = thunks[x]; + + /* The out-of-line thunks are all the same; load the return address + from B0, load the GP, and branch to the code. Note that we are + always post-call, so the register window has rolled, so we're + using incoming parameter register numbers, not outgoing. */ + if (dest == NULL) { + uintptr_t *desc = (uintptr_t *)helpers[x]; + uintptr_t func = desc[0], gp = desc[1], disp; + + thunks[x] = dest = s->code_ptr; + + tcg_out_bundle(s, mlx, + INSN_NOP_M, + tcg_opc_l2 (gp), + tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, + TCG_REG_R1, gp)); + tcg_out_bundle(s, mii, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_i22(TCG_REG_P0, OPC_MOV_I22, + l->is_ld ? TCG_REG_R35 : TCG_REG_R36, + TCG_REG_B0)); + disp = (tcg_insn_unit *)func - s->code_ptr; + tcg_out_bundle(s, mLX, + INSN_NOP_M, + tcg_opc_l3 (disp), + tcg_opc_x3 (TCG_REG_P0, OPC_BRL_SPTK_MANY_X3, disp)); + } + + reloc_pcrel21b_slot2(l->label_ptr, dest); + + /* Test for (pending) buffer overflow. The assumption is that any + one operation beginning below the high water mark cannot overrun + the buffer completely. Thus we can test for overflow after + generating code without having to check during generation. */ + if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { + return false; + } + } + return true; +} + +static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args) +{ + static const uint64_t opc_ld_m1[4] = { + OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1 + }; + int addr_reg, data_reg, mem_index; + TCGMemOpIdx oi; + TCGMemOp opc, s_bits; + uint64_t fin1, fin2; + tcg_insn_unit *label_ptr; + + data_reg = args[0]; + addr_reg = args[1]; + oi = args[2]; + opc = get_memop(oi); + mem_index = get_mmuidx(oi); + s_bits = opc & MO_SIZE; + + /* Read the TLB entry */ + tcg_out_qemu_tlb(s, addr_reg, s_bits, + offsetof(CPUArchState, tlb_table[mem_index][0].addr_read), + offsetof(CPUArchState, tlb_table[mem_index][0].addend), + INSN_NOP_I, INSN_NOP_I); + + /* P6 is the fast path, and P7 the slow path */ + + fin2 = 0; + if (opc & MO_BSWAP) { + fin1 = tcg_opc_bswap64_i(TCG_REG_P0, data_reg, TCG_REG_R8); + if (s_bits < MO_64) { + int shift = 64 - (8 << s_bits); + fin2 = (opc & MO_SIGN ? OPC_EXTR_I11 : OPC_EXTR_U_I11); + fin2 = tcg_opc_i11(TCG_REG_P0, fin2, + data_reg, data_reg, shift, 63 - shift); + } + } else { + fin1 = tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, TCG_REG_R8); + } + + tcg_out_bundle(s, mmI, + tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0), + tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2, + TCG_REG_R2, TCG_REG_R57), + tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, oi)); + label_ptr = s->code_ptr; + tcg_out_bundle(s, miB, + tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits], + TCG_REG_R8, TCG_REG_R2), + INSN_NOP_I, + tcg_opc_b3 (TCG_REG_P7, OPC_BR_CALL_SPNT_FEW_B3, TCG_REG_B0, + get_reloc_pcrel21b_slot2(label_ptr))); + + add_qemu_ldst_label(s, 1, opc, label_ptr); + + /* Note that we always use LE helper functions, so the bswap insns + here for the fast path also apply to the slow path. */ + tcg_out_bundle(s, (fin2 ? mII : miI), + INSN_NOP_M, + fin1, + fin2 ? fin2 : INSN_NOP_I); +} + +static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) +{ + static const uint64_t opc_st_m4[4] = { + OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 + }; + TCGReg addr_reg, data_reg; + int mem_index; + uint64_t pre1, pre2; + TCGMemOpIdx oi; + TCGMemOp opc, s_bits; + tcg_insn_unit *label_ptr; + + data_reg = args[0]; + addr_reg = args[1]; + oi = args[2]; + opc = get_memop(oi); + mem_index = get_mmuidx(oi); + s_bits = opc & MO_SIZE; + + /* Note that we always use LE helper functions, so the bswap insns + that are here for the fast path also apply to the slow path, + and move the data into the argument register. */ + pre2 = INSN_NOP_I; + if (opc & MO_BSWAP) { + pre1 = tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R58, data_reg); + if (s_bits < MO_64) { + int shift = 64 - (8 << s_bits); + pre2 = tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, + TCG_REG_R58, TCG_REG_R58, shift, 63 - shift); + } + } else { + /* Just move the data into place for the slow path. */ + pre1 = tcg_opc_ext_i(TCG_REG_P0, opc, TCG_REG_R58, data_reg); + } + + tcg_out_qemu_tlb(s, addr_reg, s_bits, + offsetof(CPUArchState, tlb_table[mem_index][0].addr_write), + offsetof(CPUArchState, tlb_table[mem_index][0].addend), + pre1, pre2); + + /* P6 is the fast path, and P7 the slow path */ + tcg_out_bundle(s, mmI, + tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0), + tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2, + TCG_REG_R2, TCG_REG_R57), + tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, oi)); + label_ptr = s->code_ptr; + tcg_out_bundle(s, miB, + tcg_opc_m4 (TCG_REG_P6, opc_st_m4[s_bits], + TCG_REG_R58, TCG_REG_R2), + INSN_NOP_I, + tcg_opc_b3 (TCG_REG_P7, OPC_BR_CALL_SPNT_FEW_B3, TCG_REG_B0, + get_reloc_pcrel21b_slot2(label_ptr))); + + add_qemu_ldst_label(s, 0, opc, label_ptr); +} + +#else /* !CONFIG_SOFTMMU */ +# include "tcg-be-null.h" + +static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args) +{ + static uint64_t const opc_ld_m1[4] = { + OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1 + }; + int addr_reg, data_reg; + TCGMemOp opc, s_bits, bswap; + + data_reg = args[0]; + addr_reg = args[1]; + opc = args[2]; + s_bits = opc & MO_SIZE; + bswap = opc & MO_BSWAP; + +#if TARGET_LONG_BITS == 32 + if (guest_base != 0) { + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, + TCG_REG_R3, addr_reg), + tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, + TCG_GUEST_BASE_REG, TCG_REG_R3)); + } else { + tcg_out_bundle(s, miI, + INSN_NOP_M, + tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, + TCG_REG_R2, addr_reg), + INSN_NOP_I); + } + + if (!bswap) { + if (!(opc & MO_SIGN)) { + tcg_out_bundle(s, miI, + tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], + data_reg, TCG_REG_R2), + INSN_NOP_I, + INSN_NOP_I); + } else { + tcg_out_bundle(s, mII, + tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], + data_reg, TCG_REG_R2), + INSN_NOP_I, + tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg)); + } + } else if (s_bits == MO_64) { + tcg_out_bundle(s, mII, + tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], + data_reg, TCG_REG_R2), + INSN_NOP_I, + tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg)); + } else { + if (s_bits == MO_16) { + tcg_out_bundle(s, mII, + tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], + data_reg, TCG_REG_R2), + INSN_NOP_I, + tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, + data_reg, data_reg, 15, 15)); + } else { + tcg_out_bundle(s, mII, + tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], + data_reg, TCG_REG_R2), + INSN_NOP_I, + tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, + data_reg, data_reg, 31, 31)); + } + if (!(opc & MO_SIGN)) { + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg)); + } else { + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg), + tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg)); + } + } +#else + if (guest_base != 0) { + tcg_out_bundle(s, MmI, + tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, + TCG_GUEST_BASE_REG, addr_reg), + tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], + data_reg, TCG_REG_R2), + INSN_NOP_I); + } else { + tcg_out_bundle(s, mmI, + INSN_NOP_M, + tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], + data_reg, addr_reg), + INSN_NOP_I); + } + + if (bswap && s_bits == MO_16) { + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, + data_reg, data_reg, 15, 15), + tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg)); + } else if (bswap && s_bits == MO_32) { + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, + data_reg, data_reg, 31, 31), + tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg)); + } else if (bswap && s_bits == MO_64) { + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg)); + } + if (opc & MO_SIGN) { + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg)); + } +#endif +} + +static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) +{ + static uint64_t const opc_st_m4[4] = { + OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 + }; + int addr_reg, data_reg; +#if TARGET_LONG_BITS == 64 + uint64_t add_guest_base; +#endif + TCGMemOp opc, s_bits, bswap; + + data_reg = args[0]; + addr_reg = args[1]; + opc = args[2]; + s_bits = opc & MO_SIZE; + bswap = opc & MO_BSWAP; + +#if TARGET_LONG_BITS == 32 + if (guest_base != 0) { + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, + TCG_REG_R3, addr_reg), + tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, + TCG_GUEST_BASE_REG, TCG_REG_R3)); + } else { + tcg_out_bundle(s, miI, + INSN_NOP_M, + tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, + TCG_REG_R2, addr_reg), + INSN_NOP_I); + } + + if (bswap) { + if (s_bits == MO_16) { + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, + TCG_REG_R3, data_reg, 15, 15), + tcg_opc_bswap64_i(TCG_REG_P0, + TCG_REG_R3, TCG_REG_R3)); + data_reg = TCG_REG_R3; + } else if (s_bits == MO_32) { + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, + TCG_REG_R3, data_reg, 31, 31), + tcg_opc_bswap64_i(TCG_REG_P0, + TCG_REG_R3, TCG_REG_R3)); + data_reg = TCG_REG_R3; + } else if (s_bits == MO_64) { + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R3, data_reg)); + data_reg = TCG_REG_R3; + } + } + tcg_out_bundle(s, mmI, + tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits], + data_reg, TCG_REG_R2), + INSN_NOP_M, + INSN_NOP_I); +#else + if (guest_base != 0) { + add_guest_base = tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, + TCG_GUEST_BASE_REG, addr_reg); + addr_reg = TCG_REG_R2; + } else { + add_guest_base = INSN_NOP_M; + } + + if (!bswap) { + tcg_out_bundle(s, (guest_base ? MmI : mmI), + add_guest_base, + tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits], + data_reg, addr_reg), + INSN_NOP_I); + } else { + if (s_bits == MO_16) { + tcg_out_bundle(s, mII, + add_guest_base, + tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, + TCG_REG_R3, data_reg, 15, 15), + tcg_opc_bswap64_i(TCG_REG_P0, + TCG_REG_R3, TCG_REG_R3)); + data_reg = TCG_REG_R3; + } else if (s_bits == MO_32) { + tcg_out_bundle(s, mII, + add_guest_base, + tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, + TCG_REG_R3, data_reg, 31, 31), + tcg_opc_bswap64_i(TCG_REG_P0, + TCG_REG_R3, TCG_REG_R3)); + data_reg = TCG_REG_R3; + } else if (s_bits == MO_64) { + tcg_out_bundle(s, miI, + add_guest_base, + INSN_NOP_I, + tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R3, data_reg)); + data_reg = TCG_REG_R3; + } + tcg_out_bundle(s, miI, + tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits], + data_reg, addr_reg), + INSN_NOP_I, + INSN_NOP_I); + } +#endif +} + +#endif + +static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, + const TCGArg *args, const int *const_args) +{ + switch(opc) { + case INDEX_op_exit_tb: + tcg_out_exit_tb(s, args[0]); + break; + case INDEX_op_br: + tcg_out_br(s, arg_label(args[0])); + break; + case INDEX_op_goto_tb: + tcg_out_goto_tb(s, args[0]); + break; + + case INDEX_op_ld8u_i32: + case INDEX_op_ld8u_i64: + tcg_out_ld_rel(s, OPC_LD1_M1, args[0], args[1], args[2]); + break; + case INDEX_op_ld8s_i32: + case INDEX_op_ld8s_i64: + tcg_out_ld_rel(s, OPC_LD1_M1, args[0], args[1], args[2]); + tcg_out_ext(s, OPC_SXT1_I29, args[0], args[0]); + break; + case INDEX_op_ld16u_i32: + case INDEX_op_ld16u_i64: + tcg_out_ld_rel(s, OPC_LD2_M1, args[0], args[1], args[2]); + break; + case INDEX_op_ld16s_i32: + case INDEX_op_ld16s_i64: + tcg_out_ld_rel(s, OPC_LD2_M1, args[0], args[1], args[2]); + tcg_out_ext(s, OPC_SXT2_I29, args[0], args[0]); + break; + case INDEX_op_ld_i32: + case INDEX_op_ld32u_i64: + tcg_out_ld_rel(s, OPC_LD4_M1, args[0], args[1], args[2]); + break; + case INDEX_op_ld32s_i64: + tcg_out_ld_rel(s, OPC_LD4_M1, args[0], args[1], args[2]); + tcg_out_ext(s, OPC_SXT4_I29, args[0], args[0]); + break; + case INDEX_op_ld_i64: + tcg_out_ld_rel(s, OPC_LD8_M1, args[0], args[1], args[2]); + break; + case INDEX_op_st8_i32: + case INDEX_op_st8_i64: + tcg_out_st_rel(s, OPC_ST1_M4, args[0], args[1], args[2]); + break; + case INDEX_op_st16_i32: + case INDEX_op_st16_i64: + tcg_out_st_rel(s, OPC_ST2_M4, args[0], args[1], args[2]); + break; + case INDEX_op_st_i32: + case INDEX_op_st32_i64: + tcg_out_st_rel(s, OPC_ST4_M4, args[0], args[1], args[2]); + break; + case INDEX_op_st_i64: + tcg_out_st_rel(s, OPC_ST8_M4, args[0], args[1], args[2]); + break; + + case INDEX_op_add_i32: + case INDEX_op_add_i64: + tcg_out_add(s, args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_sub_i32: + case INDEX_op_sub_i64: + tcg_out_sub(s, args[0], args[1], const_args[1], args[2], const_args[2]); + break; + + case INDEX_op_and_i32: + case INDEX_op_and_i64: + /* TCG expects arg2 constant; A3 expects arg1 constant. Swap. */ + tcg_out_alu(s, OPC_AND_A1, OPC_AND_A3, args[0], + args[2], const_args[2], args[1], const_args[1]); + break; + case INDEX_op_andc_i32: + case INDEX_op_andc_i64: + tcg_out_alu(s, OPC_ANDCM_A1, OPC_ANDCM_A3, args[0], + args[1], const_args[1], args[2], const_args[2]); + break; + case INDEX_op_eqv_i32: + case INDEX_op_eqv_i64: + tcg_out_eqv(s, args[0], args[1], const_args[1], + args[2], const_args[2]); + break; + case INDEX_op_nand_i32: + case INDEX_op_nand_i64: + tcg_out_nand(s, args[0], args[1], const_args[1], + args[2], const_args[2]); + break; + case INDEX_op_nor_i32: + case INDEX_op_nor_i64: + tcg_out_nor(s, args[0], args[1], const_args[1], + args[2], const_args[2]); + break; + case INDEX_op_or_i32: + case INDEX_op_or_i64: + /* TCG expects arg2 constant; A3 expects arg1 constant. Swap. */ + tcg_out_alu(s, OPC_OR_A1, OPC_OR_A3, args[0], + args[2], const_args[2], args[1], const_args[1]); + break; + case INDEX_op_orc_i32: + case INDEX_op_orc_i64: + tcg_out_orc(s, args[0], args[1], const_args[1], + args[2], const_args[2]); + break; + case INDEX_op_xor_i32: + case INDEX_op_xor_i64: + /* TCG expects arg2 constant; A3 expects arg1 constant. Swap. */ + tcg_out_alu(s, OPC_XOR_A1, OPC_XOR_A3, args[0], + args[2], const_args[2], args[1], const_args[1]); + break; + + case INDEX_op_mul_i32: + case INDEX_op_mul_i64: + tcg_out_mul(s, args[0], args[1], args[2]); + break; + + case INDEX_op_sar_i32: + tcg_out_sar_i32(s, args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_sar_i64: + tcg_out_sar_i64(s, args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_shl_i32: + tcg_out_shl_i32(s, args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_shl_i64: + tcg_out_shl_i64(s, args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_shr_i32: + tcg_out_shr_i32(s, args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_shr_i64: + tcg_out_shr_i64(s, args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_rotl_i32: + tcg_out_rotl_i32(s, args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_rotl_i64: + tcg_out_rotl_i64(s, args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_rotr_i32: + tcg_out_rotr_i32(s, args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_rotr_i64: + tcg_out_rotr_i64(s, args[0], args[1], args[2], const_args[2]); + break; + + case INDEX_op_ext8s_i32: + case INDEX_op_ext8s_i64: + tcg_out_ext(s, OPC_SXT1_I29, args[0], args[1]); + break; + case INDEX_op_ext8u_i32: + case INDEX_op_ext8u_i64: + tcg_out_ext(s, OPC_ZXT1_I29, args[0], args[1]); + break; + case INDEX_op_ext16s_i32: + case INDEX_op_ext16s_i64: + tcg_out_ext(s, OPC_SXT2_I29, args[0], args[1]); + break; + case INDEX_op_ext16u_i32: + case INDEX_op_ext16u_i64: + tcg_out_ext(s, OPC_ZXT2_I29, args[0], args[1]); + break; + case INDEX_op_ext_i32_i64: + case INDEX_op_ext32s_i64: + tcg_out_ext(s, OPC_SXT4_I29, args[0], args[1]); + break; + case INDEX_op_extu_i32_i64: + case INDEX_op_ext32u_i64: + tcg_out_ext(s, OPC_ZXT4_I29, args[0], args[1]); + break; + + case INDEX_op_bswap16_i32: + case INDEX_op_bswap16_i64: + tcg_out_bswap16(s, args[0], args[1]); + break; + case INDEX_op_bswap32_i32: + case INDEX_op_bswap32_i64: + tcg_out_bswap32(s, args[0], args[1]); + break; + case INDEX_op_bswap64_i64: + tcg_out_bswap64(s, args[0], args[1]); + break; + + case INDEX_op_deposit_i32: + case INDEX_op_deposit_i64: + tcg_out_deposit(s, args[0], args[1], args[2], const_args[2], + args[3], args[4]); + break; + + case INDEX_op_brcond_i32: + tcg_out_brcond(s, args[2], args[0], args[1], arg_label(args[3]), 1); + break; + case INDEX_op_brcond_i64: + tcg_out_brcond(s, args[2], args[0], args[1], arg_label(args[3]), 0); + break; + case INDEX_op_setcond_i32: + tcg_out_setcond(s, args[3], args[0], args[1], args[2], 1); + break; + case INDEX_op_setcond_i64: + tcg_out_setcond(s, args[3], args[0], args[1], args[2], 0); + break; + case INDEX_op_movcond_i32: + tcg_out_movcond(s, args[5], args[0], args[1], args[2], + args[3], const_args[3], args[4], const_args[4], 1); + break; + case INDEX_op_movcond_i64: + tcg_out_movcond(s, args[5], args[0], args[1], args[2], + args[3], const_args[3], args[4], const_args[4], 0); + break; + + case INDEX_op_qemu_ld_i32: + tcg_out_qemu_ld(s, args); + break; + case INDEX_op_qemu_ld_i64: + tcg_out_qemu_ld(s, args); + break; + case INDEX_op_qemu_st_i32: + tcg_out_qemu_st(s, args); + break; + case INDEX_op_qemu_st_i64: + tcg_out_qemu_st(s, args); + break; + + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_mov_i64: + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ + case INDEX_op_movi_i64: + case INDEX_op_call: /* Always emitted via tcg_out_call. */ + default: + tcg_abort(); + } +} + +static const TCGTargetOpDef ia64_op_defs[] = { + { INDEX_op_br, { } }, + { INDEX_op_exit_tb, { } }, + { INDEX_op_goto_tb, { } }, + + { INDEX_op_ld8u_i32, { "r", "r" } }, + { INDEX_op_ld8s_i32, { "r", "r" } }, + { INDEX_op_ld16u_i32, { "r", "r" } }, + { INDEX_op_ld16s_i32, { "r", "r" } }, + { INDEX_op_ld_i32, { "r", "r" } }, + { INDEX_op_st8_i32, { "rZ", "r" } }, + { INDEX_op_st16_i32, { "rZ", "r" } }, + { INDEX_op_st_i32, { "rZ", "r" } }, + + { INDEX_op_add_i32, { "r", "rZ", "rI" } }, + { INDEX_op_sub_i32, { "r", "rI", "rI" } }, + + { INDEX_op_and_i32, { "r", "rI", "rI" } }, + { INDEX_op_andc_i32, { "r", "rI", "rI" } }, + { INDEX_op_eqv_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_nand_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_nor_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_or_i32, { "r", "rI", "rI" } }, + { INDEX_op_orc_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_xor_i32, { "r", "rI", "rI" } }, + + { INDEX_op_mul_i32, { "r", "rZ", "rZ" } }, + + { INDEX_op_sar_i32, { "r", "rZ", "ri" } }, + { INDEX_op_shl_i32, { "r", "rZ", "ri" } }, + { INDEX_op_shr_i32, { "r", "rZ", "ri" } }, + { INDEX_op_rotl_i32, { "r", "rZ", "ri" } }, + { INDEX_op_rotr_i32, { "r", "rZ", "ri" } }, + + { INDEX_op_ext8s_i32, { "r", "rZ"} }, + { INDEX_op_ext8u_i32, { "r", "rZ"} }, + { INDEX_op_ext16s_i32, { "r", "rZ"} }, + { INDEX_op_ext16u_i32, { "r", "rZ"} }, + + { INDEX_op_bswap16_i32, { "r", "rZ" } }, + { INDEX_op_bswap32_i32, { "r", "rZ" } }, + + { INDEX_op_brcond_i32, { "rZ", "rZ" } }, + { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rI", "rI" } }, + + { INDEX_op_ld8u_i64, { "r", "r" } }, + { INDEX_op_ld8s_i64, { "r", "r" } }, + { INDEX_op_ld16u_i64, { "r", "r" } }, + { INDEX_op_ld16s_i64, { "r", "r" } }, + { INDEX_op_ld32u_i64, { "r", "r" } }, + { INDEX_op_ld32s_i64, { "r", "r" } }, + { INDEX_op_ld_i64, { "r", "r" } }, + { INDEX_op_st8_i64, { "rZ", "r" } }, + { INDEX_op_st16_i64, { "rZ", "r" } }, + { INDEX_op_st32_i64, { "rZ", "r" } }, + { INDEX_op_st_i64, { "rZ", "r" } }, + + { INDEX_op_add_i64, { "r", "rZ", "rI" } }, + { INDEX_op_sub_i64, { "r", "rI", "rI" } }, + + { INDEX_op_and_i64, { "r", "rI", "rI" } }, + { INDEX_op_andc_i64, { "r", "rI", "rI" } }, + { INDEX_op_eqv_i64, { "r", "rZ", "rZ" } }, + { INDEX_op_nand_i64, { "r", "rZ", "rZ" } }, + { INDEX_op_nor_i64, { "r", "rZ", "rZ" } }, + { INDEX_op_or_i64, { "r", "rI", "rI" } }, + { INDEX_op_orc_i64, { "r", "rZ", "rZ" } }, + { INDEX_op_xor_i64, { "r", "rI", "rI" } }, + + { INDEX_op_mul_i64, { "r", "rZ", "rZ" } }, + + { INDEX_op_sar_i64, { "r", "rZ", "ri" } }, + { INDEX_op_shl_i64, { "r", "rZ", "ri" } }, + { INDEX_op_shr_i64, { "r", "rZ", "ri" } }, + { INDEX_op_rotl_i64, { "r", "rZ", "ri" } }, + { INDEX_op_rotr_i64, { "r", "rZ", "ri" } }, + + { INDEX_op_ext8s_i64, { "r", "rZ"} }, + { INDEX_op_ext8u_i64, { "r", "rZ"} }, + { INDEX_op_ext16s_i64, { "r", "rZ"} }, + { INDEX_op_ext16u_i64, { "r", "rZ"} }, + { INDEX_op_ext32s_i64, { "r", "rZ"} }, + { INDEX_op_ext32u_i64, { "r", "rZ"} }, + { INDEX_op_ext_i32_i64, { "r", "rZ" } }, + { INDEX_op_extu_i32_i64, { "r", "rZ" } }, + + { INDEX_op_bswap16_i64, { "r", "rZ" } }, + { INDEX_op_bswap32_i64, { "r", "rZ" } }, + { INDEX_op_bswap64_i64, { "r", "rZ" } }, + + { INDEX_op_brcond_i64, { "rZ", "rZ" } }, + { INDEX_op_setcond_i64, { "r", "rZ", "rZ" } }, + { INDEX_op_movcond_i64, { "r", "rZ", "rZ", "rI", "rI" } }, + + { INDEX_op_deposit_i32, { "r", "rZ", "ri" } }, + { INDEX_op_deposit_i64, { "r", "rZ", "ri" } }, + + { INDEX_op_qemu_ld_i32, { "r", "r" } }, + { INDEX_op_qemu_ld_i64, { "r", "r" } }, + { INDEX_op_qemu_st_i32, { "SZ", "r" } }, + { INDEX_op_qemu_st_i64, { "SZ", "r" } }, + + { -1 }, +}; + +/* Generate global QEMU prologue and epilogue code */ +static void tcg_target_qemu_prologue(TCGContext *s) +{ + int frame_size; + + /* reserve some stack space */ + frame_size = TCG_STATIC_CALL_ARGS_SIZE + + CPU_TEMP_BUF_NLONGS * sizeof(long); + frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) & + ~(TCG_TARGET_STACK_ALIGN - 1); + tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, + CPU_TEMP_BUF_NLONGS * sizeof(long)); + + /* First emit adhoc function descriptor */ + *s->code_ptr = (tcg_insn_unit){ + (uint64_t)(s->code_ptr + 1), /* entry point */ + 0 /* skip gp */ + }; + s->code_ptr++; + + /* prologue */ + tcg_out_bundle(s, miI, + tcg_opc_m34(TCG_REG_P0, OPC_ALLOC_M34, + TCG_REG_R34, 32, 24, 0), + INSN_NOP_I, + tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, + TCG_REG_B6, TCG_REG_R33, 0)); + + /* ??? If guest_base < 0x200000, we could load the register via + an ADDL in the M slot of the next bundle. */ + if (guest_base != 0) { + tcg_out_bundle(s, mlx, + INSN_NOP_M, + tcg_opc_l2(guest_base), + tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, + TCG_GUEST_BASE_REG, guest_base)); + tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); + } + + tcg_out_bundle(s, miB, + tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4, + TCG_REG_R12, -frame_size, TCG_REG_R12), + tcg_opc_i22(TCG_REG_P0, OPC_MOV_I22, + TCG_REG_R33, TCG_REG_B0), + tcg_opc_b4 (TCG_REG_P0, OPC_BR_SPTK_MANY_B4, TCG_REG_B6)); + + /* epilogue */ + tb_ret_addr = s->code_ptr; + tcg_out_bundle(s, miI, + INSN_NOP_M, + tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, + TCG_REG_B0, TCG_REG_R33, 0), + tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4, + TCG_REG_R12, frame_size, TCG_REG_R12)); + tcg_out_bundle(s, miB, + INSN_NOP_M, + tcg_opc_i26(TCG_REG_P0, OPC_MOV_I_I26, + TCG_REG_PFS, TCG_REG_R34), + tcg_opc_b4 (TCG_REG_P0, OPC_BR_RET_SPTK_MANY_B4, + TCG_REG_B0)); +} + +static void tcg_target_init(TCGContext *s) +{ + tcg_regset_set(tcg_target_available_regs[TCG_TYPE_I32], + 0xffffffffffffffffull); + tcg_regset_set(tcg_target_available_regs[TCG_TYPE_I64], + 0xffffffffffffffffull); + + tcg_regset_clear(tcg_target_call_clobber_regs); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R15); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R16); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R17); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R18); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R19); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R20); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R21); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R22); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R23); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R24); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R25); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R26); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R27); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R28); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R29); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R30); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R31); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R56); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R57); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R58); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R59); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R60); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R61); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R62); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R63); + + tcg_regset_clear(s->reserved_regs); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* zero register */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* global pointer */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* internal use */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R3); /* internal use */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R12); /* stack pointer */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R33); /* return address */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R34); /* PFS */ + + /* The following 4 are not in use, are call-saved, but *not* saved + by the prologue. Therefore we cannot use them without modifying + the prologue. There doesn't seem to be any good reason to use + these as opposed to the windowed registers. */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R4); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R5); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R6); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R7); + + tcg_add_target_add_op_defs(ia64_op_defs); +} diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c deleted file mode 100644 index 2dc4998719..0000000000 --- a/tcg/mips/tcg-target.c +++ /dev/null @@ -1,1892 +0,0 @@ -/* - * Tiny Code Generator for QEMU - * - * Copyright (c) 2008-2009 Arnaud Patard - * Copyright (c) 2009 Aurelien Jarno - * Based on i386/tcg-target.c - Copyright (c) 2008 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "tcg-be-ldst.h" - -#ifdef HOST_WORDS_BIGENDIAN -# define MIPS_BE 1 -#else -# define MIPS_BE 0 -#endif - -#define LO_OFF (MIPS_BE * 4) -#define HI_OFF (4 - LO_OFF) - -#ifndef NDEBUG -static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { - "zero", - "at", - "v0", - "v1", - "a0", - "a1", - "a2", - "a3", - "t0", - "t1", - "t2", - "t3", - "t4", - "t5", - "t6", - "t7", - "s0", - "s1", - "s2", - "s3", - "s4", - "s5", - "s6", - "s7", - "t8", - "t9", - "k0", - "k1", - "gp", - "sp", - "s8", - "ra", -}; -#endif - -#define TCG_TMP0 TCG_REG_AT -#define TCG_TMP1 TCG_REG_T9 - -/* check if we really need so many registers :P */ -static const TCGReg tcg_target_reg_alloc_order[] = { - /* Call saved registers. */ - TCG_REG_S0, - TCG_REG_S1, - TCG_REG_S2, - TCG_REG_S3, - TCG_REG_S4, - TCG_REG_S5, - TCG_REG_S6, - TCG_REG_S7, - TCG_REG_S8, - - /* Call clobbered registers. */ - TCG_REG_T0, - TCG_REG_T1, - TCG_REG_T2, - TCG_REG_T3, - TCG_REG_T4, - TCG_REG_T5, - TCG_REG_T6, - TCG_REG_T7, - TCG_REG_T8, - TCG_REG_T9, - TCG_REG_V1, - TCG_REG_V0, - - /* Argument registers, opposite order of allocation. */ - TCG_REG_A3, - TCG_REG_A2, - TCG_REG_A1, - TCG_REG_A0, -}; - -static const TCGReg tcg_target_call_iarg_regs[4] = { - TCG_REG_A0, - TCG_REG_A1, - TCG_REG_A2, - TCG_REG_A3 -}; - -static const TCGReg tcg_target_call_oarg_regs[2] = { - TCG_REG_V0, - TCG_REG_V1 -}; - -static tcg_insn_unit *tb_ret_addr; - -static inline uint32_t reloc_pc16_val(tcg_insn_unit *pc, tcg_insn_unit *target) -{ - /* Let the compiler perform the right-shift as part of the arithmetic. */ - ptrdiff_t disp = target - (pc + 1); - assert(disp == (int16_t)disp); - return disp & 0xffff; -} - -static inline void reloc_pc16(tcg_insn_unit *pc, tcg_insn_unit *target) -{ - *pc = deposit32(*pc, 0, 16, reloc_pc16_val(pc, target)); -} - -static inline uint32_t reloc_26_val(tcg_insn_unit *pc, tcg_insn_unit *target) -{ - assert((((uintptr_t)pc ^ (uintptr_t)target) & 0xf0000000) == 0); - return ((uintptr_t)target >> 2) & 0x3ffffff; -} - -static inline void reloc_26(tcg_insn_unit *pc, tcg_insn_unit *target) -{ - *pc = deposit32(*pc, 0, 26, reloc_26_val(pc, target)); -} - -static void patch_reloc(tcg_insn_unit *code_ptr, int type, - intptr_t value, intptr_t addend) -{ - assert(type == R_MIPS_PC16); - assert(addend == 0); - reloc_pc16(code_ptr, (tcg_insn_unit *)value); -} - -#define TCG_CT_CONST_ZERO 0x100 -#define TCG_CT_CONST_U16 0x200 /* Unsigned 16-bit: 0 - 0xffff. */ -#define TCG_CT_CONST_S16 0x400 /* Signed 16-bit: -32768 - 32767 */ -#define TCG_CT_CONST_P2M1 0x800 /* Power of 2 minus 1. */ -#define TCG_CT_CONST_N16 0x1000 /* "Negatable" 16-bit: -32767 - 32767 */ - -static inline bool is_p2m1(tcg_target_long val) -{ - return val && ((val + 1) & val) == 0; -} - -/* parse target specific constraints */ -static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) -{ - const char *ct_str; - - ct_str = *pct_str; - switch(ct_str[0]) { - case 'r': - ct->ct |= TCG_CT_REG; - tcg_regset_set(ct->u.regs, 0xffffffff); - break; - case 'L': /* qemu_ld output arg constraint */ - ct->ct |= TCG_CT_REG; - tcg_regset_set(ct->u.regs, 0xffffffff); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_V0); - break; - case 'l': /* qemu_ld input arg constraint */ - ct->ct |= TCG_CT_REG; - tcg_regset_set(ct->u.regs, 0xffffffff); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0); -#if defined(CONFIG_SOFTMMU) - if (TARGET_LONG_BITS == 64) { - tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2); - } -#endif - break; - case 'S': /* qemu_st constraint */ - ct->ct |= TCG_CT_REG; - tcg_regset_set(ct->u.regs, 0xffffffff); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0); -#if defined(CONFIG_SOFTMMU) - if (TARGET_LONG_BITS == 32) { - tcg_regset_reset_reg(ct->u.regs, TCG_REG_A1); - } else { - tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_A3); - } -#endif - break; - case 'I': - ct->ct |= TCG_CT_CONST_U16; - break; - case 'J': - ct->ct |= TCG_CT_CONST_S16; - break; - case 'K': - ct->ct |= TCG_CT_CONST_P2M1; - break; - case 'N': - ct->ct |= TCG_CT_CONST_N16; - break; - case 'Z': - /* We are cheating a bit here, using the fact that the register - ZERO is also the register number 0. Hence there is no need - to check for const_args in each instruction. */ - ct->ct |= TCG_CT_CONST_ZERO; - break; - default: - return -1; - } - ct_str++; - *pct_str = ct_str; - return 0; -} - -/* test if a constant matches the constraint */ -static inline int tcg_target_const_match(tcg_target_long val, TCGType type, - const TCGArgConstraint *arg_ct) -{ - int ct; - ct = arg_ct->ct; - if (ct & TCG_CT_CONST) { - return 1; - } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { - return 1; - } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) { - return 1; - } else if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { - return 1; - } else if ((ct & TCG_CT_CONST_N16) && val >= -32767 && val <= 32767) { - return 1; - } else if ((ct & TCG_CT_CONST_P2M1) - && use_mips32r2_instructions && is_p2m1(val)) { - return 1; - } - return 0; -} - -/* instruction opcodes */ -typedef enum { - OPC_J = 0x02 << 26, - OPC_JAL = 0x03 << 26, - OPC_BEQ = 0x04 << 26, - OPC_BNE = 0x05 << 26, - OPC_BLEZ = 0x06 << 26, - OPC_BGTZ = 0x07 << 26, - OPC_ADDIU = 0x09 << 26, - OPC_SLTI = 0x0A << 26, - OPC_SLTIU = 0x0B << 26, - OPC_ANDI = 0x0C << 26, - OPC_ORI = 0x0D << 26, - OPC_XORI = 0x0E << 26, - OPC_LUI = 0x0F << 26, - OPC_LB = 0x20 << 26, - OPC_LH = 0x21 << 26, - OPC_LW = 0x23 << 26, - OPC_LBU = 0x24 << 26, - OPC_LHU = 0x25 << 26, - OPC_LWU = 0x27 << 26, - OPC_SB = 0x28 << 26, - OPC_SH = 0x29 << 26, - OPC_SW = 0x2B << 26, - - OPC_SPECIAL = 0x00 << 26, - OPC_SLL = OPC_SPECIAL | 0x00, - OPC_SRL = OPC_SPECIAL | 0x02, - OPC_ROTR = OPC_SPECIAL | (0x01 << 21) | 0x02, - OPC_SRA = OPC_SPECIAL | 0x03, - OPC_SLLV = OPC_SPECIAL | 0x04, - OPC_SRLV = OPC_SPECIAL | 0x06, - OPC_ROTRV = OPC_SPECIAL | (0x01 << 6) | 0x06, - OPC_SRAV = OPC_SPECIAL | 0x07, - OPC_JR_R5 = OPC_SPECIAL | 0x08, - OPC_JALR = OPC_SPECIAL | 0x09, - OPC_MOVZ = OPC_SPECIAL | 0x0A, - OPC_MOVN = OPC_SPECIAL | 0x0B, - OPC_MFHI = OPC_SPECIAL | 0x10, - OPC_MFLO = OPC_SPECIAL | 0x12, - OPC_MULT = OPC_SPECIAL | 0x18, - OPC_MUL_R6 = OPC_SPECIAL | (0x02 << 6) | 0x18, - OPC_MUH = OPC_SPECIAL | (0x03 << 6) | 0x18, - OPC_MULTU = OPC_SPECIAL | 0x19, - OPC_MULU = OPC_SPECIAL | (0x02 << 6) | 0x19, - OPC_MUHU = OPC_SPECIAL | (0x03 << 6) | 0x19, - OPC_DIV = OPC_SPECIAL | 0x1A, - OPC_DIV_R6 = OPC_SPECIAL | (0x02 << 6) | 0x1A, - OPC_MOD = OPC_SPECIAL | (0x03 << 6) | 0x1A, - OPC_DIVU = OPC_SPECIAL | 0x1B, - OPC_DIVU_R6 = OPC_SPECIAL | (0x02 << 6) | 0x1B, - OPC_MODU = OPC_SPECIAL | (0x03 << 6) | 0x1B, - OPC_ADDU = OPC_SPECIAL | 0x21, - OPC_SUBU = OPC_SPECIAL | 0x23, - OPC_AND = OPC_SPECIAL | 0x24, - OPC_OR = OPC_SPECIAL | 0x25, - OPC_XOR = OPC_SPECIAL | 0x26, - OPC_NOR = OPC_SPECIAL | 0x27, - OPC_SLT = OPC_SPECIAL | 0x2A, - OPC_SLTU = OPC_SPECIAL | 0x2B, - OPC_SELEQZ = OPC_SPECIAL | 0x35, - OPC_SELNEZ = OPC_SPECIAL | 0x37, - - OPC_REGIMM = 0x01 << 26, - OPC_BLTZ = OPC_REGIMM | (0x00 << 16), - OPC_BGEZ = OPC_REGIMM | (0x01 << 16), - - OPC_SPECIAL2 = 0x1c << 26, - OPC_MUL_R5 = OPC_SPECIAL2 | 0x002, - - OPC_SPECIAL3 = 0x1f << 26, - OPC_EXT = OPC_SPECIAL3 | 0x000, - OPC_INS = OPC_SPECIAL3 | 0x004, - OPC_WSBH = OPC_SPECIAL3 | 0x0a0, - OPC_SEB = OPC_SPECIAL3 | 0x420, - OPC_SEH = OPC_SPECIAL3 | 0x620, - - /* MIPS r6 doesn't have JR, JALR should be used instead */ - OPC_JR = use_mips32r6_instructions ? OPC_JALR : OPC_JR_R5, - - /* - * MIPS r6 replaces MUL with an alternative encoding which is - * backwards-compatible at the assembly level. - */ - OPC_MUL = use_mips32r6_instructions ? OPC_MUL_R6 : OPC_MUL_R5, -} MIPSInsn; - -/* - * Type reg - */ -static inline void tcg_out_opc_reg(TCGContext *s, MIPSInsn opc, - TCGReg rd, TCGReg rs, TCGReg rt) -{ - int32_t inst; - - inst = opc; - inst |= (rs & 0x1F) << 21; - inst |= (rt & 0x1F) << 16; - inst |= (rd & 0x1F) << 11; - tcg_out32(s, inst); -} - -/* - * Type immediate - */ -static inline void tcg_out_opc_imm(TCGContext *s, MIPSInsn opc, - TCGReg rt, TCGReg rs, TCGArg imm) -{ - int32_t inst; - - inst = opc; - inst |= (rs & 0x1F) << 21; - inst |= (rt & 0x1F) << 16; - inst |= (imm & 0xffff); - tcg_out32(s, inst); -} - -/* - * Type bitfield - */ -static inline void tcg_out_opc_bf(TCGContext *s, MIPSInsn opc, TCGReg rt, - TCGReg rs, int msb, int lsb) -{ - int32_t inst; - - inst = opc; - inst |= (rs & 0x1F) << 21; - inst |= (rt & 0x1F) << 16; - inst |= (msb & 0x1F) << 11; - inst |= (lsb & 0x1F) << 6; - tcg_out32(s, inst); -} - -/* - * Type branch - */ -static inline void tcg_out_opc_br(TCGContext *s, MIPSInsn opc, - TCGReg rt, TCGReg rs) -{ - /* We pay attention here to not modify the branch target by reading - the existing value and using it again. This ensure that caches and - memory are kept coherent during retranslation. */ - uint16_t offset = (uint16_t)*s->code_ptr; - - tcg_out_opc_imm(s, opc, rt, rs, offset); -} - -/* - * Type sa - */ -static inline void tcg_out_opc_sa(TCGContext *s, MIPSInsn opc, - TCGReg rd, TCGReg rt, TCGArg sa) -{ - int32_t inst; - - inst = opc; - inst |= (rt & 0x1F) << 16; - inst |= (rd & 0x1F) << 11; - inst |= (sa & 0x1F) << 6; - tcg_out32(s, inst); - -} - -/* - * Type jump. - * Returns true if the branch was in range and the insn was emitted. - */ -static bool tcg_out_opc_jmp(TCGContext *s, MIPSInsn opc, void *target) -{ - uintptr_t dest = (uintptr_t)target; - uintptr_t from = (uintptr_t)s->code_ptr + 4; - int32_t inst; - - /* The pc-region branch happens within the 256MB region of - the delay slot (thus the +4). */ - if ((from ^ dest) & -(1 << 28)) { - return false; - } - assert((dest & 3) == 0); - - inst = opc; - inst |= (dest >> 2) & 0x3ffffff; - tcg_out32(s, inst); - return true; -} - -static inline void tcg_out_nop(TCGContext *s) -{ - tcg_out32(s, 0); -} - -static inline void tcg_out_mov(TCGContext *s, TCGType type, - TCGReg ret, TCGReg arg) -{ - /* Simple reg-reg move, optimising out the 'do nothing' case */ - if (ret != arg) { - tcg_out_opc_reg(s, OPC_ADDU, ret, arg, TCG_REG_ZERO); - } -} - -static inline void tcg_out_movi(TCGContext *s, TCGType type, - TCGReg reg, tcg_target_long arg) -{ - if (arg == (int16_t)arg) { - tcg_out_opc_imm(s, OPC_ADDIU, reg, TCG_REG_ZERO, arg); - } else if (arg == (uint16_t)arg) { - tcg_out_opc_imm(s, OPC_ORI, reg, TCG_REG_ZERO, arg); - } else { - tcg_out_opc_imm(s, OPC_LUI, reg, TCG_REG_ZERO, arg >> 16); - if (arg & 0xffff) { - tcg_out_opc_imm(s, OPC_ORI, reg, reg, arg & 0xffff); - } - } -} - -static inline void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg) -{ - if (use_mips32r2_instructions) { - tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); - } else { - /* ret and arg can't be register at */ - if (ret == TCG_TMP0 || arg == TCG_TMP0) { - tcg_abort(); - } - - tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); - tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8); - tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); - } -} - -static inline void tcg_out_bswap16s(TCGContext *s, TCGReg ret, TCGReg arg) -{ - if (use_mips32r2_instructions) { - tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); - tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret); - } else { - /* ret and arg can't be register at */ - if (ret == TCG_TMP0 || arg == TCG_TMP0) { - tcg_abort(); - } - - tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); - tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); - tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); - } -} - -static inline void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg) -{ - if (use_mips32r2_instructions) { - tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); - tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16); - } else { - /* ret and arg must be different and can't be register at */ - if (ret == arg || ret == TCG_TMP0 || arg == TCG_TMP0) { - tcg_abort(); - } - - tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); - - tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 24); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); - - tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, arg, 0xff00); - tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); - - tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); - tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0xff00); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); - } -} - -static inline void tcg_out_ext8s(TCGContext *s, TCGReg ret, TCGReg arg) -{ - if (use_mips32r2_instructions) { - tcg_out_opc_reg(s, OPC_SEB, ret, 0, arg); - } else { - tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); - tcg_out_opc_sa(s, OPC_SRA, ret, ret, 24); - } -} - -static inline void tcg_out_ext16s(TCGContext *s, TCGReg ret, TCGReg arg) -{ - if (use_mips32r2_instructions) { - tcg_out_opc_reg(s, OPC_SEH, ret, 0, arg); - } else { - tcg_out_opc_sa(s, OPC_SLL, ret, arg, 16); - tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16); - } -} - -static void tcg_out_ldst(TCGContext *s, MIPSInsn opc, TCGReg data, - TCGReg addr, intptr_t ofs) -{ - int16_t lo = ofs; - if (ofs != lo) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - lo); - if (addr != TCG_REG_ZERO) { - tcg_out_opc_reg(s, OPC_ADDU, TCG_TMP0, TCG_TMP0, addr); - } - addr = TCG_TMP0; - } - tcg_out_opc_imm(s, opc, data, addr, lo); -} - -static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, intptr_t arg2) -{ - tcg_out_ldst(s, OPC_LW, arg, arg1, arg2); -} - -static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, intptr_t arg2) -{ - tcg_out_ldst(s, OPC_SW, arg, arg1, arg2); -} - -static inline void tcg_out_addi(TCGContext *s, TCGReg reg, TCGArg val) -{ - if (val == (int16_t)val) { - tcg_out_opc_imm(s, OPC_ADDIU, reg, reg, val); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, val); - tcg_out_opc_reg(s, OPC_ADDU, reg, reg, TCG_TMP0); - } -} - -static void tcg_out_addsub2(TCGContext *s, TCGReg rl, TCGReg rh, TCGReg al, - TCGReg ah, TCGArg bl, TCGArg bh, bool cbl, - bool cbh, bool is_sub) -{ - TCGReg th = TCG_TMP1; - - /* If we have a negative constant such that negating it would - make the high part zero, we can (usually) eliminate one insn. */ - if (cbl && cbh && bh == -1 && bl != 0) { - bl = -bl; - bh = 0; - is_sub = !is_sub; - } - - /* By operating on the high part first, we get to use the final - carry operation to move back from the temporary. */ - if (!cbh) { - tcg_out_opc_reg(s, (is_sub ? OPC_SUBU : OPC_ADDU), th, ah, bh); - } else if (bh != 0 || ah == rl) { - tcg_out_opc_imm(s, OPC_ADDIU, th, ah, (is_sub ? -bh : bh)); - } else { - th = ah; - } - - /* Note that tcg optimization should eliminate the bl == 0 case. */ - if (is_sub) { - if (cbl) { - tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, al, bl); - tcg_out_opc_imm(s, OPC_ADDIU, rl, al, -bl); - } else { - tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, al, bl); - tcg_out_opc_reg(s, OPC_SUBU, rl, al, bl); - } - tcg_out_opc_reg(s, OPC_SUBU, rh, th, TCG_TMP0); - } else { - if (cbl) { - tcg_out_opc_imm(s, OPC_ADDIU, rl, al, bl); - tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, rl, bl); - } else if (rl == al && rl == bl) { - tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, al, 31); - tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl); - } else { - tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl); - tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, rl, (rl == bl ? al : bl)); - } - tcg_out_opc_reg(s, OPC_ADDU, rh, th, TCG_TMP0); - } -} - -/* Bit 0 set if inversion required; bit 1 set if swapping required. */ -#define MIPS_CMP_INV 1 -#define MIPS_CMP_SWAP 2 - -static const uint8_t mips_cmp_map[16] = { - [TCG_COND_LT] = 0, - [TCG_COND_LTU] = 0, - [TCG_COND_GE] = MIPS_CMP_INV, - [TCG_COND_GEU] = MIPS_CMP_INV, - [TCG_COND_LE] = MIPS_CMP_INV | MIPS_CMP_SWAP, - [TCG_COND_LEU] = MIPS_CMP_INV | MIPS_CMP_SWAP, - [TCG_COND_GT] = MIPS_CMP_SWAP, - [TCG_COND_GTU] = MIPS_CMP_SWAP, -}; - -static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg arg1, TCGReg arg2) -{ - MIPSInsn s_opc = OPC_SLTU; - int cmp_map; - - switch (cond) { - case TCG_COND_EQ: - if (arg2 != 0) { - tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); - arg1 = ret; - } - tcg_out_opc_imm(s, OPC_SLTIU, ret, arg1, 1); - break; - - case TCG_COND_NE: - if (arg2 != 0) { - tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); - arg1 = ret; - } - tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, arg1); - break; - - case TCG_COND_LT: - case TCG_COND_GE: - case TCG_COND_LE: - case TCG_COND_GT: - s_opc = OPC_SLT; - /* FALLTHRU */ - - case TCG_COND_LTU: - case TCG_COND_GEU: - case TCG_COND_LEU: - case TCG_COND_GTU: - cmp_map = mips_cmp_map[cond]; - if (cmp_map & MIPS_CMP_SWAP) { - TCGReg t = arg1; - arg1 = arg2; - arg2 = t; - } - tcg_out_opc_reg(s, s_opc, ret, arg1, arg2); - if (cmp_map & MIPS_CMP_INV) { - tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); - } - break; - - default: - tcg_abort(); - break; - } -} - -static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, - TCGReg arg2, TCGLabel *l) -{ - static const MIPSInsn b_zero[16] = { - [TCG_COND_LT] = OPC_BLTZ, - [TCG_COND_GT] = OPC_BGTZ, - [TCG_COND_LE] = OPC_BLEZ, - [TCG_COND_GE] = OPC_BGEZ, - }; - - MIPSInsn s_opc = OPC_SLTU; - MIPSInsn b_opc; - int cmp_map; - - switch (cond) { - case TCG_COND_EQ: - b_opc = OPC_BEQ; - break; - case TCG_COND_NE: - b_opc = OPC_BNE; - break; - - case TCG_COND_LT: - case TCG_COND_GT: - case TCG_COND_LE: - case TCG_COND_GE: - if (arg2 == 0) { - b_opc = b_zero[cond]; - arg2 = arg1; - arg1 = 0; - break; - } - s_opc = OPC_SLT; - /* FALLTHRU */ - - case TCG_COND_LTU: - case TCG_COND_GTU: - case TCG_COND_LEU: - case TCG_COND_GEU: - cmp_map = mips_cmp_map[cond]; - if (cmp_map & MIPS_CMP_SWAP) { - TCGReg t = arg1; - arg1 = arg2; - arg2 = t; - } - tcg_out_opc_reg(s, s_opc, TCG_TMP0, arg1, arg2); - b_opc = (cmp_map & MIPS_CMP_INV ? OPC_BEQ : OPC_BNE); - arg1 = TCG_TMP0; - arg2 = TCG_REG_ZERO; - break; - - default: - tcg_abort(); - break; - } - - tcg_out_opc_br(s, b_opc, arg1, arg2); - if (l->has_value) { - reloc_pc16(s->code_ptr - 1, l->u.value_ptr); - } else { - tcg_out_reloc(s, s->code_ptr - 1, R_MIPS_PC16, l, 0); - } - tcg_out_nop(s); -} - -static TCGReg tcg_out_reduce_eq2(TCGContext *s, TCGReg tmp0, TCGReg tmp1, - TCGReg al, TCGReg ah, - TCGReg bl, TCGReg bh) -{ - /* Merge highpart comparison into AH. */ - if (bh != 0) { - if (ah != 0) { - tcg_out_opc_reg(s, OPC_XOR, tmp0, ah, bh); - ah = tmp0; - } else { - ah = bh; - } - } - /* Merge lowpart comparison into AL. */ - if (bl != 0) { - if (al != 0) { - tcg_out_opc_reg(s, OPC_XOR, tmp1, al, bl); - al = tmp1; - } else { - al = bl; - } - } - /* Merge high and low part comparisons into AL. */ - if (ah != 0) { - if (al != 0) { - tcg_out_opc_reg(s, OPC_OR, tmp0, ah, al); - al = tmp0; - } else { - al = ah; - } - } - return al; -} - -static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg al, TCGReg ah, TCGReg bl, TCGReg bh) -{ - TCGReg tmp0 = TCG_TMP0; - TCGReg tmp1 = ret; - - assert(ret != TCG_TMP0); - if (ret == ah || ret == bh) { - assert(ret != TCG_TMP1); - tmp1 = TCG_TMP1; - } - - switch (cond) { - case TCG_COND_EQ: - case TCG_COND_NE: - tmp1 = tcg_out_reduce_eq2(s, tmp0, tmp1, al, ah, bl, bh); - tcg_out_setcond(s, cond, ret, tmp1, TCG_REG_ZERO); - break; - - default: - tcg_out_setcond(s, TCG_COND_EQ, tmp0, ah, bh); - tcg_out_setcond(s, tcg_unsigned_cond(cond), tmp1, al, bl); - tcg_out_opc_reg(s, OPC_AND, tmp1, tmp1, tmp0); - tcg_out_setcond(s, tcg_high_cond(cond), tmp0, ah, bh); - tcg_out_opc_reg(s, OPC_OR, ret, tmp1, tmp0); - break; - } -} - -static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, - TCGReg bl, TCGReg bh, TCGLabel *l) -{ - TCGCond b_cond = TCG_COND_NE; - TCGReg tmp = TCG_TMP1; - - /* With branches, we emit between 4 and 9 insns with 2 or 3 branches. - With setcond, we emit between 3 and 10 insns and only 1 branch, - which ought to get better branch prediction. */ - switch (cond) { - case TCG_COND_EQ: - case TCG_COND_NE: - b_cond = cond; - tmp = tcg_out_reduce_eq2(s, TCG_TMP0, TCG_TMP1, al, ah, bl, bh); - break; - - default: - /* Minimize code size by preferring a compare not requiring INV. */ - if (mips_cmp_map[cond] & MIPS_CMP_INV) { - cond = tcg_invert_cond(cond); - b_cond = TCG_COND_EQ; - } - tcg_out_setcond2(s, cond, tmp, al, ah, bl, bh); - break; - } - - tcg_out_brcond(s, b_cond, tmp, TCG_REG_ZERO, l); -} - -static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg c1, TCGReg c2, TCGReg v1, TCGReg v2) -{ - bool eqz = false; - - /* If one of the values is zero, put it last to match SEL*Z instructions */ - if (use_mips32r6_instructions && v1 == 0) { - v1 = v2; - v2 = 0; - cond = tcg_invert_cond(cond); - } - - switch (cond) { - case TCG_COND_EQ: - eqz = true; - /* FALLTHRU */ - case TCG_COND_NE: - if (c2 != 0) { - tcg_out_opc_reg(s, OPC_XOR, TCG_TMP0, c1, c2); - c1 = TCG_TMP0; - } - break; - - default: - /* Minimize code size by preferring a compare not requiring INV. */ - if (mips_cmp_map[cond] & MIPS_CMP_INV) { - cond = tcg_invert_cond(cond); - eqz = true; - } - tcg_out_setcond(s, cond, TCG_TMP0, c1, c2); - c1 = TCG_TMP0; - break; - } - - if (use_mips32r6_instructions) { - MIPSInsn m_opc_t = eqz ? OPC_SELEQZ : OPC_SELNEZ; - MIPSInsn m_opc_f = eqz ? OPC_SELNEZ : OPC_SELEQZ; - - if (v2 != 0) { - tcg_out_opc_reg(s, m_opc_f, TCG_TMP1, v2, c1); - } - tcg_out_opc_reg(s, m_opc_t, ret, v1, c1); - if (v2 != 0) { - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP1); - } - } else { - MIPSInsn m_opc = eqz ? OPC_MOVZ : OPC_MOVN; - - tcg_out_opc_reg(s, m_opc, ret, v1, c1); - - /* This should be guaranteed via constraints */ - tcg_debug_assert(v2 == ret); - } -} - -static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail) -{ - /* Note that the ABI requires the called function's address to be - loaded into T9, even if a direct branch is in range. */ - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T9, (uintptr_t)arg); - - /* But do try a direct branch, allowing the cpu better insn prefetch. */ - if (tail) { - if (!tcg_out_opc_jmp(s, OPC_J, arg)) { - tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_T9, 0); - } - } else { - if (!tcg_out_opc_jmp(s, OPC_JAL, arg)) { - tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0); - } - } -} - -static void tcg_out_call(TCGContext *s, tcg_insn_unit *arg) -{ - tcg_out_call_int(s, arg, false); - tcg_out_nop(s); -} - -#if defined(CONFIG_SOFTMMU) -static void * const qemu_ld_helpers[16] = { - [MO_UB] = helper_ret_ldub_mmu, - [MO_SB] = helper_ret_ldsb_mmu, - [MO_LEUW] = helper_le_lduw_mmu, - [MO_LESW] = helper_le_ldsw_mmu, - [MO_LEUL] = helper_le_ldul_mmu, - [MO_LEQ] = helper_le_ldq_mmu, - [MO_BEUW] = helper_be_lduw_mmu, - [MO_BESW] = helper_be_ldsw_mmu, - [MO_BEUL] = helper_be_ldul_mmu, - [MO_BEQ] = helper_be_ldq_mmu, -}; - -static void * const qemu_st_helpers[16] = { - [MO_UB] = helper_ret_stb_mmu, - [MO_LEUW] = helper_le_stw_mmu, - [MO_LEUL] = helper_le_stl_mmu, - [MO_LEQ] = helper_le_stq_mmu, - [MO_BEUW] = helper_be_stw_mmu, - [MO_BEUL] = helper_be_stl_mmu, - [MO_BEQ] = helper_be_stq_mmu, -}; - -/* Helper routines for marshalling helper function arguments into - * the correct registers and stack. - * I is where we want to put this argument, and is updated and returned - * for the next call. ARG is the argument itself. - * - * We provide routines for arguments which are: immediate, 32 bit - * value in register, 16 and 8 bit values in register (which must be zero - * extended before use) and 64 bit value in a lo:hi register pair. - */ - -static int tcg_out_call_iarg_reg(TCGContext *s, int i, TCGReg arg) -{ - if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { - tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[i], arg); - } else { - tcg_out_st(s, TCG_TYPE_REG, arg, TCG_REG_SP, 4 * i); - } - return i + 1; -} - -static int tcg_out_call_iarg_reg8(TCGContext *s, int i, TCGReg arg) -{ - TCGReg tmp = TCG_TMP0; - if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { - tmp = tcg_target_call_iarg_regs[i]; - } - tcg_out_opc_imm(s, OPC_ANDI, tmp, arg, 0xff); - return tcg_out_call_iarg_reg(s, i, tmp); -} - -static int tcg_out_call_iarg_reg16(TCGContext *s, int i, TCGReg arg) -{ - TCGReg tmp = TCG_TMP0; - if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { - tmp = tcg_target_call_iarg_regs[i]; - } - tcg_out_opc_imm(s, OPC_ANDI, tmp, arg, 0xffff); - return tcg_out_call_iarg_reg(s, i, tmp); -} - -static int tcg_out_call_iarg_imm(TCGContext *s, int i, TCGArg arg) -{ - TCGReg tmp = TCG_TMP0; - if (arg == 0) { - tmp = TCG_REG_ZERO; - } else { - if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { - tmp = tcg_target_call_iarg_regs[i]; - } - tcg_out_movi(s, TCG_TYPE_REG, tmp, arg); - } - return tcg_out_call_iarg_reg(s, i, tmp); -} - -static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah) -{ - i = (i + 1) & ~1; - i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? ah : al)); - i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? al : ah)); - return i; -} - -/* Perform the tlb comparison operation. The complete host address is - placed in BASE. Clobbers AT, T0, A0. */ -static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl, - TCGReg addrh, TCGMemOpIdx oi, - tcg_insn_unit *label_ptr[2], bool is_load) -{ - TCGMemOp s_bits = get_memop(oi) & MO_SIZE; - int mem_index = get_mmuidx(oi); - int cmp_off - = (is_load - ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) - : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); - int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); - - tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addrl, - TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_A0, TCG_REG_A0, - (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); - tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_A0, TCG_REG_A0, TCG_AREG0); - - /* Compensate for very large offsets. */ - if (add_off >= 0x8000) { - /* Most target env are smaller than 32k; none are larger than 64k. - Simplify the logic here merely to offset by 0x7ff0, giving us a - range just shy of 64k. Check this assumption. */ - QEMU_BUILD_BUG_ON(offsetof(CPUArchState, - tlb_table[NB_MMU_MODES - 1][1]) - > 0x7ff0 + 0x7fff); - tcg_out_opc_imm(s, OPC_ADDIU, TCG_REG_A0, TCG_REG_A0, 0x7ff0); - cmp_off -= 0x7ff0; - add_off -= 0x7ff0; - } - - /* Load the (low half) tlb comparator. */ - tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0, - cmp_off + (TARGET_LONG_BITS == 64 ? LO_OFF : 0)); - - /* Mask the page bits, keeping the alignment bits to compare against. - In between on 32-bit targets, load the tlb addend for the fast path. */ - tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, - TARGET_PAGE_MASK | ((1 << s_bits) - 1)); - if (TARGET_LONG_BITS == 32) { - tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off); - } - tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl); - - label_ptr[0] = s->code_ptr; - tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0); - - /* Load and test the high half tlb comparator. */ - if (TARGET_LONG_BITS == 64) { - /* delay slot */ - tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0, cmp_off + HI_OFF); - - /* Load the tlb addend for the fast path. We can't do it earlier with - 64-bit targets or we'll clobber a0 before reading the high half tlb - comparator. */ - tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off); - - label_ptr[1] = s->code_ptr; - tcg_out_opc_br(s, OPC_BNE, addrh, TCG_TMP0); - } - - /* delay slot */ - tcg_out_opc_reg(s, OPC_ADDU, base, TCG_REG_A0, addrl); -} - -static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi, - TCGReg datalo, TCGReg datahi, - TCGReg addrlo, TCGReg addrhi, - void *raddr, tcg_insn_unit *label_ptr[2]) -{ - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->oi = oi; - label->datalo_reg = datalo; - label->datahi_reg = datahi; - label->addrlo_reg = addrlo; - label->addrhi_reg = addrhi; - label->raddr = raddr; - label->label_ptr[0] = label_ptr[0]; - if (TARGET_LONG_BITS == 64) { - label->label_ptr[1] = label_ptr[1]; - } -} - -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) -{ - TCGMemOpIdx oi = l->oi; - TCGMemOp opc = get_memop(oi); - TCGReg v0; - int i; - - /* resolve label address */ - reloc_pc16(l->label_ptr[0], s->code_ptr); - if (TARGET_LONG_BITS == 64) { - reloc_pc16(l->label_ptr[1], s->code_ptr); - } - - i = 1; - if (TARGET_LONG_BITS == 64) { - i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg); - } else { - i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg); - } - i = tcg_out_call_iarg_imm(s, i, oi); - i = tcg_out_call_iarg_imm(s, i, (intptr_t)l->raddr); - tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)], false); - /* delay slot */ - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); - - v0 = l->datalo_reg; - if ((opc & MO_SIZE) == MO_64) { - /* We eliminated V0 from the possible output registers, so it - cannot be clobbered here. So we must move V1 first. */ - if (MIPS_BE) { - tcg_out_mov(s, TCG_TYPE_I32, v0, TCG_REG_V1); - v0 = l->datahi_reg; - } else { - tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_V1); - } - } - - reloc_pc16(s->code_ptr, l->raddr); - tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO); - /* delay slot */ - tcg_out_mov(s, TCG_TYPE_REG, v0, TCG_REG_V0); -} - -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) -{ - TCGMemOpIdx oi = l->oi; - TCGMemOp opc = get_memop(oi); - TCGMemOp s_bits = opc & MO_SIZE; - int i; - - /* resolve label address */ - reloc_pc16(l->label_ptr[0], s->code_ptr); - if (TARGET_LONG_BITS == 64) { - reloc_pc16(l->label_ptr[1], s->code_ptr); - } - - i = 1; - if (TARGET_LONG_BITS == 64) { - i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg); - } else { - i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg); - } - switch (s_bits) { - case MO_8: - i = tcg_out_call_iarg_reg8(s, i, l->datalo_reg); - break; - case MO_16: - i = tcg_out_call_iarg_reg16(s, i, l->datalo_reg); - break; - case MO_32: - i = tcg_out_call_iarg_reg(s, i, l->datalo_reg); - break; - case MO_64: - i = tcg_out_call_iarg_reg2(s, i, l->datalo_reg, l->datahi_reg); - break; - default: - tcg_abort(); - } - i = tcg_out_call_iarg_imm(s, i, oi); - - /* Tail call to the store helper. Thus force the return address - computation to take place in the return address register. */ - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)l->raddr); - i = tcg_out_call_iarg_reg(s, i, TCG_REG_RA); - tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], true); - /* delay slot */ - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); -} -#endif - -static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, - TCGReg base, TCGMemOp opc) -{ - switch (opc & (MO_SSIZE | MO_BSWAP)) { - case MO_UB: - tcg_out_opc_imm(s, OPC_LBU, datalo, base, 0); - break; - case MO_SB: - tcg_out_opc_imm(s, OPC_LB, datalo, base, 0); - break; - case MO_UW | MO_BSWAP: - tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0); - tcg_out_bswap16(s, datalo, TCG_TMP1); - break; - case MO_UW: - tcg_out_opc_imm(s, OPC_LHU, datalo, base, 0); - break; - case MO_SW | MO_BSWAP: - tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0); - tcg_out_bswap16s(s, datalo, TCG_TMP1); - break; - case MO_SW: - tcg_out_opc_imm(s, OPC_LH, datalo, base, 0); - break; - case MO_UL | MO_BSWAP: - tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, 0); - tcg_out_bswap32(s, datalo, TCG_TMP1); - break; - case MO_UL: - tcg_out_opc_imm(s, OPC_LW, datalo, base, 0); - break; - case MO_Q | MO_BSWAP: - tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, HI_OFF); - tcg_out_bswap32(s, datalo, TCG_TMP1); - tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, LO_OFF); - tcg_out_bswap32(s, datahi, TCG_TMP1); - break; - case MO_Q: - tcg_out_opc_imm(s, OPC_LW, datalo, base, LO_OFF); - tcg_out_opc_imm(s, OPC_LW, datahi, base, HI_OFF); - break; - default: - tcg_abort(); - } -} - -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) -{ - TCGReg addr_regl, addr_regh __attribute__((unused)); - TCGReg data_regl, data_regh; - TCGMemOpIdx oi; - TCGMemOp opc; -#if defined(CONFIG_SOFTMMU) - tcg_insn_unit *label_ptr[2]; -#endif - /* Note that we've eliminated V0 from the output registers, - so we won't overwrite the base register during loading. */ - TCGReg base = TCG_REG_V0; - - data_regl = *args++; - data_regh = (is_64 ? *args++ : 0); - addr_regl = *args++; - addr_regh = (TARGET_LONG_BITS == 64 ? *args++ : 0); - oi = *args++; - opc = get_memop(oi); - -#if defined(CONFIG_SOFTMMU) - tcg_out_tlb_load(s, base, addr_regl, addr_regh, oi, label_ptr, 1); - tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc); - add_qemu_ldst_label(s, 1, oi, data_regl, data_regh, addr_regl, addr_regh, - s->code_ptr, label_ptr); -#else - if (guest_base == 0 && data_regl != addr_regl) { - base = addr_regl; - } else if (guest_base == (int16_t)guest_base) { - tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, guest_base); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, base, guest_base); - tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl); - } - tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc); -#endif -} - -static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, - TCGReg base, TCGMemOp opc) -{ - switch (opc & (MO_SIZE | MO_BSWAP)) { - case MO_8: - tcg_out_opc_imm(s, OPC_SB, datalo, base, 0); - break; - - case MO_16 | MO_BSWAP: - tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, datalo, 0xffff); - tcg_out_bswap16(s, TCG_TMP1, TCG_TMP1); - datalo = TCG_TMP1; - /* FALLTHRU */ - case MO_16: - tcg_out_opc_imm(s, OPC_SH, datalo, base, 0); - break; - - case MO_32 | MO_BSWAP: - tcg_out_bswap32(s, TCG_TMP1, datalo); - datalo = TCG_TMP1; - /* FALLTHRU */ - case MO_32: - tcg_out_opc_imm(s, OPC_SW, datalo, base, 0); - break; - - case MO_64 | MO_BSWAP: - tcg_out_bswap32(s, TCG_TMP1, datalo); - tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, HI_OFF); - tcg_out_bswap32(s, TCG_TMP1, datahi); - tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, LO_OFF); - break; - case MO_64: - tcg_out_opc_imm(s, OPC_SW, datalo, base, LO_OFF); - tcg_out_opc_imm(s, OPC_SW, datahi, base, HI_OFF); - break; - - default: - tcg_abort(); - } -} - -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) -{ - TCGReg addr_regl, addr_regh __attribute__((unused)); - TCGReg data_regl, data_regh, base; - TCGMemOpIdx oi; - TCGMemOp opc; -#if defined(CONFIG_SOFTMMU) - tcg_insn_unit *label_ptr[2]; -#endif - - data_regl = *args++; - data_regh = (is_64 ? *args++ : 0); - addr_regl = *args++; - addr_regh = (TARGET_LONG_BITS == 64 ? *args++ : 0); - oi = *args++; - opc = get_memop(oi); - -#if defined(CONFIG_SOFTMMU) - /* Note that we eliminated the helper's address argument, - so we can reuse that for the base. */ - base = (TARGET_LONG_BITS == 32 ? TCG_REG_A1 : TCG_REG_A2); - tcg_out_tlb_load(s, base, addr_regl, addr_regh, oi, label_ptr, 0); - tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); - add_qemu_ldst_label(s, 0, oi, data_regl, data_regh, addr_regl, addr_regh, - s->code_ptr, label_ptr); -#else - if (guest_base == 0) { - base = addr_regl; - } else { - base = TCG_REG_A0; - if (guest_base == (int16_t)guest_base) { - tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, guest_base); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, base, guest_base); - tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl); - } - } - tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); -#endif -} - -static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, - const TCGArg *args, const int *const_args) -{ - MIPSInsn i1, i2; - TCGArg a0, a1, a2; - int c2; - - a0 = args[0]; - a1 = args[1]; - a2 = args[2]; - c2 = const_args[2]; - - switch (opc) { - case INDEX_op_exit_tb: - { - TCGReg b0 = TCG_REG_ZERO; - - if (a0 & ~0xffff) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_V0, a0 & ~0xffff); - b0 = TCG_REG_V0; - } - if (!tcg_out_opc_jmp(s, OPC_J, tb_ret_addr)) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, - (uintptr_t)tb_ret_addr); - tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); - } - tcg_out_opc_imm(s, OPC_ORI, TCG_REG_V0, b0, a0 & 0xffff); - } - break; - case INDEX_op_goto_tb: - if (s->tb_jmp_offset) { - /* direct jump method */ - s->tb_jmp_offset[a0] = tcg_current_code_size(s); - /* Avoid clobbering the address during retranslation. */ - tcg_out32(s, OPC_J | (*(uint32_t *)s->code_ptr & 0x3ffffff)); - } else { - /* indirect jump method */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_REG_ZERO, - (uintptr_t)(s->tb_next + a0)); - tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); - } - tcg_out_nop(s); - s->tb_next_offset[a0] = tcg_current_code_size(s); - break; - case INDEX_op_br: - tcg_out_brcond(s, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO, - arg_label(a0)); - break; - - case INDEX_op_ld8u_i32: - i1 = OPC_LBU; - goto do_ldst; - case INDEX_op_ld8s_i32: - i1 = OPC_LB; - goto do_ldst; - case INDEX_op_ld16u_i32: - i1 = OPC_LHU; - goto do_ldst; - case INDEX_op_ld16s_i32: - i1 = OPC_LH; - goto do_ldst; - case INDEX_op_ld_i32: - i1 = OPC_LW; - goto do_ldst; - case INDEX_op_st8_i32: - i1 = OPC_SB; - goto do_ldst; - case INDEX_op_st16_i32: - i1 = OPC_SH; - goto do_ldst; - case INDEX_op_st_i32: - i1 = OPC_SW; - do_ldst: - tcg_out_ldst(s, i1, a0, a1, a2); - break; - - case INDEX_op_add_i32: - i1 = OPC_ADDU, i2 = OPC_ADDIU; - goto do_binary; - case INDEX_op_or_i32: - i1 = OPC_OR, i2 = OPC_ORI; - goto do_binary; - case INDEX_op_xor_i32: - i1 = OPC_XOR, i2 = OPC_XORI; - do_binary: - if (c2) { - tcg_out_opc_imm(s, i2, a0, a1, a2); - break; - } - do_binaryv: - tcg_out_opc_reg(s, i1, a0, a1, a2); - break; - - case INDEX_op_sub_i32: - if (c2) { - tcg_out_opc_imm(s, OPC_ADDIU, a0, a1, -a2); - break; - } - i1 = OPC_SUBU; - goto do_binary; - case INDEX_op_and_i32: - if (c2 && a2 != (uint16_t)a2) { - int msb = ctz32(~a2) - 1; - assert(use_mips32r2_instructions); - assert(is_p2m1(a2)); - tcg_out_opc_bf(s, OPC_EXT, a0, a1, msb, 0); - break; - } - i1 = OPC_AND, i2 = OPC_ANDI; - goto do_binary; - case INDEX_op_nor_i32: - i1 = OPC_NOR; - goto do_binaryv; - - case INDEX_op_mul_i32: - if (use_mips32_instructions) { - tcg_out_opc_reg(s, OPC_MUL, a0, a1, a2); - break; - } - i1 = OPC_MULT, i2 = OPC_MFLO; - goto do_hilo1; - case INDEX_op_mulsh_i32: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_MUH, a0, a1, a2); - break; - } - i1 = OPC_MULT, i2 = OPC_MFHI; - goto do_hilo1; - case INDEX_op_muluh_i32: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_MUHU, a0, a1, a2); - break; - } - i1 = OPC_MULTU, i2 = OPC_MFHI; - goto do_hilo1; - case INDEX_op_div_i32: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_DIV_R6, a0, a1, a2); - break; - } - i1 = OPC_DIV, i2 = OPC_MFLO; - goto do_hilo1; - case INDEX_op_divu_i32: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_DIVU_R6, a0, a1, a2); - break; - } - i1 = OPC_DIVU, i2 = OPC_MFLO; - goto do_hilo1; - case INDEX_op_rem_i32: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_MOD, a0, a1, a2); - break; - } - i1 = OPC_DIV, i2 = OPC_MFHI; - goto do_hilo1; - case INDEX_op_remu_i32: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_MODU, a0, a1, a2); - break; - } - i1 = OPC_DIVU, i2 = OPC_MFHI; - do_hilo1: - tcg_out_opc_reg(s, i1, 0, a1, a2); - tcg_out_opc_reg(s, i2, a0, 0, 0); - break; - - case INDEX_op_muls2_i32: - i1 = OPC_MULT; - goto do_hilo2; - case INDEX_op_mulu2_i32: - i1 = OPC_MULTU; - do_hilo2: - tcg_out_opc_reg(s, i1, 0, a2, args[3]); - tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0); - tcg_out_opc_reg(s, OPC_MFHI, a1, 0, 0); - break; - - case INDEX_op_not_i32: - i1 = OPC_NOR; - goto do_unary; - case INDEX_op_bswap16_i32: - i1 = OPC_WSBH; - goto do_unary; - case INDEX_op_ext8s_i32: - i1 = OPC_SEB; - goto do_unary; - case INDEX_op_ext16s_i32: - i1 = OPC_SEH; - do_unary: - tcg_out_opc_reg(s, i1, a0, TCG_REG_ZERO, a1); - break; - - case INDEX_op_sar_i32: - i1 = OPC_SRAV, i2 = OPC_SRA; - goto do_shift; - case INDEX_op_shl_i32: - i1 = OPC_SLLV, i2 = OPC_SLL; - goto do_shift; - case INDEX_op_shr_i32: - i1 = OPC_SRLV, i2 = OPC_SRL; - goto do_shift; - case INDEX_op_rotr_i32: - i1 = OPC_ROTRV, i2 = OPC_ROTR; - do_shift: - if (c2) { - tcg_out_opc_sa(s, i2, a0, a1, a2); - } else { - tcg_out_opc_reg(s, i1, a0, a2, a1); - } - break; - case INDEX_op_rotl_i32: - if (c2) { - tcg_out_opc_sa(s, OPC_ROTR, a0, a1, 32 - a2); - } else { - tcg_out_opc_reg(s, OPC_SUBU, TCG_TMP0, TCG_REG_ZERO, a2); - tcg_out_opc_reg(s, OPC_ROTRV, a0, TCG_TMP0, a1); - } - break; - - case INDEX_op_bswap32_i32: - tcg_out_opc_reg(s, OPC_WSBH, a0, 0, a1); - tcg_out_opc_sa(s, OPC_ROTR, a0, a0, 16); - break; - - case INDEX_op_deposit_i32: - tcg_out_opc_bf(s, OPC_INS, a0, a2, args[3] + args[4] - 1, args[3]); - break; - - case INDEX_op_brcond_i32: - tcg_out_brcond(s, a2, a0, a1, arg_label(args[3])); - break; - case INDEX_op_brcond2_i32: - tcg_out_brcond2(s, args[4], a0, a1, a2, args[3], arg_label(args[5])); - break; - - case INDEX_op_movcond_i32: - tcg_out_movcond(s, args[5], a0, a1, a2, args[3], args[4]); - break; - - case INDEX_op_setcond_i32: - tcg_out_setcond(s, args[3], a0, a1, a2); - break; - case INDEX_op_setcond2_i32: - tcg_out_setcond2(s, args[5], a0, a1, a2, args[3], args[4]); - break; - - case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, args, false); - break; - case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, args, true); - break; - case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, args, false); - break; - case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, args, true); - break; - - case INDEX_op_add2_i32: - tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], - const_args[4], const_args[5], false); - break; - case INDEX_op_sub2_i32: - tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], - const_args[4], const_args[5], true); - break; - - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - default: - tcg_abort(); - } -} - -static const TCGTargetOpDef mips_op_defs[] = { - { INDEX_op_exit_tb, { } }, - { INDEX_op_goto_tb, { } }, - { INDEX_op_br, { } }, - - { INDEX_op_ld8u_i32, { "r", "r" } }, - { INDEX_op_ld8s_i32, { "r", "r" } }, - { INDEX_op_ld16u_i32, { "r", "r" } }, - { INDEX_op_ld16s_i32, { "r", "r" } }, - { INDEX_op_ld_i32, { "r", "r" } }, - { INDEX_op_st8_i32, { "rZ", "r" } }, - { INDEX_op_st16_i32, { "rZ", "r" } }, - { INDEX_op_st_i32, { "rZ", "r" } }, - - { INDEX_op_add_i32, { "r", "rZ", "rJ" } }, - { INDEX_op_mul_i32, { "r", "rZ", "rZ" } }, -#if !use_mips32r6_instructions - { INDEX_op_muls2_i32, { "r", "r", "rZ", "rZ" } }, - { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rZ" } }, -#endif - { INDEX_op_mulsh_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_muluh_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_div_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_divu_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_rem_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_remu_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_sub_i32, { "r", "rZ", "rN" } }, - - { INDEX_op_and_i32, { "r", "rZ", "rIK" } }, - { INDEX_op_nor_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_not_i32, { "r", "rZ" } }, - { INDEX_op_or_i32, { "r", "rZ", "rIZ" } }, - { INDEX_op_xor_i32, { "r", "rZ", "rIZ" } }, - - { INDEX_op_shl_i32, { "r", "rZ", "ri" } }, - { INDEX_op_shr_i32, { "r", "rZ", "ri" } }, - { INDEX_op_sar_i32, { "r", "rZ", "ri" } }, - { INDEX_op_rotr_i32, { "r", "rZ", "ri" } }, - { INDEX_op_rotl_i32, { "r", "rZ", "ri" } }, - - { INDEX_op_bswap16_i32, { "r", "r" } }, - { INDEX_op_bswap32_i32, { "r", "r" } }, - - { INDEX_op_ext8s_i32, { "r", "rZ" } }, - { INDEX_op_ext16s_i32, { "r", "rZ" } }, - - { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, - - { INDEX_op_brcond_i32, { "rZ", "rZ" } }, -#if use_mips32r6_instructions - { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "rZ" } }, -#else - { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "0" } }, -#endif - { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rZ", "rZ" } }, - - { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rN", "rN" } }, - { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rN", "rN" } }, - { INDEX_op_brcond2_i32, { "rZ", "rZ", "rZ", "rZ" } }, - -#if TARGET_LONG_BITS == 32 - { INDEX_op_qemu_ld_i32, { "L", "lZ" } }, - { INDEX_op_qemu_st_i32, { "SZ", "SZ" } }, - { INDEX_op_qemu_ld_i64, { "L", "L", "lZ" } }, - { INDEX_op_qemu_st_i64, { "SZ", "SZ", "SZ" } }, -#else - { INDEX_op_qemu_ld_i32, { "L", "lZ", "lZ" } }, - { INDEX_op_qemu_st_i32, { "SZ", "SZ", "SZ" } }, - { INDEX_op_qemu_ld_i64, { "L", "L", "lZ", "lZ" } }, - { INDEX_op_qemu_st_i64, { "SZ", "SZ", "SZ", "SZ" } }, -#endif - { -1 }, -}; - -static int tcg_target_callee_save_regs[] = { - TCG_REG_S0, /* used for the global env (TCG_AREG0) */ - TCG_REG_S1, - TCG_REG_S2, - TCG_REG_S3, - TCG_REG_S4, - TCG_REG_S5, - TCG_REG_S6, - TCG_REG_S7, - TCG_REG_S8, - TCG_REG_RA, /* should be last for ABI compliance */ -}; - -/* The Linux kernel doesn't provide any information about the available - instruction set. Probe it using a signal handler. */ - - -#ifndef use_movnz_instructions -bool use_movnz_instructions = false; -#endif - -#ifndef use_mips32_instructions -bool use_mips32_instructions = false; -#endif - -#ifndef use_mips32r2_instructions -bool use_mips32r2_instructions = false; -#endif - -static volatile sig_atomic_t got_sigill; - -static void sigill_handler(int signo, siginfo_t *si, void *data) -{ - /* Skip the faulty instruction */ - ucontext_t *uc = (ucontext_t *)data; - uc->uc_mcontext.pc += 4; - - got_sigill = 1; -} - -static void tcg_target_detect_isa(void) -{ - struct sigaction sa_old, sa_new; - - memset(&sa_new, 0, sizeof(sa_new)); - sa_new.sa_flags = SA_SIGINFO; - sa_new.sa_sigaction = sigill_handler; - sigaction(SIGILL, &sa_new, &sa_old); - - /* Probe for movn/movz, necessary to implement movcond. */ -#ifndef use_movnz_instructions - got_sigill = 0; - asm volatile(".set push\n" - ".set mips32\n" - "movn $zero, $zero, $zero\n" - "movz $zero, $zero, $zero\n" - ".set pop\n" - : : : ); - use_movnz_instructions = !got_sigill; -#endif - - /* Probe for MIPS32 instructions. As no subsetting is allowed - by the specification, it is only necessary to probe for one - of the instructions. */ -#ifndef use_mips32_instructions - got_sigill = 0; - asm volatile(".set push\n" - ".set mips32\n" - "mul $zero, $zero\n" - ".set pop\n" - : : : ); - use_mips32_instructions = !got_sigill; -#endif - - /* Probe for MIPS32r2 instructions if MIPS32 instructions are - available. As no subsetting is allowed by the specification, - it is only necessary to probe for one of the instructions. */ -#ifndef use_mips32r2_instructions - if (use_mips32_instructions) { - got_sigill = 0; - asm volatile(".set push\n" - ".set mips32r2\n" - "seb $zero, $zero\n" - ".set pop\n" - : : : ); - use_mips32r2_instructions = !got_sigill; - } -#endif - - sigaction(SIGILL, &sa_old, NULL); -} - -/* Generate global QEMU prologue and epilogue code */ -static void tcg_target_qemu_prologue(TCGContext *s) -{ - int i, frame_size; - - /* reserve some stack space, also for TCG temps. */ - frame_size = ARRAY_SIZE(tcg_target_callee_save_regs) * 4 - + TCG_STATIC_CALL_ARGS_SIZE - + CPU_TEMP_BUF_NLONGS * sizeof(long); - frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) & - ~(TCG_TARGET_STACK_ALIGN - 1); - tcg_set_frame(s, TCG_REG_SP, ARRAY_SIZE(tcg_target_callee_save_regs) * 4 - + TCG_STATIC_CALL_ARGS_SIZE, - CPU_TEMP_BUF_NLONGS * sizeof(long)); - - /* TB prologue */ - tcg_out_addi(s, TCG_REG_SP, -frame_size); - for(i = 0 ; i < ARRAY_SIZE(tcg_target_callee_save_regs) ; i++) { - tcg_out_st(s, TCG_TYPE_I32, tcg_target_callee_save_regs[i], - TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE + i * 4); - } - - /* Call generated code */ - tcg_out_opc_reg(s, OPC_JR, 0, tcg_target_call_iarg_regs[1], 0); - tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); - tb_ret_addr = s->code_ptr; - - /* TB epilogue */ - for(i = 0 ; i < ARRAY_SIZE(tcg_target_callee_save_regs) ; i++) { - tcg_out_ld(s, TCG_TYPE_I32, tcg_target_callee_save_regs[i], - TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE + i * 4); - } - - tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_RA, 0); - tcg_out_addi(s, TCG_REG_SP, frame_size); -} - -static void tcg_target_init(TCGContext *s) -{ - tcg_target_detect_isa(); - tcg_regset_set(tcg_target_available_regs[TCG_TYPE_I32], 0xffffffff); - tcg_regset_set(tcg_target_call_clobber_regs, - (1 << TCG_REG_V0) | - (1 << TCG_REG_V1) | - (1 << TCG_REG_A0) | - (1 << TCG_REG_A1) | - (1 << TCG_REG_A2) | - (1 << TCG_REG_A3) | - (1 << TCG_REG_T0) | - (1 << TCG_REG_T1) | - (1 << TCG_REG_T2) | - (1 << TCG_REG_T3) | - (1 << TCG_REG_T4) | - (1 << TCG_REG_T5) | - (1 << TCG_REG_T6) | - (1 << TCG_REG_T7) | - (1 << TCG_REG_T8) | - (1 << TCG_REG_T9)); - - tcg_regset_clear(s->reserved_regs); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO); /* zero register */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_K0); /* kernel use only */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_K1); /* kernel use only */ - tcg_regset_set_reg(s->reserved_regs, TCG_TMP0); /* internal use */ - tcg_regset_set_reg(s->reserved_regs, TCG_TMP1); /* internal use */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA); /* return address */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); /* stack pointer */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP); /* global pointer */ - - tcg_add_target_add_op_defs(mips_op_defs); -} - -void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) -{ - uint32_t *ptr = (uint32_t *)jmp_addr; - *ptr = deposit32(*ptr, 0, 26, addr >> 2); - flush_icache_range(jmp_addr, jmp_addr + 4); -} diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c new file mode 100644 index 0000000000..2dc4998719 --- /dev/null +++ b/tcg/mips/tcg-target.inc.c @@ -0,0 +1,1892 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008-2009 Arnaud Patard + * Copyright (c) 2009 Aurelien Jarno + * Based on i386/tcg-target.c - Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "tcg-be-ldst.h" + +#ifdef HOST_WORDS_BIGENDIAN +# define MIPS_BE 1 +#else +# define MIPS_BE 0 +#endif + +#define LO_OFF (MIPS_BE * 4) +#define HI_OFF (4 - LO_OFF) + +#ifndef NDEBUG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { + "zero", + "at", + "v0", + "v1", + "a0", + "a1", + "a2", + "a3", + "t0", + "t1", + "t2", + "t3", + "t4", + "t5", + "t6", + "t7", + "s0", + "s1", + "s2", + "s3", + "s4", + "s5", + "s6", + "s7", + "t8", + "t9", + "k0", + "k1", + "gp", + "sp", + "s8", + "ra", +}; +#endif + +#define TCG_TMP0 TCG_REG_AT +#define TCG_TMP1 TCG_REG_T9 + +/* check if we really need so many registers :P */ +static const TCGReg tcg_target_reg_alloc_order[] = { + /* Call saved registers. */ + TCG_REG_S0, + TCG_REG_S1, + TCG_REG_S2, + TCG_REG_S3, + TCG_REG_S4, + TCG_REG_S5, + TCG_REG_S6, + TCG_REG_S7, + TCG_REG_S8, + + /* Call clobbered registers. */ + TCG_REG_T0, + TCG_REG_T1, + TCG_REG_T2, + TCG_REG_T3, + TCG_REG_T4, + TCG_REG_T5, + TCG_REG_T6, + TCG_REG_T7, + TCG_REG_T8, + TCG_REG_T9, + TCG_REG_V1, + TCG_REG_V0, + + /* Argument registers, opposite order of allocation. */ + TCG_REG_A3, + TCG_REG_A2, + TCG_REG_A1, + TCG_REG_A0, +}; + +static const TCGReg tcg_target_call_iarg_regs[4] = { + TCG_REG_A0, + TCG_REG_A1, + TCG_REG_A2, + TCG_REG_A3 +}; + +static const TCGReg tcg_target_call_oarg_regs[2] = { + TCG_REG_V0, + TCG_REG_V1 +}; + +static tcg_insn_unit *tb_ret_addr; + +static inline uint32_t reloc_pc16_val(tcg_insn_unit *pc, tcg_insn_unit *target) +{ + /* Let the compiler perform the right-shift as part of the arithmetic. */ + ptrdiff_t disp = target - (pc + 1); + assert(disp == (int16_t)disp); + return disp & 0xffff; +} + +static inline void reloc_pc16(tcg_insn_unit *pc, tcg_insn_unit *target) +{ + *pc = deposit32(*pc, 0, 16, reloc_pc16_val(pc, target)); +} + +static inline uint32_t reloc_26_val(tcg_insn_unit *pc, tcg_insn_unit *target) +{ + assert((((uintptr_t)pc ^ (uintptr_t)target) & 0xf0000000) == 0); + return ((uintptr_t)target >> 2) & 0x3ffffff; +} + +static inline void reloc_26(tcg_insn_unit *pc, tcg_insn_unit *target) +{ + *pc = deposit32(*pc, 0, 26, reloc_26_val(pc, target)); +} + +static void patch_reloc(tcg_insn_unit *code_ptr, int type, + intptr_t value, intptr_t addend) +{ + assert(type == R_MIPS_PC16); + assert(addend == 0); + reloc_pc16(code_ptr, (tcg_insn_unit *)value); +} + +#define TCG_CT_CONST_ZERO 0x100 +#define TCG_CT_CONST_U16 0x200 /* Unsigned 16-bit: 0 - 0xffff. */ +#define TCG_CT_CONST_S16 0x400 /* Signed 16-bit: -32768 - 32767 */ +#define TCG_CT_CONST_P2M1 0x800 /* Power of 2 minus 1. */ +#define TCG_CT_CONST_N16 0x1000 /* "Negatable" 16-bit: -32767 - 32767 */ + +static inline bool is_p2m1(tcg_target_long val) +{ + return val && ((val + 1) & val) == 0; +} + +/* parse target specific constraints */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) +{ + const char *ct_str; + + ct_str = *pct_str; + switch(ct_str[0]) { + case 'r': + ct->ct |= TCG_CT_REG; + tcg_regset_set(ct->u.regs, 0xffffffff); + break; + case 'L': /* qemu_ld output arg constraint */ + ct->ct |= TCG_CT_REG; + tcg_regset_set(ct->u.regs, 0xffffffff); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_V0); + break; + case 'l': /* qemu_ld input arg constraint */ + ct->ct |= TCG_CT_REG; + tcg_regset_set(ct->u.regs, 0xffffffff); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0); +#if defined(CONFIG_SOFTMMU) + if (TARGET_LONG_BITS == 64) { + tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2); + } +#endif + break; + case 'S': /* qemu_st constraint */ + ct->ct |= TCG_CT_REG; + tcg_regset_set(ct->u.regs, 0xffffffff); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0); +#if defined(CONFIG_SOFTMMU) + if (TARGET_LONG_BITS == 32) { + tcg_regset_reset_reg(ct->u.regs, TCG_REG_A1); + } else { + tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_A3); + } +#endif + break; + case 'I': + ct->ct |= TCG_CT_CONST_U16; + break; + case 'J': + ct->ct |= TCG_CT_CONST_S16; + break; + case 'K': + ct->ct |= TCG_CT_CONST_P2M1; + break; + case 'N': + ct->ct |= TCG_CT_CONST_N16; + break; + case 'Z': + /* We are cheating a bit here, using the fact that the register + ZERO is also the register number 0. Hence there is no need + to check for const_args in each instruction. */ + ct->ct |= TCG_CT_CONST_ZERO; + break; + default: + return -1; + } + ct_str++; + *pct_str = ct_str; + return 0; +} + +/* test if a constant matches the constraint */ +static inline int tcg_target_const_match(tcg_target_long val, TCGType type, + const TCGArgConstraint *arg_ct) +{ + int ct; + ct = arg_ct->ct; + if (ct & TCG_CT_CONST) { + return 1; + } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { + return 1; + } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) { + return 1; + } else if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { + return 1; + } else if ((ct & TCG_CT_CONST_N16) && val >= -32767 && val <= 32767) { + return 1; + } else if ((ct & TCG_CT_CONST_P2M1) + && use_mips32r2_instructions && is_p2m1(val)) { + return 1; + } + return 0; +} + +/* instruction opcodes */ +typedef enum { + OPC_J = 0x02 << 26, + OPC_JAL = 0x03 << 26, + OPC_BEQ = 0x04 << 26, + OPC_BNE = 0x05 << 26, + OPC_BLEZ = 0x06 << 26, + OPC_BGTZ = 0x07 << 26, + OPC_ADDIU = 0x09 << 26, + OPC_SLTI = 0x0A << 26, + OPC_SLTIU = 0x0B << 26, + OPC_ANDI = 0x0C << 26, + OPC_ORI = 0x0D << 26, + OPC_XORI = 0x0E << 26, + OPC_LUI = 0x0F << 26, + OPC_LB = 0x20 << 26, + OPC_LH = 0x21 << 26, + OPC_LW = 0x23 << 26, + OPC_LBU = 0x24 << 26, + OPC_LHU = 0x25 << 26, + OPC_LWU = 0x27 << 26, + OPC_SB = 0x28 << 26, + OPC_SH = 0x29 << 26, + OPC_SW = 0x2B << 26, + + OPC_SPECIAL = 0x00 << 26, + OPC_SLL = OPC_SPECIAL | 0x00, + OPC_SRL = OPC_SPECIAL | 0x02, + OPC_ROTR = OPC_SPECIAL | (0x01 << 21) | 0x02, + OPC_SRA = OPC_SPECIAL | 0x03, + OPC_SLLV = OPC_SPECIAL | 0x04, + OPC_SRLV = OPC_SPECIAL | 0x06, + OPC_ROTRV = OPC_SPECIAL | (0x01 << 6) | 0x06, + OPC_SRAV = OPC_SPECIAL | 0x07, + OPC_JR_R5 = OPC_SPECIAL | 0x08, + OPC_JALR = OPC_SPECIAL | 0x09, + OPC_MOVZ = OPC_SPECIAL | 0x0A, + OPC_MOVN = OPC_SPECIAL | 0x0B, + OPC_MFHI = OPC_SPECIAL | 0x10, + OPC_MFLO = OPC_SPECIAL | 0x12, + OPC_MULT = OPC_SPECIAL | 0x18, + OPC_MUL_R6 = OPC_SPECIAL | (0x02 << 6) | 0x18, + OPC_MUH = OPC_SPECIAL | (0x03 << 6) | 0x18, + OPC_MULTU = OPC_SPECIAL | 0x19, + OPC_MULU = OPC_SPECIAL | (0x02 << 6) | 0x19, + OPC_MUHU = OPC_SPECIAL | (0x03 << 6) | 0x19, + OPC_DIV = OPC_SPECIAL | 0x1A, + OPC_DIV_R6 = OPC_SPECIAL | (0x02 << 6) | 0x1A, + OPC_MOD = OPC_SPECIAL | (0x03 << 6) | 0x1A, + OPC_DIVU = OPC_SPECIAL | 0x1B, + OPC_DIVU_R6 = OPC_SPECIAL | (0x02 << 6) | 0x1B, + OPC_MODU = OPC_SPECIAL | (0x03 << 6) | 0x1B, + OPC_ADDU = OPC_SPECIAL | 0x21, + OPC_SUBU = OPC_SPECIAL | 0x23, + OPC_AND = OPC_SPECIAL | 0x24, + OPC_OR = OPC_SPECIAL | 0x25, + OPC_XOR = OPC_SPECIAL | 0x26, + OPC_NOR = OPC_SPECIAL | 0x27, + OPC_SLT = OPC_SPECIAL | 0x2A, + OPC_SLTU = OPC_SPECIAL | 0x2B, + OPC_SELEQZ = OPC_SPECIAL | 0x35, + OPC_SELNEZ = OPC_SPECIAL | 0x37, + + OPC_REGIMM = 0x01 << 26, + OPC_BLTZ = OPC_REGIMM | (0x00 << 16), + OPC_BGEZ = OPC_REGIMM | (0x01 << 16), + + OPC_SPECIAL2 = 0x1c << 26, + OPC_MUL_R5 = OPC_SPECIAL2 | 0x002, + + OPC_SPECIAL3 = 0x1f << 26, + OPC_EXT = OPC_SPECIAL3 | 0x000, + OPC_INS = OPC_SPECIAL3 | 0x004, + OPC_WSBH = OPC_SPECIAL3 | 0x0a0, + OPC_SEB = OPC_SPECIAL3 | 0x420, + OPC_SEH = OPC_SPECIAL3 | 0x620, + + /* MIPS r6 doesn't have JR, JALR should be used instead */ + OPC_JR = use_mips32r6_instructions ? OPC_JALR : OPC_JR_R5, + + /* + * MIPS r6 replaces MUL with an alternative encoding which is + * backwards-compatible at the assembly level. + */ + OPC_MUL = use_mips32r6_instructions ? OPC_MUL_R6 : OPC_MUL_R5, +} MIPSInsn; + +/* + * Type reg + */ +static inline void tcg_out_opc_reg(TCGContext *s, MIPSInsn opc, + TCGReg rd, TCGReg rs, TCGReg rt) +{ + int32_t inst; + + inst = opc; + inst |= (rs & 0x1F) << 21; + inst |= (rt & 0x1F) << 16; + inst |= (rd & 0x1F) << 11; + tcg_out32(s, inst); +} + +/* + * Type immediate + */ +static inline void tcg_out_opc_imm(TCGContext *s, MIPSInsn opc, + TCGReg rt, TCGReg rs, TCGArg imm) +{ + int32_t inst; + + inst = opc; + inst |= (rs & 0x1F) << 21; + inst |= (rt & 0x1F) << 16; + inst |= (imm & 0xffff); + tcg_out32(s, inst); +} + +/* + * Type bitfield + */ +static inline void tcg_out_opc_bf(TCGContext *s, MIPSInsn opc, TCGReg rt, + TCGReg rs, int msb, int lsb) +{ + int32_t inst; + + inst = opc; + inst |= (rs & 0x1F) << 21; + inst |= (rt & 0x1F) << 16; + inst |= (msb & 0x1F) << 11; + inst |= (lsb & 0x1F) << 6; + tcg_out32(s, inst); +} + +/* + * Type branch + */ +static inline void tcg_out_opc_br(TCGContext *s, MIPSInsn opc, + TCGReg rt, TCGReg rs) +{ + /* We pay attention here to not modify the branch target by reading + the existing value and using it again. This ensure that caches and + memory are kept coherent during retranslation. */ + uint16_t offset = (uint16_t)*s->code_ptr; + + tcg_out_opc_imm(s, opc, rt, rs, offset); +} + +/* + * Type sa + */ +static inline void tcg_out_opc_sa(TCGContext *s, MIPSInsn opc, + TCGReg rd, TCGReg rt, TCGArg sa) +{ + int32_t inst; + + inst = opc; + inst |= (rt & 0x1F) << 16; + inst |= (rd & 0x1F) << 11; + inst |= (sa & 0x1F) << 6; + tcg_out32(s, inst); + +} + +/* + * Type jump. + * Returns true if the branch was in range and the insn was emitted. + */ +static bool tcg_out_opc_jmp(TCGContext *s, MIPSInsn opc, void *target) +{ + uintptr_t dest = (uintptr_t)target; + uintptr_t from = (uintptr_t)s->code_ptr + 4; + int32_t inst; + + /* The pc-region branch happens within the 256MB region of + the delay slot (thus the +4). */ + if ((from ^ dest) & -(1 << 28)) { + return false; + } + assert((dest & 3) == 0); + + inst = opc; + inst |= (dest >> 2) & 0x3ffffff; + tcg_out32(s, inst); + return true; +} + +static inline void tcg_out_nop(TCGContext *s) +{ + tcg_out32(s, 0); +} + +static inline void tcg_out_mov(TCGContext *s, TCGType type, + TCGReg ret, TCGReg arg) +{ + /* Simple reg-reg move, optimising out the 'do nothing' case */ + if (ret != arg) { + tcg_out_opc_reg(s, OPC_ADDU, ret, arg, TCG_REG_ZERO); + } +} + +static inline void tcg_out_movi(TCGContext *s, TCGType type, + TCGReg reg, tcg_target_long arg) +{ + if (arg == (int16_t)arg) { + tcg_out_opc_imm(s, OPC_ADDIU, reg, TCG_REG_ZERO, arg); + } else if (arg == (uint16_t)arg) { + tcg_out_opc_imm(s, OPC_ORI, reg, TCG_REG_ZERO, arg); + } else { + tcg_out_opc_imm(s, OPC_LUI, reg, TCG_REG_ZERO, arg >> 16); + if (arg & 0xffff) { + tcg_out_opc_imm(s, OPC_ORI, reg, reg, arg & 0xffff); + } + } +} + +static inline void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg) +{ + if (use_mips32r2_instructions) { + tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); + } else { + /* ret and arg can't be register at */ + if (ret == TCG_TMP0 || arg == TCG_TMP0) { + tcg_abort(); + } + + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); + tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8); + tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); + } +} + +static inline void tcg_out_bswap16s(TCGContext *s, TCGReg ret, TCGReg arg) +{ + if (use_mips32r2_instructions) { + tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); + tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret); + } else { + /* ret and arg can't be register at */ + if (ret == TCG_TMP0 || arg == TCG_TMP0) { + tcg_abort(); + } + + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); + tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); + tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); + } +} + +static inline void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg) +{ + if (use_mips32r2_instructions) { + tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); + tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16); + } else { + /* ret and arg must be different and can't be register at */ + if (ret == arg || ret == TCG_TMP0 || arg == TCG_TMP0) { + tcg_abort(); + } + + tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); + + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 24); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); + + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, arg, 0xff00); + tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); + + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0xff00); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); + } +} + +static inline void tcg_out_ext8s(TCGContext *s, TCGReg ret, TCGReg arg) +{ + if (use_mips32r2_instructions) { + tcg_out_opc_reg(s, OPC_SEB, ret, 0, arg); + } else { + tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); + tcg_out_opc_sa(s, OPC_SRA, ret, ret, 24); + } +} + +static inline void tcg_out_ext16s(TCGContext *s, TCGReg ret, TCGReg arg) +{ + if (use_mips32r2_instructions) { + tcg_out_opc_reg(s, OPC_SEH, ret, 0, arg); + } else { + tcg_out_opc_sa(s, OPC_SLL, ret, arg, 16); + tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16); + } +} + +static void tcg_out_ldst(TCGContext *s, MIPSInsn opc, TCGReg data, + TCGReg addr, intptr_t ofs) +{ + int16_t lo = ofs; + if (ofs != lo) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - lo); + if (addr != TCG_REG_ZERO) { + tcg_out_opc_reg(s, OPC_ADDU, TCG_TMP0, TCG_TMP0, addr); + } + addr = TCG_TMP0; + } + tcg_out_opc_imm(s, opc, data, addr, lo); +} + +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) +{ + tcg_out_ldst(s, OPC_LW, arg, arg1, arg2); +} + +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) +{ + tcg_out_ldst(s, OPC_SW, arg, arg1, arg2); +} + +static inline void tcg_out_addi(TCGContext *s, TCGReg reg, TCGArg val) +{ + if (val == (int16_t)val) { + tcg_out_opc_imm(s, OPC_ADDIU, reg, reg, val); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, val); + tcg_out_opc_reg(s, OPC_ADDU, reg, reg, TCG_TMP0); + } +} + +static void tcg_out_addsub2(TCGContext *s, TCGReg rl, TCGReg rh, TCGReg al, + TCGReg ah, TCGArg bl, TCGArg bh, bool cbl, + bool cbh, bool is_sub) +{ + TCGReg th = TCG_TMP1; + + /* If we have a negative constant such that negating it would + make the high part zero, we can (usually) eliminate one insn. */ + if (cbl && cbh && bh == -1 && bl != 0) { + bl = -bl; + bh = 0; + is_sub = !is_sub; + } + + /* By operating on the high part first, we get to use the final + carry operation to move back from the temporary. */ + if (!cbh) { + tcg_out_opc_reg(s, (is_sub ? OPC_SUBU : OPC_ADDU), th, ah, bh); + } else if (bh != 0 || ah == rl) { + tcg_out_opc_imm(s, OPC_ADDIU, th, ah, (is_sub ? -bh : bh)); + } else { + th = ah; + } + + /* Note that tcg optimization should eliminate the bl == 0 case. */ + if (is_sub) { + if (cbl) { + tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, al, bl); + tcg_out_opc_imm(s, OPC_ADDIU, rl, al, -bl); + } else { + tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, al, bl); + tcg_out_opc_reg(s, OPC_SUBU, rl, al, bl); + } + tcg_out_opc_reg(s, OPC_SUBU, rh, th, TCG_TMP0); + } else { + if (cbl) { + tcg_out_opc_imm(s, OPC_ADDIU, rl, al, bl); + tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, rl, bl); + } else if (rl == al && rl == bl) { + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, al, 31); + tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl); + } else { + tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl); + tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, rl, (rl == bl ? al : bl)); + } + tcg_out_opc_reg(s, OPC_ADDU, rh, th, TCG_TMP0); + } +} + +/* Bit 0 set if inversion required; bit 1 set if swapping required. */ +#define MIPS_CMP_INV 1 +#define MIPS_CMP_SWAP 2 + +static const uint8_t mips_cmp_map[16] = { + [TCG_COND_LT] = 0, + [TCG_COND_LTU] = 0, + [TCG_COND_GE] = MIPS_CMP_INV, + [TCG_COND_GEU] = MIPS_CMP_INV, + [TCG_COND_LE] = MIPS_CMP_INV | MIPS_CMP_SWAP, + [TCG_COND_LEU] = MIPS_CMP_INV | MIPS_CMP_SWAP, + [TCG_COND_GT] = MIPS_CMP_SWAP, + [TCG_COND_GTU] = MIPS_CMP_SWAP, +}; + +static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg arg1, TCGReg arg2) +{ + MIPSInsn s_opc = OPC_SLTU; + int cmp_map; + + switch (cond) { + case TCG_COND_EQ: + if (arg2 != 0) { + tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); + arg1 = ret; + } + tcg_out_opc_imm(s, OPC_SLTIU, ret, arg1, 1); + break; + + case TCG_COND_NE: + if (arg2 != 0) { + tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); + arg1 = ret; + } + tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, arg1); + break; + + case TCG_COND_LT: + case TCG_COND_GE: + case TCG_COND_LE: + case TCG_COND_GT: + s_opc = OPC_SLT; + /* FALLTHRU */ + + case TCG_COND_LTU: + case TCG_COND_GEU: + case TCG_COND_LEU: + case TCG_COND_GTU: + cmp_map = mips_cmp_map[cond]; + if (cmp_map & MIPS_CMP_SWAP) { + TCGReg t = arg1; + arg1 = arg2; + arg2 = t; + } + tcg_out_opc_reg(s, s_opc, ret, arg1, arg2); + if (cmp_map & MIPS_CMP_INV) { + tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); + } + break; + + default: + tcg_abort(); + break; + } +} + +static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, + TCGReg arg2, TCGLabel *l) +{ + static const MIPSInsn b_zero[16] = { + [TCG_COND_LT] = OPC_BLTZ, + [TCG_COND_GT] = OPC_BGTZ, + [TCG_COND_LE] = OPC_BLEZ, + [TCG_COND_GE] = OPC_BGEZ, + }; + + MIPSInsn s_opc = OPC_SLTU; + MIPSInsn b_opc; + int cmp_map; + + switch (cond) { + case TCG_COND_EQ: + b_opc = OPC_BEQ; + break; + case TCG_COND_NE: + b_opc = OPC_BNE; + break; + + case TCG_COND_LT: + case TCG_COND_GT: + case TCG_COND_LE: + case TCG_COND_GE: + if (arg2 == 0) { + b_opc = b_zero[cond]; + arg2 = arg1; + arg1 = 0; + break; + } + s_opc = OPC_SLT; + /* FALLTHRU */ + + case TCG_COND_LTU: + case TCG_COND_GTU: + case TCG_COND_LEU: + case TCG_COND_GEU: + cmp_map = mips_cmp_map[cond]; + if (cmp_map & MIPS_CMP_SWAP) { + TCGReg t = arg1; + arg1 = arg2; + arg2 = t; + } + tcg_out_opc_reg(s, s_opc, TCG_TMP0, arg1, arg2); + b_opc = (cmp_map & MIPS_CMP_INV ? OPC_BEQ : OPC_BNE); + arg1 = TCG_TMP0; + arg2 = TCG_REG_ZERO; + break; + + default: + tcg_abort(); + break; + } + + tcg_out_opc_br(s, b_opc, arg1, arg2); + if (l->has_value) { + reloc_pc16(s->code_ptr - 1, l->u.value_ptr); + } else { + tcg_out_reloc(s, s->code_ptr - 1, R_MIPS_PC16, l, 0); + } + tcg_out_nop(s); +} + +static TCGReg tcg_out_reduce_eq2(TCGContext *s, TCGReg tmp0, TCGReg tmp1, + TCGReg al, TCGReg ah, + TCGReg bl, TCGReg bh) +{ + /* Merge highpart comparison into AH. */ + if (bh != 0) { + if (ah != 0) { + tcg_out_opc_reg(s, OPC_XOR, tmp0, ah, bh); + ah = tmp0; + } else { + ah = bh; + } + } + /* Merge lowpart comparison into AL. */ + if (bl != 0) { + if (al != 0) { + tcg_out_opc_reg(s, OPC_XOR, tmp1, al, bl); + al = tmp1; + } else { + al = bl; + } + } + /* Merge high and low part comparisons into AL. */ + if (ah != 0) { + if (al != 0) { + tcg_out_opc_reg(s, OPC_OR, tmp0, ah, al); + al = tmp0; + } else { + al = ah; + } + } + return al; +} + +static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg al, TCGReg ah, TCGReg bl, TCGReg bh) +{ + TCGReg tmp0 = TCG_TMP0; + TCGReg tmp1 = ret; + + assert(ret != TCG_TMP0); + if (ret == ah || ret == bh) { + assert(ret != TCG_TMP1); + tmp1 = TCG_TMP1; + } + + switch (cond) { + case TCG_COND_EQ: + case TCG_COND_NE: + tmp1 = tcg_out_reduce_eq2(s, tmp0, tmp1, al, ah, bl, bh); + tcg_out_setcond(s, cond, ret, tmp1, TCG_REG_ZERO); + break; + + default: + tcg_out_setcond(s, TCG_COND_EQ, tmp0, ah, bh); + tcg_out_setcond(s, tcg_unsigned_cond(cond), tmp1, al, bl); + tcg_out_opc_reg(s, OPC_AND, tmp1, tmp1, tmp0); + tcg_out_setcond(s, tcg_high_cond(cond), tmp0, ah, bh); + tcg_out_opc_reg(s, OPC_OR, ret, tmp1, tmp0); + break; + } +} + +static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, + TCGReg bl, TCGReg bh, TCGLabel *l) +{ + TCGCond b_cond = TCG_COND_NE; + TCGReg tmp = TCG_TMP1; + + /* With branches, we emit between 4 and 9 insns with 2 or 3 branches. + With setcond, we emit between 3 and 10 insns and only 1 branch, + which ought to get better branch prediction. */ + switch (cond) { + case TCG_COND_EQ: + case TCG_COND_NE: + b_cond = cond; + tmp = tcg_out_reduce_eq2(s, TCG_TMP0, TCG_TMP1, al, ah, bl, bh); + break; + + default: + /* Minimize code size by preferring a compare not requiring INV. */ + if (mips_cmp_map[cond] & MIPS_CMP_INV) { + cond = tcg_invert_cond(cond); + b_cond = TCG_COND_EQ; + } + tcg_out_setcond2(s, cond, tmp, al, ah, bl, bh); + break; + } + + tcg_out_brcond(s, b_cond, tmp, TCG_REG_ZERO, l); +} + +static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg c1, TCGReg c2, TCGReg v1, TCGReg v2) +{ + bool eqz = false; + + /* If one of the values is zero, put it last to match SEL*Z instructions */ + if (use_mips32r6_instructions && v1 == 0) { + v1 = v2; + v2 = 0; + cond = tcg_invert_cond(cond); + } + + switch (cond) { + case TCG_COND_EQ: + eqz = true; + /* FALLTHRU */ + case TCG_COND_NE: + if (c2 != 0) { + tcg_out_opc_reg(s, OPC_XOR, TCG_TMP0, c1, c2); + c1 = TCG_TMP0; + } + break; + + default: + /* Minimize code size by preferring a compare not requiring INV. */ + if (mips_cmp_map[cond] & MIPS_CMP_INV) { + cond = tcg_invert_cond(cond); + eqz = true; + } + tcg_out_setcond(s, cond, TCG_TMP0, c1, c2); + c1 = TCG_TMP0; + break; + } + + if (use_mips32r6_instructions) { + MIPSInsn m_opc_t = eqz ? OPC_SELEQZ : OPC_SELNEZ; + MIPSInsn m_opc_f = eqz ? OPC_SELNEZ : OPC_SELEQZ; + + if (v2 != 0) { + tcg_out_opc_reg(s, m_opc_f, TCG_TMP1, v2, c1); + } + tcg_out_opc_reg(s, m_opc_t, ret, v1, c1); + if (v2 != 0) { + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP1); + } + } else { + MIPSInsn m_opc = eqz ? OPC_MOVZ : OPC_MOVN; + + tcg_out_opc_reg(s, m_opc, ret, v1, c1); + + /* This should be guaranteed via constraints */ + tcg_debug_assert(v2 == ret); + } +} + +static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail) +{ + /* Note that the ABI requires the called function's address to be + loaded into T9, even if a direct branch is in range. */ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T9, (uintptr_t)arg); + + /* But do try a direct branch, allowing the cpu better insn prefetch. */ + if (tail) { + if (!tcg_out_opc_jmp(s, OPC_J, arg)) { + tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_T9, 0); + } + } else { + if (!tcg_out_opc_jmp(s, OPC_JAL, arg)) { + tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0); + } + } +} + +static void tcg_out_call(TCGContext *s, tcg_insn_unit *arg) +{ + tcg_out_call_int(s, arg, false); + tcg_out_nop(s); +} + +#if defined(CONFIG_SOFTMMU) +static void * const qemu_ld_helpers[16] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_SB] = helper_ret_ldsb_mmu, + [MO_LEUW] = helper_le_lduw_mmu, + [MO_LESW] = helper_le_ldsw_mmu, + [MO_LEUL] = helper_le_ldul_mmu, + [MO_LEQ] = helper_le_ldq_mmu, + [MO_BEUW] = helper_be_lduw_mmu, + [MO_BESW] = helper_be_ldsw_mmu, + [MO_BEUL] = helper_be_ldul_mmu, + [MO_BEQ] = helper_be_ldq_mmu, +}; + +static void * const qemu_st_helpers[16] = { + [MO_UB] = helper_ret_stb_mmu, + [MO_LEUW] = helper_le_stw_mmu, + [MO_LEUL] = helper_le_stl_mmu, + [MO_LEQ] = helper_le_stq_mmu, + [MO_BEUW] = helper_be_stw_mmu, + [MO_BEUL] = helper_be_stl_mmu, + [MO_BEQ] = helper_be_stq_mmu, +}; + +/* Helper routines for marshalling helper function arguments into + * the correct registers and stack. + * I is where we want to put this argument, and is updated and returned + * for the next call. ARG is the argument itself. + * + * We provide routines for arguments which are: immediate, 32 bit + * value in register, 16 and 8 bit values in register (which must be zero + * extended before use) and 64 bit value in a lo:hi register pair. + */ + +static int tcg_out_call_iarg_reg(TCGContext *s, int i, TCGReg arg) +{ + if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { + tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[i], arg); + } else { + tcg_out_st(s, TCG_TYPE_REG, arg, TCG_REG_SP, 4 * i); + } + return i + 1; +} + +static int tcg_out_call_iarg_reg8(TCGContext *s, int i, TCGReg arg) +{ + TCGReg tmp = TCG_TMP0; + if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { + tmp = tcg_target_call_iarg_regs[i]; + } + tcg_out_opc_imm(s, OPC_ANDI, tmp, arg, 0xff); + return tcg_out_call_iarg_reg(s, i, tmp); +} + +static int tcg_out_call_iarg_reg16(TCGContext *s, int i, TCGReg arg) +{ + TCGReg tmp = TCG_TMP0; + if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { + tmp = tcg_target_call_iarg_regs[i]; + } + tcg_out_opc_imm(s, OPC_ANDI, tmp, arg, 0xffff); + return tcg_out_call_iarg_reg(s, i, tmp); +} + +static int tcg_out_call_iarg_imm(TCGContext *s, int i, TCGArg arg) +{ + TCGReg tmp = TCG_TMP0; + if (arg == 0) { + tmp = TCG_REG_ZERO; + } else { + if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { + tmp = tcg_target_call_iarg_regs[i]; + } + tcg_out_movi(s, TCG_TYPE_REG, tmp, arg); + } + return tcg_out_call_iarg_reg(s, i, tmp); +} + +static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah) +{ + i = (i + 1) & ~1; + i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? ah : al)); + i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? al : ah)); + return i; +} + +/* Perform the tlb comparison operation. The complete host address is + placed in BASE. Clobbers AT, T0, A0. */ +static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl, + TCGReg addrh, TCGMemOpIdx oi, + tcg_insn_unit *label_ptr[2], bool is_load) +{ + TCGMemOp s_bits = get_memop(oi) & MO_SIZE; + int mem_index = get_mmuidx(oi); + int cmp_off + = (is_load + ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) + : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); + int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); + + tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addrl, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_A0, TCG_REG_A0, + (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); + tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_A0, TCG_REG_A0, TCG_AREG0); + + /* Compensate for very large offsets. */ + if (add_off >= 0x8000) { + /* Most target env are smaller than 32k; none are larger than 64k. + Simplify the logic here merely to offset by 0x7ff0, giving us a + range just shy of 64k. Check this assumption. */ + QEMU_BUILD_BUG_ON(offsetof(CPUArchState, + tlb_table[NB_MMU_MODES - 1][1]) + > 0x7ff0 + 0x7fff); + tcg_out_opc_imm(s, OPC_ADDIU, TCG_REG_A0, TCG_REG_A0, 0x7ff0); + cmp_off -= 0x7ff0; + add_off -= 0x7ff0; + } + + /* Load the (low half) tlb comparator. */ + tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0, + cmp_off + (TARGET_LONG_BITS == 64 ? LO_OFF : 0)); + + /* Mask the page bits, keeping the alignment bits to compare against. + In between on 32-bit targets, load the tlb addend for the fast path. */ + tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, + TARGET_PAGE_MASK | ((1 << s_bits) - 1)); + if (TARGET_LONG_BITS == 32) { + tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off); + } + tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl); + + label_ptr[0] = s->code_ptr; + tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0); + + /* Load and test the high half tlb comparator. */ + if (TARGET_LONG_BITS == 64) { + /* delay slot */ + tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0, cmp_off + HI_OFF); + + /* Load the tlb addend for the fast path. We can't do it earlier with + 64-bit targets or we'll clobber a0 before reading the high half tlb + comparator. */ + tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off); + + label_ptr[1] = s->code_ptr; + tcg_out_opc_br(s, OPC_BNE, addrh, TCG_TMP0); + } + + /* delay slot */ + tcg_out_opc_reg(s, OPC_ADDU, base, TCG_REG_A0, addrl); +} + +static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi, + TCGReg datalo, TCGReg datahi, + TCGReg addrlo, TCGReg addrhi, + void *raddr, tcg_insn_unit *label_ptr[2]) +{ + TCGLabelQemuLdst *label = new_ldst_label(s); + + label->is_ld = is_ld; + label->oi = oi; + label->datalo_reg = datalo; + label->datahi_reg = datahi; + label->addrlo_reg = addrlo; + label->addrhi_reg = addrhi; + label->raddr = raddr; + label->label_ptr[0] = label_ptr[0]; + if (TARGET_LONG_BITS == 64) { + label->label_ptr[1] = label_ptr[1]; + } +} + +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + TCGMemOpIdx oi = l->oi; + TCGMemOp opc = get_memop(oi); + TCGReg v0; + int i; + + /* resolve label address */ + reloc_pc16(l->label_ptr[0], s->code_ptr); + if (TARGET_LONG_BITS == 64) { + reloc_pc16(l->label_ptr[1], s->code_ptr); + } + + i = 1; + if (TARGET_LONG_BITS == 64) { + i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg); + } else { + i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg); + } + i = tcg_out_call_iarg_imm(s, i, oi); + i = tcg_out_call_iarg_imm(s, i, (intptr_t)l->raddr); + tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)], false); + /* delay slot */ + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + + v0 = l->datalo_reg; + if ((opc & MO_SIZE) == MO_64) { + /* We eliminated V0 from the possible output registers, so it + cannot be clobbered here. So we must move V1 first. */ + if (MIPS_BE) { + tcg_out_mov(s, TCG_TYPE_I32, v0, TCG_REG_V1); + v0 = l->datahi_reg; + } else { + tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_V1); + } + } + + reloc_pc16(s->code_ptr, l->raddr); + tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO); + /* delay slot */ + tcg_out_mov(s, TCG_TYPE_REG, v0, TCG_REG_V0); +} + +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + TCGMemOpIdx oi = l->oi; + TCGMemOp opc = get_memop(oi); + TCGMemOp s_bits = opc & MO_SIZE; + int i; + + /* resolve label address */ + reloc_pc16(l->label_ptr[0], s->code_ptr); + if (TARGET_LONG_BITS == 64) { + reloc_pc16(l->label_ptr[1], s->code_ptr); + } + + i = 1; + if (TARGET_LONG_BITS == 64) { + i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg); + } else { + i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg); + } + switch (s_bits) { + case MO_8: + i = tcg_out_call_iarg_reg8(s, i, l->datalo_reg); + break; + case MO_16: + i = tcg_out_call_iarg_reg16(s, i, l->datalo_reg); + break; + case MO_32: + i = tcg_out_call_iarg_reg(s, i, l->datalo_reg); + break; + case MO_64: + i = tcg_out_call_iarg_reg2(s, i, l->datalo_reg, l->datahi_reg); + break; + default: + tcg_abort(); + } + i = tcg_out_call_iarg_imm(s, i, oi); + + /* Tail call to the store helper. Thus force the return address + computation to take place in the return address register. */ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)l->raddr); + i = tcg_out_call_iarg_reg(s, i, TCG_REG_RA); + tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], true); + /* delay slot */ + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); +} +#endif + +static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, + TCGReg base, TCGMemOp opc) +{ + switch (opc & (MO_SSIZE | MO_BSWAP)) { + case MO_UB: + tcg_out_opc_imm(s, OPC_LBU, datalo, base, 0); + break; + case MO_SB: + tcg_out_opc_imm(s, OPC_LB, datalo, base, 0); + break; + case MO_UW | MO_BSWAP: + tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0); + tcg_out_bswap16(s, datalo, TCG_TMP1); + break; + case MO_UW: + tcg_out_opc_imm(s, OPC_LHU, datalo, base, 0); + break; + case MO_SW | MO_BSWAP: + tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0); + tcg_out_bswap16s(s, datalo, TCG_TMP1); + break; + case MO_SW: + tcg_out_opc_imm(s, OPC_LH, datalo, base, 0); + break; + case MO_UL | MO_BSWAP: + tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, 0); + tcg_out_bswap32(s, datalo, TCG_TMP1); + break; + case MO_UL: + tcg_out_opc_imm(s, OPC_LW, datalo, base, 0); + break; + case MO_Q | MO_BSWAP: + tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, HI_OFF); + tcg_out_bswap32(s, datalo, TCG_TMP1); + tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, LO_OFF); + tcg_out_bswap32(s, datahi, TCG_TMP1); + break; + case MO_Q: + tcg_out_opc_imm(s, OPC_LW, datalo, base, LO_OFF); + tcg_out_opc_imm(s, OPC_LW, datahi, base, HI_OFF); + break; + default: + tcg_abort(); + } +} + +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) +{ + TCGReg addr_regl, addr_regh __attribute__((unused)); + TCGReg data_regl, data_regh; + TCGMemOpIdx oi; + TCGMemOp opc; +#if defined(CONFIG_SOFTMMU) + tcg_insn_unit *label_ptr[2]; +#endif + /* Note that we've eliminated V0 from the output registers, + so we won't overwrite the base register during loading. */ + TCGReg base = TCG_REG_V0; + + data_regl = *args++; + data_regh = (is_64 ? *args++ : 0); + addr_regl = *args++; + addr_regh = (TARGET_LONG_BITS == 64 ? *args++ : 0); + oi = *args++; + opc = get_memop(oi); + +#if defined(CONFIG_SOFTMMU) + tcg_out_tlb_load(s, base, addr_regl, addr_regh, oi, label_ptr, 1); + tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc); + add_qemu_ldst_label(s, 1, oi, data_regl, data_regh, addr_regl, addr_regh, + s->code_ptr, label_ptr); +#else + if (guest_base == 0 && data_regl != addr_regl) { + base = addr_regl; + } else if (guest_base == (int16_t)guest_base) { + tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, guest_base); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, base, guest_base); + tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl); + } + tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc); +#endif +} + +static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, + TCGReg base, TCGMemOp opc) +{ + switch (opc & (MO_SIZE | MO_BSWAP)) { + case MO_8: + tcg_out_opc_imm(s, OPC_SB, datalo, base, 0); + break; + + case MO_16 | MO_BSWAP: + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, datalo, 0xffff); + tcg_out_bswap16(s, TCG_TMP1, TCG_TMP1); + datalo = TCG_TMP1; + /* FALLTHRU */ + case MO_16: + tcg_out_opc_imm(s, OPC_SH, datalo, base, 0); + break; + + case MO_32 | MO_BSWAP: + tcg_out_bswap32(s, TCG_TMP1, datalo); + datalo = TCG_TMP1; + /* FALLTHRU */ + case MO_32: + tcg_out_opc_imm(s, OPC_SW, datalo, base, 0); + break; + + case MO_64 | MO_BSWAP: + tcg_out_bswap32(s, TCG_TMP1, datalo); + tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, HI_OFF); + tcg_out_bswap32(s, TCG_TMP1, datahi); + tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, LO_OFF); + break; + case MO_64: + tcg_out_opc_imm(s, OPC_SW, datalo, base, LO_OFF); + tcg_out_opc_imm(s, OPC_SW, datahi, base, HI_OFF); + break; + + default: + tcg_abort(); + } +} + +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) +{ + TCGReg addr_regl, addr_regh __attribute__((unused)); + TCGReg data_regl, data_regh, base; + TCGMemOpIdx oi; + TCGMemOp opc; +#if defined(CONFIG_SOFTMMU) + tcg_insn_unit *label_ptr[2]; +#endif + + data_regl = *args++; + data_regh = (is_64 ? *args++ : 0); + addr_regl = *args++; + addr_regh = (TARGET_LONG_BITS == 64 ? *args++ : 0); + oi = *args++; + opc = get_memop(oi); + +#if defined(CONFIG_SOFTMMU) + /* Note that we eliminated the helper's address argument, + so we can reuse that for the base. */ + base = (TARGET_LONG_BITS == 32 ? TCG_REG_A1 : TCG_REG_A2); + tcg_out_tlb_load(s, base, addr_regl, addr_regh, oi, label_ptr, 0); + tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); + add_qemu_ldst_label(s, 0, oi, data_regl, data_regh, addr_regl, addr_regh, + s->code_ptr, label_ptr); +#else + if (guest_base == 0) { + base = addr_regl; + } else { + base = TCG_REG_A0; + if (guest_base == (int16_t)guest_base) { + tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, guest_base); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, base, guest_base); + tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl); + } + } + tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); +#endif +} + +static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, + const TCGArg *args, const int *const_args) +{ + MIPSInsn i1, i2; + TCGArg a0, a1, a2; + int c2; + + a0 = args[0]; + a1 = args[1]; + a2 = args[2]; + c2 = const_args[2]; + + switch (opc) { + case INDEX_op_exit_tb: + { + TCGReg b0 = TCG_REG_ZERO; + + if (a0 & ~0xffff) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_V0, a0 & ~0xffff); + b0 = TCG_REG_V0; + } + if (!tcg_out_opc_jmp(s, OPC_J, tb_ret_addr)) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, + (uintptr_t)tb_ret_addr); + tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); + } + tcg_out_opc_imm(s, OPC_ORI, TCG_REG_V0, b0, a0 & 0xffff); + } + break; + case INDEX_op_goto_tb: + if (s->tb_jmp_offset) { + /* direct jump method */ + s->tb_jmp_offset[a0] = tcg_current_code_size(s); + /* Avoid clobbering the address during retranslation. */ + tcg_out32(s, OPC_J | (*(uint32_t *)s->code_ptr & 0x3ffffff)); + } else { + /* indirect jump method */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_REG_ZERO, + (uintptr_t)(s->tb_next + a0)); + tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); + } + tcg_out_nop(s); + s->tb_next_offset[a0] = tcg_current_code_size(s); + break; + case INDEX_op_br: + tcg_out_brcond(s, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO, + arg_label(a0)); + break; + + case INDEX_op_ld8u_i32: + i1 = OPC_LBU; + goto do_ldst; + case INDEX_op_ld8s_i32: + i1 = OPC_LB; + goto do_ldst; + case INDEX_op_ld16u_i32: + i1 = OPC_LHU; + goto do_ldst; + case INDEX_op_ld16s_i32: + i1 = OPC_LH; + goto do_ldst; + case INDEX_op_ld_i32: + i1 = OPC_LW; + goto do_ldst; + case INDEX_op_st8_i32: + i1 = OPC_SB; + goto do_ldst; + case INDEX_op_st16_i32: + i1 = OPC_SH; + goto do_ldst; + case INDEX_op_st_i32: + i1 = OPC_SW; + do_ldst: + tcg_out_ldst(s, i1, a0, a1, a2); + break; + + case INDEX_op_add_i32: + i1 = OPC_ADDU, i2 = OPC_ADDIU; + goto do_binary; + case INDEX_op_or_i32: + i1 = OPC_OR, i2 = OPC_ORI; + goto do_binary; + case INDEX_op_xor_i32: + i1 = OPC_XOR, i2 = OPC_XORI; + do_binary: + if (c2) { + tcg_out_opc_imm(s, i2, a0, a1, a2); + break; + } + do_binaryv: + tcg_out_opc_reg(s, i1, a0, a1, a2); + break; + + case INDEX_op_sub_i32: + if (c2) { + tcg_out_opc_imm(s, OPC_ADDIU, a0, a1, -a2); + break; + } + i1 = OPC_SUBU; + goto do_binary; + case INDEX_op_and_i32: + if (c2 && a2 != (uint16_t)a2) { + int msb = ctz32(~a2) - 1; + assert(use_mips32r2_instructions); + assert(is_p2m1(a2)); + tcg_out_opc_bf(s, OPC_EXT, a0, a1, msb, 0); + break; + } + i1 = OPC_AND, i2 = OPC_ANDI; + goto do_binary; + case INDEX_op_nor_i32: + i1 = OPC_NOR; + goto do_binaryv; + + case INDEX_op_mul_i32: + if (use_mips32_instructions) { + tcg_out_opc_reg(s, OPC_MUL, a0, a1, a2); + break; + } + i1 = OPC_MULT, i2 = OPC_MFLO; + goto do_hilo1; + case INDEX_op_mulsh_i32: + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_MUH, a0, a1, a2); + break; + } + i1 = OPC_MULT, i2 = OPC_MFHI; + goto do_hilo1; + case INDEX_op_muluh_i32: + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_MUHU, a0, a1, a2); + break; + } + i1 = OPC_MULTU, i2 = OPC_MFHI; + goto do_hilo1; + case INDEX_op_div_i32: + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_DIV_R6, a0, a1, a2); + break; + } + i1 = OPC_DIV, i2 = OPC_MFLO; + goto do_hilo1; + case INDEX_op_divu_i32: + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_DIVU_R6, a0, a1, a2); + break; + } + i1 = OPC_DIVU, i2 = OPC_MFLO; + goto do_hilo1; + case INDEX_op_rem_i32: + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_MOD, a0, a1, a2); + break; + } + i1 = OPC_DIV, i2 = OPC_MFHI; + goto do_hilo1; + case INDEX_op_remu_i32: + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_MODU, a0, a1, a2); + break; + } + i1 = OPC_DIVU, i2 = OPC_MFHI; + do_hilo1: + tcg_out_opc_reg(s, i1, 0, a1, a2); + tcg_out_opc_reg(s, i2, a0, 0, 0); + break; + + case INDEX_op_muls2_i32: + i1 = OPC_MULT; + goto do_hilo2; + case INDEX_op_mulu2_i32: + i1 = OPC_MULTU; + do_hilo2: + tcg_out_opc_reg(s, i1, 0, a2, args[3]); + tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0); + tcg_out_opc_reg(s, OPC_MFHI, a1, 0, 0); + break; + + case INDEX_op_not_i32: + i1 = OPC_NOR; + goto do_unary; + case INDEX_op_bswap16_i32: + i1 = OPC_WSBH; + goto do_unary; + case INDEX_op_ext8s_i32: + i1 = OPC_SEB; + goto do_unary; + case INDEX_op_ext16s_i32: + i1 = OPC_SEH; + do_unary: + tcg_out_opc_reg(s, i1, a0, TCG_REG_ZERO, a1); + break; + + case INDEX_op_sar_i32: + i1 = OPC_SRAV, i2 = OPC_SRA; + goto do_shift; + case INDEX_op_shl_i32: + i1 = OPC_SLLV, i2 = OPC_SLL; + goto do_shift; + case INDEX_op_shr_i32: + i1 = OPC_SRLV, i2 = OPC_SRL; + goto do_shift; + case INDEX_op_rotr_i32: + i1 = OPC_ROTRV, i2 = OPC_ROTR; + do_shift: + if (c2) { + tcg_out_opc_sa(s, i2, a0, a1, a2); + } else { + tcg_out_opc_reg(s, i1, a0, a2, a1); + } + break; + case INDEX_op_rotl_i32: + if (c2) { + tcg_out_opc_sa(s, OPC_ROTR, a0, a1, 32 - a2); + } else { + tcg_out_opc_reg(s, OPC_SUBU, TCG_TMP0, TCG_REG_ZERO, a2); + tcg_out_opc_reg(s, OPC_ROTRV, a0, TCG_TMP0, a1); + } + break; + + case INDEX_op_bswap32_i32: + tcg_out_opc_reg(s, OPC_WSBH, a0, 0, a1); + tcg_out_opc_sa(s, OPC_ROTR, a0, a0, 16); + break; + + case INDEX_op_deposit_i32: + tcg_out_opc_bf(s, OPC_INS, a0, a2, args[3] + args[4] - 1, args[3]); + break; + + case INDEX_op_brcond_i32: + tcg_out_brcond(s, a2, a0, a1, arg_label(args[3])); + break; + case INDEX_op_brcond2_i32: + tcg_out_brcond2(s, args[4], a0, a1, a2, args[3], arg_label(args[5])); + break; + + case INDEX_op_movcond_i32: + tcg_out_movcond(s, args[5], a0, a1, a2, args[3], args[4]); + break; + + case INDEX_op_setcond_i32: + tcg_out_setcond(s, args[3], a0, a1, a2); + break; + case INDEX_op_setcond2_i32: + tcg_out_setcond2(s, args[5], a0, a1, a2, args[3], args[4]); + break; + + case INDEX_op_qemu_ld_i32: + tcg_out_qemu_ld(s, args, false); + break; + case INDEX_op_qemu_ld_i64: + tcg_out_qemu_ld(s, args, true); + break; + case INDEX_op_qemu_st_i32: + tcg_out_qemu_st(s, args, false); + break; + case INDEX_op_qemu_st_i64: + tcg_out_qemu_st(s, args, true); + break; + + case INDEX_op_add2_i32: + tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], + const_args[4], const_args[5], false); + break; + case INDEX_op_sub2_i32: + tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], + const_args[4], const_args[5], true); + break; + + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ + case INDEX_op_call: /* Always emitted via tcg_out_call. */ + default: + tcg_abort(); + } +} + +static const TCGTargetOpDef mips_op_defs[] = { + { INDEX_op_exit_tb, { } }, + { INDEX_op_goto_tb, { } }, + { INDEX_op_br, { } }, + + { INDEX_op_ld8u_i32, { "r", "r" } }, + { INDEX_op_ld8s_i32, { "r", "r" } }, + { INDEX_op_ld16u_i32, { "r", "r" } }, + { INDEX_op_ld16s_i32, { "r", "r" } }, + { INDEX_op_ld_i32, { "r", "r" } }, + { INDEX_op_st8_i32, { "rZ", "r" } }, + { INDEX_op_st16_i32, { "rZ", "r" } }, + { INDEX_op_st_i32, { "rZ", "r" } }, + + { INDEX_op_add_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_mul_i32, { "r", "rZ", "rZ" } }, +#if !use_mips32r6_instructions + { INDEX_op_muls2_i32, { "r", "r", "rZ", "rZ" } }, + { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rZ" } }, +#endif + { INDEX_op_mulsh_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_muluh_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_div_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_divu_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_rem_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_remu_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_sub_i32, { "r", "rZ", "rN" } }, + + { INDEX_op_and_i32, { "r", "rZ", "rIK" } }, + { INDEX_op_nor_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_not_i32, { "r", "rZ" } }, + { INDEX_op_or_i32, { "r", "rZ", "rIZ" } }, + { INDEX_op_xor_i32, { "r", "rZ", "rIZ" } }, + + { INDEX_op_shl_i32, { "r", "rZ", "ri" } }, + { INDEX_op_shr_i32, { "r", "rZ", "ri" } }, + { INDEX_op_sar_i32, { "r", "rZ", "ri" } }, + { INDEX_op_rotr_i32, { "r", "rZ", "ri" } }, + { INDEX_op_rotl_i32, { "r", "rZ", "ri" } }, + + { INDEX_op_bswap16_i32, { "r", "r" } }, + { INDEX_op_bswap32_i32, { "r", "r" } }, + + { INDEX_op_ext8s_i32, { "r", "rZ" } }, + { INDEX_op_ext16s_i32, { "r", "rZ" } }, + + { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, + + { INDEX_op_brcond_i32, { "rZ", "rZ" } }, +#if use_mips32r6_instructions + { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "rZ" } }, +#else + { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "0" } }, +#endif + { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rZ", "rZ" } }, + + { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rN", "rN" } }, + { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rN", "rN" } }, + { INDEX_op_brcond2_i32, { "rZ", "rZ", "rZ", "rZ" } }, + +#if TARGET_LONG_BITS == 32 + { INDEX_op_qemu_ld_i32, { "L", "lZ" } }, + { INDEX_op_qemu_st_i32, { "SZ", "SZ" } }, + { INDEX_op_qemu_ld_i64, { "L", "L", "lZ" } }, + { INDEX_op_qemu_st_i64, { "SZ", "SZ", "SZ" } }, +#else + { INDEX_op_qemu_ld_i32, { "L", "lZ", "lZ" } }, + { INDEX_op_qemu_st_i32, { "SZ", "SZ", "SZ" } }, + { INDEX_op_qemu_ld_i64, { "L", "L", "lZ", "lZ" } }, + { INDEX_op_qemu_st_i64, { "SZ", "SZ", "SZ", "SZ" } }, +#endif + { -1 }, +}; + +static int tcg_target_callee_save_regs[] = { + TCG_REG_S0, /* used for the global env (TCG_AREG0) */ + TCG_REG_S1, + TCG_REG_S2, + TCG_REG_S3, + TCG_REG_S4, + TCG_REG_S5, + TCG_REG_S6, + TCG_REG_S7, + TCG_REG_S8, + TCG_REG_RA, /* should be last for ABI compliance */ +}; + +/* The Linux kernel doesn't provide any information about the available + instruction set. Probe it using a signal handler. */ + + +#ifndef use_movnz_instructions +bool use_movnz_instructions = false; +#endif + +#ifndef use_mips32_instructions +bool use_mips32_instructions = false; +#endif + +#ifndef use_mips32r2_instructions +bool use_mips32r2_instructions = false; +#endif + +static volatile sig_atomic_t got_sigill; + +static void sigill_handler(int signo, siginfo_t *si, void *data) +{ + /* Skip the faulty instruction */ + ucontext_t *uc = (ucontext_t *)data; + uc->uc_mcontext.pc += 4; + + got_sigill = 1; +} + +static void tcg_target_detect_isa(void) +{ + struct sigaction sa_old, sa_new; + + memset(&sa_new, 0, sizeof(sa_new)); + sa_new.sa_flags = SA_SIGINFO; + sa_new.sa_sigaction = sigill_handler; + sigaction(SIGILL, &sa_new, &sa_old); + + /* Probe for movn/movz, necessary to implement movcond. */ +#ifndef use_movnz_instructions + got_sigill = 0; + asm volatile(".set push\n" + ".set mips32\n" + "movn $zero, $zero, $zero\n" + "movz $zero, $zero, $zero\n" + ".set pop\n" + : : : ); + use_movnz_instructions = !got_sigill; +#endif + + /* Probe for MIPS32 instructions. As no subsetting is allowed + by the specification, it is only necessary to probe for one + of the instructions. */ +#ifndef use_mips32_instructions + got_sigill = 0; + asm volatile(".set push\n" + ".set mips32\n" + "mul $zero, $zero\n" + ".set pop\n" + : : : ); + use_mips32_instructions = !got_sigill; +#endif + + /* Probe for MIPS32r2 instructions if MIPS32 instructions are + available. As no subsetting is allowed by the specification, + it is only necessary to probe for one of the instructions. */ +#ifndef use_mips32r2_instructions + if (use_mips32_instructions) { + got_sigill = 0; + asm volatile(".set push\n" + ".set mips32r2\n" + "seb $zero, $zero\n" + ".set pop\n" + : : : ); + use_mips32r2_instructions = !got_sigill; + } +#endif + + sigaction(SIGILL, &sa_old, NULL); +} + +/* Generate global QEMU prologue and epilogue code */ +static void tcg_target_qemu_prologue(TCGContext *s) +{ + int i, frame_size; + + /* reserve some stack space, also for TCG temps. */ + frame_size = ARRAY_SIZE(tcg_target_callee_save_regs) * 4 + + TCG_STATIC_CALL_ARGS_SIZE + + CPU_TEMP_BUF_NLONGS * sizeof(long); + frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) & + ~(TCG_TARGET_STACK_ALIGN - 1); + tcg_set_frame(s, TCG_REG_SP, ARRAY_SIZE(tcg_target_callee_save_regs) * 4 + + TCG_STATIC_CALL_ARGS_SIZE, + CPU_TEMP_BUF_NLONGS * sizeof(long)); + + /* TB prologue */ + tcg_out_addi(s, TCG_REG_SP, -frame_size); + for(i = 0 ; i < ARRAY_SIZE(tcg_target_callee_save_regs) ; i++) { + tcg_out_st(s, TCG_TYPE_I32, tcg_target_callee_save_regs[i], + TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE + i * 4); + } + + /* Call generated code */ + tcg_out_opc_reg(s, OPC_JR, 0, tcg_target_call_iarg_regs[1], 0); + tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); + tb_ret_addr = s->code_ptr; + + /* TB epilogue */ + for(i = 0 ; i < ARRAY_SIZE(tcg_target_callee_save_regs) ; i++) { + tcg_out_ld(s, TCG_TYPE_I32, tcg_target_callee_save_regs[i], + TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE + i * 4); + } + + tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_RA, 0); + tcg_out_addi(s, TCG_REG_SP, frame_size); +} + +static void tcg_target_init(TCGContext *s) +{ + tcg_target_detect_isa(); + tcg_regset_set(tcg_target_available_regs[TCG_TYPE_I32], 0xffffffff); + tcg_regset_set(tcg_target_call_clobber_regs, + (1 << TCG_REG_V0) | + (1 << TCG_REG_V1) | + (1 << TCG_REG_A0) | + (1 << TCG_REG_A1) | + (1 << TCG_REG_A2) | + (1 << TCG_REG_A3) | + (1 << TCG_REG_T0) | + (1 << TCG_REG_T1) | + (1 << TCG_REG_T2) | + (1 << TCG_REG_T3) | + (1 << TCG_REG_T4) | + (1 << TCG_REG_T5) | + (1 << TCG_REG_T6) | + (1 << TCG_REG_T7) | + (1 << TCG_REG_T8) | + (1 << TCG_REG_T9)); + + tcg_regset_clear(s->reserved_regs); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO); /* zero register */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_K0); /* kernel use only */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_K1); /* kernel use only */ + tcg_regset_set_reg(s->reserved_regs, TCG_TMP0); /* internal use */ + tcg_regset_set_reg(s->reserved_regs, TCG_TMP1); /* internal use */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA); /* return address */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); /* stack pointer */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP); /* global pointer */ + + tcg_add_target_add_op_defs(mips_op_defs); +} + +void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) +{ + uint32_t *ptr = (uint32_t *)jmp_addr; + *ptr = deposit32(*ptr, 0, 26, addr >> 2); + flush_icache_range(jmp_addr, jmp_addr + 4); +} diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c deleted file mode 100644 index c593344db1..0000000000 --- a/tcg/ppc/tcg-target.c +++ /dev/null @@ -1,2762 +0,0 @@ -/* - * Tiny Code Generator for QEMU - * - * Copyright (c) 2008 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "tcg-be-ldst.h" - -#if defined _CALL_DARWIN || defined __APPLE__ -#define TCG_TARGET_CALL_DARWIN -#endif -#ifdef _CALL_SYSV -# define TCG_TARGET_CALL_ALIGN_ARGS 1 -#endif - -/* For some memory operations, we need a scratch that isn't R0. For the AIX - calling convention, we can re-use the TOC register since we'll be reloading - it at every call. Otherwise R12 will do nicely as neither a call-saved - register nor a parameter register. */ -#ifdef _CALL_AIX -# define TCG_REG_TMP1 TCG_REG_R2 -#else -# define TCG_REG_TMP1 TCG_REG_R12 -#endif - -/* For the 64-bit target, we don't like the 5 insn sequence needed to build - full 64-bit addresses. Better to have a base register to which we can - apply a 32-bit displacement. - - There are generally three items of interest: - (1) helper functions in the main executable, - (2) TranslationBlock data structures, - (3) the return address in the epilogue. - - For user-only, we USE_STATIC_CODE_GEN_BUFFER, so the code_gen_buffer - will be inside the main executable, and thus near enough to make a - pointer to the epilogue be within 2GB of all helper functions. - - For softmmu, we'll let the kernel choose the address of code_gen_buffer, - and odds are it'll be somewhere close to the main malloc arena, and so - a pointer to the epilogue will be within 2GB of the TranslationBlocks. - - For --enable-pie, everything will be kinda near everything else, - somewhere in high memory. - - Thus we choose to keep the return address in a call-saved register. */ -#define TCG_REG_RA TCG_REG_R31 -#define USE_REG_RA (TCG_TARGET_REG_BITS == 64) - -/* Shorthand for size of a pointer. Avoid promotion to unsigned. */ -#define SZP ((int)sizeof(void *)) - -/* Shorthand for size of a register. */ -#define SZR (TCG_TARGET_REG_BITS / 8) - -#define TCG_CT_CONST_S16 0x100 -#define TCG_CT_CONST_U16 0x200 -#define TCG_CT_CONST_S32 0x400 -#define TCG_CT_CONST_U32 0x800 -#define TCG_CT_CONST_ZERO 0x1000 -#define TCG_CT_CONST_MONE 0x2000 - -static tcg_insn_unit *tb_ret_addr; - -#include "elf.h" -static bool have_isa_2_06; -#define HAVE_ISA_2_06 have_isa_2_06 -#define HAVE_ISEL have_isa_2_06 - -#ifndef CONFIG_SOFTMMU -#define TCG_GUEST_BASE_REG 30 -#endif - -#ifndef NDEBUG -static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { - "r0", - "r1", - "r2", - "r3", - "r4", - "r5", - "r6", - "r7", - "r8", - "r9", - "r10", - "r11", - "r12", - "r13", - "r14", - "r15", - "r16", - "r17", - "r18", - "r19", - "r20", - "r21", - "r22", - "r23", - "r24", - "r25", - "r26", - "r27", - "r28", - "r29", - "r30", - "r31" -}; -#endif - -static const int tcg_target_reg_alloc_order[] = { - TCG_REG_R14, /* call saved registers */ - TCG_REG_R15, - TCG_REG_R16, - TCG_REG_R17, - TCG_REG_R18, - TCG_REG_R19, - TCG_REG_R20, - TCG_REG_R21, - TCG_REG_R22, - TCG_REG_R23, - TCG_REG_R24, - TCG_REG_R25, - TCG_REG_R26, - TCG_REG_R27, - TCG_REG_R28, - TCG_REG_R29, - TCG_REG_R30, - TCG_REG_R31, - TCG_REG_R12, /* call clobbered, non-arguments */ - TCG_REG_R11, - TCG_REG_R2, - TCG_REG_R13, - TCG_REG_R10, /* call clobbered, arguments */ - TCG_REG_R9, - TCG_REG_R8, - TCG_REG_R7, - TCG_REG_R6, - TCG_REG_R5, - TCG_REG_R4, - TCG_REG_R3, -}; - -static const int tcg_target_call_iarg_regs[] = { - TCG_REG_R3, - TCG_REG_R4, - TCG_REG_R5, - TCG_REG_R6, - TCG_REG_R7, - TCG_REG_R8, - TCG_REG_R9, - TCG_REG_R10 -}; - -static const int tcg_target_call_oarg_regs[] = { - TCG_REG_R3, - TCG_REG_R4 -}; - -static const int tcg_target_callee_save_regs[] = { -#ifdef TCG_TARGET_CALL_DARWIN - TCG_REG_R11, -#endif - TCG_REG_R14, - TCG_REG_R15, - TCG_REG_R16, - TCG_REG_R17, - TCG_REG_R18, - TCG_REG_R19, - TCG_REG_R20, - TCG_REG_R21, - TCG_REG_R22, - TCG_REG_R23, - TCG_REG_R24, - TCG_REG_R25, - TCG_REG_R26, - TCG_REG_R27, /* currently used for the global env */ - TCG_REG_R28, - TCG_REG_R29, - TCG_REG_R30, - TCG_REG_R31 -}; - -static inline bool in_range_b(tcg_target_long target) -{ - return target == sextract64(target, 0, 26); -} - -static uint32_t reloc_pc24_val(tcg_insn_unit *pc, tcg_insn_unit *target) -{ - ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); - assert(in_range_b(disp)); - return disp & 0x3fffffc; -} - -static void reloc_pc24(tcg_insn_unit *pc, tcg_insn_unit *target) -{ - *pc = (*pc & ~0x3fffffc) | reloc_pc24_val(pc, target); -} - -static uint16_t reloc_pc14_val(tcg_insn_unit *pc, tcg_insn_unit *target) -{ - ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); - assert(disp == (int16_t) disp); - return disp & 0xfffc; -} - -static void reloc_pc14(tcg_insn_unit *pc, tcg_insn_unit *target) -{ - *pc = (*pc & ~0xfffc) | reloc_pc14_val(pc, target); -} - -static inline void tcg_out_b_noaddr(TCGContext *s, int insn) -{ - unsigned retrans = *s->code_ptr & 0x3fffffc; - tcg_out32(s, insn | retrans); -} - -static inline void tcg_out_bc_noaddr(TCGContext *s, int insn) -{ - unsigned retrans = *s->code_ptr & 0xfffc; - tcg_out32(s, insn | retrans); -} - -static void patch_reloc(tcg_insn_unit *code_ptr, int type, - intptr_t value, intptr_t addend) -{ - tcg_insn_unit *target = (tcg_insn_unit *)value; - - assert(addend == 0); - switch (type) { - case R_PPC_REL14: - reloc_pc14(code_ptr, target); - break; - case R_PPC_REL24: - reloc_pc24(code_ptr, target); - break; - default: - tcg_abort(); - } -} - -/* parse target specific constraints */ -static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) -{ - const char *ct_str; - - ct_str = *pct_str; - switch (ct_str[0]) { - case 'A': case 'B': case 'C': case 'D': - ct->ct |= TCG_CT_REG; - tcg_regset_set_reg(ct->u.regs, 3 + ct_str[0] - 'A'); - break; - case 'r': - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xffffffff); - break; - case 'L': /* qemu_ld constraint */ - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xffffffff); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); -#ifdef CONFIG_SOFTMMU - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5); -#endif - break; - case 'S': /* qemu_st constraint */ - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xffffffff); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); -#ifdef CONFIG_SOFTMMU - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6); -#endif - break; - case 'I': - ct->ct |= TCG_CT_CONST_S16; - break; - case 'J': - ct->ct |= TCG_CT_CONST_U16; - break; - case 'M': - ct->ct |= TCG_CT_CONST_MONE; - break; - case 'T': - ct->ct |= TCG_CT_CONST_S32; - break; - case 'U': - ct->ct |= TCG_CT_CONST_U32; - break; - case 'Z': - ct->ct |= TCG_CT_CONST_ZERO; - break; - default: - return -1; - } - ct_str++; - *pct_str = ct_str; - return 0; -} - -/* test if a constant matches the constraint */ -static int tcg_target_const_match(tcg_target_long val, TCGType type, - const TCGArgConstraint *arg_ct) -{ - int ct = arg_ct->ct; - if (ct & TCG_CT_CONST) { - return 1; - } - - /* The only 32-bit constraint we use aside from - TCG_CT_CONST is TCG_CT_CONST_S16. */ - if (type == TCG_TYPE_I32) { - val = (int32_t)val; - } - - if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { - return 1; - } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) { - return 1; - } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { - return 1; - } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { - return 1; - } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { - return 1; - } else if ((ct & TCG_CT_CONST_MONE) && val == -1) { - return 1; - } - return 0; -} - -#define OPCD(opc) ((opc)<<26) -#define XO19(opc) (OPCD(19)|((opc)<<1)) -#define MD30(opc) (OPCD(30)|((opc)<<2)) -#define MDS30(opc) (OPCD(30)|((opc)<<1)) -#define XO31(opc) (OPCD(31)|((opc)<<1)) -#define XO58(opc) (OPCD(58)|(opc)) -#define XO62(opc) (OPCD(62)|(opc)) - -#define B OPCD( 18) -#define BC OPCD( 16) -#define LBZ OPCD( 34) -#define LHZ OPCD( 40) -#define LHA OPCD( 42) -#define LWZ OPCD( 32) -#define STB OPCD( 38) -#define STH OPCD( 44) -#define STW OPCD( 36) - -#define STD XO62( 0) -#define STDU XO62( 1) -#define STDX XO31(149) - -#define LD XO58( 0) -#define LDX XO31( 21) -#define LDU XO58( 1) -#define LWA XO58( 2) -#define LWAX XO31(341) - -#define ADDIC OPCD( 12) -#define ADDI OPCD( 14) -#define ADDIS OPCD( 15) -#define ORI OPCD( 24) -#define ORIS OPCD( 25) -#define XORI OPCD( 26) -#define XORIS OPCD( 27) -#define ANDI OPCD( 28) -#define ANDIS OPCD( 29) -#define MULLI OPCD( 7) -#define CMPLI OPCD( 10) -#define CMPI OPCD( 11) -#define SUBFIC OPCD( 8) - -#define LWZU OPCD( 33) -#define STWU OPCD( 37) - -#define RLWIMI OPCD( 20) -#define RLWINM OPCD( 21) -#define RLWNM OPCD( 23) - -#define RLDICL MD30( 0) -#define RLDICR MD30( 1) -#define RLDIMI MD30( 3) -#define RLDCL MDS30( 8) - -#define BCLR XO19( 16) -#define BCCTR XO19(528) -#define CRAND XO19(257) -#define CRANDC XO19(129) -#define CRNAND XO19(225) -#define CROR XO19(449) -#define CRNOR XO19( 33) - -#define EXTSB XO31(954) -#define EXTSH XO31(922) -#define EXTSW XO31(986) -#define ADD XO31(266) -#define ADDE XO31(138) -#define ADDME XO31(234) -#define ADDZE XO31(202) -#define ADDC XO31( 10) -#define AND XO31( 28) -#define SUBF XO31( 40) -#define SUBFC XO31( 8) -#define SUBFE XO31(136) -#define SUBFME XO31(232) -#define SUBFZE XO31(200) -#define OR XO31(444) -#define XOR XO31(316) -#define MULLW XO31(235) -#define MULHW XO31( 75) -#define MULHWU XO31( 11) -#define DIVW XO31(491) -#define DIVWU XO31(459) -#define CMP XO31( 0) -#define CMPL XO31( 32) -#define LHBRX XO31(790) -#define LWBRX XO31(534) -#define LDBRX XO31(532) -#define STHBRX XO31(918) -#define STWBRX XO31(662) -#define STDBRX XO31(660) -#define MFSPR XO31(339) -#define MTSPR XO31(467) -#define SRAWI XO31(824) -#define NEG XO31(104) -#define MFCR XO31( 19) -#define MFOCRF (MFCR | (1u << 20)) -#define NOR XO31(124) -#define CNTLZW XO31( 26) -#define CNTLZD XO31( 58) -#define ANDC XO31( 60) -#define ORC XO31(412) -#define EQV XO31(284) -#define NAND XO31(476) -#define ISEL XO31( 15) - -#define MULLD XO31(233) -#define MULHD XO31( 73) -#define MULHDU XO31( 9) -#define DIVD XO31(489) -#define DIVDU XO31(457) - -#define LBZX XO31( 87) -#define LHZX XO31(279) -#define LHAX XO31(343) -#define LWZX XO31( 23) -#define STBX XO31(215) -#define STHX XO31(407) -#define STWX XO31(151) - -#define SPR(a, b) ((((a)<<5)|(b))<<11) -#define LR SPR(8, 0) -#define CTR SPR(9, 0) - -#define SLW XO31( 24) -#define SRW XO31(536) -#define SRAW XO31(792) - -#define SLD XO31( 27) -#define SRD XO31(539) -#define SRAD XO31(794) -#define SRADI XO31(413<<1) - -#define TW XO31( 4) -#define TRAP (TW | TO(31)) - -#define NOP ORI /* ori 0,0,0 */ - -#define RT(r) ((r)<<21) -#define RS(r) ((r)<<21) -#define RA(r) ((r)<<16) -#define RB(r) ((r)<<11) -#define TO(t) ((t)<<21) -#define SH(s) ((s)<<11) -#define MB(b) ((b)<<6) -#define ME(e) ((e)<<1) -#define BO(o) ((o)<<21) -#define MB64(b) ((b)<<5) -#define FXM(b) (1 << (19 - (b))) - -#define LK 1 - -#define TAB(t, a, b) (RT(t) | RA(a) | RB(b)) -#define SAB(s, a, b) (RS(s) | RA(a) | RB(b)) -#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff)) -#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff)) - -#define BF(n) ((n)<<23) -#define BI(n, c) (((c)+((n)*4))<<16) -#define BT(n, c) (((c)+((n)*4))<<21) -#define BA(n, c) (((c)+((n)*4))<<16) -#define BB(n, c) (((c)+((n)*4))<<11) -#define BC_(n, c) (((c)+((n)*4))<<6) - -#define BO_COND_TRUE BO(12) -#define BO_COND_FALSE BO( 4) -#define BO_ALWAYS BO(20) - -enum { - CR_LT, - CR_GT, - CR_EQ, - CR_SO -}; - -static const uint32_t tcg_to_bc[] = { - [TCG_COND_EQ] = BC | BI(7, CR_EQ) | BO_COND_TRUE, - [TCG_COND_NE] = BC | BI(7, CR_EQ) | BO_COND_FALSE, - [TCG_COND_LT] = BC | BI(7, CR_LT) | BO_COND_TRUE, - [TCG_COND_GE] = BC | BI(7, CR_LT) | BO_COND_FALSE, - [TCG_COND_LE] = BC | BI(7, CR_GT) | BO_COND_FALSE, - [TCG_COND_GT] = BC | BI(7, CR_GT) | BO_COND_TRUE, - [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE, - [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE, - [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE, - [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE, -}; - -/* The low bit here is set if the RA and RB fields must be inverted. */ -static const uint32_t tcg_to_isel[] = { - [TCG_COND_EQ] = ISEL | BC_(7, CR_EQ), - [TCG_COND_NE] = ISEL | BC_(7, CR_EQ) | 1, - [TCG_COND_LT] = ISEL | BC_(7, CR_LT), - [TCG_COND_GE] = ISEL | BC_(7, CR_LT) | 1, - [TCG_COND_LE] = ISEL | BC_(7, CR_GT) | 1, - [TCG_COND_GT] = ISEL | BC_(7, CR_GT), - [TCG_COND_LTU] = ISEL | BC_(7, CR_LT), - [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1, - [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1, - [TCG_COND_GTU] = ISEL | BC_(7, CR_GT), -}; - -static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, - TCGReg base, tcg_target_long offset); - -static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) -{ - tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); - if (ret != arg) { - tcg_out32(s, OR | SAB(arg, ret, arg)); - } -} - -static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs, - int sh, int mb) -{ - assert(TCG_TARGET_REG_BITS == 64); - sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1); - mb = MB64((mb >> 5) | ((mb << 1) & 0x3f)); - tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb); -} - -static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs, - int sh, int mb, int me) -{ - tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me)); -} - -static inline void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src) -{ - tcg_out_rld(s, RLDICL, dst, src, 0, 32); -} - -static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c) -{ - tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c); -} - -static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c) -{ - tcg_out_rld(s, RLDICR, dst, src, c, 63 - c); -} - -static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c) -{ - tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31); -} - -static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c) -{ - tcg_out_rld(s, RLDICL, dst, src, 64 - c, c); -} - -static void tcg_out_movi32(TCGContext *s, TCGReg ret, int32_t arg) -{ - if (arg == (int16_t) arg) { - tcg_out32(s, ADDI | TAI(ret, 0, arg)); - } else { - tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16)); - if (arg & 0xffff) { - tcg_out32(s, ORI | SAI(ret, ret, arg)); - } - } -} - -static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, - tcg_target_long arg) -{ - tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); - if (type == TCG_TYPE_I32 || arg == (int32_t)arg) { - tcg_out_movi32(s, ret, arg); - } else if (arg == (uint32_t)arg && !(arg & 0x8000)) { - tcg_out32(s, ADDI | TAI(ret, 0, arg)); - tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); - } else { - int32_t high; - - if (USE_REG_RA) { - intptr_t diff = arg - (intptr_t)tb_ret_addr; - if (diff == (int32_t)diff) { - tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_RA, diff); - return; - } - } - - high = arg >> 31 >> 1; - tcg_out_movi32(s, ret, high); - if (high) { - tcg_out_shli64(s, ret, ret, 32); - } - if (arg & 0xffff0000) { - tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); - } - if (arg & 0xffff) { - tcg_out32(s, ORI | SAI(ret, ret, arg)); - } - } -} - -static bool mask_operand(uint32_t c, int *mb, int *me) -{ - uint32_t lsb, test; - - /* Accept a bit pattern like: - 0....01....1 - 1....10....0 - 0..01..10..0 - Keep track of the transitions. */ - if (c == 0 || c == -1) { - return false; - } - test = c; - lsb = test & -test; - test += lsb; - if (test & (test - 1)) { - return false; - } - - *me = clz32(lsb); - *mb = test ? clz32(test & -test) + 1 : 0; - return true; -} - -static bool mask64_operand(uint64_t c, int *mb, int *me) -{ - uint64_t lsb; - - if (c == 0) { - return false; - } - - lsb = c & -c; - /* Accept 1..10..0. */ - if (c == -lsb) { - *mb = 0; - *me = clz64(lsb); - return true; - } - /* Accept 0..01..1. */ - if (lsb == 1 && (c & (c + 1)) == 0) { - *mb = clz64(c + 1) + 1; - *me = 63; - return true; - } - return false; -} - -static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) -{ - int mb, me; - - if (mask_operand(c, &mb, &me)) { - tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me); - } else if ((c & 0xffff) == c) { - tcg_out32(s, ANDI | SAI(src, dst, c)); - return; - } else if ((c & 0xffff0000) == c) { - tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); - return; - } else { - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c); - tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); - } -} - -static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c) -{ - int mb, me; - - assert(TCG_TARGET_REG_BITS == 64); - if (mask64_operand(c, &mb, &me)) { - if (mb == 0) { - tcg_out_rld(s, RLDICR, dst, src, 0, me); - } else { - tcg_out_rld(s, RLDICL, dst, src, 0, mb); - } - } else if ((c & 0xffff) == c) { - tcg_out32(s, ANDI | SAI(src, dst, c)); - return; - } else if ((c & 0xffff0000) == c) { - tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); - return; - } else { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c); - tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); - } -} - -static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c, - int op_lo, int op_hi) -{ - if (c >> 16) { - tcg_out32(s, op_hi | SAI(src, dst, c >> 16)); - src = dst; - } - if (c & 0xffff) { - tcg_out32(s, op_lo | SAI(src, dst, c)); - src = dst; - } -} - -static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) -{ - tcg_out_zori32(s, dst, src, c, ORI, ORIS); -} - -static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) -{ - tcg_out_zori32(s, dst, src, c, XORI, XORIS); -} - -static void tcg_out_b(TCGContext *s, int mask, tcg_insn_unit *target) -{ - ptrdiff_t disp = tcg_pcrel_diff(s, target); - if (in_range_b(disp)) { - tcg_out32(s, B | (disp & 0x3fffffc) | mask); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target); - tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR); - tcg_out32(s, BCCTR | BO_ALWAYS | mask); - } -} - -static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, - TCGReg base, tcg_target_long offset) -{ - tcg_target_long orig = offset, l0, l1, extra = 0, align = 0; - bool is_store = false; - TCGReg rs = TCG_REG_TMP1; - - switch (opi) { - case LD: case LWA: - align = 3; - /* FALLTHRU */ - default: - if (rt != TCG_REG_R0) { - rs = rt; - break; - } - break; - case STD: - align = 3; - /* FALLTHRU */ - case STB: case STH: case STW: - is_store = true; - break; - } - - /* For unaligned, or very large offsets, use the indexed form. */ - if (offset & align || offset != (int32_t)offset) { - if (rs == base) { - rs = TCG_REG_R0; - } - tcg_debug_assert(!is_store || rs != rt); - tcg_out_movi(s, TCG_TYPE_PTR, rs, orig); - tcg_out32(s, opx | TAB(rt, base, rs)); - return; - } - - l0 = (int16_t)offset; - offset = (offset - l0) >> 16; - l1 = (int16_t)offset; - - if (l1 < 0 && orig >= 0) { - extra = 0x4000; - l1 = (int16_t)(offset - 0x4000); - } - if (l1) { - tcg_out32(s, ADDIS | TAI(rs, base, l1)); - base = rs; - } - if (extra) { - tcg_out32(s, ADDIS | TAI(rs, base, extra)); - base = rs; - } - if (opi != ADDI || base != rt || l0 != 0) { - tcg_out32(s, opi | TAI(rt, base, l0)); - } -} - -static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, - TCGReg arg1, intptr_t arg2) -{ - int opi, opx; - - assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); - if (type == TCG_TYPE_I32) { - opi = LWZ, opx = LWZX; - } else { - opi = LD, opx = LDX; - } - tcg_out_mem_long(s, opi, opx, ret, arg1, arg2); -} - -static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, intptr_t arg2) -{ - int opi, opx; - - assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); - if (type == TCG_TYPE_I32) { - opi = STW, opx = STWX; - } else { - opi = STD, opx = STDX; - } - tcg_out_mem_long(s, opi, opx, arg, arg1, arg2); -} - -static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, - int const_arg2, int cr, TCGType type) -{ - int imm; - uint32_t op; - - tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); - - /* Simplify the comparisons below wrt CMPI. */ - if (type == TCG_TYPE_I32) { - arg2 = (int32_t)arg2; - } - - switch (cond) { - case TCG_COND_EQ: - case TCG_COND_NE: - if (const_arg2) { - if ((int16_t) arg2 == arg2) { - op = CMPI; - imm = 1; - break; - } else if ((uint16_t) arg2 == arg2) { - op = CMPLI; - imm = 1; - break; - } - } - op = CMPL; - imm = 0; - break; - - case TCG_COND_LT: - case TCG_COND_GE: - case TCG_COND_LE: - case TCG_COND_GT: - if (const_arg2) { - if ((int16_t) arg2 == arg2) { - op = CMPI; - imm = 1; - break; - } - } - op = CMP; - imm = 0; - break; - - case TCG_COND_LTU: - case TCG_COND_GEU: - case TCG_COND_LEU: - case TCG_COND_GTU: - if (const_arg2) { - if ((uint16_t) arg2 == arg2) { - op = CMPLI; - imm = 1; - break; - } - } - op = CMPL; - imm = 0; - break; - - default: - tcg_abort(); - } - op |= BF(cr) | ((type == TCG_TYPE_I64) << 21); - - if (imm) { - tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff)); - } else { - if (const_arg2) { - tcg_out_movi(s, type, TCG_REG_R0, arg2); - arg2 = TCG_REG_R0; - } - tcg_out32(s, op | RA(arg1) | RB(arg2)); - } -} - -static void tcg_out_setcond_eq0(TCGContext *s, TCGType type, - TCGReg dst, TCGReg src) -{ - if (type == TCG_TYPE_I32) { - tcg_out32(s, CNTLZW | RS(src) | RA(dst)); - tcg_out_shri32(s, dst, dst, 5); - } else { - tcg_out32(s, CNTLZD | RS(src) | RA(dst)); - tcg_out_shri64(s, dst, dst, 6); - } -} - -static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src) -{ - /* X != 0 implies X + -1 generates a carry. Extra addition - trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C. */ - if (dst != src) { - tcg_out32(s, ADDIC | TAI(dst, src, -1)); - tcg_out32(s, SUBFE | TAB(dst, dst, src)); - } else { - tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1)); - tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src)); - } -} - -static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2, - bool const_arg2) -{ - if (const_arg2) { - if ((uint32_t)arg2 == arg2) { - tcg_out_xori32(s, TCG_REG_R0, arg1, arg2); - } else { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2); - tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0)); - } - } else { - tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2)); - } - return TCG_REG_R0; -} - -static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, - TCGArg arg0, TCGArg arg1, TCGArg arg2, - int const_arg2) -{ - int crop, sh; - - assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); - - /* Ignore high bits of a potential constant arg2. */ - if (type == TCG_TYPE_I32) { - arg2 = (uint32_t)arg2; - } - - /* Handle common and trivial cases before handling anything else. */ - if (arg2 == 0) { - switch (cond) { - case TCG_COND_EQ: - tcg_out_setcond_eq0(s, type, arg0, arg1); - return; - case TCG_COND_NE: - if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { - tcg_out_ext32u(s, TCG_REG_R0, arg1); - arg1 = TCG_REG_R0; - } - tcg_out_setcond_ne0(s, arg0, arg1); - return; - case TCG_COND_GE: - tcg_out32(s, NOR | SAB(arg1, arg0, arg1)); - arg1 = arg0; - /* FALLTHRU */ - case TCG_COND_LT: - /* Extract the sign bit. */ - if (type == TCG_TYPE_I32) { - tcg_out_shri32(s, arg0, arg1, 31); - } else { - tcg_out_shri64(s, arg0, arg1, 63); - } - return; - default: - break; - } - } - - /* If we have ISEL, we can implement everything with 3 or 4 insns. - All other cases below are also at least 3 insns, so speed up the - code generator by not considering them and always using ISEL. */ - if (HAVE_ISEL) { - int isel, tab; - - tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); - - isel = tcg_to_isel[cond]; - - tcg_out_movi(s, type, arg0, 1); - if (isel & 1) { - /* arg0 = (bc ? 0 : 1) */ - tab = TAB(arg0, 0, arg0); - isel &= ~1; - } else { - /* arg0 = (bc ? 1 : 0) */ - tcg_out_movi(s, type, TCG_REG_R0, 0); - tab = TAB(arg0, arg0, TCG_REG_R0); - } - tcg_out32(s, isel | tab); - return; - } - - switch (cond) { - case TCG_COND_EQ: - arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); - tcg_out_setcond_eq0(s, type, arg0, arg1); - return; - - case TCG_COND_NE: - arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); - /* Discard the high bits only once, rather than both inputs. */ - if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { - tcg_out_ext32u(s, TCG_REG_R0, arg1); - arg1 = TCG_REG_R0; - } - tcg_out_setcond_ne0(s, arg0, arg1); - return; - - case TCG_COND_GT: - case TCG_COND_GTU: - sh = 30; - crop = 0; - goto crtest; - - case TCG_COND_LT: - case TCG_COND_LTU: - sh = 29; - crop = 0; - goto crtest; - - case TCG_COND_GE: - case TCG_COND_GEU: - sh = 31; - crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT); - goto crtest; - - case TCG_COND_LE: - case TCG_COND_LEU: - sh = 31; - crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT); - crtest: - tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); - if (crop) { - tcg_out32(s, crop); - } - tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); - tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31); - break; - - default: - tcg_abort(); - } -} - -static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l) -{ - if (l->has_value) { - tcg_out32(s, bc | reloc_pc14_val(s->code_ptr, l->u.value_ptr)); - } else { - tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0); - tcg_out_bc_noaddr(s, bc); - } -} - -static void tcg_out_brcond(TCGContext *s, TCGCond cond, - TCGArg arg1, TCGArg arg2, int const_arg2, - TCGLabel *l, TCGType type) -{ - tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); - tcg_out_bc(s, tcg_to_bc[cond], l); -} - -static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, - TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1, - TCGArg v2, bool const_c2) -{ - /* If for some reason both inputs are zero, don't produce bad code. */ - if (v1 == 0 && v2 == 0) { - tcg_out_movi(s, type, dest, 0); - return; - } - - tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type); - - if (HAVE_ISEL) { - int isel = tcg_to_isel[cond]; - - /* Swap the V operands if the operation indicates inversion. */ - if (isel & 1) { - int t = v1; - v1 = v2; - v2 = t; - isel &= ~1; - } - /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand. */ - if (v2 == 0) { - tcg_out_movi(s, type, TCG_REG_R0, 0); - } - tcg_out32(s, isel | TAB(dest, v1, v2)); - } else { - if (dest == v2) { - cond = tcg_invert_cond(cond); - v2 = v1; - } else if (dest != v1) { - if (v1 == 0) { - tcg_out_movi(s, type, dest, 0); - } else { - tcg_out_mov(s, type, dest, v1); - } - } - /* Branch forward over one insn */ - tcg_out32(s, tcg_to_bc[cond] | 8); - if (v2 == 0) { - tcg_out_movi(s, type, dest, 0); - } else { - tcg_out_mov(s, type, dest, v2); - } - } -} - -static void tcg_out_cmp2(TCGContext *s, const TCGArg *args, - const int *const_args) -{ - static const struct { uint8_t bit1, bit2; } bits[] = { - [TCG_COND_LT ] = { CR_LT, CR_LT }, - [TCG_COND_LE ] = { CR_LT, CR_GT }, - [TCG_COND_GT ] = { CR_GT, CR_GT }, - [TCG_COND_GE ] = { CR_GT, CR_LT }, - [TCG_COND_LTU] = { CR_LT, CR_LT }, - [TCG_COND_LEU] = { CR_LT, CR_GT }, - [TCG_COND_GTU] = { CR_GT, CR_GT }, - [TCG_COND_GEU] = { CR_GT, CR_LT }, - }; - - TCGCond cond = args[4], cond2; - TCGArg al, ah, bl, bh; - int blconst, bhconst; - int op, bit1, bit2; - - al = args[0]; - ah = args[1]; - bl = args[2]; - bh = args[3]; - blconst = const_args[2]; - bhconst = const_args[3]; - - switch (cond) { - case TCG_COND_EQ: - op = CRAND; - goto do_equality; - case TCG_COND_NE: - op = CRNAND; - do_equality: - tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32); - tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32); - tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); - break; - - case TCG_COND_LT: - case TCG_COND_LE: - case TCG_COND_GT: - case TCG_COND_GE: - case TCG_COND_LTU: - case TCG_COND_LEU: - case TCG_COND_GTU: - case TCG_COND_GEU: - bit1 = bits[cond].bit1; - bit2 = bits[cond].bit2; - op = (bit1 != bit2 ? CRANDC : CRAND); - cond2 = tcg_unsigned_cond(cond); - - tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32); - tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32); - tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2)); - tcg_out32(s, CROR | BT(7, CR_EQ) | BA(6, bit1) | BB(7, CR_EQ)); - break; - - default: - tcg_abort(); - } -} - -static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, - const int *const_args) -{ - tcg_out_cmp2(s, args + 1, const_args + 1); - tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); - tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31); -} - -static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args, - const int *const_args) -{ - tcg_out_cmp2(s, args, const_args); - tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5])); -} - -void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr) -{ - tcg_insn_unit i1, i2; - uint64_t pair; - intptr_t diff = addr - jmp_addr; - - if (in_range_b(diff)) { - i1 = B | (diff & 0x3fffffc); - i2 = NOP; - } else if (USE_REG_RA) { - intptr_t lo, hi; - diff = addr - (uintptr_t)tb_ret_addr; - lo = (int16_t)diff; - hi = (int32_t)(diff - lo); - assert(diff == hi + lo); - i1 = ADDIS | TAI(TCG_REG_TMP1, TCG_REG_RA, hi >> 16); - i2 = ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, lo); - } else { - assert(TCG_TARGET_REG_BITS == 32 || addr == (int32_t)addr); - i1 = ADDIS | TAI(TCG_REG_TMP1, 0, addr >> 16); - i2 = ORI | SAI(TCG_REG_TMP1, TCG_REG_TMP1, addr); - } -#ifdef HOST_WORDS_BIGENDIAN - pair = (uint64_t)i1 << 32 | i2; -#else - pair = (uint64_t)i2 << 32 | i1; -#endif - - /* ??? __atomic_store_8, presuming there's some way to do that - for 32-bit, otherwise this is good enough for 64-bit. */ - *(uint64_t *)jmp_addr = pair; - flush_icache_range(jmp_addr, jmp_addr + 8); -} - -static void tcg_out_call(TCGContext *s, tcg_insn_unit *target) -{ -#ifdef _CALL_AIX - /* Look through the descriptor. If the branch is in range, and we - don't have to spend too much effort on building the toc. */ - void *tgt = ((void **)target)[0]; - uintptr_t toc = ((uintptr_t *)target)[1]; - intptr_t diff = tcg_pcrel_diff(s, tgt); - - if (in_range_b(diff) && toc == (uint32_t)toc) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc); - tcg_out_b(s, LK, tgt); - } else { - /* Fold the low bits of the constant into the addresses below. */ - intptr_t arg = (intptr_t)target; - int ofs = (int16_t)arg; - - if (ofs + 8 < 0x8000) { - arg -= ofs; - } else { - ofs = 0; - } - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg); - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs); - tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR); - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP); - tcg_out32(s, BCCTR | BO_ALWAYS | LK); - } -#elif defined(_CALL_ELF) && _CALL_ELF == 2 - intptr_t diff; - - /* In the ELFv2 ABI, we have to set up r12 to contain the destination - address, which the callee uses to compute its TOC address. */ - /* FIXME: when the branch is in range, we could avoid r12 load if we - knew that the destination uses the same TOC, and what its local - entry point offset is. */ - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target); - - diff = tcg_pcrel_diff(s, target); - if (in_range_b(diff)) { - tcg_out_b(s, LK, target); - } else { - tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR); - tcg_out32(s, BCCTR | BO_ALWAYS | LK); - } -#else - tcg_out_b(s, LK, target); -#endif -} - -static const uint32_t qemu_ldx_opc[16] = { - [MO_UB] = LBZX, - [MO_UW] = LHZX, - [MO_UL] = LWZX, - [MO_Q] = LDX, - [MO_SW] = LHAX, - [MO_SL] = LWAX, - [MO_BSWAP | MO_UB] = LBZX, - [MO_BSWAP | MO_UW] = LHBRX, - [MO_BSWAP | MO_UL] = LWBRX, - [MO_BSWAP | MO_Q] = LDBRX, -}; - -static const uint32_t qemu_stx_opc[16] = { - [MO_UB] = STBX, - [MO_UW] = STHX, - [MO_UL] = STWX, - [MO_Q] = STDX, - [MO_BSWAP | MO_UB] = STBX, - [MO_BSWAP | MO_UW] = STHBRX, - [MO_BSWAP | MO_UL] = STWBRX, - [MO_BSWAP | MO_Q] = STDBRX, -}; - -static const uint32_t qemu_exts_opc[4] = { - EXTSB, EXTSH, EXTSW, 0 -}; - -#if defined (CONFIG_SOFTMMU) -/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, - * int mmu_idx, uintptr_t ra) - */ -static void * const qemu_ld_helpers[16] = { - [MO_UB] = helper_ret_ldub_mmu, - [MO_LEUW] = helper_le_lduw_mmu, - [MO_LEUL] = helper_le_ldul_mmu, - [MO_LEQ] = helper_le_ldq_mmu, - [MO_BEUW] = helper_be_lduw_mmu, - [MO_BEUL] = helper_be_ldul_mmu, - [MO_BEQ] = helper_be_ldq_mmu, -}; - -/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, - * uintxx_t val, int mmu_idx, uintptr_t ra) - */ -static void * const qemu_st_helpers[16] = { - [MO_UB] = helper_ret_stb_mmu, - [MO_LEUW] = helper_le_stw_mmu, - [MO_LEUL] = helper_le_stl_mmu, - [MO_LEQ] = helper_le_stq_mmu, - [MO_BEUW] = helper_be_stw_mmu, - [MO_BEUL] = helper_be_stl_mmu, - [MO_BEQ] = helper_be_stq_mmu, -}; - -/* Perform the TLB load and compare. Places the result of the comparison - in CR7, loads the addend of the TLB into R3, and returns the register - containing the guest address (zero-extended into R4). Clobbers R0 and R2. */ - -static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp opc, - TCGReg addrlo, TCGReg addrhi, - int mem_index, bool is_read) -{ - int cmp_off - = (is_read - ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) - : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); - int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); - TCGReg base = TCG_AREG0; - TCGMemOp s_bits = opc & MO_SIZE; - - /* Extract the page index, shifted into place for tlb index. */ - if (TCG_TARGET_REG_BITS == 64) { - if (TARGET_LONG_BITS == 32) { - /* Zero-extend the address into a place helpful for further use. */ - tcg_out_ext32u(s, TCG_REG_R4, addrlo); - addrlo = TCG_REG_R4; - } else { - tcg_out_rld(s, RLDICL, TCG_REG_R3, addrlo, - 64 - TARGET_PAGE_BITS, 64 - CPU_TLB_BITS); - } - } - - /* Compensate for very large offsets. */ - if (add_off >= 0x8000) { - /* Most target env are smaller than 32k; none are larger than 64k. - Simplify the logic here merely to offset by 0x7ff0, giving us a - range just shy of 64k. Check this assumption. */ - QEMU_BUILD_BUG_ON(offsetof(CPUArchState, - tlb_table[NB_MMU_MODES - 1][1]) - > 0x7ff0 + 0x7fff); - tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, base, 0x7ff0)); - base = TCG_REG_TMP1; - cmp_off -= 0x7ff0; - add_off -= 0x7ff0; - } - - /* Extraction and shifting, part 2. */ - if (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32) { - tcg_out_rlw(s, RLWINM, TCG_REG_R3, addrlo, - 32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS), - 32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS), - 31 - CPU_TLB_ENTRY_BITS); - } else { - tcg_out_shli64(s, TCG_REG_R3, TCG_REG_R3, CPU_TLB_ENTRY_BITS); - } - - tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, base)); - - /* Load the tlb comparator. */ - if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off); - tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4); - } else { - tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off); - } - - /* Load the TLB addend for use on the fast path. Do this asap - to minimize any load use delay. */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3, add_off); - - /* Clear the non-page, non-alignment bits from the address */ - if (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32) { - /* We don't support unaligned accesses on 32-bits, preserve - * the bottom bits and thus trigger a comparison failure on - * unaligned accesses - */ - tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0, - (32 - s_bits) & 31, 31 - TARGET_PAGE_BITS); - } else if (s_bits) { - /* > byte access, we need to handle alignment */ - if ((opc & MO_AMASK) == MO_ALIGN) { - /* Alignment required by the front-end, same as 32-bits */ - tcg_out_rld(s, RLDICL, TCG_REG_R0, addrlo, - 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - s_bits); - tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0); - } else { - /* We support unaligned accesses, we need to make sure we fail - * if we cross a page boundary. The trick is to add the - * access_size-1 to the address before masking the low bits. - * That will make the address overflow to the next page if we - * cross a page boundary which will then force a mismatch of - * the TLB compare since the next page cannot possibly be in - * the same TLB index. - */ - tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, (1 << s_bits) - 1)); - tcg_out_rld(s, RLDICR, TCG_REG_R0, TCG_REG_R0, - 0, 63 - TARGET_PAGE_BITS); - } - } else { - /* Byte access, just chop off the bits below the page index */ - tcg_out_rld(s, RLDICR, TCG_REG_R0, addrlo, 0, 63 - TARGET_PAGE_BITS); - } - - if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1, - 0, 7, TCG_TYPE_I32); - tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32); - tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); - } else { - tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1, - 0, 7, TCG_TYPE_TL); - } - - return addrlo; -} - -/* Record the context of a call to the out of line helper code for the slow - path for a load or store, so that we can later generate the correct - helper code. */ -static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, - TCGReg datalo_reg, TCGReg datahi_reg, - TCGReg addrlo_reg, TCGReg addrhi_reg, - tcg_insn_unit *raddr, tcg_insn_unit *lptr) -{ - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->oi = oi; - label->datalo_reg = datalo_reg; - label->datahi_reg = datahi_reg; - label->addrlo_reg = addrlo_reg; - label->addrhi_reg = addrhi_reg; - label->raddr = raddr; - label->label_ptr[0] = lptr; -} - -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) -{ - TCGMemOpIdx oi = lb->oi; - TCGMemOp opc = get_memop(oi); - TCGReg hi, lo, arg = TCG_REG_R3; - - reloc_pc14(lb->label_ptr[0], s->code_ptr); - - tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0); - - lo = lb->addrlo_reg; - hi = lb->addrhi_reg; - if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { -#ifdef TCG_TARGET_CALL_ALIGN_ARGS - arg |= 1; -#endif - tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); - tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); - } else { - /* If the address needed to be zero-extended, we'll have already - placed it in R4. The only remaining case is 64-bit guest. */ - tcg_out_mov(s, TCG_TYPE_TL, arg++, lo); - } - - tcg_out_movi(s, TCG_TYPE_I32, arg++, oi); - tcg_out32(s, MFSPR | RT(arg) | LR); - - tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); - - lo = lb->datalo_reg; - hi = lb->datahi_reg; - if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { - tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_R4); - tcg_out_mov(s, TCG_TYPE_I32, hi, TCG_REG_R3); - } else if (opc & MO_SIGN) { - uint32_t insn = qemu_exts_opc[opc & MO_SIZE]; - tcg_out32(s, insn | RA(lo) | RS(TCG_REG_R3)); - } else { - tcg_out_mov(s, TCG_TYPE_REG, lo, TCG_REG_R3); - } - - tcg_out_b(s, 0, lb->raddr); -} - -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) -{ - TCGMemOpIdx oi = lb->oi; - TCGMemOp opc = get_memop(oi); - TCGMemOp s_bits = opc & MO_SIZE; - TCGReg hi, lo, arg = TCG_REG_R3; - - reloc_pc14(lb->label_ptr[0], s->code_ptr); - - tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0); - - lo = lb->addrlo_reg; - hi = lb->addrhi_reg; - if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { -#ifdef TCG_TARGET_CALL_ALIGN_ARGS - arg |= 1; -#endif - tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); - tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); - } else { - /* If the address needed to be zero-extended, we'll have already - placed it in R4. The only remaining case is 64-bit guest. */ - tcg_out_mov(s, TCG_TYPE_TL, arg++, lo); - } - - lo = lb->datalo_reg; - hi = lb->datahi_reg; - if (TCG_TARGET_REG_BITS == 32) { - switch (s_bits) { - case MO_64: -#ifdef TCG_TARGET_CALL_ALIGN_ARGS - arg |= 1; -#endif - tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); - /* FALLTHRU */ - case MO_32: - tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); - break; - default: - tcg_out_rlw(s, RLWINM, arg++, lo, 0, 32 - (8 << s_bits), 31); - break; - } - } else { - if (s_bits == MO_64) { - tcg_out_mov(s, TCG_TYPE_I64, arg++, lo); - } else { - tcg_out_rld(s, RLDICL, arg++, lo, 0, 64 - (8 << s_bits)); - } - } - - tcg_out_movi(s, TCG_TYPE_I32, arg++, oi); - tcg_out32(s, MFSPR | RT(arg) | LR); - - tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); - - tcg_out_b(s, 0, lb->raddr); -} -#endif /* SOFTMMU */ - -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) -{ - TCGReg datalo, datahi, addrlo, rbase; - TCGReg addrhi __attribute__((unused)); - TCGMemOpIdx oi; - TCGMemOp opc, s_bits; -#ifdef CONFIG_SOFTMMU - int mem_index; - tcg_insn_unit *label_ptr; -#endif - - datalo = *args++; - datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0); - addrlo = *args++; - addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0); - oi = *args++; - opc = get_memop(oi); - s_bits = opc & MO_SIZE; - -#ifdef CONFIG_SOFTMMU - mem_index = get_mmuidx(oi); - addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, true); - - /* Load a pointer into the current opcode w/conditional branch-link. */ - label_ptr = s->code_ptr; - tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); - - rbase = TCG_REG_R3; -#else /* !CONFIG_SOFTMMU */ - rbase = guest_base ? TCG_GUEST_BASE_REG : 0; - if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { - tcg_out_ext32u(s, TCG_REG_TMP1, addrlo); - addrlo = TCG_REG_TMP1; - } -#endif - - if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { - if (opc & MO_BSWAP) { - tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); - tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo)); - tcg_out32(s, LWBRX | TAB(datahi, rbase, TCG_REG_R0)); - } else if (rbase != 0) { - tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); - tcg_out32(s, LWZX | TAB(datahi, rbase, addrlo)); - tcg_out32(s, LWZX | TAB(datalo, rbase, TCG_REG_R0)); - } else if (addrlo == datahi) { - tcg_out32(s, LWZ | TAI(datalo, addrlo, 4)); - tcg_out32(s, LWZ | TAI(datahi, addrlo, 0)); - } else { - tcg_out32(s, LWZ | TAI(datahi, addrlo, 0)); - tcg_out32(s, LWZ | TAI(datalo, addrlo, 4)); - } - } else { - uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)]; - if (!HAVE_ISA_2_06 && insn == LDBRX) { - tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); - tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo)); - tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0)); - tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0); - } else if (insn) { - tcg_out32(s, insn | TAB(datalo, rbase, addrlo)); - } else { - insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)]; - tcg_out32(s, insn | TAB(datalo, rbase, addrlo)); - insn = qemu_exts_opc[s_bits]; - tcg_out32(s, insn | RA(datalo) | RS(datalo)); - } - } - -#ifdef CONFIG_SOFTMMU - add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, - s->code_ptr, label_ptr); -#endif -} - -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) -{ - TCGReg datalo, datahi, addrlo, rbase; - TCGReg addrhi __attribute__((unused)); - TCGMemOpIdx oi; - TCGMemOp opc, s_bits; -#ifdef CONFIG_SOFTMMU - int mem_index; - tcg_insn_unit *label_ptr; -#endif - - datalo = *args++; - datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0); - addrlo = *args++; - addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0); - oi = *args++; - opc = get_memop(oi); - s_bits = opc & MO_SIZE; - -#ifdef CONFIG_SOFTMMU - mem_index = get_mmuidx(oi); - addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, false); - - /* Load a pointer into the current opcode w/conditional branch-link. */ - label_ptr = s->code_ptr; - tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); - - rbase = TCG_REG_R3; -#else /* !CONFIG_SOFTMMU */ - rbase = guest_base ? TCG_GUEST_BASE_REG : 0; - if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { - tcg_out_ext32u(s, TCG_REG_TMP1, addrlo); - addrlo = TCG_REG_TMP1; - } -#endif - - if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { - if (opc & MO_BSWAP) { - tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); - tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo)); - tcg_out32(s, STWBRX | SAB(datahi, rbase, TCG_REG_R0)); - } else if (rbase != 0) { - tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); - tcg_out32(s, STWX | SAB(datahi, rbase, addrlo)); - tcg_out32(s, STWX | SAB(datalo, rbase, TCG_REG_R0)); - } else { - tcg_out32(s, STW | TAI(datahi, addrlo, 0)); - tcg_out32(s, STW | TAI(datalo, addrlo, 4)); - } - } else { - uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)]; - if (!HAVE_ISA_2_06 && insn == STDBRX) { - tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo)); - tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4)); - tcg_out_shri64(s, TCG_REG_R0, datalo, 32); - tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, TCG_REG_TMP1)); - } else { - tcg_out32(s, insn | SAB(datalo, rbase, addrlo)); - } - } - -#ifdef CONFIG_SOFTMMU - add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, - s->code_ptr, label_ptr); -#endif -} - -/* Parameters for function call generation, used in tcg.c. */ -#define TCG_TARGET_STACK_ALIGN 16 -#define TCG_TARGET_EXTEND_ARGS 1 - -#ifdef _CALL_AIX -# define LINK_AREA_SIZE (6 * SZR) -# define LR_OFFSET (1 * SZR) -# define TCG_TARGET_CALL_STACK_OFFSET (LINK_AREA_SIZE + 8 * SZR) -#elif defined(TCG_TARGET_CALL_DARWIN) -# define LINK_AREA_SIZE (6 * SZR) -# define LR_OFFSET (2 * SZR) -#elif TCG_TARGET_REG_BITS == 64 -# if defined(_CALL_ELF) && _CALL_ELF == 2 -# define LINK_AREA_SIZE (4 * SZR) -# define LR_OFFSET (1 * SZR) -# endif -#else /* TCG_TARGET_REG_BITS == 32 */ -# if defined(_CALL_SYSV) -# define LINK_AREA_SIZE (2 * SZR) -# define LR_OFFSET (1 * SZR) -# endif -#endif -#ifndef LR_OFFSET -# error "Unhandled abi" -#endif -#ifndef TCG_TARGET_CALL_STACK_OFFSET -# define TCG_TARGET_CALL_STACK_OFFSET LINK_AREA_SIZE -#endif - -#define CPU_TEMP_BUF_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long)) -#define REG_SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR) - -#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET \ - + TCG_STATIC_CALL_ARGS_SIZE \ - + CPU_TEMP_BUF_SIZE \ - + REG_SAVE_SIZE \ - + TCG_TARGET_STACK_ALIGN - 1) \ - & -TCG_TARGET_STACK_ALIGN) - -#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE) - -static void tcg_target_qemu_prologue(TCGContext *s) -{ - int i; - -#ifdef _CALL_AIX - void **desc = (void **)s->code_ptr; - desc[0] = desc + 2; /* entry point */ - desc[1] = 0; /* environment pointer */ - s->code_ptr = (void *)(desc + 2); /* skip over descriptor */ -#endif - - tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE, - CPU_TEMP_BUF_SIZE); - - /* Prologue */ - tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR); - tcg_out32(s, (SZR == 8 ? STDU : STWU) - | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE)); - - for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { - tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], - TCG_REG_R1, REG_SAVE_BOT + i * SZR); - } - tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); - -#ifndef CONFIG_SOFTMMU - if (guest_base) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); - tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); - } -#endif - - tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); - tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR); - - if (USE_REG_RA) { -#ifdef _CALL_AIX - /* Make the caller load the value as the TOC into R2. */ - tb_ret_addr = s->code_ptr + 2; - desc[1] = tb_ret_addr; - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_RA, TCG_REG_R2); - tcg_out32(s, BCCTR | BO_ALWAYS); -#elif defined(_CALL_ELF) && _CALL_ELF == 2 - /* Compute from the incoming R12 value. */ - tb_ret_addr = s->code_ptr + 2; - tcg_out32(s, ADDI | TAI(TCG_REG_RA, TCG_REG_R12, - tcg_ptr_byte_diff(tb_ret_addr, s->code_buf))); - tcg_out32(s, BCCTR | BO_ALWAYS); -#else - /* Reserve max 5 insns for the constant load. */ - tb_ret_addr = s->code_ptr + 6; - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)tb_ret_addr); - tcg_out32(s, BCCTR | BO_ALWAYS); - while (s->code_ptr < tb_ret_addr) { - tcg_out32(s, NOP); - } -#endif - } else { - tcg_out32(s, BCCTR | BO_ALWAYS); - tb_ret_addr = s->code_ptr; - } - - /* Epilogue */ - assert(tb_ret_addr == s->code_ptr); - - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); - for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { - tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], - TCG_REG_R1, REG_SAVE_BOT + i * SZR); - } - tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR); - tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE)); - tcg_out32(s, BCLR | BO_ALWAYS); -} - -static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, - const int *const_args) -{ - TCGArg a0, a1, a2; - int c; - - switch (opc) { - case INDEX_op_exit_tb: - if (USE_REG_RA) { - ptrdiff_t disp = tcg_pcrel_diff(s, tb_ret_addr); - - /* Use a direct branch if we can, otherwise use the value in RA. - Note that the direct branch is always backward, thus we need - to account for the possibility of 5 insns from the movi. */ - if (!in_range_b(disp - 20)) { - tcg_out32(s, MTSPR | RS(TCG_REG_RA) | CTR); - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]); - tcg_out32(s, BCCTR | BO_ALWAYS); - break; - } - } - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]); - tcg_out_b(s, 0, tb_ret_addr); - break; - case INDEX_op_goto_tb: - tcg_debug_assert(s->tb_jmp_offset); - /* Direct jump. Ensure the next insns are 8-byte aligned. */ - if ((uintptr_t)s->code_ptr & 7) { - tcg_out32(s, NOP); - } - s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); - /* To be replaced by either a branch+nop or a load into TMP1. */ - s->code_ptr += 2; - tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); - tcg_out32(s, BCCTR | BO_ALWAYS); - s->tb_next_offset[args[0]] = tcg_current_code_size(s); - break; - case INDEX_op_br: - { - TCGLabel *l = arg_label(args[0]); - - if (l->has_value) { - tcg_out_b(s, 0, l->u.value_ptr); - } else { - tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0); - tcg_out_b_noaddr(s, B); - } - } - break; - case INDEX_op_ld8u_i32: - case INDEX_op_ld8u_i64: - tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); - break; - case INDEX_op_ld8s_i32: - case INDEX_op_ld8s_i64: - tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); - tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0])); - break; - case INDEX_op_ld16u_i32: - case INDEX_op_ld16u_i64: - tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]); - break; - case INDEX_op_ld16s_i32: - case INDEX_op_ld16s_i64: - tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]); - break; - case INDEX_op_ld_i32: - case INDEX_op_ld32u_i64: - tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]); - break; - case INDEX_op_ld32s_i64: - tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]); - break; - case INDEX_op_ld_i64: - tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]); - break; - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: - tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]); - break; - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: - tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]); - break; - case INDEX_op_st_i32: - case INDEX_op_st32_i64: - tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]); - break; - case INDEX_op_st_i64: - tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]); - break; - - case INDEX_op_add_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - do_addi_32: - tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2); - } else { - tcg_out32(s, ADD | TAB(a0, a1, a2)); - } - break; - case INDEX_op_sub_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[1]) { - if (const_args[2]) { - tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2); - } else { - tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); - } - } else if (const_args[2]) { - a2 = -a2; - goto do_addi_32; - } else { - tcg_out32(s, SUBF | TAB(a0, a2, a1)); - } - break; - - case INDEX_op_and_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_andi32(s, a0, a1, a2); - } else { - tcg_out32(s, AND | SAB(a1, a0, a2)); - } - break; - case INDEX_op_and_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_andi64(s, a0, a1, a2); - } else { - tcg_out32(s, AND | SAB(a1, a0, a2)); - } - break; - case INDEX_op_or_i64: - case INDEX_op_or_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_ori32(s, a0, a1, a2); - } else { - tcg_out32(s, OR | SAB(a1, a0, a2)); - } - break; - case INDEX_op_xor_i64: - case INDEX_op_xor_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_xori32(s, a0, a1, a2); - } else { - tcg_out32(s, XOR | SAB(a1, a0, a2)); - } - break; - case INDEX_op_andc_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_andi32(s, a0, a1, ~a2); - } else { - tcg_out32(s, ANDC | SAB(a1, a0, a2)); - } - break; - case INDEX_op_andc_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_andi64(s, a0, a1, ~a2); - } else { - tcg_out32(s, ANDC | SAB(a1, a0, a2)); - } - break; - case INDEX_op_orc_i32: - if (const_args[2]) { - tcg_out_ori32(s, args[0], args[1], ~args[2]); - break; - } - /* FALLTHRU */ - case INDEX_op_orc_i64: - tcg_out32(s, ORC | SAB(args[1], args[0], args[2])); - break; - case INDEX_op_eqv_i32: - if (const_args[2]) { - tcg_out_xori32(s, args[0], args[1], ~args[2]); - break; - } - /* FALLTHRU */ - case INDEX_op_eqv_i64: - tcg_out32(s, EQV | SAB(args[1], args[0], args[2])); - break; - case INDEX_op_nand_i32: - case INDEX_op_nand_i64: - tcg_out32(s, NAND | SAB(args[1], args[0], args[2])); - break; - case INDEX_op_nor_i32: - case INDEX_op_nor_i64: - tcg_out32(s, NOR | SAB(args[1], args[0], args[2])); - break; - - case INDEX_op_mul_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out32(s, MULLI | TAI(a0, a1, a2)); - } else { - tcg_out32(s, MULLW | TAB(a0, a1, a2)); - } - break; - - case INDEX_op_div_i32: - tcg_out32(s, DIVW | TAB(args[0], args[1], args[2])); - break; - - case INDEX_op_divu_i32: - tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2])); - break; - - case INDEX_op_shl_i32: - if (const_args[2]) { - tcg_out_shli32(s, args[0], args[1], args[2]); - } else { - tcg_out32(s, SLW | SAB(args[1], args[0], args[2])); - } - break; - case INDEX_op_shr_i32: - if (const_args[2]) { - tcg_out_shri32(s, args[0], args[1], args[2]); - } else { - tcg_out32(s, SRW | SAB(args[1], args[0], args[2])); - } - break; - case INDEX_op_sar_i32: - if (const_args[2]) { - tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2])); - } else { - tcg_out32(s, SRAW | SAB(args[1], args[0], args[2])); - } - break; - case INDEX_op_rotl_i32: - if (const_args[2]) { - tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31); - } else { - tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2]) - | MB(0) | ME(31)); - } - break; - case INDEX_op_rotr_i32: - if (const_args[2]) { - tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31); - } else { - tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32)); - tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0) - | MB(0) | ME(31)); - } - break; - - case INDEX_op_brcond_i32: - tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], - arg_label(args[3]), TCG_TYPE_I32); - break; - case INDEX_op_brcond_i64: - tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], - arg_label(args[3]), TCG_TYPE_I64); - break; - case INDEX_op_brcond2_i32: - tcg_out_brcond2(s, args, const_args); - break; - - case INDEX_op_neg_i32: - case INDEX_op_neg_i64: - tcg_out32(s, NEG | RT(args[0]) | RA(args[1])); - break; - - case INDEX_op_not_i32: - case INDEX_op_not_i64: - tcg_out32(s, NOR | SAB(args[1], args[0], args[1])); - break; - - case INDEX_op_add_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - do_addi_64: - tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2); - } else { - tcg_out32(s, ADD | TAB(a0, a1, a2)); - } - break; - case INDEX_op_sub_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[1]) { - if (const_args[2]) { - tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2); - } else { - tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); - } - } else if (const_args[2]) { - a2 = -a2; - goto do_addi_64; - } else { - tcg_out32(s, SUBF | TAB(a0, a2, a1)); - } - break; - - case INDEX_op_shl_i64: - if (const_args[2]) { - tcg_out_shli64(s, args[0], args[1], args[2]); - } else { - tcg_out32(s, SLD | SAB(args[1], args[0], args[2])); - } - break; - case INDEX_op_shr_i64: - if (const_args[2]) { - tcg_out_shri64(s, args[0], args[1], args[2]); - } else { - tcg_out32(s, SRD | SAB(args[1], args[0], args[2])); - } - break; - case INDEX_op_sar_i64: - if (const_args[2]) { - int sh = SH(args[2] & 0x1f) | (((args[2] >> 5) & 1) << 1); - tcg_out32(s, SRADI | RA(args[0]) | RS(args[1]) | sh); - } else { - tcg_out32(s, SRAD | SAB(args[1], args[0], args[2])); - } - break; - case INDEX_op_rotl_i64: - if (const_args[2]) { - tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0); - } else { - tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0)); - } - break; - case INDEX_op_rotr_i64: - if (const_args[2]) { - tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0); - } else { - tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64)); - tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0)); - } - break; - - case INDEX_op_mul_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out32(s, MULLI | TAI(a0, a1, a2)); - } else { - tcg_out32(s, MULLD | TAB(a0, a1, a2)); - } - break; - case INDEX_op_div_i64: - tcg_out32(s, DIVD | TAB(args[0], args[1], args[2])); - break; - case INDEX_op_divu_i64: - tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2])); - break; - - case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, args, false); - break; - case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, args, true); - break; - case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, args, false); - break; - case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, args, true); - break; - - case INDEX_op_ext8s_i32: - case INDEX_op_ext8s_i64: - c = EXTSB; - goto gen_ext; - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - c = EXTSH; - goto gen_ext; - case INDEX_op_ext_i32_i64: - case INDEX_op_ext32s_i64: - c = EXTSW; - goto gen_ext; - gen_ext: - tcg_out32(s, c | RS(args[1]) | RA(args[0])); - break; - case INDEX_op_extu_i32_i64: - tcg_out_ext32u(s, args[0], args[1]); - break; - - case INDEX_op_setcond_i32: - tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], - const_args[2]); - break; - case INDEX_op_setcond_i64: - tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2], - const_args[2]); - break; - case INDEX_op_setcond2_i32: - tcg_out_setcond2(s, args, const_args); - break; - - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: - a0 = args[0], a1 = args[1]; - /* a1 = abcd */ - if (a0 != a1) { - /* a0 = (a1 r<< 24) & 0xff # 000c */ - tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31); - /* a0 = (a0 & ~0xff00) | (a1 r<< 8) & 0xff00 # 00dc */ - tcg_out_rlw(s, RLWIMI, a0, a1, 8, 16, 23); - } else { - /* r0 = (a1 r<< 8) & 0xff00 # 00d0 */ - tcg_out_rlw(s, RLWINM, TCG_REG_R0, a1, 8, 16, 23); - /* a0 = (a1 r<< 24) & 0xff # 000c */ - tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31); - /* a0 = a0 | r0 # 00dc */ - tcg_out32(s, OR | SAB(TCG_REG_R0, a0, a0)); - } - break; - - case INDEX_op_bswap32_i32: - case INDEX_op_bswap32_i64: - /* Stolen from gcc's builtin_bswap32 */ - a1 = args[1]; - a0 = args[0] == a1 ? TCG_REG_R0 : args[0]; - - /* a1 = args[1] # abcd */ - /* a0 = rotate_left (a1, 8) # bcda */ - tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31); - /* a0 = (a0 & ~0xff000000) | ((a1 r<< 24) & 0xff000000) # dcda */ - tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7); - /* a0 = (a0 & ~0x0000ff00) | ((a1 r<< 24) & 0x0000ff00) # dcba */ - tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23); - - if (a0 == TCG_REG_R0) { - tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); - } - break; - - case INDEX_op_bswap64_i64: - a0 = args[0], a1 = args[1], a2 = TCG_REG_R0; - if (a0 == a1) { - a0 = TCG_REG_R0; - a2 = a1; - } - - /* a1 = # abcd efgh */ - /* a0 = rl32(a1, 8) # 0000 fghe */ - tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31); - /* a0 = dep(a0, rl32(a1, 24), 0xff000000) # 0000 hghe */ - tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7); - /* a0 = dep(a0, rl32(a1, 24), 0x0000ff00) # 0000 hgfe */ - tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23); - - /* a0 = rl64(a0, 32) # hgfe 0000 */ - /* a2 = rl64(a1, 32) # efgh abcd */ - tcg_out_rld(s, RLDICL, a0, a0, 32, 0); - tcg_out_rld(s, RLDICL, a2, a1, 32, 0); - - /* a0 = dep(a0, rl32(a2, 8), 0xffffffff) # hgfe bcda */ - tcg_out_rlw(s, RLWIMI, a0, a2, 8, 0, 31); - /* a0 = dep(a0, rl32(a2, 24), 0xff000000) # hgfe dcda */ - tcg_out_rlw(s, RLWIMI, a0, a2, 24, 0, 7); - /* a0 = dep(a0, rl32(a2, 24), 0x0000ff00) # hgfe dcba */ - tcg_out_rlw(s, RLWIMI, a0, a2, 24, 16, 23); - - if (a0 == 0) { - tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); - } - break; - - case INDEX_op_deposit_i32: - if (const_args[2]) { - uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3]; - tcg_out_andi32(s, args[0], args[0], ~mask); - } else { - tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3], - 32 - args[3] - args[4], 31 - args[3]); - } - break; - case INDEX_op_deposit_i64: - if (const_args[2]) { - uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3]; - tcg_out_andi64(s, args[0], args[0], ~mask); - } else { - tcg_out_rld(s, RLDIMI, args[0], args[2], args[3], - 64 - args[3] - args[4]); - } - break; - - case INDEX_op_movcond_i32: - tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2], - args[3], args[4], const_args[2]); - break; - case INDEX_op_movcond_i64: - tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2], - args[3], args[4], const_args[2]); - break; - -#if TCG_TARGET_REG_BITS == 64 - case INDEX_op_add2_i64: -#else - case INDEX_op_add2_i32: -#endif - /* Note that the CA bit is defined based on the word size of the - environment. So in 64-bit mode it's always carry-out of bit 63. - The fallback code using deposit works just as well for 32-bit. */ - a0 = args[0], a1 = args[1]; - if (a0 == args[3] || (!const_args[5] && a0 == args[5])) { - a0 = TCG_REG_R0; - } - if (const_args[4]) { - tcg_out32(s, ADDIC | TAI(a0, args[2], args[4])); - } else { - tcg_out32(s, ADDC | TAB(a0, args[2], args[4])); - } - if (const_args[5]) { - tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3])); - } else { - tcg_out32(s, ADDE | TAB(a1, args[3], args[5])); - } - if (a0 != args[0]) { - tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); - } - break; - -#if TCG_TARGET_REG_BITS == 64 - case INDEX_op_sub2_i64: -#else - case INDEX_op_sub2_i32: -#endif - a0 = args[0], a1 = args[1]; - if (a0 == args[5] || (!const_args[3] && a0 == args[3])) { - a0 = TCG_REG_R0; - } - if (const_args[2]) { - tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2])); - } else { - tcg_out32(s, SUBFC | TAB(a0, args[4], args[2])); - } - if (const_args[3]) { - tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5])); - } else { - tcg_out32(s, SUBFE | TAB(a1, args[5], args[3])); - } - if (a0 != args[0]) { - tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); - } - break; - - case INDEX_op_muluh_i32: - tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2])); - break; - case INDEX_op_mulsh_i32: - tcg_out32(s, MULHW | TAB(args[0], args[1], args[2])); - break; - case INDEX_op_muluh_i64: - tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2])); - break; - case INDEX_op_mulsh_i64: - tcg_out32(s, MULHD | TAB(args[0], args[1], args[2])); - break; - - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: - case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ - case INDEX_op_movi_i64: - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - default: - tcg_abort(); - } -} - -static const TCGTargetOpDef ppc_op_defs[] = { - { INDEX_op_exit_tb, { } }, - { INDEX_op_goto_tb, { } }, - { INDEX_op_br, { } }, - - { INDEX_op_ld8u_i32, { "r", "r" } }, - { INDEX_op_ld8s_i32, { "r", "r" } }, - { INDEX_op_ld16u_i32, { "r", "r" } }, - { INDEX_op_ld16s_i32, { "r", "r" } }, - { INDEX_op_ld_i32, { "r", "r" } }, - - { INDEX_op_st8_i32, { "r", "r" } }, - { INDEX_op_st16_i32, { "r", "r" } }, - { INDEX_op_st_i32, { "r", "r" } }, - - { INDEX_op_add_i32, { "r", "r", "ri" } }, - { INDEX_op_mul_i32, { "r", "r", "rI" } }, - { INDEX_op_div_i32, { "r", "r", "r" } }, - { INDEX_op_divu_i32, { "r", "r", "r" } }, - { INDEX_op_sub_i32, { "r", "rI", "ri" } }, - { INDEX_op_and_i32, { "r", "r", "ri" } }, - { INDEX_op_or_i32, { "r", "r", "ri" } }, - { INDEX_op_xor_i32, { "r", "r", "ri" } }, - { INDEX_op_andc_i32, { "r", "r", "ri" } }, - { INDEX_op_orc_i32, { "r", "r", "ri" } }, - { INDEX_op_eqv_i32, { "r", "r", "ri" } }, - { INDEX_op_nand_i32, { "r", "r", "r" } }, - { INDEX_op_nor_i32, { "r", "r", "r" } }, - - { INDEX_op_shl_i32, { "r", "r", "ri" } }, - { INDEX_op_shr_i32, { "r", "r", "ri" } }, - { INDEX_op_sar_i32, { "r", "r", "ri" } }, - { INDEX_op_rotl_i32, { "r", "r", "ri" } }, - { INDEX_op_rotr_i32, { "r", "r", "ri" } }, - - { INDEX_op_neg_i32, { "r", "r" } }, - { INDEX_op_not_i32, { "r", "r" } }, - { INDEX_op_ext8s_i32, { "r", "r" } }, - { INDEX_op_ext16s_i32, { "r", "r" } }, - { INDEX_op_bswap16_i32, { "r", "r" } }, - { INDEX_op_bswap32_i32, { "r", "r" } }, - - { INDEX_op_brcond_i32, { "r", "ri" } }, - { INDEX_op_setcond_i32, { "r", "r", "ri" } }, - { INDEX_op_movcond_i32, { "r", "r", "ri", "rZ", "rZ" } }, - - { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, - - { INDEX_op_muluh_i32, { "r", "r", "r" } }, - { INDEX_op_mulsh_i32, { "r", "r", "r" } }, - -#if TCG_TARGET_REG_BITS == 64 - { INDEX_op_ld8u_i64, { "r", "r" } }, - { INDEX_op_ld8s_i64, { "r", "r" } }, - { INDEX_op_ld16u_i64, { "r", "r" } }, - { INDEX_op_ld16s_i64, { "r", "r" } }, - { INDEX_op_ld32u_i64, { "r", "r" } }, - { INDEX_op_ld32s_i64, { "r", "r" } }, - { INDEX_op_ld_i64, { "r", "r" } }, - - { INDEX_op_st8_i64, { "r", "r" } }, - { INDEX_op_st16_i64, { "r", "r" } }, - { INDEX_op_st32_i64, { "r", "r" } }, - { INDEX_op_st_i64, { "r", "r" } }, - - { INDEX_op_add_i64, { "r", "r", "rT" } }, - { INDEX_op_sub_i64, { "r", "rI", "rT" } }, - { INDEX_op_and_i64, { "r", "r", "ri" } }, - { INDEX_op_or_i64, { "r", "r", "rU" } }, - { INDEX_op_xor_i64, { "r", "r", "rU" } }, - { INDEX_op_andc_i64, { "r", "r", "ri" } }, - { INDEX_op_orc_i64, { "r", "r", "r" } }, - { INDEX_op_eqv_i64, { "r", "r", "r" } }, - { INDEX_op_nand_i64, { "r", "r", "r" } }, - { INDEX_op_nor_i64, { "r", "r", "r" } }, - - { INDEX_op_shl_i64, { "r", "r", "ri" } }, - { INDEX_op_shr_i64, { "r", "r", "ri" } }, - { INDEX_op_sar_i64, { "r", "r", "ri" } }, - { INDEX_op_rotl_i64, { "r", "r", "ri" } }, - { INDEX_op_rotr_i64, { "r", "r", "ri" } }, - - { INDEX_op_mul_i64, { "r", "r", "rI" } }, - { INDEX_op_div_i64, { "r", "r", "r" } }, - { INDEX_op_divu_i64, { "r", "r", "r" } }, - - { INDEX_op_neg_i64, { "r", "r" } }, - { INDEX_op_not_i64, { "r", "r" } }, - { INDEX_op_ext8s_i64, { "r", "r" } }, - { INDEX_op_ext16s_i64, { "r", "r" } }, - { INDEX_op_ext32s_i64, { "r", "r" } }, - { INDEX_op_ext_i32_i64, { "r", "r" } }, - { INDEX_op_extu_i32_i64, { "r", "r" } }, - { INDEX_op_bswap16_i64, { "r", "r" } }, - { INDEX_op_bswap32_i64, { "r", "r" } }, - { INDEX_op_bswap64_i64, { "r", "r" } }, - - { INDEX_op_brcond_i64, { "r", "ri" } }, - { INDEX_op_setcond_i64, { "r", "r", "ri" } }, - { INDEX_op_movcond_i64, { "r", "r", "ri", "rZ", "rZ" } }, - - { INDEX_op_deposit_i64, { "r", "0", "rZ" } }, - - { INDEX_op_mulsh_i64, { "r", "r", "r" } }, - { INDEX_op_muluh_i64, { "r", "r", "r" } }, -#endif - -#if TCG_TARGET_REG_BITS == 32 - { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } }, - { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } }, -#endif - -#if TCG_TARGET_REG_BITS == 64 - { INDEX_op_add2_i64, { "r", "r", "r", "r", "rI", "rZM" } }, - { INDEX_op_sub2_i64, { "r", "r", "rI", "rZM", "r", "r" } }, -#else - { INDEX_op_add2_i32, { "r", "r", "r", "r", "rI", "rZM" } }, - { INDEX_op_sub2_i32, { "r", "r", "rI", "rZM", "r", "r" } }, -#endif - -#if TCG_TARGET_REG_BITS == 64 - { INDEX_op_qemu_ld_i32, { "r", "L" } }, - { INDEX_op_qemu_st_i32, { "S", "S" } }, - { INDEX_op_qemu_ld_i64, { "r", "L" } }, - { INDEX_op_qemu_st_i64, { "S", "S" } }, -#elif TARGET_LONG_BITS == 32 - { INDEX_op_qemu_ld_i32, { "r", "L" } }, - { INDEX_op_qemu_st_i32, { "S", "S" } }, - { INDEX_op_qemu_ld_i64, { "L", "L", "L" } }, - { INDEX_op_qemu_st_i64, { "S", "S", "S" } }, -#else - { INDEX_op_qemu_ld_i32, { "r", "L", "L" } }, - { INDEX_op_qemu_st_i32, { "S", "S", "S" } }, - { INDEX_op_qemu_ld_i64, { "L", "L", "L", "L" } }, - { INDEX_op_qemu_st_i64, { "S", "S", "S", "S" } }, -#endif - - { -1 }, -}; - -static void tcg_target_init(TCGContext *s) -{ - unsigned long hwcap = qemu_getauxval(AT_HWCAP); - if (hwcap & PPC_FEATURE_ARCH_2_06) { - have_isa_2_06 = true; - } - - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff); - tcg_regset_set32(tcg_target_call_clobber_regs, 0, - (1 << TCG_REG_R0) | - (1 << TCG_REG_R2) | - (1 << TCG_REG_R3) | - (1 << TCG_REG_R4) | - (1 << TCG_REG_R5) | - (1 << TCG_REG_R6) | - (1 << TCG_REG_R7) | - (1 << TCG_REG_R8) | - (1 << TCG_REG_R9) | - (1 << TCG_REG_R10) | - (1 << TCG_REG_R11) | - (1 << TCG_REG_R12)); - - tcg_regset_clear(s->reserved_regs); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */ -#if defined(_CALL_SYSV) - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */ -#endif -#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64 - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */ -#endif - tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */ - if (USE_REG_RA) { - tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA); /* return addr */ - } - - tcg_add_target_add_op_defs(ppc_op_defs); -} - -#ifdef __ELF__ -typedef struct { - DebugFrameCIE cie; - DebugFrameFDEHeader fde; - uint8_t fde_def_cfa[4]; - uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3]; -} DebugFrame; - -/* We're expecting a 2 byte uleb128 encoded value. */ -QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); - -#if TCG_TARGET_REG_BITS == 64 -# define ELF_HOST_MACHINE EM_PPC64 -#else -# define ELF_HOST_MACHINE EM_PPC -#endif - -static DebugFrame debug_frame = { - .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ - .cie.id = -1, - .cie.version = 1, - .cie.code_align = 1, - .cie.data_align = (-SZR & 0x7f), /* sleb128 -SZR */ - .cie.return_column = 65, - - /* Total FDE size does not include the "len" member. */ - .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), - - .fde_def_cfa = { - 12, TCG_REG_R1, /* DW_CFA_def_cfa r1, ... */ - (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ - (FRAME_SIZE >> 7) - }, - .fde_reg_ofs = { - /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */ - 0x11, 65, (LR_OFFSET / -SZR) & 0x7f, - } -}; - -void tcg_register_jit(void *buf, size_t buf_size) -{ - uint8_t *p = &debug_frame.fde_reg_ofs[3]; - int i; - - for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) { - p[0] = 0x80 + tcg_target_callee_save_regs[i]; - p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR; - } - - debug_frame.fde.func_start = (uintptr_t)buf; - debug_frame.fde.func_len = buf_size; - - tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); -} -#endif /* __ELF__ */ - -static size_t dcache_bsize = 16; -static size_t icache_bsize = 16; - -void flush_icache_range(uintptr_t start, uintptr_t stop) -{ - uintptr_t p, start1, stop1; - size_t dsize = dcache_bsize; - size_t isize = icache_bsize; - - start1 = start & ~(dsize - 1); - stop1 = (stop + dsize - 1) & ~(dsize - 1); - for (p = start1; p < stop1; p += dsize) { - asm volatile ("dcbst 0,%0" : : "r"(p) : "memory"); - } - asm volatile ("sync" : : : "memory"); - - start &= start & ~(isize - 1); - stop1 = (stop + isize - 1) & ~(isize - 1); - for (p = start1; p < stop1; p += isize) { - asm volatile ("icbi 0,%0" : : "r"(p) : "memory"); - } - asm volatile ("sync" : : : "memory"); - asm volatile ("isync" : : : "memory"); -} - -#if defined _AIX -#include - -static void __attribute__((constructor)) tcg_cache_init(void) -{ - icache_bsize = _system_configuration.icache_line; - dcache_bsize = _system_configuration.dcache_line; -} - -#elif defined __linux__ -static void __attribute__((constructor)) tcg_cache_init(void) -{ - unsigned long dsize = qemu_getauxval(AT_DCACHEBSIZE); - unsigned long isize = qemu_getauxval(AT_ICACHEBSIZE); - - if (dsize == 0 || isize == 0) { - if (dsize == 0) { - fprintf(stderr, "getauxval AT_DCACHEBSIZE failed\n"); - } - if (isize == 0) { - fprintf(stderr, "getauxval AT_ICACHEBSIZE failed\n"); - } - exit(1); - } - dcache_bsize = dsize; - icache_bsize = isize; -} - -#elif defined __APPLE__ -#include - -static void __attribute__((constructor)) tcg_cache_init(void) -{ - size_t len; - unsigned cacheline; - int name[2] = { CTL_HW, HW_CACHELINE }; - - len = sizeof(cacheline); - if (sysctl(name, 2, &cacheline, &len, NULL, 0)) { - perror("sysctl CTL_HW HW_CACHELINE failed"); - exit(1); - } - dcache_bsize = cacheline; - icache_bsize = cacheline; -} - -#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) -#include - -static void __attribute__((constructor)) tcg_cache_init(void) -{ - size_t len = 4; - unsigned cacheline; - - if (sysctlbyname ("machdep.cacheline_size", &cacheline, &len, NULL, 0)) { - fprintf(stderr, "sysctlbyname machdep.cacheline_size failed: %s\n", - strerror(errno)); - exit(1); - } - dcache_bsize = cacheline; - icache_bsize = cacheline; -} -#endif diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c new file mode 100644 index 0000000000..c593344db1 --- /dev/null +++ b/tcg/ppc/tcg-target.inc.c @@ -0,0 +1,2762 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "tcg-be-ldst.h" + +#if defined _CALL_DARWIN || defined __APPLE__ +#define TCG_TARGET_CALL_DARWIN +#endif +#ifdef _CALL_SYSV +# define TCG_TARGET_CALL_ALIGN_ARGS 1 +#endif + +/* For some memory operations, we need a scratch that isn't R0. For the AIX + calling convention, we can re-use the TOC register since we'll be reloading + it at every call. Otherwise R12 will do nicely as neither a call-saved + register nor a parameter register. */ +#ifdef _CALL_AIX +# define TCG_REG_TMP1 TCG_REG_R2 +#else +# define TCG_REG_TMP1 TCG_REG_R12 +#endif + +/* For the 64-bit target, we don't like the 5 insn sequence needed to build + full 64-bit addresses. Better to have a base register to which we can + apply a 32-bit displacement. + + There are generally three items of interest: + (1) helper functions in the main executable, + (2) TranslationBlock data structures, + (3) the return address in the epilogue. + + For user-only, we USE_STATIC_CODE_GEN_BUFFER, so the code_gen_buffer + will be inside the main executable, and thus near enough to make a + pointer to the epilogue be within 2GB of all helper functions. + + For softmmu, we'll let the kernel choose the address of code_gen_buffer, + and odds are it'll be somewhere close to the main malloc arena, and so + a pointer to the epilogue will be within 2GB of the TranslationBlocks. + + For --enable-pie, everything will be kinda near everything else, + somewhere in high memory. + + Thus we choose to keep the return address in a call-saved register. */ +#define TCG_REG_RA TCG_REG_R31 +#define USE_REG_RA (TCG_TARGET_REG_BITS == 64) + +/* Shorthand for size of a pointer. Avoid promotion to unsigned. */ +#define SZP ((int)sizeof(void *)) + +/* Shorthand for size of a register. */ +#define SZR (TCG_TARGET_REG_BITS / 8) + +#define TCG_CT_CONST_S16 0x100 +#define TCG_CT_CONST_U16 0x200 +#define TCG_CT_CONST_S32 0x400 +#define TCG_CT_CONST_U32 0x800 +#define TCG_CT_CONST_ZERO 0x1000 +#define TCG_CT_CONST_MONE 0x2000 + +static tcg_insn_unit *tb_ret_addr; + +#include "elf.h" +static bool have_isa_2_06; +#define HAVE_ISA_2_06 have_isa_2_06 +#define HAVE_ISEL have_isa_2_06 + +#ifndef CONFIG_SOFTMMU +#define TCG_GUEST_BASE_REG 30 +#endif + +#ifndef NDEBUG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { + "r0", + "r1", + "r2", + "r3", + "r4", + "r5", + "r6", + "r7", + "r8", + "r9", + "r10", + "r11", + "r12", + "r13", + "r14", + "r15", + "r16", + "r17", + "r18", + "r19", + "r20", + "r21", + "r22", + "r23", + "r24", + "r25", + "r26", + "r27", + "r28", + "r29", + "r30", + "r31" +}; +#endif + +static const int tcg_target_reg_alloc_order[] = { + TCG_REG_R14, /* call saved registers */ + TCG_REG_R15, + TCG_REG_R16, + TCG_REG_R17, + TCG_REG_R18, + TCG_REG_R19, + TCG_REG_R20, + TCG_REG_R21, + TCG_REG_R22, + TCG_REG_R23, + TCG_REG_R24, + TCG_REG_R25, + TCG_REG_R26, + TCG_REG_R27, + TCG_REG_R28, + TCG_REG_R29, + TCG_REG_R30, + TCG_REG_R31, + TCG_REG_R12, /* call clobbered, non-arguments */ + TCG_REG_R11, + TCG_REG_R2, + TCG_REG_R13, + TCG_REG_R10, /* call clobbered, arguments */ + TCG_REG_R9, + TCG_REG_R8, + TCG_REG_R7, + TCG_REG_R6, + TCG_REG_R5, + TCG_REG_R4, + TCG_REG_R3, +}; + +static const int tcg_target_call_iarg_regs[] = { + TCG_REG_R3, + TCG_REG_R4, + TCG_REG_R5, + TCG_REG_R6, + TCG_REG_R7, + TCG_REG_R8, + TCG_REG_R9, + TCG_REG_R10 +}; + +static const int tcg_target_call_oarg_regs[] = { + TCG_REG_R3, + TCG_REG_R4 +}; + +static const int tcg_target_callee_save_regs[] = { +#ifdef TCG_TARGET_CALL_DARWIN + TCG_REG_R11, +#endif + TCG_REG_R14, + TCG_REG_R15, + TCG_REG_R16, + TCG_REG_R17, + TCG_REG_R18, + TCG_REG_R19, + TCG_REG_R20, + TCG_REG_R21, + TCG_REG_R22, + TCG_REG_R23, + TCG_REG_R24, + TCG_REG_R25, + TCG_REG_R26, + TCG_REG_R27, /* currently used for the global env */ + TCG_REG_R28, + TCG_REG_R29, + TCG_REG_R30, + TCG_REG_R31 +}; + +static inline bool in_range_b(tcg_target_long target) +{ + return target == sextract64(target, 0, 26); +} + +static uint32_t reloc_pc24_val(tcg_insn_unit *pc, tcg_insn_unit *target) +{ + ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); + assert(in_range_b(disp)); + return disp & 0x3fffffc; +} + +static void reloc_pc24(tcg_insn_unit *pc, tcg_insn_unit *target) +{ + *pc = (*pc & ~0x3fffffc) | reloc_pc24_val(pc, target); +} + +static uint16_t reloc_pc14_val(tcg_insn_unit *pc, tcg_insn_unit *target) +{ + ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); + assert(disp == (int16_t) disp); + return disp & 0xfffc; +} + +static void reloc_pc14(tcg_insn_unit *pc, tcg_insn_unit *target) +{ + *pc = (*pc & ~0xfffc) | reloc_pc14_val(pc, target); +} + +static inline void tcg_out_b_noaddr(TCGContext *s, int insn) +{ + unsigned retrans = *s->code_ptr & 0x3fffffc; + tcg_out32(s, insn | retrans); +} + +static inline void tcg_out_bc_noaddr(TCGContext *s, int insn) +{ + unsigned retrans = *s->code_ptr & 0xfffc; + tcg_out32(s, insn | retrans); +} + +static void patch_reloc(tcg_insn_unit *code_ptr, int type, + intptr_t value, intptr_t addend) +{ + tcg_insn_unit *target = (tcg_insn_unit *)value; + + assert(addend == 0); + switch (type) { + case R_PPC_REL14: + reloc_pc14(code_ptr, target); + break; + case R_PPC_REL24: + reloc_pc24(code_ptr, target); + break; + default: + tcg_abort(); + } +} + +/* parse target specific constraints */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) +{ + const char *ct_str; + + ct_str = *pct_str; + switch (ct_str[0]) { + case 'A': case 'B': case 'C': case 'D': + ct->ct |= TCG_CT_REG; + tcg_regset_set_reg(ct->u.regs, 3 + ct_str[0] - 'A'); + break; + case 'r': + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, 0xffffffff); + break; + case 'L': /* qemu_ld constraint */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, 0xffffffff); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); +#ifdef CONFIG_SOFTMMU + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5); +#endif + break; + case 'S': /* qemu_st constraint */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, 0xffffffff); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); +#ifdef CONFIG_SOFTMMU + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6); +#endif + break; + case 'I': + ct->ct |= TCG_CT_CONST_S16; + break; + case 'J': + ct->ct |= TCG_CT_CONST_U16; + break; + case 'M': + ct->ct |= TCG_CT_CONST_MONE; + break; + case 'T': + ct->ct |= TCG_CT_CONST_S32; + break; + case 'U': + ct->ct |= TCG_CT_CONST_U32; + break; + case 'Z': + ct->ct |= TCG_CT_CONST_ZERO; + break; + default: + return -1; + } + ct_str++; + *pct_str = ct_str; + return 0; +} + +/* test if a constant matches the constraint */ +static int tcg_target_const_match(tcg_target_long val, TCGType type, + const TCGArgConstraint *arg_ct) +{ + int ct = arg_ct->ct; + if (ct & TCG_CT_CONST) { + return 1; + } + + /* The only 32-bit constraint we use aside from + TCG_CT_CONST is TCG_CT_CONST_S16. */ + if (type == TCG_TYPE_I32) { + val = (int32_t)val; + } + + if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { + return 1; + } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) { + return 1; + } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { + return 1; + } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { + return 1; + } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { + return 1; + } else if ((ct & TCG_CT_CONST_MONE) && val == -1) { + return 1; + } + return 0; +} + +#define OPCD(opc) ((opc)<<26) +#define XO19(opc) (OPCD(19)|((opc)<<1)) +#define MD30(opc) (OPCD(30)|((opc)<<2)) +#define MDS30(opc) (OPCD(30)|((opc)<<1)) +#define XO31(opc) (OPCD(31)|((opc)<<1)) +#define XO58(opc) (OPCD(58)|(opc)) +#define XO62(opc) (OPCD(62)|(opc)) + +#define B OPCD( 18) +#define BC OPCD( 16) +#define LBZ OPCD( 34) +#define LHZ OPCD( 40) +#define LHA OPCD( 42) +#define LWZ OPCD( 32) +#define STB OPCD( 38) +#define STH OPCD( 44) +#define STW OPCD( 36) + +#define STD XO62( 0) +#define STDU XO62( 1) +#define STDX XO31(149) + +#define LD XO58( 0) +#define LDX XO31( 21) +#define LDU XO58( 1) +#define LWA XO58( 2) +#define LWAX XO31(341) + +#define ADDIC OPCD( 12) +#define ADDI OPCD( 14) +#define ADDIS OPCD( 15) +#define ORI OPCD( 24) +#define ORIS OPCD( 25) +#define XORI OPCD( 26) +#define XORIS OPCD( 27) +#define ANDI OPCD( 28) +#define ANDIS OPCD( 29) +#define MULLI OPCD( 7) +#define CMPLI OPCD( 10) +#define CMPI OPCD( 11) +#define SUBFIC OPCD( 8) + +#define LWZU OPCD( 33) +#define STWU OPCD( 37) + +#define RLWIMI OPCD( 20) +#define RLWINM OPCD( 21) +#define RLWNM OPCD( 23) + +#define RLDICL MD30( 0) +#define RLDICR MD30( 1) +#define RLDIMI MD30( 3) +#define RLDCL MDS30( 8) + +#define BCLR XO19( 16) +#define BCCTR XO19(528) +#define CRAND XO19(257) +#define CRANDC XO19(129) +#define CRNAND XO19(225) +#define CROR XO19(449) +#define CRNOR XO19( 33) + +#define EXTSB XO31(954) +#define EXTSH XO31(922) +#define EXTSW XO31(986) +#define ADD XO31(266) +#define ADDE XO31(138) +#define ADDME XO31(234) +#define ADDZE XO31(202) +#define ADDC XO31( 10) +#define AND XO31( 28) +#define SUBF XO31( 40) +#define SUBFC XO31( 8) +#define SUBFE XO31(136) +#define SUBFME XO31(232) +#define SUBFZE XO31(200) +#define OR XO31(444) +#define XOR XO31(316) +#define MULLW XO31(235) +#define MULHW XO31( 75) +#define MULHWU XO31( 11) +#define DIVW XO31(491) +#define DIVWU XO31(459) +#define CMP XO31( 0) +#define CMPL XO31( 32) +#define LHBRX XO31(790) +#define LWBRX XO31(534) +#define LDBRX XO31(532) +#define STHBRX XO31(918) +#define STWBRX XO31(662) +#define STDBRX XO31(660) +#define MFSPR XO31(339) +#define MTSPR XO31(467) +#define SRAWI XO31(824) +#define NEG XO31(104) +#define MFCR XO31( 19) +#define MFOCRF (MFCR | (1u << 20)) +#define NOR XO31(124) +#define CNTLZW XO31( 26) +#define CNTLZD XO31( 58) +#define ANDC XO31( 60) +#define ORC XO31(412) +#define EQV XO31(284) +#define NAND XO31(476) +#define ISEL XO31( 15) + +#define MULLD XO31(233) +#define MULHD XO31( 73) +#define MULHDU XO31( 9) +#define DIVD XO31(489) +#define DIVDU XO31(457) + +#define LBZX XO31( 87) +#define LHZX XO31(279) +#define LHAX XO31(343) +#define LWZX XO31( 23) +#define STBX XO31(215) +#define STHX XO31(407) +#define STWX XO31(151) + +#define SPR(a, b) ((((a)<<5)|(b))<<11) +#define LR SPR(8, 0) +#define CTR SPR(9, 0) + +#define SLW XO31( 24) +#define SRW XO31(536) +#define SRAW XO31(792) + +#define SLD XO31( 27) +#define SRD XO31(539) +#define SRAD XO31(794) +#define SRADI XO31(413<<1) + +#define TW XO31( 4) +#define TRAP (TW | TO(31)) + +#define NOP ORI /* ori 0,0,0 */ + +#define RT(r) ((r)<<21) +#define RS(r) ((r)<<21) +#define RA(r) ((r)<<16) +#define RB(r) ((r)<<11) +#define TO(t) ((t)<<21) +#define SH(s) ((s)<<11) +#define MB(b) ((b)<<6) +#define ME(e) ((e)<<1) +#define BO(o) ((o)<<21) +#define MB64(b) ((b)<<5) +#define FXM(b) (1 << (19 - (b))) + +#define LK 1 + +#define TAB(t, a, b) (RT(t) | RA(a) | RB(b)) +#define SAB(s, a, b) (RS(s) | RA(a) | RB(b)) +#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff)) +#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff)) + +#define BF(n) ((n)<<23) +#define BI(n, c) (((c)+((n)*4))<<16) +#define BT(n, c) (((c)+((n)*4))<<21) +#define BA(n, c) (((c)+((n)*4))<<16) +#define BB(n, c) (((c)+((n)*4))<<11) +#define BC_(n, c) (((c)+((n)*4))<<6) + +#define BO_COND_TRUE BO(12) +#define BO_COND_FALSE BO( 4) +#define BO_ALWAYS BO(20) + +enum { + CR_LT, + CR_GT, + CR_EQ, + CR_SO +}; + +static const uint32_t tcg_to_bc[] = { + [TCG_COND_EQ] = BC | BI(7, CR_EQ) | BO_COND_TRUE, + [TCG_COND_NE] = BC | BI(7, CR_EQ) | BO_COND_FALSE, + [TCG_COND_LT] = BC | BI(7, CR_LT) | BO_COND_TRUE, + [TCG_COND_GE] = BC | BI(7, CR_LT) | BO_COND_FALSE, + [TCG_COND_LE] = BC | BI(7, CR_GT) | BO_COND_FALSE, + [TCG_COND_GT] = BC | BI(7, CR_GT) | BO_COND_TRUE, + [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE, + [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE, + [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE, + [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE, +}; + +/* The low bit here is set if the RA and RB fields must be inverted. */ +static const uint32_t tcg_to_isel[] = { + [TCG_COND_EQ] = ISEL | BC_(7, CR_EQ), + [TCG_COND_NE] = ISEL | BC_(7, CR_EQ) | 1, + [TCG_COND_LT] = ISEL | BC_(7, CR_LT), + [TCG_COND_GE] = ISEL | BC_(7, CR_LT) | 1, + [TCG_COND_LE] = ISEL | BC_(7, CR_GT) | 1, + [TCG_COND_GT] = ISEL | BC_(7, CR_GT), + [TCG_COND_LTU] = ISEL | BC_(7, CR_LT), + [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1, + [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1, + [TCG_COND_GTU] = ISEL | BC_(7, CR_GT), +}; + +static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, + TCGReg base, tcg_target_long offset); + +static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) +{ + tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); + if (ret != arg) { + tcg_out32(s, OR | SAB(arg, ret, arg)); + } +} + +static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs, + int sh, int mb) +{ + assert(TCG_TARGET_REG_BITS == 64); + sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1); + mb = MB64((mb >> 5) | ((mb << 1) & 0x3f)); + tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb); +} + +static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs, + int sh, int mb, int me) +{ + tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me)); +} + +static inline void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src) +{ + tcg_out_rld(s, RLDICL, dst, src, 0, 32); +} + +static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c) +{ + tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c); +} + +static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c) +{ + tcg_out_rld(s, RLDICR, dst, src, c, 63 - c); +} + +static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c) +{ + tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31); +} + +static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c) +{ + tcg_out_rld(s, RLDICL, dst, src, 64 - c, c); +} + +static void tcg_out_movi32(TCGContext *s, TCGReg ret, int32_t arg) +{ + if (arg == (int16_t) arg) { + tcg_out32(s, ADDI | TAI(ret, 0, arg)); + } else { + tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16)); + if (arg & 0xffff) { + tcg_out32(s, ORI | SAI(ret, ret, arg)); + } + } +} + +static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, + tcg_target_long arg) +{ + tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); + if (type == TCG_TYPE_I32 || arg == (int32_t)arg) { + tcg_out_movi32(s, ret, arg); + } else if (arg == (uint32_t)arg && !(arg & 0x8000)) { + tcg_out32(s, ADDI | TAI(ret, 0, arg)); + tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); + } else { + int32_t high; + + if (USE_REG_RA) { + intptr_t diff = arg - (intptr_t)tb_ret_addr; + if (diff == (int32_t)diff) { + tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_RA, diff); + return; + } + } + + high = arg >> 31 >> 1; + tcg_out_movi32(s, ret, high); + if (high) { + tcg_out_shli64(s, ret, ret, 32); + } + if (arg & 0xffff0000) { + tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); + } + if (arg & 0xffff) { + tcg_out32(s, ORI | SAI(ret, ret, arg)); + } + } +} + +static bool mask_operand(uint32_t c, int *mb, int *me) +{ + uint32_t lsb, test; + + /* Accept a bit pattern like: + 0....01....1 + 1....10....0 + 0..01..10..0 + Keep track of the transitions. */ + if (c == 0 || c == -1) { + return false; + } + test = c; + lsb = test & -test; + test += lsb; + if (test & (test - 1)) { + return false; + } + + *me = clz32(lsb); + *mb = test ? clz32(test & -test) + 1 : 0; + return true; +} + +static bool mask64_operand(uint64_t c, int *mb, int *me) +{ + uint64_t lsb; + + if (c == 0) { + return false; + } + + lsb = c & -c; + /* Accept 1..10..0. */ + if (c == -lsb) { + *mb = 0; + *me = clz64(lsb); + return true; + } + /* Accept 0..01..1. */ + if (lsb == 1 && (c & (c + 1)) == 0) { + *mb = clz64(c + 1) + 1; + *me = 63; + return true; + } + return false; +} + +static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) +{ + int mb, me; + + if (mask_operand(c, &mb, &me)) { + tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me); + } else if ((c & 0xffff) == c) { + tcg_out32(s, ANDI | SAI(src, dst, c)); + return; + } else if ((c & 0xffff0000) == c) { + tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); + return; + } else { + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c); + tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); + } +} + +static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c) +{ + int mb, me; + + assert(TCG_TARGET_REG_BITS == 64); + if (mask64_operand(c, &mb, &me)) { + if (mb == 0) { + tcg_out_rld(s, RLDICR, dst, src, 0, me); + } else { + tcg_out_rld(s, RLDICL, dst, src, 0, mb); + } + } else if ((c & 0xffff) == c) { + tcg_out32(s, ANDI | SAI(src, dst, c)); + return; + } else if ((c & 0xffff0000) == c) { + tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); + return; + } else { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c); + tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); + } +} + +static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c, + int op_lo, int op_hi) +{ + if (c >> 16) { + tcg_out32(s, op_hi | SAI(src, dst, c >> 16)); + src = dst; + } + if (c & 0xffff) { + tcg_out32(s, op_lo | SAI(src, dst, c)); + src = dst; + } +} + +static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) +{ + tcg_out_zori32(s, dst, src, c, ORI, ORIS); +} + +static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) +{ + tcg_out_zori32(s, dst, src, c, XORI, XORIS); +} + +static void tcg_out_b(TCGContext *s, int mask, tcg_insn_unit *target) +{ + ptrdiff_t disp = tcg_pcrel_diff(s, target); + if (in_range_b(disp)) { + tcg_out32(s, B | (disp & 0x3fffffc) | mask); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target); + tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS | mask); + } +} + +static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, + TCGReg base, tcg_target_long offset) +{ + tcg_target_long orig = offset, l0, l1, extra = 0, align = 0; + bool is_store = false; + TCGReg rs = TCG_REG_TMP1; + + switch (opi) { + case LD: case LWA: + align = 3; + /* FALLTHRU */ + default: + if (rt != TCG_REG_R0) { + rs = rt; + break; + } + break; + case STD: + align = 3; + /* FALLTHRU */ + case STB: case STH: case STW: + is_store = true; + break; + } + + /* For unaligned, or very large offsets, use the indexed form. */ + if (offset & align || offset != (int32_t)offset) { + if (rs == base) { + rs = TCG_REG_R0; + } + tcg_debug_assert(!is_store || rs != rt); + tcg_out_movi(s, TCG_TYPE_PTR, rs, orig); + tcg_out32(s, opx | TAB(rt, base, rs)); + return; + } + + l0 = (int16_t)offset; + offset = (offset - l0) >> 16; + l1 = (int16_t)offset; + + if (l1 < 0 && orig >= 0) { + extra = 0x4000; + l1 = (int16_t)(offset - 0x4000); + } + if (l1) { + tcg_out32(s, ADDIS | TAI(rs, base, l1)); + base = rs; + } + if (extra) { + tcg_out32(s, ADDIS | TAI(rs, base, extra)); + base = rs; + } + if (opi != ADDI || base != rt || l0 != 0) { + tcg_out32(s, opi | TAI(rt, base, l0)); + } +} + +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, + TCGReg arg1, intptr_t arg2) +{ + int opi, opx; + + assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); + if (type == TCG_TYPE_I32) { + opi = LWZ, opx = LWZX; + } else { + opi = LD, opx = LDX; + } + tcg_out_mem_long(s, opi, opx, ret, arg1, arg2); +} + +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) +{ + int opi, opx; + + assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); + if (type == TCG_TYPE_I32) { + opi = STW, opx = STWX; + } else { + opi = STD, opx = STDX; + } + tcg_out_mem_long(s, opi, opx, arg, arg1, arg2); +} + +static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, + int const_arg2, int cr, TCGType type) +{ + int imm; + uint32_t op; + + tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); + + /* Simplify the comparisons below wrt CMPI. */ + if (type == TCG_TYPE_I32) { + arg2 = (int32_t)arg2; + } + + switch (cond) { + case TCG_COND_EQ: + case TCG_COND_NE: + if (const_arg2) { + if ((int16_t) arg2 == arg2) { + op = CMPI; + imm = 1; + break; + } else if ((uint16_t) arg2 == arg2) { + op = CMPLI; + imm = 1; + break; + } + } + op = CMPL; + imm = 0; + break; + + case TCG_COND_LT: + case TCG_COND_GE: + case TCG_COND_LE: + case TCG_COND_GT: + if (const_arg2) { + if ((int16_t) arg2 == arg2) { + op = CMPI; + imm = 1; + break; + } + } + op = CMP; + imm = 0; + break; + + case TCG_COND_LTU: + case TCG_COND_GEU: + case TCG_COND_LEU: + case TCG_COND_GTU: + if (const_arg2) { + if ((uint16_t) arg2 == arg2) { + op = CMPLI; + imm = 1; + break; + } + } + op = CMPL; + imm = 0; + break; + + default: + tcg_abort(); + } + op |= BF(cr) | ((type == TCG_TYPE_I64) << 21); + + if (imm) { + tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff)); + } else { + if (const_arg2) { + tcg_out_movi(s, type, TCG_REG_R0, arg2); + arg2 = TCG_REG_R0; + } + tcg_out32(s, op | RA(arg1) | RB(arg2)); + } +} + +static void tcg_out_setcond_eq0(TCGContext *s, TCGType type, + TCGReg dst, TCGReg src) +{ + if (type == TCG_TYPE_I32) { + tcg_out32(s, CNTLZW | RS(src) | RA(dst)); + tcg_out_shri32(s, dst, dst, 5); + } else { + tcg_out32(s, CNTLZD | RS(src) | RA(dst)); + tcg_out_shri64(s, dst, dst, 6); + } +} + +static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src) +{ + /* X != 0 implies X + -1 generates a carry. Extra addition + trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C. */ + if (dst != src) { + tcg_out32(s, ADDIC | TAI(dst, src, -1)); + tcg_out32(s, SUBFE | TAB(dst, dst, src)); + } else { + tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1)); + tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src)); + } +} + +static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2, + bool const_arg2) +{ + if (const_arg2) { + if ((uint32_t)arg2 == arg2) { + tcg_out_xori32(s, TCG_REG_R0, arg1, arg2); + } else { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2); + tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0)); + } + } else { + tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2)); + } + return TCG_REG_R0; +} + +static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGArg arg0, TCGArg arg1, TCGArg arg2, + int const_arg2) +{ + int crop, sh; + + assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); + + /* Ignore high bits of a potential constant arg2. */ + if (type == TCG_TYPE_I32) { + arg2 = (uint32_t)arg2; + } + + /* Handle common and trivial cases before handling anything else. */ + if (arg2 == 0) { + switch (cond) { + case TCG_COND_EQ: + tcg_out_setcond_eq0(s, type, arg0, arg1); + return; + case TCG_COND_NE: + if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { + tcg_out_ext32u(s, TCG_REG_R0, arg1); + arg1 = TCG_REG_R0; + } + tcg_out_setcond_ne0(s, arg0, arg1); + return; + case TCG_COND_GE: + tcg_out32(s, NOR | SAB(arg1, arg0, arg1)); + arg1 = arg0; + /* FALLTHRU */ + case TCG_COND_LT: + /* Extract the sign bit. */ + if (type == TCG_TYPE_I32) { + tcg_out_shri32(s, arg0, arg1, 31); + } else { + tcg_out_shri64(s, arg0, arg1, 63); + } + return; + default: + break; + } + } + + /* If we have ISEL, we can implement everything with 3 or 4 insns. + All other cases below are also at least 3 insns, so speed up the + code generator by not considering them and always using ISEL. */ + if (HAVE_ISEL) { + int isel, tab; + + tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); + + isel = tcg_to_isel[cond]; + + tcg_out_movi(s, type, arg0, 1); + if (isel & 1) { + /* arg0 = (bc ? 0 : 1) */ + tab = TAB(arg0, 0, arg0); + isel &= ~1; + } else { + /* arg0 = (bc ? 1 : 0) */ + tcg_out_movi(s, type, TCG_REG_R0, 0); + tab = TAB(arg0, arg0, TCG_REG_R0); + } + tcg_out32(s, isel | tab); + return; + } + + switch (cond) { + case TCG_COND_EQ: + arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); + tcg_out_setcond_eq0(s, type, arg0, arg1); + return; + + case TCG_COND_NE: + arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); + /* Discard the high bits only once, rather than both inputs. */ + if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { + tcg_out_ext32u(s, TCG_REG_R0, arg1); + arg1 = TCG_REG_R0; + } + tcg_out_setcond_ne0(s, arg0, arg1); + return; + + case TCG_COND_GT: + case TCG_COND_GTU: + sh = 30; + crop = 0; + goto crtest; + + case TCG_COND_LT: + case TCG_COND_LTU: + sh = 29; + crop = 0; + goto crtest; + + case TCG_COND_GE: + case TCG_COND_GEU: + sh = 31; + crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT); + goto crtest; + + case TCG_COND_LE: + case TCG_COND_LEU: + sh = 31; + crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT); + crtest: + tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); + if (crop) { + tcg_out32(s, crop); + } + tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); + tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31); + break; + + default: + tcg_abort(); + } +} + +static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l) +{ + if (l->has_value) { + tcg_out32(s, bc | reloc_pc14_val(s->code_ptr, l->u.value_ptr)); + } else { + tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0); + tcg_out_bc_noaddr(s, bc); + } +} + +static void tcg_out_brcond(TCGContext *s, TCGCond cond, + TCGArg arg1, TCGArg arg2, int const_arg2, + TCGLabel *l, TCGType type) +{ + tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); + tcg_out_bc(s, tcg_to_bc[cond], l); +} + +static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, + TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1, + TCGArg v2, bool const_c2) +{ + /* If for some reason both inputs are zero, don't produce bad code. */ + if (v1 == 0 && v2 == 0) { + tcg_out_movi(s, type, dest, 0); + return; + } + + tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type); + + if (HAVE_ISEL) { + int isel = tcg_to_isel[cond]; + + /* Swap the V operands if the operation indicates inversion. */ + if (isel & 1) { + int t = v1; + v1 = v2; + v2 = t; + isel &= ~1; + } + /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand. */ + if (v2 == 0) { + tcg_out_movi(s, type, TCG_REG_R0, 0); + } + tcg_out32(s, isel | TAB(dest, v1, v2)); + } else { + if (dest == v2) { + cond = tcg_invert_cond(cond); + v2 = v1; + } else if (dest != v1) { + if (v1 == 0) { + tcg_out_movi(s, type, dest, 0); + } else { + tcg_out_mov(s, type, dest, v1); + } + } + /* Branch forward over one insn */ + tcg_out32(s, tcg_to_bc[cond] | 8); + if (v2 == 0) { + tcg_out_movi(s, type, dest, 0); + } else { + tcg_out_mov(s, type, dest, v2); + } + } +} + +static void tcg_out_cmp2(TCGContext *s, const TCGArg *args, + const int *const_args) +{ + static const struct { uint8_t bit1, bit2; } bits[] = { + [TCG_COND_LT ] = { CR_LT, CR_LT }, + [TCG_COND_LE ] = { CR_LT, CR_GT }, + [TCG_COND_GT ] = { CR_GT, CR_GT }, + [TCG_COND_GE ] = { CR_GT, CR_LT }, + [TCG_COND_LTU] = { CR_LT, CR_LT }, + [TCG_COND_LEU] = { CR_LT, CR_GT }, + [TCG_COND_GTU] = { CR_GT, CR_GT }, + [TCG_COND_GEU] = { CR_GT, CR_LT }, + }; + + TCGCond cond = args[4], cond2; + TCGArg al, ah, bl, bh; + int blconst, bhconst; + int op, bit1, bit2; + + al = args[0]; + ah = args[1]; + bl = args[2]; + bh = args[3]; + blconst = const_args[2]; + bhconst = const_args[3]; + + switch (cond) { + case TCG_COND_EQ: + op = CRAND; + goto do_equality; + case TCG_COND_NE: + op = CRNAND; + do_equality: + tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32); + tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32); + tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); + break; + + case TCG_COND_LT: + case TCG_COND_LE: + case TCG_COND_GT: + case TCG_COND_GE: + case TCG_COND_LTU: + case TCG_COND_LEU: + case TCG_COND_GTU: + case TCG_COND_GEU: + bit1 = bits[cond].bit1; + bit2 = bits[cond].bit2; + op = (bit1 != bit2 ? CRANDC : CRAND); + cond2 = tcg_unsigned_cond(cond); + + tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32); + tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32); + tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2)); + tcg_out32(s, CROR | BT(7, CR_EQ) | BA(6, bit1) | BB(7, CR_EQ)); + break; + + default: + tcg_abort(); + } +} + +static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, + const int *const_args) +{ + tcg_out_cmp2(s, args + 1, const_args + 1); + tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); + tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31); +} + +static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args, + const int *const_args) +{ + tcg_out_cmp2(s, args, const_args); + tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5])); +} + +void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr) +{ + tcg_insn_unit i1, i2; + uint64_t pair; + intptr_t diff = addr - jmp_addr; + + if (in_range_b(diff)) { + i1 = B | (diff & 0x3fffffc); + i2 = NOP; + } else if (USE_REG_RA) { + intptr_t lo, hi; + diff = addr - (uintptr_t)tb_ret_addr; + lo = (int16_t)diff; + hi = (int32_t)(diff - lo); + assert(diff == hi + lo); + i1 = ADDIS | TAI(TCG_REG_TMP1, TCG_REG_RA, hi >> 16); + i2 = ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, lo); + } else { + assert(TCG_TARGET_REG_BITS == 32 || addr == (int32_t)addr); + i1 = ADDIS | TAI(TCG_REG_TMP1, 0, addr >> 16); + i2 = ORI | SAI(TCG_REG_TMP1, TCG_REG_TMP1, addr); + } +#ifdef HOST_WORDS_BIGENDIAN + pair = (uint64_t)i1 << 32 | i2; +#else + pair = (uint64_t)i2 << 32 | i1; +#endif + + /* ??? __atomic_store_8, presuming there's some way to do that + for 32-bit, otherwise this is good enough for 64-bit. */ + *(uint64_t *)jmp_addr = pair; + flush_icache_range(jmp_addr, jmp_addr + 8); +} + +static void tcg_out_call(TCGContext *s, tcg_insn_unit *target) +{ +#ifdef _CALL_AIX + /* Look through the descriptor. If the branch is in range, and we + don't have to spend too much effort on building the toc. */ + void *tgt = ((void **)target)[0]; + uintptr_t toc = ((uintptr_t *)target)[1]; + intptr_t diff = tcg_pcrel_diff(s, tgt); + + if (in_range_b(diff) && toc == (uint32_t)toc) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc); + tcg_out_b(s, LK, tgt); + } else { + /* Fold the low bits of the constant into the addresses below. */ + intptr_t arg = (intptr_t)target; + int ofs = (int16_t)arg; + + if (ofs + 8 < 0x8000) { + arg -= ofs; + } else { + ofs = 0; + } + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs); + tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP); + tcg_out32(s, BCCTR | BO_ALWAYS | LK); + } +#elif defined(_CALL_ELF) && _CALL_ELF == 2 + intptr_t diff; + + /* In the ELFv2 ABI, we have to set up r12 to contain the destination + address, which the callee uses to compute its TOC address. */ + /* FIXME: when the branch is in range, we could avoid r12 load if we + knew that the destination uses the same TOC, and what its local + entry point offset is. */ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target); + + diff = tcg_pcrel_diff(s, target); + if (in_range_b(diff)) { + tcg_out_b(s, LK, target); + } else { + tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS | LK); + } +#else + tcg_out_b(s, LK, target); +#endif +} + +static const uint32_t qemu_ldx_opc[16] = { + [MO_UB] = LBZX, + [MO_UW] = LHZX, + [MO_UL] = LWZX, + [MO_Q] = LDX, + [MO_SW] = LHAX, + [MO_SL] = LWAX, + [MO_BSWAP | MO_UB] = LBZX, + [MO_BSWAP | MO_UW] = LHBRX, + [MO_BSWAP | MO_UL] = LWBRX, + [MO_BSWAP | MO_Q] = LDBRX, +}; + +static const uint32_t qemu_stx_opc[16] = { + [MO_UB] = STBX, + [MO_UW] = STHX, + [MO_UL] = STWX, + [MO_Q] = STDX, + [MO_BSWAP | MO_UB] = STBX, + [MO_BSWAP | MO_UW] = STHBRX, + [MO_BSWAP | MO_UL] = STWBRX, + [MO_BSWAP | MO_Q] = STDBRX, +}; + +static const uint32_t qemu_exts_opc[4] = { + EXTSB, EXTSH, EXTSW, 0 +}; + +#if defined (CONFIG_SOFTMMU) +/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, + * int mmu_idx, uintptr_t ra) + */ +static void * const qemu_ld_helpers[16] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_LEUW] = helper_le_lduw_mmu, + [MO_LEUL] = helper_le_ldul_mmu, + [MO_LEQ] = helper_le_ldq_mmu, + [MO_BEUW] = helper_be_lduw_mmu, + [MO_BEUL] = helper_be_ldul_mmu, + [MO_BEQ] = helper_be_ldq_mmu, +}; + +/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, + * uintxx_t val, int mmu_idx, uintptr_t ra) + */ +static void * const qemu_st_helpers[16] = { + [MO_UB] = helper_ret_stb_mmu, + [MO_LEUW] = helper_le_stw_mmu, + [MO_LEUL] = helper_le_stl_mmu, + [MO_LEQ] = helper_le_stq_mmu, + [MO_BEUW] = helper_be_stw_mmu, + [MO_BEUL] = helper_be_stl_mmu, + [MO_BEQ] = helper_be_stq_mmu, +}; + +/* Perform the TLB load and compare. Places the result of the comparison + in CR7, loads the addend of the TLB into R3, and returns the register + containing the guest address (zero-extended into R4). Clobbers R0 and R2. */ + +static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp opc, + TCGReg addrlo, TCGReg addrhi, + int mem_index, bool is_read) +{ + int cmp_off + = (is_read + ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) + : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); + int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); + TCGReg base = TCG_AREG0; + TCGMemOp s_bits = opc & MO_SIZE; + + /* Extract the page index, shifted into place for tlb index. */ + if (TCG_TARGET_REG_BITS == 64) { + if (TARGET_LONG_BITS == 32) { + /* Zero-extend the address into a place helpful for further use. */ + tcg_out_ext32u(s, TCG_REG_R4, addrlo); + addrlo = TCG_REG_R4; + } else { + tcg_out_rld(s, RLDICL, TCG_REG_R3, addrlo, + 64 - TARGET_PAGE_BITS, 64 - CPU_TLB_BITS); + } + } + + /* Compensate for very large offsets. */ + if (add_off >= 0x8000) { + /* Most target env are smaller than 32k; none are larger than 64k. + Simplify the logic here merely to offset by 0x7ff0, giving us a + range just shy of 64k. Check this assumption. */ + QEMU_BUILD_BUG_ON(offsetof(CPUArchState, + tlb_table[NB_MMU_MODES - 1][1]) + > 0x7ff0 + 0x7fff); + tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, base, 0x7ff0)); + base = TCG_REG_TMP1; + cmp_off -= 0x7ff0; + add_off -= 0x7ff0; + } + + /* Extraction and shifting, part 2. */ + if (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32) { + tcg_out_rlw(s, RLWINM, TCG_REG_R3, addrlo, + 32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS), + 32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS), + 31 - CPU_TLB_ENTRY_BITS); + } else { + tcg_out_shli64(s, TCG_REG_R3, TCG_REG_R3, CPU_TLB_ENTRY_BITS); + } + + tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, base)); + + /* Load the tlb comparator. */ + if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { + tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off); + tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4); + } else { + tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off); + } + + /* Load the TLB addend for use on the fast path. Do this asap + to minimize any load use delay. */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3, add_off); + + /* Clear the non-page, non-alignment bits from the address */ + if (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32) { + /* We don't support unaligned accesses on 32-bits, preserve + * the bottom bits and thus trigger a comparison failure on + * unaligned accesses + */ + tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0, + (32 - s_bits) & 31, 31 - TARGET_PAGE_BITS); + } else if (s_bits) { + /* > byte access, we need to handle alignment */ + if ((opc & MO_AMASK) == MO_ALIGN) { + /* Alignment required by the front-end, same as 32-bits */ + tcg_out_rld(s, RLDICL, TCG_REG_R0, addrlo, + 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - s_bits); + tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0); + } else { + /* We support unaligned accesses, we need to make sure we fail + * if we cross a page boundary. The trick is to add the + * access_size-1 to the address before masking the low bits. + * That will make the address overflow to the next page if we + * cross a page boundary which will then force a mismatch of + * the TLB compare since the next page cannot possibly be in + * the same TLB index. + */ + tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, (1 << s_bits) - 1)); + tcg_out_rld(s, RLDICR, TCG_REG_R0, TCG_REG_R0, + 0, 63 - TARGET_PAGE_BITS); + } + } else { + /* Byte access, just chop off the bits below the page index */ + tcg_out_rld(s, RLDICR, TCG_REG_R0, addrlo, 0, 63 - TARGET_PAGE_BITS); + } + + if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { + tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1, + 0, 7, TCG_TYPE_I32); + tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32); + tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); + } else { + tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1, + 0, 7, TCG_TYPE_TL); + } + + return addrlo; +} + +/* Record the context of a call to the out of line helper code for the slow + path for a load or store, so that we can later generate the correct + helper code. */ +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, + TCGReg datalo_reg, TCGReg datahi_reg, + TCGReg addrlo_reg, TCGReg addrhi_reg, + tcg_insn_unit *raddr, tcg_insn_unit *lptr) +{ + TCGLabelQemuLdst *label = new_ldst_label(s); + + label->is_ld = is_ld; + label->oi = oi; + label->datalo_reg = datalo_reg; + label->datahi_reg = datahi_reg; + label->addrlo_reg = addrlo_reg; + label->addrhi_reg = addrhi_reg; + label->raddr = raddr; + label->label_ptr[0] = lptr; +} + +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + TCGMemOpIdx oi = lb->oi; + TCGMemOp opc = get_memop(oi); + TCGReg hi, lo, arg = TCG_REG_R3; + + reloc_pc14(lb->label_ptr[0], s->code_ptr); + + tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0); + + lo = lb->addrlo_reg; + hi = lb->addrhi_reg; + if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { +#ifdef TCG_TARGET_CALL_ALIGN_ARGS + arg |= 1; +#endif + tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); + tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); + } else { + /* If the address needed to be zero-extended, we'll have already + placed it in R4. The only remaining case is 64-bit guest. */ + tcg_out_mov(s, TCG_TYPE_TL, arg++, lo); + } + + tcg_out_movi(s, TCG_TYPE_I32, arg++, oi); + tcg_out32(s, MFSPR | RT(arg) | LR); + + tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); + + lo = lb->datalo_reg; + hi = lb->datahi_reg; + if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { + tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_R4); + tcg_out_mov(s, TCG_TYPE_I32, hi, TCG_REG_R3); + } else if (opc & MO_SIGN) { + uint32_t insn = qemu_exts_opc[opc & MO_SIZE]; + tcg_out32(s, insn | RA(lo) | RS(TCG_REG_R3)); + } else { + tcg_out_mov(s, TCG_TYPE_REG, lo, TCG_REG_R3); + } + + tcg_out_b(s, 0, lb->raddr); +} + +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + TCGMemOpIdx oi = lb->oi; + TCGMemOp opc = get_memop(oi); + TCGMemOp s_bits = opc & MO_SIZE; + TCGReg hi, lo, arg = TCG_REG_R3; + + reloc_pc14(lb->label_ptr[0], s->code_ptr); + + tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0); + + lo = lb->addrlo_reg; + hi = lb->addrhi_reg; + if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { +#ifdef TCG_TARGET_CALL_ALIGN_ARGS + arg |= 1; +#endif + tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); + tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); + } else { + /* If the address needed to be zero-extended, we'll have already + placed it in R4. The only remaining case is 64-bit guest. */ + tcg_out_mov(s, TCG_TYPE_TL, arg++, lo); + } + + lo = lb->datalo_reg; + hi = lb->datahi_reg; + if (TCG_TARGET_REG_BITS == 32) { + switch (s_bits) { + case MO_64: +#ifdef TCG_TARGET_CALL_ALIGN_ARGS + arg |= 1; +#endif + tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); + /* FALLTHRU */ + case MO_32: + tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); + break; + default: + tcg_out_rlw(s, RLWINM, arg++, lo, 0, 32 - (8 << s_bits), 31); + break; + } + } else { + if (s_bits == MO_64) { + tcg_out_mov(s, TCG_TYPE_I64, arg++, lo); + } else { + tcg_out_rld(s, RLDICL, arg++, lo, 0, 64 - (8 << s_bits)); + } + } + + tcg_out_movi(s, TCG_TYPE_I32, arg++, oi); + tcg_out32(s, MFSPR | RT(arg) | LR); + + tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); + + tcg_out_b(s, 0, lb->raddr); +} +#endif /* SOFTMMU */ + +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) +{ + TCGReg datalo, datahi, addrlo, rbase; + TCGReg addrhi __attribute__((unused)); + TCGMemOpIdx oi; + TCGMemOp opc, s_bits; +#ifdef CONFIG_SOFTMMU + int mem_index; + tcg_insn_unit *label_ptr; +#endif + + datalo = *args++; + datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0); + addrlo = *args++; + addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0); + oi = *args++; + opc = get_memop(oi); + s_bits = opc & MO_SIZE; + +#ifdef CONFIG_SOFTMMU + mem_index = get_mmuidx(oi); + addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, true); + + /* Load a pointer into the current opcode w/conditional branch-link. */ + label_ptr = s->code_ptr; + tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); + + rbase = TCG_REG_R3; +#else /* !CONFIG_SOFTMMU */ + rbase = guest_base ? TCG_GUEST_BASE_REG : 0; + if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { + tcg_out_ext32u(s, TCG_REG_TMP1, addrlo); + addrlo = TCG_REG_TMP1; + } +#endif + + if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { + if (opc & MO_BSWAP) { + tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); + tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo)); + tcg_out32(s, LWBRX | TAB(datahi, rbase, TCG_REG_R0)); + } else if (rbase != 0) { + tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); + tcg_out32(s, LWZX | TAB(datahi, rbase, addrlo)); + tcg_out32(s, LWZX | TAB(datalo, rbase, TCG_REG_R0)); + } else if (addrlo == datahi) { + tcg_out32(s, LWZ | TAI(datalo, addrlo, 4)); + tcg_out32(s, LWZ | TAI(datahi, addrlo, 0)); + } else { + tcg_out32(s, LWZ | TAI(datahi, addrlo, 0)); + tcg_out32(s, LWZ | TAI(datalo, addrlo, 4)); + } + } else { + uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)]; + if (!HAVE_ISA_2_06 && insn == LDBRX) { + tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); + tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo)); + tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0)); + tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0); + } else if (insn) { + tcg_out32(s, insn | TAB(datalo, rbase, addrlo)); + } else { + insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)]; + tcg_out32(s, insn | TAB(datalo, rbase, addrlo)); + insn = qemu_exts_opc[s_bits]; + tcg_out32(s, insn | RA(datalo) | RS(datalo)); + } + } + +#ifdef CONFIG_SOFTMMU + add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, + s->code_ptr, label_ptr); +#endif +} + +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) +{ + TCGReg datalo, datahi, addrlo, rbase; + TCGReg addrhi __attribute__((unused)); + TCGMemOpIdx oi; + TCGMemOp opc, s_bits; +#ifdef CONFIG_SOFTMMU + int mem_index; + tcg_insn_unit *label_ptr; +#endif + + datalo = *args++; + datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0); + addrlo = *args++; + addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0); + oi = *args++; + opc = get_memop(oi); + s_bits = opc & MO_SIZE; + +#ifdef CONFIG_SOFTMMU + mem_index = get_mmuidx(oi); + addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, false); + + /* Load a pointer into the current opcode w/conditional branch-link. */ + label_ptr = s->code_ptr; + tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); + + rbase = TCG_REG_R3; +#else /* !CONFIG_SOFTMMU */ + rbase = guest_base ? TCG_GUEST_BASE_REG : 0; + if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { + tcg_out_ext32u(s, TCG_REG_TMP1, addrlo); + addrlo = TCG_REG_TMP1; + } +#endif + + if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { + if (opc & MO_BSWAP) { + tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); + tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo)); + tcg_out32(s, STWBRX | SAB(datahi, rbase, TCG_REG_R0)); + } else if (rbase != 0) { + tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); + tcg_out32(s, STWX | SAB(datahi, rbase, addrlo)); + tcg_out32(s, STWX | SAB(datalo, rbase, TCG_REG_R0)); + } else { + tcg_out32(s, STW | TAI(datahi, addrlo, 0)); + tcg_out32(s, STW | TAI(datalo, addrlo, 4)); + } + } else { + uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)]; + if (!HAVE_ISA_2_06 && insn == STDBRX) { + tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo)); + tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4)); + tcg_out_shri64(s, TCG_REG_R0, datalo, 32); + tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, TCG_REG_TMP1)); + } else { + tcg_out32(s, insn | SAB(datalo, rbase, addrlo)); + } + } + +#ifdef CONFIG_SOFTMMU + add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, + s->code_ptr, label_ptr); +#endif +} + +/* Parameters for function call generation, used in tcg.c. */ +#define TCG_TARGET_STACK_ALIGN 16 +#define TCG_TARGET_EXTEND_ARGS 1 + +#ifdef _CALL_AIX +# define LINK_AREA_SIZE (6 * SZR) +# define LR_OFFSET (1 * SZR) +# define TCG_TARGET_CALL_STACK_OFFSET (LINK_AREA_SIZE + 8 * SZR) +#elif defined(TCG_TARGET_CALL_DARWIN) +# define LINK_AREA_SIZE (6 * SZR) +# define LR_OFFSET (2 * SZR) +#elif TCG_TARGET_REG_BITS == 64 +# if defined(_CALL_ELF) && _CALL_ELF == 2 +# define LINK_AREA_SIZE (4 * SZR) +# define LR_OFFSET (1 * SZR) +# endif +#else /* TCG_TARGET_REG_BITS == 32 */ +# if defined(_CALL_SYSV) +# define LINK_AREA_SIZE (2 * SZR) +# define LR_OFFSET (1 * SZR) +# endif +#endif +#ifndef LR_OFFSET +# error "Unhandled abi" +#endif +#ifndef TCG_TARGET_CALL_STACK_OFFSET +# define TCG_TARGET_CALL_STACK_OFFSET LINK_AREA_SIZE +#endif + +#define CPU_TEMP_BUF_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long)) +#define REG_SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR) + +#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET \ + + TCG_STATIC_CALL_ARGS_SIZE \ + + CPU_TEMP_BUF_SIZE \ + + REG_SAVE_SIZE \ + + TCG_TARGET_STACK_ALIGN - 1) \ + & -TCG_TARGET_STACK_ALIGN) + +#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE) + +static void tcg_target_qemu_prologue(TCGContext *s) +{ + int i; + +#ifdef _CALL_AIX + void **desc = (void **)s->code_ptr; + desc[0] = desc + 2; /* entry point */ + desc[1] = 0; /* environment pointer */ + s->code_ptr = (void *)(desc + 2); /* skip over descriptor */ +#endif + + tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE, + CPU_TEMP_BUF_SIZE); + + /* Prologue */ + tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR); + tcg_out32(s, (SZR == 8 ? STDU : STWU) + | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE)); + + for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { + tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], + TCG_REG_R1, REG_SAVE_BOT + i * SZR); + } + tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); + +#ifndef CONFIG_SOFTMMU + if (guest_base) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); + tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); + } +#endif + + tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); + tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR); + + if (USE_REG_RA) { +#ifdef _CALL_AIX + /* Make the caller load the value as the TOC into R2. */ + tb_ret_addr = s->code_ptr + 2; + desc[1] = tb_ret_addr; + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_RA, TCG_REG_R2); + tcg_out32(s, BCCTR | BO_ALWAYS); +#elif defined(_CALL_ELF) && _CALL_ELF == 2 + /* Compute from the incoming R12 value. */ + tb_ret_addr = s->code_ptr + 2; + tcg_out32(s, ADDI | TAI(TCG_REG_RA, TCG_REG_R12, + tcg_ptr_byte_diff(tb_ret_addr, s->code_buf))); + tcg_out32(s, BCCTR | BO_ALWAYS); +#else + /* Reserve max 5 insns for the constant load. */ + tb_ret_addr = s->code_ptr + 6; + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)tb_ret_addr); + tcg_out32(s, BCCTR | BO_ALWAYS); + while (s->code_ptr < tb_ret_addr) { + tcg_out32(s, NOP); + } +#endif + } else { + tcg_out32(s, BCCTR | BO_ALWAYS); + tb_ret_addr = s->code_ptr; + } + + /* Epilogue */ + assert(tb_ret_addr == s->code_ptr); + + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); + for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { + tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], + TCG_REG_R1, REG_SAVE_BOT + i * SZR); + } + tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR); + tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE)); + tcg_out32(s, BCLR | BO_ALWAYS); +} + +static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, + const int *const_args) +{ + TCGArg a0, a1, a2; + int c; + + switch (opc) { + case INDEX_op_exit_tb: + if (USE_REG_RA) { + ptrdiff_t disp = tcg_pcrel_diff(s, tb_ret_addr); + + /* Use a direct branch if we can, otherwise use the value in RA. + Note that the direct branch is always backward, thus we need + to account for the possibility of 5 insns from the movi. */ + if (!in_range_b(disp - 20)) { + tcg_out32(s, MTSPR | RS(TCG_REG_RA) | CTR); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]); + tcg_out32(s, BCCTR | BO_ALWAYS); + break; + } + } + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]); + tcg_out_b(s, 0, tb_ret_addr); + break; + case INDEX_op_goto_tb: + tcg_debug_assert(s->tb_jmp_offset); + /* Direct jump. Ensure the next insns are 8-byte aligned. */ + if ((uintptr_t)s->code_ptr & 7) { + tcg_out32(s, NOP); + } + s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); + /* To be replaced by either a branch+nop or a load into TMP1. */ + s->code_ptr += 2; + tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS); + s->tb_next_offset[args[0]] = tcg_current_code_size(s); + break; + case INDEX_op_br: + { + TCGLabel *l = arg_label(args[0]); + + if (l->has_value) { + tcg_out_b(s, 0, l->u.value_ptr); + } else { + tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0); + tcg_out_b_noaddr(s, B); + } + } + break; + case INDEX_op_ld8u_i32: + case INDEX_op_ld8u_i64: + tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); + break; + case INDEX_op_ld8s_i32: + case INDEX_op_ld8s_i64: + tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); + tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0])); + break; + case INDEX_op_ld16u_i32: + case INDEX_op_ld16u_i64: + tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]); + break; + case INDEX_op_ld16s_i32: + case INDEX_op_ld16s_i64: + tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]); + break; + case INDEX_op_ld_i32: + case INDEX_op_ld32u_i64: + tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]); + break; + case INDEX_op_ld32s_i64: + tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]); + break; + case INDEX_op_ld_i64: + tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]); + break; + case INDEX_op_st8_i32: + case INDEX_op_st8_i64: + tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]); + break; + case INDEX_op_st16_i32: + case INDEX_op_st16_i64: + tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]); + break; + case INDEX_op_st_i32: + case INDEX_op_st32_i64: + tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]); + break; + case INDEX_op_st_i64: + tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]); + break; + + case INDEX_op_add_i32: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + do_addi_32: + tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2); + } else { + tcg_out32(s, ADD | TAB(a0, a1, a2)); + } + break; + case INDEX_op_sub_i32: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[1]) { + if (const_args[2]) { + tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2); + } else { + tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); + } + } else if (const_args[2]) { + a2 = -a2; + goto do_addi_32; + } else { + tcg_out32(s, SUBF | TAB(a0, a2, a1)); + } + break; + + case INDEX_op_and_i32: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + tcg_out_andi32(s, a0, a1, a2); + } else { + tcg_out32(s, AND | SAB(a1, a0, a2)); + } + break; + case INDEX_op_and_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + tcg_out_andi64(s, a0, a1, a2); + } else { + tcg_out32(s, AND | SAB(a1, a0, a2)); + } + break; + case INDEX_op_or_i64: + case INDEX_op_or_i32: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + tcg_out_ori32(s, a0, a1, a2); + } else { + tcg_out32(s, OR | SAB(a1, a0, a2)); + } + break; + case INDEX_op_xor_i64: + case INDEX_op_xor_i32: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + tcg_out_xori32(s, a0, a1, a2); + } else { + tcg_out32(s, XOR | SAB(a1, a0, a2)); + } + break; + case INDEX_op_andc_i32: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + tcg_out_andi32(s, a0, a1, ~a2); + } else { + tcg_out32(s, ANDC | SAB(a1, a0, a2)); + } + break; + case INDEX_op_andc_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + tcg_out_andi64(s, a0, a1, ~a2); + } else { + tcg_out32(s, ANDC | SAB(a1, a0, a2)); + } + break; + case INDEX_op_orc_i32: + if (const_args[2]) { + tcg_out_ori32(s, args[0], args[1], ~args[2]); + break; + } + /* FALLTHRU */ + case INDEX_op_orc_i64: + tcg_out32(s, ORC | SAB(args[1], args[0], args[2])); + break; + case INDEX_op_eqv_i32: + if (const_args[2]) { + tcg_out_xori32(s, args[0], args[1], ~args[2]); + break; + } + /* FALLTHRU */ + case INDEX_op_eqv_i64: + tcg_out32(s, EQV | SAB(args[1], args[0], args[2])); + break; + case INDEX_op_nand_i32: + case INDEX_op_nand_i64: + tcg_out32(s, NAND | SAB(args[1], args[0], args[2])); + break; + case INDEX_op_nor_i32: + case INDEX_op_nor_i64: + tcg_out32(s, NOR | SAB(args[1], args[0], args[2])); + break; + + case INDEX_op_mul_i32: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + tcg_out32(s, MULLI | TAI(a0, a1, a2)); + } else { + tcg_out32(s, MULLW | TAB(a0, a1, a2)); + } + break; + + case INDEX_op_div_i32: + tcg_out32(s, DIVW | TAB(args[0], args[1], args[2])); + break; + + case INDEX_op_divu_i32: + tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2])); + break; + + case INDEX_op_shl_i32: + if (const_args[2]) { + tcg_out_shli32(s, args[0], args[1], args[2]); + } else { + tcg_out32(s, SLW | SAB(args[1], args[0], args[2])); + } + break; + case INDEX_op_shr_i32: + if (const_args[2]) { + tcg_out_shri32(s, args[0], args[1], args[2]); + } else { + tcg_out32(s, SRW | SAB(args[1], args[0], args[2])); + } + break; + case INDEX_op_sar_i32: + if (const_args[2]) { + tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2])); + } else { + tcg_out32(s, SRAW | SAB(args[1], args[0], args[2])); + } + break; + case INDEX_op_rotl_i32: + if (const_args[2]) { + tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31); + } else { + tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2]) + | MB(0) | ME(31)); + } + break; + case INDEX_op_rotr_i32: + if (const_args[2]) { + tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31); + } else { + tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32)); + tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0) + | MB(0) | ME(31)); + } + break; + + case INDEX_op_brcond_i32: + tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], + arg_label(args[3]), TCG_TYPE_I32); + break; + case INDEX_op_brcond_i64: + tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], + arg_label(args[3]), TCG_TYPE_I64); + break; + case INDEX_op_brcond2_i32: + tcg_out_brcond2(s, args, const_args); + break; + + case INDEX_op_neg_i32: + case INDEX_op_neg_i64: + tcg_out32(s, NEG | RT(args[0]) | RA(args[1])); + break; + + case INDEX_op_not_i32: + case INDEX_op_not_i64: + tcg_out32(s, NOR | SAB(args[1], args[0], args[1])); + break; + + case INDEX_op_add_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + do_addi_64: + tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2); + } else { + tcg_out32(s, ADD | TAB(a0, a1, a2)); + } + break; + case INDEX_op_sub_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[1]) { + if (const_args[2]) { + tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2); + } else { + tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); + } + } else if (const_args[2]) { + a2 = -a2; + goto do_addi_64; + } else { + tcg_out32(s, SUBF | TAB(a0, a2, a1)); + } + break; + + case INDEX_op_shl_i64: + if (const_args[2]) { + tcg_out_shli64(s, args[0], args[1], args[2]); + } else { + tcg_out32(s, SLD | SAB(args[1], args[0], args[2])); + } + break; + case INDEX_op_shr_i64: + if (const_args[2]) { + tcg_out_shri64(s, args[0], args[1], args[2]); + } else { + tcg_out32(s, SRD | SAB(args[1], args[0], args[2])); + } + break; + case INDEX_op_sar_i64: + if (const_args[2]) { + int sh = SH(args[2] & 0x1f) | (((args[2] >> 5) & 1) << 1); + tcg_out32(s, SRADI | RA(args[0]) | RS(args[1]) | sh); + } else { + tcg_out32(s, SRAD | SAB(args[1], args[0], args[2])); + } + break; + case INDEX_op_rotl_i64: + if (const_args[2]) { + tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0); + } else { + tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0)); + } + break; + case INDEX_op_rotr_i64: + if (const_args[2]) { + tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0); + } else { + tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64)); + tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0)); + } + break; + + case INDEX_op_mul_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + tcg_out32(s, MULLI | TAI(a0, a1, a2)); + } else { + tcg_out32(s, MULLD | TAB(a0, a1, a2)); + } + break; + case INDEX_op_div_i64: + tcg_out32(s, DIVD | TAB(args[0], args[1], args[2])); + break; + case INDEX_op_divu_i64: + tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2])); + break; + + case INDEX_op_qemu_ld_i32: + tcg_out_qemu_ld(s, args, false); + break; + case INDEX_op_qemu_ld_i64: + tcg_out_qemu_ld(s, args, true); + break; + case INDEX_op_qemu_st_i32: + tcg_out_qemu_st(s, args, false); + break; + case INDEX_op_qemu_st_i64: + tcg_out_qemu_st(s, args, true); + break; + + case INDEX_op_ext8s_i32: + case INDEX_op_ext8s_i64: + c = EXTSB; + goto gen_ext; + case INDEX_op_ext16s_i32: + case INDEX_op_ext16s_i64: + c = EXTSH; + goto gen_ext; + case INDEX_op_ext_i32_i64: + case INDEX_op_ext32s_i64: + c = EXTSW; + goto gen_ext; + gen_ext: + tcg_out32(s, c | RS(args[1]) | RA(args[0])); + break; + case INDEX_op_extu_i32_i64: + tcg_out_ext32u(s, args[0], args[1]); + break; + + case INDEX_op_setcond_i32: + tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], + const_args[2]); + break; + case INDEX_op_setcond_i64: + tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2], + const_args[2]); + break; + case INDEX_op_setcond2_i32: + tcg_out_setcond2(s, args, const_args); + break; + + case INDEX_op_bswap16_i32: + case INDEX_op_bswap16_i64: + a0 = args[0], a1 = args[1]; + /* a1 = abcd */ + if (a0 != a1) { + /* a0 = (a1 r<< 24) & 0xff # 000c */ + tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31); + /* a0 = (a0 & ~0xff00) | (a1 r<< 8) & 0xff00 # 00dc */ + tcg_out_rlw(s, RLWIMI, a0, a1, 8, 16, 23); + } else { + /* r0 = (a1 r<< 8) & 0xff00 # 00d0 */ + tcg_out_rlw(s, RLWINM, TCG_REG_R0, a1, 8, 16, 23); + /* a0 = (a1 r<< 24) & 0xff # 000c */ + tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31); + /* a0 = a0 | r0 # 00dc */ + tcg_out32(s, OR | SAB(TCG_REG_R0, a0, a0)); + } + break; + + case INDEX_op_bswap32_i32: + case INDEX_op_bswap32_i64: + /* Stolen from gcc's builtin_bswap32 */ + a1 = args[1]; + a0 = args[0] == a1 ? TCG_REG_R0 : args[0]; + + /* a1 = args[1] # abcd */ + /* a0 = rotate_left (a1, 8) # bcda */ + tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31); + /* a0 = (a0 & ~0xff000000) | ((a1 r<< 24) & 0xff000000) # dcda */ + tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7); + /* a0 = (a0 & ~0x0000ff00) | ((a1 r<< 24) & 0x0000ff00) # dcba */ + tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23); + + if (a0 == TCG_REG_R0) { + tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); + } + break; + + case INDEX_op_bswap64_i64: + a0 = args[0], a1 = args[1], a2 = TCG_REG_R0; + if (a0 == a1) { + a0 = TCG_REG_R0; + a2 = a1; + } + + /* a1 = # abcd efgh */ + /* a0 = rl32(a1, 8) # 0000 fghe */ + tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31); + /* a0 = dep(a0, rl32(a1, 24), 0xff000000) # 0000 hghe */ + tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7); + /* a0 = dep(a0, rl32(a1, 24), 0x0000ff00) # 0000 hgfe */ + tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23); + + /* a0 = rl64(a0, 32) # hgfe 0000 */ + /* a2 = rl64(a1, 32) # efgh abcd */ + tcg_out_rld(s, RLDICL, a0, a0, 32, 0); + tcg_out_rld(s, RLDICL, a2, a1, 32, 0); + + /* a0 = dep(a0, rl32(a2, 8), 0xffffffff) # hgfe bcda */ + tcg_out_rlw(s, RLWIMI, a0, a2, 8, 0, 31); + /* a0 = dep(a0, rl32(a2, 24), 0xff000000) # hgfe dcda */ + tcg_out_rlw(s, RLWIMI, a0, a2, 24, 0, 7); + /* a0 = dep(a0, rl32(a2, 24), 0x0000ff00) # hgfe dcba */ + tcg_out_rlw(s, RLWIMI, a0, a2, 24, 16, 23); + + if (a0 == 0) { + tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); + } + break; + + case INDEX_op_deposit_i32: + if (const_args[2]) { + uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3]; + tcg_out_andi32(s, args[0], args[0], ~mask); + } else { + tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3], + 32 - args[3] - args[4], 31 - args[3]); + } + break; + case INDEX_op_deposit_i64: + if (const_args[2]) { + uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3]; + tcg_out_andi64(s, args[0], args[0], ~mask); + } else { + tcg_out_rld(s, RLDIMI, args[0], args[2], args[3], + 64 - args[3] - args[4]); + } + break; + + case INDEX_op_movcond_i32: + tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2], + args[3], args[4], const_args[2]); + break; + case INDEX_op_movcond_i64: + tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2], + args[3], args[4], const_args[2]); + break; + +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_add2_i64: +#else + case INDEX_op_add2_i32: +#endif + /* Note that the CA bit is defined based on the word size of the + environment. So in 64-bit mode it's always carry-out of bit 63. + The fallback code using deposit works just as well for 32-bit. */ + a0 = args[0], a1 = args[1]; + if (a0 == args[3] || (!const_args[5] && a0 == args[5])) { + a0 = TCG_REG_R0; + } + if (const_args[4]) { + tcg_out32(s, ADDIC | TAI(a0, args[2], args[4])); + } else { + tcg_out32(s, ADDC | TAB(a0, args[2], args[4])); + } + if (const_args[5]) { + tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3])); + } else { + tcg_out32(s, ADDE | TAB(a1, args[3], args[5])); + } + if (a0 != args[0]) { + tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); + } + break; + +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_sub2_i64: +#else + case INDEX_op_sub2_i32: +#endif + a0 = args[0], a1 = args[1]; + if (a0 == args[5] || (!const_args[3] && a0 == args[3])) { + a0 = TCG_REG_R0; + } + if (const_args[2]) { + tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2])); + } else { + tcg_out32(s, SUBFC | TAB(a0, args[4], args[2])); + } + if (const_args[3]) { + tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5])); + } else { + tcg_out32(s, SUBFE | TAB(a1, args[5], args[3])); + } + if (a0 != args[0]) { + tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); + } + break; + + case INDEX_op_muluh_i32: + tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2])); + break; + case INDEX_op_mulsh_i32: + tcg_out32(s, MULHW | TAB(args[0], args[1], args[2])); + break; + case INDEX_op_muluh_i64: + tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2])); + break; + case INDEX_op_mulsh_i64: + tcg_out32(s, MULHD | TAB(args[0], args[1], args[2])); + break; + + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_mov_i64: + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ + case INDEX_op_movi_i64: + case INDEX_op_call: /* Always emitted via tcg_out_call. */ + default: + tcg_abort(); + } +} + +static const TCGTargetOpDef ppc_op_defs[] = { + { INDEX_op_exit_tb, { } }, + { INDEX_op_goto_tb, { } }, + { INDEX_op_br, { } }, + + { INDEX_op_ld8u_i32, { "r", "r" } }, + { INDEX_op_ld8s_i32, { "r", "r" } }, + { INDEX_op_ld16u_i32, { "r", "r" } }, + { INDEX_op_ld16s_i32, { "r", "r" } }, + { INDEX_op_ld_i32, { "r", "r" } }, + + { INDEX_op_st8_i32, { "r", "r" } }, + { INDEX_op_st16_i32, { "r", "r" } }, + { INDEX_op_st_i32, { "r", "r" } }, + + { INDEX_op_add_i32, { "r", "r", "ri" } }, + { INDEX_op_mul_i32, { "r", "r", "rI" } }, + { INDEX_op_div_i32, { "r", "r", "r" } }, + { INDEX_op_divu_i32, { "r", "r", "r" } }, + { INDEX_op_sub_i32, { "r", "rI", "ri" } }, + { INDEX_op_and_i32, { "r", "r", "ri" } }, + { INDEX_op_or_i32, { "r", "r", "ri" } }, + { INDEX_op_xor_i32, { "r", "r", "ri" } }, + { INDEX_op_andc_i32, { "r", "r", "ri" } }, + { INDEX_op_orc_i32, { "r", "r", "ri" } }, + { INDEX_op_eqv_i32, { "r", "r", "ri" } }, + { INDEX_op_nand_i32, { "r", "r", "r" } }, + { INDEX_op_nor_i32, { "r", "r", "r" } }, + + { INDEX_op_shl_i32, { "r", "r", "ri" } }, + { INDEX_op_shr_i32, { "r", "r", "ri" } }, + { INDEX_op_sar_i32, { "r", "r", "ri" } }, + { INDEX_op_rotl_i32, { "r", "r", "ri" } }, + { INDEX_op_rotr_i32, { "r", "r", "ri" } }, + + { INDEX_op_neg_i32, { "r", "r" } }, + { INDEX_op_not_i32, { "r", "r" } }, + { INDEX_op_ext8s_i32, { "r", "r" } }, + { INDEX_op_ext16s_i32, { "r", "r" } }, + { INDEX_op_bswap16_i32, { "r", "r" } }, + { INDEX_op_bswap32_i32, { "r", "r" } }, + + { INDEX_op_brcond_i32, { "r", "ri" } }, + { INDEX_op_setcond_i32, { "r", "r", "ri" } }, + { INDEX_op_movcond_i32, { "r", "r", "ri", "rZ", "rZ" } }, + + { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, + + { INDEX_op_muluh_i32, { "r", "r", "r" } }, + { INDEX_op_mulsh_i32, { "r", "r", "r" } }, + +#if TCG_TARGET_REG_BITS == 64 + { INDEX_op_ld8u_i64, { "r", "r" } }, + { INDEX_op_ld8s_i64, { "r", "r" } }, + { INDEX_op_ld16u_i64, { "r", "r" } }, + { INDEX_op_ld16s_i64, { "r", "r" } }, + { INDEX_op_ld32u_i64, { "r", "r" } }, + { INDEX_op_ld32s_i64, { "r", "r" } }, + { INDEX_op_ld_i64, { "r", "r" } }, + + { INDEX_op_st8_i64, { "r", "r" } }, + { INDEX_op_st16_i64, { "r", "r" } }, + { INDEX_op_st32_i64, { "r", "r" } }, + { INDEX_op_st_i64, { "r", "r" } }, + + { INDEX_op_add_i64, { "r", "r", "rT" } }, + { INDEX_op_sub_i64, { "r", "rI", "rT" } }, + { INDEX_op_and_i64, { "r", "r", "ri" } }, + { INDEX_op_or_i64, { "r", "r", "rU" } }, + { INDEX_op_xor_i64, { "r", "r", "rU" } }, + { INDEX_op_andc_i64, { "r", "r", "ri" } }, + { INDEX_op_orc_i64, { "r", "r", "r" } }, + { INDEX_op_eqv_i64, { "r", "r", "r" } }, + { INDEX_op_nand_i64, { "r", "r", "r" } }, + { INDEX_op_nor_i64, { "r", "r", "r" } }, + + { INDEX_op_shl_i64, { "r", "r", "ri" } }, + { INDEX_op_shr_i64, { "r", "r", "ri" } }, + { INDEX_op_sar_i64, { "r", "r", "ri" } }, + { INDEX_op_rotl_i64, { "r", "r", "ri" } }, + { INDEX_op_rotr_i64, { "r", "r", "ri" } }, + + { INDEX_op_mul_i64, { "r", "r", "rI" } }, + { INDEX_op_div_i64, { "r", "r", "r" } }, + { INDEX_op_divu_i64, { "r", "r", "r" } }, + + { INDEX_op_neg_i64, { "r", "r" } }, + { INDEX_op_not_i64, { "r", "r" } }, + { INDEX_op_ext8s_i64, { "r", "r" } }, + { INDEX_op_ext16s_i64, { "r", "r" } }, + { INDEX_op_ext32s_i64, { "r", "r" } }, + { INDEX_op_ext_i32_i64, { "r", "r" } }, + { INDEX_op_extu_i32_i64, { "r", "r" } }, + { INDEX_op_bswap16_i64, { "r", "r" } }, + { INDEX_op_bswap32_i64, { "r", "r" } }, + { INDEX_op_bswap64_i64, { "r", "r" } }, + + { INDEX_op_brcond_i64, { "r", "ri" } }, + { INDEX_op_setcond_i64, { "r", "r", "ri" } }, + { INDEX_op_movcond_i64, { "r", "r", "ri", "rZ", "rZ" } }, + + { INDEX_op_deposit_i64, { "r", "0", "rZ" } }, + + { INDEX_op_mulsh_i64, { "r", "r", "r" } }, + { INDEX_op_muluh_i64, { "r", "r", "r" } }, +#endif + +#if TCG_TARGET_REG_BITS == 32 + { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } }, + { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } }, +#endif + +#if TCG_TARGET_REG_BITS == 64 + { INDEX_op_add2_i64, { "r", "r", "r", "r", "rI", "rZM" } }, + { INDEX_op_sub2_i64, { "r", "r", "rI", "rZM", "r", "r" } }, +#else + { INDEX_op_add2_i32, { "r", "r", "r", "r", "rI", "rZM" } }, + { INDEX_op_sub2_i32, { "r", "r", "rI", "rZM", "r", "r" } }, +#endif + +#if TCG_TARGET_REG_BITS == 64 + { INDEX_op_qemu_ld_i32, { "r", "L" } }, + { INDEX_op_qemu_st_i32, { "S", "S" } }, + { INDEX_op_qemu_ld_i64, { "r", "L" } }, + { INDEX_op_qemu_st_i64, { "S", "S" } }, +#elif TARGET_LONG_BITS == 32 + { INDEX_op_qemu_ld_i32, { "r", "L" } }, + { INDEX_op_qemu_st_i32, { "S", "S" } }, + { INDEX_op_qemu_ld_i64, { "L", "L", "L" } }, + { INDEX_op_qemu_st_i64, { "S", "S", "S" } }, +#else + { INDEX_op_qemu_ld_i32, { "r", "L", "L" } }, + { INDEX_op_qemu_st_i32, { "S", "S", "S" } }, + { INDEX_op_qemu_ld_i64, { "L", "L", "L", "L" } }, + { INDEX_op_qemu_st_i64, { "S", "S", "S", "S" } }, +#endif + + { -1 }, +}; + +static void tcg_target_init(TCGContext *s) +{ + unsigned long hwcap = qemu_getauxval(AT_HWCAP); + if (hwcap & PPC_FEATURE_ARCH_2_06) { + have_isa_2_06 = true; + } + + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff); + tcg_regset_set32(tcg_target_call_clobber_regs, 0, + (1 << TCG_REG_R0) | + (1 << TCG_REG_R2) | + (1 << TCG_REG_R3) | + (1 << TCG_REG_R4) | + (1 << TCG_REG_R5) | + (1 << TCG_REG_R6) | + (1 << TCG_REG_R7) | + (1 << TCG_REG_R8) | + (1 << TCG_REG_R9) | + (1 << TCG_REG_R10) | + (1 << TCG_REG_R11) | + (1 << TCG_REG_R12)); + + tcg_regset_clear(s->reserved_regs); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */ +#if defined(_CALL_SYSV) + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */ +#endif +#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64 + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */ +#endif + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */ + if (USE_REG_RA) { + tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA); /* return addr */ + } + + tcg_add_target_add_op_defs(ppc_op_defs); +} + +#ifdef __ELF__ +typedef struct { + DebugFrameCIE cie; + DebugFrameFDEHeader fde; + uint8_t fde_def_cfa[4]; + uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3]; +} DebugFrame; + +/* We're expecting a 2 byte uleb128 encoded value. */ +QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); + +#if TCG_TARGET_REG_BITS == 64 +# define ELF_HOST_MACHINE EM_PPC64 +#else +# define ELF_HOST_MACHINE EM_PPC +#endif + +static DebugFrame debug_frame = { + .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ + .cie.id = -1, + .cie.version = 1, + .cie.code_align = 1, + .cie.data_align = (-SZR & 0x7f), /* sleb128 -SZR */ + .cie.return_column = 65, + + /* Total FDE size does not include the "len" member. */ + .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), + + .fde_def_cfa = { + 12, TCG_REG_R1, /* DW_CFA_def_cfa r1, ... */ + (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ + (FRAME_SIZE >> 7) + }, + .fde_reg_ofs = { + /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */ + 0x11, 65, (LR_OFFSET / -SZR) & 0x7f, + } +}; + +void tcg_register_jit(void *buf, size_t buf_size) +{ + uint8_t *p = &debug_frame.fde_reg_ofs[3]; + int i; + + for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) { + p[0] = 0x80 + tcg_target_callee_save_regs[i]; + p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR; + } + + debug_frame.fde.func_start = (uintptr_t)buf; + debug_frame.fde.func_len = buf_size; + + tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); +} +#endif /* __ELF__ */ + +static size_t dcache_bsize = 16; +static size_t icache_bsize = 16; + +void flush_icache_range(uintptr_t start, uintptr_t stop) +{ + uintptr_t p, start1, stop1; + size_t dsize = dcache_bsize; + size_t isize = icache_bsize; + + start1 = start & ~(dsize - 1); + stop1 = (stop + dsize - 1) & ~(dsize - 1); + for (p = start1; p < stop1; p += dsize) { + asm volatile ("dcbst 0,%0" : : "r"(p) : "memory"); + } + asm volatile ("sync" : : : "memory"); + + start &= start & ~(isize - 1); + stop1 = (stop + isize - 1) & ~(isize - 1); + for (p = start1; p < stop1; p += isize) { + asm volatile ("icbi 0,%0" : : "r"(p) : "memory"); + } + asm volatile ("sync" : : : "memory"); + asm volatile ("isync" : : : "memory"); +} + +#if defined _AIX +#include + +static void __attribute__((constructor)) tcg_cache_init(void) +{ + icache_bsize = _system_configuration.icache_line; + dcache_bsize = _system_configuration.dcache_line; +} + +#elif defined __linux__ +static void __attribute__((constructor)) tcg_cache_init(void) +{ + unsigned long dsize = qemu_getauxval(AT_DCACHEBSIZE); + unsigned long isize = qemu_getauxval(AT_ICACHEBSIZE); + + if (dsize == 0 || isize == 0) { + if (dsize == 0) { + fprintf(stderr, "getauxval AT_DCACHEBSIZE failed\n"); + } + if (isize == 0) { + fprintf(stderr, "getauxval AT_ICACHEBSIZE failed\n"); + } + exit(1); + } + dcache_bsize = dsize; + icache_bsize = isize; +} + +#elif defined __APPLE__ +#include + +static void __attribute__((constructor)) tcg_cache_init(void) +{ + size_t len; + unsigned cacheline; + int name[2] = { CTL_HW, HW_CACHELINE }; + + len = sizeof(cacheline); + if (sysctl(name, 2, &cacheline, &len, NULL, 0)) { + perror("sysctl CTL_HW HW_CACHELINE failed"); + exit(1); + } + dcache_bsize = cacheline; + icache_bsize = cacheline; +} + +#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) +#include + +static void __attribute__((constructor)) tcg_cache_init(void) +{ + size_t len = 4; + unsigned cacheline; + + if (sysctlbyname ("machdep.cacheline_size", &cacheline, &len, NULL, 0)) { + fprintf(stderr, "sysctlbyname machdep.cacheline_size failed: %s\n", + strerror(errno)); + exit(1); + } + dcache_bsize = cacheline; + icache_bsize = cacheline; +} +#endif diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c deleted file mode 100644 index 58520fa22b..0000000000 --- a/tcg/s390/tcg-target.c +++ /dev/null @@ -1,2410 +0,0 @@ -/* - * Tiny Code Generator for QEMU - * - * Copyright (c) 2009 Ulrich Hecht - * Copyright (c) 2009 Alexander Graf - * Copyright (c) 2010 Richard Henderson - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "tcg-be-ldst.h" - -/* We only support generating code for 64-bit mode. */ -#if TCG_TARGET_REG_BITS != 64 -#error "unsupported code generation mode" -#endif - -#include "elf.h" - -/* ??? The translation blocks produced by TCG are generally small enough to - be entirely reachable with a 16-bit displacement. Leaving the option for - a 32-bit displacement here Just In Case. */ -#define USE_LONG_BRANCHES 0 - -#define TCG_CT_CONST_MULI 0x100 -#define TCG_CT_CONST_ORI 0x200 -#define TCG_CT_CONST_XORI 0x400 -#define TCG_CT_CONST_CMPI 0x800 -#define TCG_CT_CONST_ADLI 0x1000 - -/* Several places within the instruction set 0 means "no register" - rather than TCG_REG_R0. */ -#define TCG_REG_NONE 0 - -/* A scratch register that may be be used throughout the backend. */ -#define TCG_TMP0 TCG_REG_R14 - -#ifndef CONFIG_SOFTMMU -#define TCG_GUEST_BASE_REG TCG_REG_R13 -#endif - -/* All of the following instructions are prefixed with their instruction - format, and are defined as 8- or 16-bit quantities, even when the two - halves of the 16-bit quantity may appear 32 bits apart in the insn. - This makes it easy to copy the values from the tables in Appendix B. */ -typedef enum S390Opcode { - RIL_AFI = 0xc209, - RIL_AGFI = 0xc208, - RIL_ALFI = 0xc20b, - RIL_ALGFI = 0xc20a, - RIL_BRASL = 0xc005, - RIL_BRCL = 0xc004, - RIL_CFI = 0xc20d, - RIL_CGFI = 0xc20c, - RIL_CLFI = 0xc20f, - RIL_CLGFI = 0xc20e, - RIL_IIHF = 0xc008, - RIL_IILF = 0xc009, - RIL_LARL = 0xc000, - RIL_LGFI = 0xc001, - RIL_LGRL = 0xc408, - RIL_LLIHF = 0xc00e, - RIL_LLILF = 0xc00f, - RIL_LRL = 0xc40d, - RIL_MSFI = 0xc201, - RIL_MSGFI = 0xc200, - RIL_NIHF = 0xc00a, - RIL_NILF = 0xc00b, - RIL_OIHF = 0xc00c, - RIL_OILF = 0xc00d, - RIL_SLFI = 0xc205, - RIL_SLGFI = 0xc204, - RIL_XIHF = 0xc006, - RIL_XILF = 0xc007, - - RI_AGHI = 0xa70b, - RI_AHI = 0xa70a, - RI_BRC = 0xa704, - RI_IIHH = 0xa500, - RI_IIHL = 0xa501, - RI_IILH = 0xa502, - RI_IILL = 0xa503, - RI_LGHI = 0xa709, - RI_LLIHH = 0xa50c, - RI_LLIHL = 0xa50d, - RI_LLILH = 0xa50e, - RI_LLILL = 0xa50f, - RI_MGHI = 0xa70d, - RI_MHI = 0xa70c, - RI_NIHH = 0xa504, - RI_NIHL = 0xa505, - RI_NILH = 0xa506, - RI_NILL = 0xa507, - RI_OIHH = 0xa508, - RI_OIHL = 0xa509, - RI_OILH = 0xa50a, - RI_OILL = 0xa50b, - - RIE_CGIJ = 0xec7c, - RIE_CGRJ = 0xec64, - RIE_CIJ = 0xec7e, - RIE_CLGRJ = 0xec65, - RIE_CLIJ = 0xec7f, - RIE_CLGIJ = 0xec7d, - RIE_CLRJ = 0xec77, - RIE_CRJ = 0xec76, - RIE_RISBG = 0xec55, - - RRE_AGR = 0xb908, - RRE_ALGR = 0xb90a, - RRE_ALCR = 0xb998, - RRE_ALCGR = 0xb988, - RRE_CGR = 0xb920, - RRE_CLGR = 0xb921, - RRE_DLGR = 0xb987, - RRE_DLR = 0xb997, - RRE_DSGFR = 0xb91d, - RRE_DSGR = 0xb90d, - RRE_LGBR = 0xb906, - RRE_LCGR = 0xb903, - RRE_LGFR = 0xb914, - RRE_LGHR = 0xb907, - RRE_LGR = 0xb904, - RRE_LLGCR = 0xb984, - RRE_LLGFR = 0xb916, - RRE_LLGHR = 0xb985, - RRE_LRVR = 0xb91f, - RRE_LRVGR = 0xb90f, - RRE_LTGR = 0xb902, - RRE_MLGR = 0xb986, - RRE_MSGR = 0xb90c, - RRE_MSR = 0xb252, - RRE_NGR = 0xb980, - RRE_OGR = 0xb981, - RRE_SGR = 0xb909, - RRE_SLGR = 0xb90b, - RRE_SLBR = 0xb999, - RRE_SLBGR = 0xb989, - RRE_XGR = 0xb982, - - RRF_LOCR = 0xb9f2, - RRF_LOCGR = 0xb9e2, - - RR_AR = 0x1a, - RR_ALR = 0x1e, - RR_BASR = 0x0d, - RR_BCR = 0x07, - RR_CLR = 0x15, - RR_CR = 0x19, - RR_DR = 0x1d, - RR_LCR = 0x13, - RR_LR = 0x18, - RR_LTR = 0x12, - RR_NR = 0x14, - RR_OR = 0x16, - RR_SR = 0x1b, - RR_SLR = 0x1f, - RR_XR = 0x17, - - RSY_RLL = 0xeb1d, - RSY_RLLG = 0xeb1c, - RSY_SLLG = 0xeb0d, - RSY_SRAG = 0xeb0a, - RSY_SRLG = 0xeb0c, - - RS_SLL = 0x89, - RS_SRA = 0x8a, - RS_SRL = 0x88, - - RXY_AG = 0xe308, - RXY_AY = 0xe35a, - RXY_CG = 0xe320, - RXY_CY = 0xe359, - RXY_LAY = 0xe371, - RXY_LB = 0xe376, - RXY_LG = 0xe304, - RXY_LGB = 0xe377, - RXY_LGF = 0xe314, - RXY_LGH = 0xe315, - RXY_LHY = 0xe378, - RXY_LLGC = 0xe390, - RXY_LLGF = 0xe316, - RXY_LLGH = 0xe391, - RXY_LMG = 0xeb04, - RXY_LRV = 0xe31e, - RXY_LRVG = 0xe30f, - RXY_LRVH = 0xe31f, - RXY_LY = 0xe358, - RXY_STCY = 0xe372, - RXY_STG = 0xe324, - RXY_STHY = 0xe370, - RXY_STMG = 0xeb24, - RXY_STRV = 0xe33e, - RXY_STRVG = 0xe32f, - RXY_STRVH = 0xe33f, - RXY_STY = 0xe350, - - RX_A = 0x5a, - RX_C = 0x59, - RX_L = 0x58, - RX_LA = 0x41, - RX_LH = 0x48, - RX_ST = 0x50, - RX_STC = 0x42, - RX_STH = 0x40, -} S390Opcode; - -#ifndef NDEBUG -static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { - "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", - "%r8", "%r9", "%r10" "%r11" "%r12" "%r13" "%r14" "%r15" -}; -#endif - -/* Since R6 is a potential argument register, choose it last of the - call-saved registers. Likewise prefer the call-clobbered registers - in reverse order to maximize the chance of avoiding the arguments. */ -static const int tcg_target_reg_alloc_order[] = { - /* Call saved registers. */ - TCG_REG_R13, - TCG_REG_R12, - TCG_REG_R11, - TCG_REG_R10, - TCG_REG_R9, - TCG_REG_R8, - TCG_REG_R7, - TCG_REG_R6, - /* Call clobbered registers. */ - TCG_REG_R14, - TCG_REG_R0, - TCG_REG_R1, - /* Argument registers, in reverse order of allocation. */ - TCG_REG_R5, - TCG_REG_R4, - TCG_REG_R3, - TCG_REG_R2, -}; - -static const int tcg_target_call_iarg_regs[] = { - TCG_REG_R2, - TCG_REG_R3, - TCG_REG_R4, - TCG_REG_R5, - TCG_REG_R6, -}; - -static const int tcg_target_call_oarg_regs[] = { - TCG_REG_R2, -}; - -#define S390_CC_EQ 8 -#define S390_CC_LT 4 -#define S390_CC_GT 2 -#define S390_CC_OV 1 -#define S390_CC_NE (S390_CC_LT | S390_CC_GT) -#define S390_CC_LE (S390_CC_LT | S390_CC_EQ) -#define S390_CC_GE (S390_CC_GT | S390_CC_EQ) -#define S390_CC_NEVER 0 -#define S390_CC_ALWAYS 15 - -/* Condition codes that result from a COMPARE and COMPARE LOGICAL. */ -static const uint8_t tcg_cond_to_s390_cond[] = { - [TCG_COND_EQ] = S390_CC_EQ, - [TCG_COND_NE] = S390_CC_NE, - [TCG_COND_LT] = S390_CC_LT, - [TCG_COND_LE] = S390_CC_LE, - [TCG_COND_GT] = S390_CC_GT, - [TCG_COND_GE] = S390_CC_GE, - [TCG_COND_LTU] = S390_CC_LT, - [TCG_COND_LEU] = S390_CC_LE, - [TCG_COND_GTU] = S390_CC_GT, - [TCG_COND_GEU] = S390_CC_GE, -}; - -/* Condition codes that result from a LOAD AND TEST. Here, we have no - unsigned instruction variation, however since the test is vs zero we - can re-map the outcomes appropriately. */ -static const uint8_t tcg_cond_to_ltr_cond[] = { - [TCG_COND_EQ] = S390_CC_EQ, - [TCG_COND_NE] = S390_CC_NE, - [TCG_COND_LT] = S390_CC_LT, - [TCG_COND_LE] = S390_CC_LE, - [TCG_COND_GT] = S390_CC_GT, - [TCG_COND_GE] = S390_CC_GE, - [TCG_COND_LTU] = S390_CC_NEVER, - [TCG_COND_LEU] = S390_CC_EQ, - [TCG_COND_GTU] = S390_CC_NE, - [TCG_COND_GEU] = S390_CC_ALWAYS, -}; - -#ifdef CONFIG_SOFTMMU -static void * const qemu_ld_helpers[16] = { - [MO_UB] = helper_ret_ldub_mmu, - [MO_SB] = helper_ret_ldsb_mmu, - [MO_LEUW] = helper_le_lduw_mmu, - [MO_LESW] = helper_le_ldsw_mmu, - [MO_LEUL] = helper_le_ldul_mmu, - [MO_LESL] = helper_le_ldsl_mmu, - [MO_LEQ] = helper_le_ldq_mmu, - [MO_BEUW] = helper_be_lduw_mmu, - [MO_BESW] = helper_be_ldsw_mmu, - [MO_BEUL] = helper_be_ldul_mmu, - [MO_BESL] = helper_be_ldsl_mmu, - [MO_BEQ] = helper_be_ldq_mmu, -}; - -static void * const qemu_st_helpers[16] = { - [MO_UB] = helper_ret_stb_mmu, - [MO_LEUW] = helper_le_stw_mmu, - [MO_LEUL] = helper_le_stl_mmu, - [MO_LEQ] = helper_le_stq_mmu, - [MO_BEUW] = helper_be_stw_mmu, - [MO_BEUL] = helper_be_stl_mmu, - [MO_BEQ] = helper_be_stq_mmu, -}; -#endif - -static tcg_insn_unit *tb_ret_addr; - -/* A list of relevant facilities used by this translator. Some of these - are required for proper operation, and these are checked at startup. */ - -#define FACILITY_ZARCH_ACTIVE (1ULL << (63 - 2)) -#define FACILITY_LONG_DISP (1ULL << (63 - 18)) -#define FACILITY_EXT_IMM (1ULL << (63 - 21)) -#define FACILITY_GEN_INST_EXT (1ULL << (63 - 34)) -#define FACILITY_LOAD_ON_COND (1ULL << (63 - 45)) - -static uint64_t facilities; - -static void patch_reloc(tcg_insn_unit *code_ptr, int type, - intptr_t value, intptr_t addend) -{ - intptr_t pcrel2 = (tcg_insn_unit *)value - (code_ptr - 1); - assert(addend == -2); - - switch (type) { - case R_390_PC16DBL: - assert(pcrel2 == (int16_t)pcrel2); - tcg_patch16(code_ptr, pcrel2); - break; - case R_390_PC32DBL: - assert(pcrel2 == (int32_t)pcrel2); - tcg_patch32(code_ptr, pcrel2); - break; - default: - tcg_abort(); - break; - } -} - -/* parse target specific constraints */ -static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) -{ - const char *ct_str = *pct_str; - - switch (ct_str[0]) { - case 'r': /* all registers */ - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xffff); - break; - case 'R': /* not R0 */ - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xffff); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); - break; - case 'L': /* qemu_ld/st constraint */ - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xffff); - tcg_regset_reset_reg (ct->u.regs, TCG_REG_R2); - tcg_regset_reset_reg (ct->u.regs, TCG_REG_R3); - tcg_regset_reset_reg (ct->u.regs, TCG_REG_R4); - break; - case 'a': /* force R2 for division */ - ct->ct |= TCG_CT_REG; - tcg_regset_clear(ct->u.regs); - tcg_regset_set_reg(ct->u.regs, TCG_REG_R2); - break; - case 'b': /* force R3 for division */ - ct->ct |= TCG_CT_REG; - tcg_regset_clear(ct->u.regs); - tcg_regset_set_reg(ct->u.regs, TCG_REG_R3); - break; - case 'A': - ct->ct |= TCG_CT_CONST_ADLI; - break; - case 'K': - ct->ct |= TCG_CT_CONST_MULI; - break; - case 'O': - ct->ct |= TCG_CT_CONST_ORI; - break; - case 'X': - ct->ct |= TCG_CT_CONST_XORI; - break; - case 'C': - ct->ct |= TCG_CT_CONST_CMPI; - break; - default: - return -1; - } - ct_str++; - *pct_str = ct_str; - - return 0; -} - -/* Immediates to be used with logical OR. This is an optimization only, - since a full 64-bit immediate OR can always be performed with 4 sequential - OI[LH][LH] instructions. What we're looking for is immediates that we - can load efficiently, and the immediate load plus the reg-reg OR is - smaller than the sequential OI's. */ - -static int tcg_match_ori(TCGType type, tcg_target_long val) -{ - if (facilities & FACILITY_EXT_IMM) { - if (type == TCG_TYPE_I32) { - /* All 32-bit ORs can be performed with 1 48-bit insn. */ - return 1; - } - } - - /* Look for negative values. These are best to load with LGHI. */ - if (val < 0) { - if (val == (int16_t)val) { - return 0; - } - if (facilities & FACILITY_EXT_IMM) { - if (val == (int32_t)val) { - return 0; - } - } - } - - return 1; -} - -/* Immediates to be used with logical XOR. This is almost, but not quite, - only an optimization. XOR with immediate is only supported with the - extended-immediate facility. That said, there are a few patterns for - which it is better to load the value into a register first. */ - -static int tcg_match_xori(TCGType type, tcg_target_long val) -{ - if ((facilities & FACILITY_EXT_IMM) == 0) { - return 0; - } - - if (type == TCG_TYPE_I32) { - /* All 32-bit XORs can be performed with 1 48-bit insn. */ - return 1; - } - - /* Look for negative values. These are best to load with LGHI. */ - if (val < 0 && val == (int32_t)val) { - return 0; - } - - return 1; -} - -/* Imediates to be used with comparisons. */ - -static int tcg_match_cmpi(TCGType type, tcg_target_long val) -{ - if (facilities & FACILITY_EXT_IMM) { - /* The COMPARE IMMEDIATE instruction is available. */ - if (type == TCG_TYPE_I32) { - /* We have a 32-bit immediate and can compare against anything. */ - return 1; - } else { - /* ??? We have no insight here into whether the comparison is - signed or unsigned. The COMPARE IMMEDIATE insn uses a 32-bit - signed immediate, and the COMPARE LOGICAL IMMEDIATE insn uses - a 32-bit unsigned immediate. If we were to use the (semi) - obvious "val == (int32_t)val" we would be enabling unsigned - comparisons vs very large numbers. The only solution is to - take the intersection of the ranges. */ - /* ??? Another possible solution is to simply lie and allow all - constants here and force the out-of-range values into a temp - register in tgen_cmp when we have knowledge of the actual - comparison code in use. */ - return val >= 0 && val <= 0x7fffffff; - } - } else { - /* Only the LOAD AND TEST instruction is available. */ - return val == 0; - } -} - -/* Immediates to be used with add2/sub2. */ - -static int tcg_match_add2i(TCGType type, tcg_target_long val) -{ - if (facilities & FACILITY_EXT_IMM) { - if (type == TCG_TYPE_I32) { - return 1; - } else if (val >= -0xffffffffll && val <= 0xffffffffll) { - return 1; - } - } - return 0; -} - -/* Test if a constant matches the constraint. */ -static int tcg_target_const_match(tcg_target_long val, TCGType type, - const TCGArgConstraint *arg_ct) -{ - int ct = arg_ct->ct; - - if (ct & TCG_CT_CONST) { - return 1; - } - - if (type == TCG_TYPE_I32) { - val = (int32_t)val; - } - - /* The following are mutually exclusive. */ - if (ct & TCG_CT_CONST_MULI) { - /* Immediates that may be used with multiply. If we have the - general-instruction-extensions, then we have MULTIPLY SINGLE - IMMEDIATE with a signed 32-bit, otherwise we have only - MULTIPLY HALFWORD IMMEDIATE, with a signed 16-bit. */ - if (facilities & FACILITY_GEN_INST_EXT) { - return val == (int32_t)val; - } else { - return val == (int16_t)val; - } - } else if (ct & TCG_CT_CONST_ADLI) { - return tcg_match_add2i(type, val); - } else if (ct & TCG_CT_CONST_ORI) { - return tcg_match_ori(type, val); - } else if (ct & TCG_CT_CONST_XORI) { - return tcg_match_xori(type, val); - } else if (ct & TCG_CT_CONST_CMPI) { - return tcg_match_cmpi(type, val); - } - - return 0; -} - -/* Emit instructions according to the given instruction format. */ - -static void tcg_out_insn_RR(TCGContext *s, S390Opcode op, TCGReg r1, TCGReg r2) -{ - tcg_out16(s, (op << 8) | (r1 << 4) | r2); -} - -static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op, - TCGReg r1, TCGReg r2) -{ - tcg_out32(s, (op << 16) | (r1 << 4) | r2); -} - -static void tcg_out_insn_RRF(TCGContext *s, S390Opcode op, - TCGReg r1, TCGReg r2, int m3) -{ - tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2); -} - -static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2) -{ - tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff)); -} - -static void tcg_out_insn_RIL(TCGContext *s, S390Opcode op, TCGReg r1, int i2) -{ - tcg_out16(s, op | (r1 << 4)); - tcg_out32(s, i2); -} - -static void tcg_out_insn_RS(TCGContext *s, S390Opcode op, TCGReg r1, - TCGReg b2, TCGReg r3, int disp) -{ - tcg_out32(s, (op << 24) | (r1 << 20) | (r3 << 16) | (b2 << 12) - | (disp & 0xfff)); -} - -static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1, - TCGReg b2, TCGReg r3, int disp) -{ - tcg_out16(s, (op & 0xff00) | (r1 << 4) | r3); - tcg_out32(s, (op & 0xff) | (b2 << 28) - | ((disp & 0xfff) << 16) | ((disp & 0xff000) >> 4)); -} - -#define tcg_out_insn_RX tcg_out_insn_RS -#define tcg_out_insn_RXY tcg_out_insn_RSY - -/* Emit an opcode with "type-checking" of the format. */ -#define tcg_out_insn(S, FMT, OP, ...) \ - glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__) - - -/* emit 64-bit shifts */ -static void tcg_out_sh64(TCGContext* s, S390Opcode op, TCGReg dest, - TCGReg src, TCGReg sh_reg, int sh_imm) -{ - tcg_out_insn_RSY(s, op, dest, sh_reg, src, sh_imm); -} - -/* emit 32-bit shifts */ -static void tcg_out_sh32(TCGContext* s, S390Opcode op, TCGReg dest, - TCGReg sh_reg, int sh_imm) -{ - tcg_out_insn_RS(s, op, dest, sh_reg, 0, sh_imm); -} - -static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) -{ - if (src != dst) { - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RR, LR, dst, src); - } else { - tcg_out_insn(s, RRE, LGR, dst, src); - } - } -} - -/* load a register with an immediate value */ -static void tcg_out_movi(TCGContext *s, TCGType type, - TCGReg ret, tcg_target_long sval) -{ - static const S390Opcode lli_insns[4] = { - RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH - }; - - tcg_target_ulong uval = sval; - int i; - - if (type == TCG_TYPE_I32) { - uval = (uint32_t)sval; - sval = (int32_t)sval; - } - - /* Try all 32-bit insns that can load it in one go. */ - if (sval >= -0x8000 && sval < 0x8000) { - tcg_out_insn(s, RI, LGHI, ret, sval); - return; - } - - for (i = 0; i < 4; i++) { - tcg_target_long mask = 0xffffull << i*16; - if ((uval & mask) == uval) { - tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i*16); - return; - } - } - - /* Try all 48-bit insns that can load it in one go. */ - if (facilities & FACILITY_EXT_IMM) { - if (sval == (int32_t)sval) { - tcg_out_insn(s, RIL, LGFI, ret, sval); - return; - } - if (uval <= 0xffffffff) { - tcg_out_insn(s, RIL, LLILF, ret, uval); - return; - } - if ((uval & 0xffffffff) == 0) { - tcg_out_insn(s, RIL, LLIHF, ret, uval >> 31 >> 1); - return; - } - } - - /* Try for PC-relative address load. */ - if ((sval & 1) == 0) { - ptrdiff_t off = tcg_pcrel_diff(s, (void *)sval) >> 1; - if (off == (int32_t)off) { - tcg_out_insn(s, RIL, LARL, ret, off); - return; - } - } - - /* If extended immediates are not present, then we may have to issue - several instructions to load the low 32 bits. */ - if (!(facilities & FACILITY_EXT_IMM)) { - /* A 32-bit unsigned value can be loaded in 2 insns. And given - that the lli_insns loop above did not succeed, we know that - both insns are required. */ - if (uval <= 0xffffffff) { - tcg_out_insn(s, RI, LLILL, ret, uval); - tcg_out_insn(s, RI, IILH, ret, uval >> 16); - return; - } - - /* If all high bits are set, the value can be loaded in 2 or 3 insns. - We first want to make sure that all the high bits get set. With - luck the low 16-bits can be considered negative to perform that for - free, otherwise we load an explicit -1. */ - if (sval >> 31 >> 1 == -1) { - if (uval & 0x8000) { - tcg_out_insn(s, RI, LGHI, ret, uval); - } else { - tcg_out_insn(s, RI, LGHI, ret, -1); - tcg_out_insn(s, RI, IILL, ret, uval); - } - tcg_out_insn(s, RI, IILH, ret, uval >> 16); - return; - } - } - - /* If we get here, both the high and low parts have non-zero bits. */ - - /* Recurse to load the lower 32-bits. */ - tcg_out_movi(s, TCG_TYPE_I64, ret, uval & 0xffffffff); - - /* Insert data into the high 32-bits. */ - uval = uval >> 31 >> 1; - if (facilities & FACILITY_EXT_IMM) { - if (uval < 0x10000) { - tcg_out_insn(s, RI, IIHL, ret, uval); - } else if ((uval & 0xffff) == 0) { - tcg_out_insn(s, RI, IIHH, ret, uval >> 16); - } else { - tcg_out_insn(s, RIL, IIHF, ret, uval); - } - } else { - if (uval & 0xffff) { - tcg_out_insn(s, RI, IIHL, ret, uval); - } - if (uval & 0xffff0000) { - tcg_out_insn(s, RI, IIHH, ret, uval >> 16); - } - } -} - - -/* Emit a load/store type instruction. Inputs are: - DATA: The register to be loaded or stored. - BASE+OFS: The effective address. - OPC_RX: If the operation has an RX format opcode (e.g. STC), otherwise 0. - OPC_RXY: The RXY format opcode for the operation (e.g. STCY). */ - -static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy, - TCGReg data, TCGReg base, TCGReg index, - tcg_target_long ofs) -{ - if (ofs < -0x80000 || ofs >= 0x80000) { - /* Combine the low 20 bits of the offset with the actual load insn; - the high 44 bits must come from an immediate load. */ - tcg_target_long low = ((ofs & 0xfffff) ^ 0x80000) - 0x80000; - tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - low); - ofs = low; - - /* If we were already given an index register, add it in. */ - if (index != TCG_REG_NONE) { - tcg_out_insn(s, RRE, AGR, TCG_TMP0, index); - } - index = TCG_TMP0; - } - - if (opc_rx && ofs >= 0 && ofs < 0x1000) { - tcg_out_insn_RX(s, opc_rx, data, base, index, ofs); - } else { - tcg_out_insn_RXY(s, opc_rxy, data, base, index, ofs); - } -} - - -/* load data without address translation or endianness conversion */ -static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data, - TCGReg base, intptr_t ofs) -{ - if (type == TCG_TYPE_I32) { - tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs); - } else { - tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs); - } -} - -static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg data, - TCGReg base, intptr_t ofs) -{ - if (type == TCG_TYPE_I32) { - tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs); - } else { - tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs); - } -} - -/* load data from an absolute host address */ -static void tcg_out_ld_abs(TCGContext *s, TCGType type, TCGReg dest, void *abs) -{ - intptr_t addr = (intptr_t)abs; - - if ((facilities & FACILITY_GEN_INST_EXT) && !(addr & 1)) { - ptrdiff_t disp = tcg_pcrel_diff(s, abs) >> 1; - if (disp == (int32_t)disp) { - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RIL, LRL, dest, disp); - } else { - tcg_out_insn(s, RIL, LGRL, dest, disp); - } - return; - } - } - - tcg_out_movi(s, TCG_TYPE_PTR, dest, addr & ~0xffff); - tcg_out_ld(s, type, dest, dest, addr & 0xffff); -} - -static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src, - int msb, int lsb, int ofs, int z) -{ - /* Format RIE-f */ - tcg_out16(s, (RIE_RISBG & 0xff00) | (dest << 4) | src); - tcg_out16(s, (msb << 8) | (z << 7) | lsb); - tcg_out16(s, (ofs << 8) | (RIE_RISBG & 0xff)); -} - -static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) -{ - if (facilities & FACILITY_EXT_IMM) { - tcg_out_insn(s, RRE, LGBR, dest, src); - return; - } - - if (type == TCG_TYPE_I32) { - if (dest == src) { - tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 24); - } else { - tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 24); - } - tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 24); - } else { - tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 56); - tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 56); - } -} - -static void tgen_ext8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) -{ - if (facilities & FACILITY_EXT_IMM) { - tcg_out_insn(s, RRE, LLGCR, dest, src); - return; - } - - if (dest == src) { - tcg_out_movi(s, type, TCG_TMP0, 0xff); - src = TCG_TMP0; - } else { - tcg_out_movi(s, type, dest, 0xff); - } - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RR, NR, dest, src); - } else { - tcg_out_insn(s, RRE, NGR, dest, src); - } -} - -static void tgen_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) -{ - if (facilities & FACILITY_EXT_IMM) { - tcg_out_insn(s, RRE, LGHR, dest, src); - return; - } - - if (type == TCG_TYPE_I32) { - if (dest == src) { - tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 16); - } else { - tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 16); - } - tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 16); - } else { - tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 48); - tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 48); - } -} - -static void tgen_ext16u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) -{ - if (facilities & FACILITY_EXT_IMM) { - tcg_out_insn(s, RRE, LLGHR, dest, src); - return; - } - - if (dest == src) { - tcg_out_movi(s, type, TCG_TMP0, 0xffff); - src = TCG_TMP0; - } else { - tcg_out_movi(s, type, dest, 0xffff); - } - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RR, NR, dest, src); - } else { - tcg_out_insn(s, RRE, NGR, dest, src); - } -} - -static inline void tgen_ext32s(TCGContext *s, TCGReg dest, TCGReg src) -{ - tcg_out_insn(s, RRE, LGFR, dest, src); -} - -static inline void tgen_ext32u(TCGContext *s, TCGReg dest, TCGReg src) -{ - tcg_out_insn(s, RRE, LLGFR, dest, src); -} - -/* Accept bit patterns like these: - 0....01....1 - 1....10....0 - 1..10..01..1 - 0..01..10..0 - Copied from gcc sources. */ -static inline bool risbg_mask(uint64_t c) -{ - uint64_t lsb; - /* We don't change the number of transitions by inverting, - so make sure we start with the LSB zero. */ - if (c & 1) { - c = ~c; - } - /* Reject all zeros or all ones. */ - if (c == 0) { - return false; - } - /* Find the first transition. */ - lsb = c & -c; - /* Invert to look for a second transition. */ - c = ~c; - /* Erase the first transition. */ - c &= -lsb; - /* Find the second transition, if any. */ - lsb = c & -c; - /* Match if all the bits are 1's, or if c is zero. */ - return c == -lsb; -} - -static void tgen_andi_risbg(TCGContext *s, TCGReg out, TCGReg in, uint64_t val) -{ - int msb, lsb; - if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) { - /* Achieve wraparound by swapping msb and lsb. */ - msb = 64 - ctz64(~val); - lsb = clz64(~val) - 1; - } else { - msb = clz64(val); - lsb = 63 - ctz64(val); - } - tcg_out_risbg(s, out, in, msb, lsb, 0, 1); -} - -static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val) -{ - static const S390Opcode ni_insns[4] = { - RI_NILL, RI_NILH, RI_NIHL, RI_NIHH - }; - static const S390Opcode nif_insns[2] = { - RIL_NILF, RIL_NIHF - }; - uint64_t valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull); - int i; - - /* Look for the zero-extensions. */ - if ((val & valid) == 0xffffffff) { - tgen_ext32u(s, dest, dest); - return; - } - if (facilities & FACILITY_EXT_IMM) { - if ((val & valid) == 0xff) { - tgen_ext8u(s, TCG_TYPE_I64, dest, dest); - return; - } - if ((val & valid) == 0xffff) { - tgen_ext16u(s, TCG_TYPE_I64, dest, dest); - return; - } - } - - /* Try all 32-bit insns that can perform it in one go. */ - for (i = 0; i < 4; i++) { - tcg_target_ulong mask = ~(0xffffull << i*16); - if (((val | ~valid) & mask) == mask) { - tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16); - return; - } - } - - /* Try all 48-bit insns that can perform it in one go. */ - if (facilities & FACILITY_EXT_IMM) { - for (i = 0; i < 2; i++) { - tcg_target_ulong mask = ~(0xffffffffull << i*32); - if (((val | ~valid) & mask) == mask) { - tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32); - return; - } - } - } - if ((facilities & FACILITY_GEN_INST_EXT) && risbg_mask(val)) { - tgen_andi_risbg(s, dest, dest, val); - return; - } - - /* Fall back to loading the constant. */ - tcg_out_movi(s, type, TCG_TMP0, val); - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RR, NR, dest, TCG_TMP0); - } else { - tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0); - } -} - -static void tgen64_ori(TCGContext *s, TCGReg dest, tcg_target_ulong val) -{ - static const S390Opcode oi_insns[4] = { - RI_OILL, RI_OILH, RI_OIHL, RI_OIHH - }; - static const S390Opcode nif_insns[2] = { - RIL_OILF, RIL_OIHF - }; - - int i; - - /* Look for no-op. */ - if (val == 0) { - return; - } - - if (facilities & FACILITY_EXT_IMM) { - /* Try all 32-bit insns that can perform it in one go. */ - for (i = 0; i < 4; i++) { - tcg_target_ulong mask = (0xffffull << i*16); - if ((val & mask) != 0 && (val & ~mask) == 0) { - tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16); - return; - } - } - - /* Try all 48-bit insns that can perform it in one go. */ - for (i = 0; i < 2; i++) { - tcg_target_ulong mask = (0xffffffffull << i*32); - if ((val & mask) != 0 && (val & ~mask) == 0) { - tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32); - return; - } - } - - /* Perform the OR via sequential modifications to the high and - low parts. Do this via recursion to handle 16-bit vs 32-bit - masks in each half. */ - tgen64_ori(s, dest, val & 0x00000000ffffffffull); - tgen64_ori(s, dest, val & 0xffffffff00000000ull); - } else { - /* With no extended-immediate facility, we don't need to be so - clever. Just iterate over the insns and mask in the constant. */ - for (i = 0; i < 4; i++) { - tcg_target_ulong mask = (0xffffull << i*16); - if ((val & mask) != 0) { - tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16); - } - } - } -} - -static void tgen64_xori(TCGContext *s, TCGReg dest, tcg_target_ulong val) -{ - /* Perform the xor by parts. */ - if (val & 0xffffffff) { - tcg_out_insn(s, RIL, XILF, dest, val); - } - if (val > 0xffffffff) { - tcg_out_insn(s, RIL, XIHF, dest, val >> 31 >> 1); - } -} - -static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1, - TCGArg c2, int c2const) -{ - bool is_unsigned = is_unsigned_cond(c); - if (c2const) { - if (c2 == 0) { - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RR, LTR, r1, r1); - } else { - tcg_out_insn(s, RRE, LTGR, r1, r1); - } - return tcg_cond_to_ltr_cond[c]; - } else { - if (is_unsigned) { - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RIL, CLFI, r1, c2); - } else { - tcg_out_insn(s, RIL, CLGFI, r1, c2); - } - } else { - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RIL, CFI, r1, c2); - } else { - tcg_out_insn(s, RIL, CGFI, r1, c2); - } - } - } - } else { - if (is_unsigned) { - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RR, CLR, r1, c2); - } else { - tcg_out_insn(s, RRE, CLGR, r1, c2); - } - } else { - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RR, CR, r1, c2); - } else { - tcg_out_insn(s, RRE, CGR, r1, c2); - } - } - } - return tcg_cond_to_s390_cond[c]; -} - -static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, - TCGReg dest, TCGReg c1, TCGArg c2, int c2const) -{ - int cc; - - switch (cond) { - case TCG_COND_GTU: - case TCG_COND_GT: - do_greater: - /* The result of a compare has CC=2 for GT and CC=3 unused. - ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit. */ - tgen_cmp(s, type, cond, c1, c2, c2const); - tcg_out_movi(s, type, dest, 0); - tcg_out_insn(s, RRE, ALCGR, dest, dest); - return; - - case TCG_COND_GEU: - do_geu: - /* We need "real" carry semantics, so use SUBTRACT LOGICAL - instead of COMPARE LOGICAL. This needs an extra move. */ - tcg_out_mov(s, type, TCG_TMP0, c1); - if (c2const) { - tcg_out_movi(s, TCG_TYPE_I64, dest, 0); - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RIL, SLFI, TCG_TMP0, c2); - } else { - tcg_out_insn(s, RIL, SLGFI, TCG_TMP0, c2); - } - } else { - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RR, SLR, TCG_TMP0, c2); - } else { - tcg_out_insn(s, RRE, SLGR, TCG_TMP0, c2); - } - tcg_out_movi(s, TCG_TYPE_I64, dest, 0); - } - tcg_out_insn(s, RRE, ALCGR, dest, dest); - return; - - case TCG_COND_LEU: - case TCG_COND_LTU: - case TCG_COND_LT: - /* Swap operands so that we can use GEU/GTU/GT. */ - if (c2const) { - tcg_out_movi(s, type, TCG_TMP0, c2); - c2 = c1; - c2const = 0; - c1 = TCG_TMP0; - } else { - TCGReg t = c1; - c1 = c2; - c2 = t; - } - if (cond == TCG_COND_LEU) { - goto do_geu; - } - cond = tcg_swap_cond(cond); - goto do_greater; - - case TCG_COND_NE: - /* X != 0 is X > 0. */ - if (c2const && c2 == 0) { - cond = TCG_COND_GTU; - goto do_greater; - } - break; - - case TCG_COND_EQ: - /* X == 0 is X <= 0 is 0 >= X. */ - if (c2const && c2 == 0) { - tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 0); - c2 = c1; - c2const = 0; - c1 = TCG_TMP0; - goto do_geu; - } - break; - - default: - break; - } - - cc = tgen_cmp(s, type, cond, c1, c2, c2const); - if (facilities & FACILITY_LOAD_ON_COND) { - /* Emit: d = 0, t = 1, d = (cc ? t : d). */ - tcg_out_movi(s, TCG_TYPE_I64, dest, 0); - tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1); - tcg_out_insn(s, RRF, LOCGR, dest, TCG_TMP0, cc); - } else { - /* Emit: d = 1; if (cc) goto over; d = 0; over: */ - tcg_out_movi(s, type, dest, 1); - tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1); - tcg_out_movi(s, type, dest, 0); - } -} - -static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest, - TCGReg c1, TCGArg c2, int c2const, TCGReg r3) -{ - int cc; - if (facilities & FACILITY_LOAD_ON_COND) { - cc = tgen_cmp(s, type, c, c1, c2, c2const); - tcg_out_insn(s, RRF, LOCGR, dest, r3, cc); - } else { - c = tcg_invert_cond(c); - cc = tgen_cmp(s, type, c, c1, c2, c2const); - - /* Emit: if (cc) goto over; dest = r3; over: */ - tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1); - tcg_out_insn(s, RRE, LGR, dest, r3); - } -} - -bool tcg_target_deposit_valid(int ofs, int len) -{ - return (facilities & FACILITY_GEN_INST_EXT) != 0; -} - -static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src, - int ofs, int len) -{ - int lsb = (63 - ofs); - int msb = lsb - (len - 1); - tcg_out_risbg(s, dest, src, msb, lsb, ofs, 0); -} - -static void tgen_gotoi(TCGContext *s, int cc, tcg_insn_unit *dest) -{ - ptrdiff_t off = dest - s->code_ptr; - if (off == (int16_t)off) { - tcg_out_insn(s, RI, BRC, cc, off); - } else if (off == (int32_t)off) { - tcg_out_insn(s, RIL, BRCL, cc, off); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest); - tcg_out_insn(s, RR, BCR, cc, TCG_TMP0); - } -} - -static void tgen_branch(TCGContext *s, int cc, TCGLabel *l) -{ - if (l->has_value) { - tgen_gotoi(s, cc, l->u.value_ptr); - } else if (USE_LONG_BRANCHES) { - tcg_out16(s, RIL_BRCL | (cc << 4)); - tcg_out_reloc(s, s->code_ptr, R_390_PC32DBL, l, -2); - s->code_ptr += 2; - } else { - tcg_out16(s, RI_BRC | (cc << 4)); - tcg_out_reloc(s, s->code_ptr, R_390_PC16DBL, l, -2); - s->code_ptr += 1; - } -} - -static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc, - TCGReg r1, TCGReg r2, TCGLabel *l) -{ - intptr_t off; - - if (l->has_value) { - off = l->u.value_ptr - s->code_ptr; - } else { - /* We need to keep the offset unchanged for retranslation. */ - off = s->code_ptr[1]; - tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, -2); - } - - tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2); - tcg_out16(s, off); - tcg_out16(s, cc << 12 | (opc & 0xff)); -} - -static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc, - TCGReg r1, int i2, TCGLabel *l) -{ - tcg_target_long off; - - if (l->has_value) { - off = l->u.value_ptr - s->code_ptr; - } else { - /* We need to keep the offset unchanged for retranslation. */ - off = s->code_ptr[1]; - tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, -2); - } - - tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc); - tcg_out16(s, off); - tcg_out16(s, (i2 << 8) | (opc & 0xff)); -} - -static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c, - TCGReg r1, TCGArg c2, int c2const, TCGLabel *l) -{ - int cc; - - if (facilities & FACILITY_GEN_INST_EXT) { - bool is_unsigned = is_unsigned_cond(c); - bool in_range; - S390Opcode opc; - - cc = tcg_cond_to_s390_cond[c]; - - if (!c2const) { - opc = (type == TCG_TYPE_I32 - ? (is_unsigned ? RIE_CLRJ : RIE_CRJ) - : (is_unsigned ? RIE_CLGRJ : RIE_CGRJ)); - tgen_compare_branch(s, opc, cc, r1, c2, l); - return; - } - - /* COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field. - If the immediate we've been given does not fit that range, we'll - fall back to separate compare and branch instructions using the - larger comparison range afforded by COMPARE IMMEDIATE. */ - if (type == TCG_TYPE_I32) { - if (is_unsigned) { - opc = RIE_CLIJ; - in_range = (uint32_t)c2 == (uint8_t)c2; - } else { - opc = RIE_CIJ; - in_range = (int32_t)c2 == (int8_t)c2; - } - } else { - if (is_unsigned) { - opc = RIE_CLGIJ; - in_range = (uint64_t)c2 == (uint8_t)c2; - } else { - opc = RIE_CGIJ; - in_range = (int64_t)c2 == (int8_t)c2; - } - } - if (in_range) { - tgen_compare_imm_branch(s, opc, cc, r1, c2, l); - return; - } - } - - cc = tgen_cmp(s, type, c, r1, c2, c2const); - tgen_branch(s, cc, l); -} - -static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest) -{ - ptrdiff_t off = dest - s->code_ptr; - if (off == (int32_t)off) { - tcg_out_insn(s, RIL, BRASL, TCG_REG_R14, off); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest); - tcg_out_insn(s, RR, BASR, TCG_REG_R14, TCG_TMP0); - } -} - -static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc, TCGReg data, - TCGReg base, TCGReg index, int disp) -{ - switch (opc & (MO_SSIZE | MO_BSWAP)) { - case MO_UB: - tcg_out_insn(s, RXY, LLGC, data, base, index, disp); - break; - case MO_SB: - tcg_out_insn(s, RXY, LGB, data, base, index, disp); - break; - - case MO_UW | MO_BSWAP: - /* swapped unsigned halfword load with upper bits zeroed */ - tcg_out_insn(s, RXY, LRVH, data, base, index, disp); - tgen_ext16u(s, TCG_TYPE_I64, data, data); - break; - case MO_UW: - tcg_out_insn(s, RXY, LLGH, data, base, index, disp); - break; - - case MO_SW | MO_BSWAP: - /* swapped sign-extended halfword load */ - tcg_out_insn(s, RXY, LRVH, data, base, index, disp); - tgen_ext16s(s, TCG_TYPE_I64, data, data); - break; - case MO_SW: - tcg_out_insn(s, RXY, LGH, data, base, index, disp); - break; - - case MO_UL | MO_BSWAP: - /* swapped unsigned int load with upper bits zeroed */ - tcg_out_insn(s, RXY, LRV, data, base, index, disp); - tgen_ext32u(s, data, data); - break; - case MO_UL: - tcg_out_insn(s, RXY, LLGF, data, base, index, disp); - break; - - case MO_SL | MO_BSWAP: - /* swapped sign-extended int load */ - tcg_out_insn(s, RXY, LRV, data, base, index, disp); - tgen_ext32s(s, data, data); - break; - case MO_SL: - tcg_out_insn(s, RXY, LGF, data, base, index, disp); - break; - - case MO_Q | MO_BSWAP: - tcg_out_insn(s, RXY, LRVG, data, base, index, disp); - break; - case MO_Q: - tcg_out_insn(s, RXY, LG, data, base, index, disp); - break; - - default: - tcg_abort(); - } -} - -static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc, TCGReg data, - TCGReg base, TCGReg index, int disp) -{ - switch (opc & (MO_SIZE | MO_BSWAP)) { - case MO_UB: - if (disp >= 0 && disp < 0x1000) { - tcg_out_insn(s, RX, STC, data, base, index, disp); - } else { - tcg_out_insn(s, RXY, STCY, data, base, index, disp); - } - break; - - case MO_UW | MO_BSWAP: - tcg_out_insn(s, RXY, STRVH, data, base, index, disp); - break; - case MO_UW: - if (disp >= 0 && disp < 0x1000) { - tcg_out_insn(s, RX, STH, data, base, index, disp); - } else { - tcg_out_insn(s, RXY, STHY, data, base, index, disp); - } - break; - - case MO_UL | MO_BSWAP: - tcg_out_insn(s, RXY, STRV, data, base, index, disp); - break; - case MO_UL: - if (disp >= 0 && disp < 0x1000) { - tcg_out_insn(s, RX, ST, data, base, index, disp); - } else { - tcg_out_insn(s, RXY, STY, data, base, index, disp); - } - break; - - case MO_Q | MO_BSWAP: - tcg_out_insn(s, RXY, STRVG, data, base, index, disp); - break; - case MO_Q: - tcg_out_insn(s, RXY, STG, data, base, index, disp); - break; - - default: - tcg_abort(); - } -} - -#if defined(CONFIG_SOFTMMU) -/* We're expecting to use a 20-bit signed offset on the tlb memory ops. - Using the offset of the second entry in the last tlb table ensures - that we can index all of the elements of the first entry. */ -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1]) - > 0x7ffff); - -/* Load and compare a TLB entry, leaving the flags set. Loads the TLB - addend into R2. Returns a register with the santitized guest address. */ -static TCGReg tcg_out_tlb_read(TCGContext* s, TCGReg addr_reg, TCGMemOp opc, - int mem_index, bool is_ld) -{ - int s_mask = (1 << (opc & MO_SIZE)) - 1; - int ofs, a_off; - uint64_t tlb_mask; - - /* For aligned accesses, we check the first byte and include the alignment - bits within the address. For unaligned access, we check that we don't - cross pages using the address of the last byte of the access. */ - if ((opc & MO_AMASK) == MO_ALIGN || s_mask == 0) { - a_off = 0; - tlb_mask = TARGET_PAGE_MASK | s_mask; - } else { - a_off = s_mask; - tlb_mask = TARGET_PAGE_MASK; - } - - if (facilities & FACILITY_GEN_INST_EXT) { - tcg_out_risbg(s, TCG_REG_R2, addr_reg, - 64 - CPU_TLB_BITS - CPU_TLB_ENTRY_BITS, - 63 - CPU_TLB_ENTRY_BITS, - 64 + CPU_TLB_ENTRY_BITS - TARGET_PAGE_BITS, 1); - if (a_off) { - tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off); - tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask); - } else { - tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask); - } - } else { - tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE, - TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off); - tgen_andi(s, TCG_TYPE_I64, TCG_REG_R2, - (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); - tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask); - } - - if (is_ld) { - ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read); - } else { - ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write); - } - if (TARGET_LONG_BITS == 32) { - tcg_out_mem(s, RX_C, RXY_CY, TCG_REG_R3, TCG_REG_R2, TCG_AREG0, ofs); - } else { - tcg_out_mem(s, 0, RXY_CG, TCG_REG_R3, TCG_REG_R2, TCG_AREG0, ofs); - } - - ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addend); - tcg_out_mem(s, 0, RXY_LG, TCG_REG_R2, TCG_REG_R2, TCG_AREG0, ofs); - - if (TARGET_LONG_BITS == 32) { - tgen_ext32u(s, TCG_REG_R3, addr_reg); - return TCG_REG_R3; - } - return addr_reg; -} - -static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, - TCGReg data, TCGReg addr, - tcg_insn_unit *raddr, tcg_insn_unit *label_ptr) -{ - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->oi = oi; - label->datalo_reg = data; - label->addrlo_reg = addr; - label->raddr = raddr; - label->label_ptr[0] = label_ptr; -} - -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) -{ - TCGReg addr_reg = lb->addrlo_reg; - TCGReg data_reg = lb->datalo_reg; - TCGMemOpIdx oi = lb->oi; - TCGMemOp opc = get_memop(oi); - - patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)s->code_ptr, -2); - - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); - if (TARGET_LONG_BITS == 64) { - tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg); - } - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi); - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr); - tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]); - tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2); - - tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); -} - -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) -{ - TCGReg addr_reg = lb->addrlo_reg; - TCGReg data_reg = lb->datalo_reg; - TCGMemOpIdx oi = lb->oi; - TCGMemOp opc = get_memop(oi); - - patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)s->code_ptr, -2); - - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); - if (TARGET_LONG_BITS == 64) { - tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg); - } - switch (opc & MO_SIZE) { - case MO_UB: - tgen_ext8u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg); - break; - case MO_UW: - tgen_ext16u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg); - break; - case MO_UL: - tgen_ext32u(s, TCG_REG_R4, data_reg); - break; - case MO_Q: - tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, data_reg); - break; - default: - tcg_abort(); - } - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi); - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr); - tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); - - tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); -} -#else -static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg, - TCGReg *index_reg, tcg_target_long *disp) -{ - if (TARGET_LONG_BITS == 32) { - tgen_ext32u(s, TCG_TMP0, *addr_reg); - *addr_reg = TCG_TMP0; - } - if (guest_base < 0x80000) { - *index_reg = TCG_REG_NONE; - *disp = guest_base; - } else { - *index_reg = TCG_GUEST_BASE_REG; - *disp = 0; - } -} -#endif /* CONFIG_SOFTMMU */ - -static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, - TCGMemOpIdx oi) -{ - TCGMemOp opc = get_memop(oi); -#ifdef CONFIG_SOFTMMU - unsigned mem_index = get_mmuidx(oi); - tcg_insn_unit *label_ptr; - TCGReg base_reg; - - base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1); - - /* We need to keep the offset unchanged for retranslation. */ - tcg_out16(s, RI_BRC | (S390_CC_NE << 4)); - label_ptr = s->code_ptr; - s->code_ptr += 1; - - tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0); - - add_qemu_ldst_label(s, 1, oi, data_reg, addr_reg, s->code_ptr, label_ptr); -#else - TCGReg index_reg; - tcg_target_long disp; - - tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp); - tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, index_reg, disp); -#endif -} - -static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, - TCGMemOpIdx oi) -{ - TCGMemOp opc = get_memop(oi); -#ifdef CONFIG_SOFTMMU - unsigned mem_index = get_mmuidx(oi); - tcg_insn_unit *label_ptr; - TCGReg base_reg; - - base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0); - - /* We need to keep the offset unchanged for retranslation. */ - tcg_out16(s, RI_BRC | (S390_CC_NE << 4)); - label_ptr = s->code_ptr; - s->code_ptr += 1; - - tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0); - - add_qemu_ldst_label(s, 0, oi, data_reg, addr_reg, s->code_ptr, label_ptr); -#else - TCGReg index_reg; - tcg_target_long disp; - - tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp); - tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, index_reg, disp); -#endif -} - -# define OP_32_64(x) \ - case glue(glue(INDEX_op_,x),_i32): \ - case glue(glue(INDEX_op_,x),_i64) - -static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, - const TCGArg *args, const int *const_args) -{ - S390Opcode op; - TCGArg a0, a1, a2; - - switch (opc) { - case INDEX_op_exit_tb: - /* return value */ - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, args[0]); - tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr); - break; - - case INDEX_op_goto_tb: - if (s->tb_jmp_offset) { - tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4)); - s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); - s->code_ptr += 2; - } else { - /* load address stored at s->tb_next + args[0] */ - tcg_out_ld_abs(s, TCG_TYPE_PTR, TCG_TMP0, s->tb_next + args[0]); - /* and go there */ - tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_TMP0); - } - s->tb_next_offset[args[0]] = tcg_current_code_size(s); - break; - - OP_32_64(ld8u): - /* ??? LLC (RXY format) is only present with the extended-immediate - facility, whereas LLGC is always present. */ - tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]); - break; - - OP_32_64(ld8s): - /* ??? LB is no smaller than LGB, so no point to using it. */ - tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]); - break; - - OP_32_64(ld16u): - /* ??? LLH (RXY format) is only present with the extended-immediate - facility, whereas LLGH is always present. */ - tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]); - break; - - case INDEX_op_ld16s_i32: - tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]); - break; - - case INDEX_op_ld_i32: - tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]); - break; - - OP_32_64(st8): - tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1], - TCG_REG_NONE, args[2]); - break; - - OP_32_64(st16): - tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1], - TCG_REG_NONE, args[2]); - break; - - case INDEX_op_st_i32: - tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); - break; - - case INDEX_op_add_i32: - a0 = args[0], a1 = args[1], a2 = (int32_t)args[2]; - if (const_args[2]) { - do_addi_32: - if (a0 == a1) { - if (a2 == (int16_t)a2) { - tcg_out_insn(s, RI, AHI, a0, a2); - break; - } - if (facilities & FACILITY_EXT_IMM) { - tcg_out_insn(s, RIL, AFI, a0, a2); - break; - } - } - tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2); - } else if (a0 == a1) { - tcg_out_insn(s, RR, AR, a0, a2); - } else { - tcg_out_insn(s, RX, LA, a0, a1, a2, 0); - } - break; - case INDEX_op_sub_i32: - a0 = args[0], a1 = args[1], a2 = (int32_t)args[2]; - if (const_args[2]) { - a2 = -a2; - goto do_addi_32; - } - tcg_out_insn(s, RR, SR, args[0], args[2]); - break; - - case INDEX_op_and_i32: - if (const_args[2]) { - tgen_andi(s, TCG_TYPE_I32, args[0], args[2]); - } else { - tcg_out_insn(s, RR, NR, args[0], args[2]); - } - break; - case INDEX_op_or_i32: - if (const_args[2]) { - tgen64_ori(s, args[0], args[2] & 0xffffffff); - } else { - tcg_out_insn(s, RR, OR, args[0], args[2]); - } - break; - case INDEX_op_xor_i32: - if (const_args[2]) { - tgen64_xori(s, args[0], args[2] & 0xffffffff); - } else { - tcg_out_insn(s, RR, XR, args[0], args[2]); - } - break; - - case INDEX_op_neg_i32: - tcg_out_insn(s, RR, LCR, args[0], args[1]); - break; - - case INDEX_op_mul_i32: - if (const_args[2]) { - if ((int32_t)args[2] == (int16_t)args[2]) { - tcg_out_insn(s, RI, MHI, args[0], args[2]); - } else { - tcg_out_insn(s, RIL, MSFI, args[0], args[2]); - } - } else { - tcg_out_insn(s, RRE, MSR, args[0], args[2]); - } - break; - - case INDEX_op_div2_i32: - tcg_out_insn(s, RR, DR, TCG_REG_R2, args[4]); - break; - case INDEX_op_divu2_i32: - tcg_out_insn(s, RRE, DLR, TCG_REG_R2, args[4]); - break; - - case INDEX_op_shl_i32: - op = RS_SLL; - do_shift32: - if (const_args[2]) { - tcg_out_sh32(s, op, args[0], TCG_REG_NONE, args[2]); - } else { - tcg_out_sh32(s, op, args[0], args[2], 0); - } - break; - case INDEX_op_shr_i32: - op = RS_SRL; - goto do_shift32; - case INDEX_op_sar_i32: - op = RS_SRA; - goto do_shift32; - - case INDEX_op_rotl_i32: - /* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol. */ - if (const_args[2]) { - tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_REG_NONE, args[2]); - } else { - tcg_out_sh64(s, RSY_RLL, args[0], args[1], args[2], 0); - } - break; - case INDEX_op_rotr_i32: - if (const_args[2]) { - tcg_out_sh64(s, RSY_RLL, args[0], args[1], - TCG_REG_NONE, (32 - args[2]) & 31); - } else { - tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]); - tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_TMP0, 0); - } - break; - - case INDEX_op_ext8s_i32: - tgen_ext8s(s, TCG_TYPE_I32, args[0], args[1]); - break; - case INDEX_op_ext16s_i32: - tgen_ext16s(s, TCG_TYPE_I32, args[0], args[1]); - break; - case INDEX_op_ext8u_i32: - tgen_ext8u(s, TCG_TYPE_I32, args[0], args[1]); - break; - case INDEX_op_ext16u_i32: - tgen_ext16u(s, TCG_TYPE_I32, args[0], args[1]); - break; - - OP_32_64(bswap16): - /* The TCG bswap definition requires bits 0-47 already be zero. - Thus we don't need the G-type insns to implement bswap16_i64. */ - tcg_out_insn(s, RRE, LRVR, args[0], args[1]); - tcg_out_sh32(s, RS_SRL, args[0], TCG_REG_NONE, 16); - break; - OP_32_64(bswap32): - tcg_out_insn(s, RRE, LRVR, args[0], args[1]); - break; - - case INDEX_op_add2_i32: - if (const_args[4]) { - tcg_out_insn(s, RIL, ALFI, args[0], args[4]); - } else { - tcg_out_insn(s, RR, ALR, args[0], args[4]); - } - tcg_out_insn(s, RRE, ALCR, args[1], args[5]); - break; - case INDEX_op_sub2_i32: - if (const_args[4]) { - tcg_out_insn(s, RIL, SLFI, args[0], args[4]); - } else { - tcg_out_insn(s, RR, SLR, args[0], args[4]); - } - tcg_out_insn(s, RRE, SLBR, args[1], args[5]); - break; - - case INDEX_op_br: - tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0])); - break; - - case INDEX_op_brcond_i32: - tgen_brcond(s, TCG_TYPE_I32, args[2], args[0], - args[1], const_args[1], arg_label(args[3])); - break; - case INDEX_op_setcond_i32: - tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], - args[2], const_args[2]); - break; - case INDEX_op_movcond_i32: - tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], - args[2], const_args[2], args[3]); - break; - - case INDEX_op_qemu_ld_i32: - /* ??? Technically we can use a non-extending instruction. */ - case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, args[0], args[1], args[2]); - break; - case INDEX_op_qemu_st_i32: - case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, args[0], args[1], args[2]); - break; - - case INDEX_op_ld16s_i64: - tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]); - break; - case INDEX_op_ld32u_i64: - tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]); - break; - case INDEX_op_ld32s_i64: - tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]); - break; - case INDEX_op_ld_i64: - tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]); - break; - - case INDEX_op_st32_i64: - tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); - break; - case INDEX_op_st_i64: - tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]); - break; - - case INDEX_op_add_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - do_addi_64: - if (a0 == a1) { - if (a2 == (int16_t)a2) { - tcg_out_insn(s, RI, AGHI, a0, a2); - break; - } - if (facilities & FACILITY_EXT_IMM) { - if (a2 == (int32_t)a2) { - tcg_out_insn(s, RIL, AGFI, a0, a2); - break; - } else if (a2 == (uint32_t)a2) { - tcg_out_insn(s, RIL, ALGFI, a0, a2); - break; - } else if (-a2 == (uint32_t)-a2) { - tcg_out_insn(s, RIL, SLGFI, a0, -a2); - break; - } - } - } - tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2); - } else if (a0 == a1) { - tcg_out_insn(s, RRE, AGR, a0, a2); - } else { - tcg_out_insn(s, RX, LA, a0, a1, a2, 0); - } - break; - case INDEX_op_sub_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - a2 = -a2; - goto do_addi_64; - } else { - tcg_out_insn(s, RRE, SGR, args[0], args[2]); - } - break; - - case INDEX_op_and_i64: - if (const_args[2]) { - tgen_andi(s, TCG_TYPE_I64, args[0], args[2]); - } else { - tcg_out_insn(s, RRE, NGR, args[0], args[2]); - } - break; - case INDEX_op_or_i64: - if (const_args[2]) { - tgen64_ori(s, args[0], args[2]); - } else { - tcg_out_insn(s, RRE, OGR, args[0], args[2]); - } - break; - case INDEX_op_xor_i64: - if (const_args[2]) { - tgen64_xori(s, args[0], args[2]); - } else { - tcg_out_insn(s, RRE, XGR, args[0], args[2]); - } - break; - - case INDEX_op_neg_i64: - tcg_out_insn(s, RRE, LCGR, args[0], args[1]); - break; - case INDEX_op_bswap64_i64: - tcg_out_insn(s, RRE, LRVGR, args[0], args[1]); - break; - - case INDEX_op_mul_i64: - if (const_args[2]) { - if (args[2] == (int16_t)args[2]) { - tcg_out_insn(s, RI, MGHI, args[0], args[2]); - } else { - tcg_out_insn(s, RIL, MSGFI, args[0], args[2]); - } - } else { - tcg_out_insn(s, RRE, MSGR, args[0], args[2]); - } - break; - - case INDEX_op_div2_i64: - /* ??? We get an unnecessary sign-extension of the dividend - into R3 with this definition, but as we do in fact always - produce both quotient and remainder using INDEX_op_div_i64 - instead requires jumping through even more hoops. */ - tcg_out_insn(s, RRE, DSGR, TCG_REG_R2, args[4]); - break; - case INDEX_op_divu2_i64: - tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, args[4]); - break; - case INDEX_op_mulu2_i64: - tcg_out_insn(s, RRE, MLGR, TCG_REG_R2, args[3]); - break; - - case INDEX_op_shl_i64: - op = RSY_SLLG; - do_shift64: - if (const_args[2]) { - tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]); - } else { - tcg_out_sh64(s, op, args[0], args[1], args[2], 0); - } - break; - case INDEX_op_shr_i64: - op = RSY_SRLG; - goto do_shift64; - case INDEX_op_sar_i64: - op = RSY_SRAG; - goto do_shift64; - - case INDEX_op_rotl_i64: - if (const_args[2]) { - tcg_out_sh64(s, RSY_RLLG, args[0], args[1], - TCG_REG_NONE, args[2]); - } else { - tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0); - } - break; - case INDEX_op_rotr_i64: - if (const_args[2]) { - tcg_out_sh64(s, RSY_RLLG, args[0], args[1], - TCG_REG_NONE, (64 - args[2]) & 63); - } else { - /* We can use the smaller 32-bit negate because only the - low 6 bits are examined for the rotate. */ - tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]); - tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0); - } - break; - - case INDEX_op_ext8s_i64: - tgen_ext8s(s, TCG_TYPE_I64, args[0], args[1]); - break; - case INDEX_op_ext16s_i64: - tgen_ext16s(s, TCG_TYPE_I64, args[0], args[1]); - break; - case INDEX_op_ext_i32_i64: - case INDEX_op_ext32s_i64: - tgen_ext32s(s, args[0], args[1]); - break; - case INDEX_op_ext8u_i64: - tgen_ext8u(s, TCG_TYPE_I64, args[0], args[1]); - break; - case INDEX_op_ext16u_i64: - tgen_ext16u(s, TCG_TYPE_I64, args[0], args[1]); - break; - case INDEX_op_extu_i32_i64: - case INDEX_op_ext32u_i64: - tgen_ext32u(s, args[0], args[1]); - break; - - case INDEX_op_add2_i64: - if (const_args[4]) { - if ((int64_t)args[4] >= 0) { - tcg_out_insn(s, RIL, ALGFI, args[0], args[4]); - } else { - tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]); - } - } else { - tcg_out_insn(s, RRE, ALGR, args[0], args[4]); - } - tcg_out_insn(s, RRE, ALCGR, args[1], args[5]); - break; - case INDEX_op_sub2_i64: - if (const_args[4]) { - if ((int64_t)args[4] >= 0) { - tcg_out_insn(s, RIL, SLGFI, args[0], args[4]); - } else { - tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]); - } - } else { - tcg_out_insn(s, RRE, SLGR, args[0], args[4]); - } - tcg_out_insn(s, RRE, SLBGR, args[1], args[5]); - break; - - case INDEX_op_brcond_i64: - tgen_brcond(s, TCG_TYPE_I64, args[2], args[0], - args[1], const_args[1], arg_label(args[3])); - break; - case INDEX_op_setcond_i64: - tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], - args[2], const_args[2]); - break; - case INDEX_op_movcond_i64: - tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], - args[2], const_args[2], args[3]); - break; - - OP_32_64(deposit): - tgen_deposit(s, args[0], args[2], args[3], args[4]); - break; - - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: - case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ - case INDEX_op_movi_i64: - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - default: - tcg_abort(); - } -} - -static const TCGTargetOpDef s390_op_defs[] = { - { INDEX_op_exit_tb, { } }, - { INDEX_op_goto_tb, { } }, - { INDEX_op_br, { } }, - - { INDEX_op_ld8u_i32, { "r", "r" } }, - { INDEX_op_ld8s_i32, { "r", "r" } }, - { INDEX_op_ld16u_i32, { "r", "r" } }, - { INDEX_op_ld16s_i32, { "r", "r" } }, - { INDEX_op_ld_i32, { "r", "r" } }, - { INDEX_op_st8_i32, { "r", "r" } }, - { INDEX_op_st16_i32, { "r", "r" } }, - { INDEX_op_st_i32, { "r", "r" } }, - - { INDEX_op_add_i32, { "r", "r", "ri" } }, - { INDEX_op_sub_i32, { "r", "0", "ri" } }, - { INDEX_op_mul_i32, { "r", "0", "rK" } }, - - { INDEX_op_div2_i32, { "b", "a", "0", "1", "r" } }, - { INDEX_op_divu2_i32, { "b", "a", "0", "1", "r" } }, - - { INDEX_op_and_i32, { "r", "0", "ri" } }, - { INDEX_op_or_i32, { "r", "0", "rO" } }, - { INDEX_op_xor_i32, { "r", "0", "rX" } }, - - { INDEX_op_neg_i32, { "r", "r" } }, - - { INDEX_op_shl_i32, { "r", "0", "Ri" } }, - { INDEX_op_shr_i32, { "r", "0", "Ri" } }, - { INDEX_op_sar_i32, { "r", "0", "Ri" } }, - - { INDEX_op_rotl_i32, { "r", "r", "Ri" } }, - { INDEX_op_rotr_i32, { "r", "r", "Ri" } }, - - { INDEX_op_ext8s_i32, { "r", "r" } }, - { INDEX_op_ext8u_i32, { "r", "r" } }, - { INDEX_op_ext16s_i32, { "r", "r" } }, - { INDEX_op_ext16u_i32, { "r", "r" } }, - - { INDEX_op_bswap16_i32, { "r", "r" } }, - { INDEX_op_bswap32_i32, { "r", "r" } }, - - { INDEX_op_add2_i32, { "r", "r", "0", "1", "rA", "r" } }, - { INDEX_op_sub2_i32, { "r", "r", "0", "1", "rA", "r" } }, - - { INDEX_op_brcond_i32, { "r", "rC" } }, - { INDEX_op_setcond_i32, { "r", "r", "rC" } }, - { INDEX_op_movcond_i32, { "r", "r", "rC", "r", "0" } }, - { INDEX_op_deposit_i32, { "r", "0", "r" } }, - - { INDEX_op_qemu_ld_i32, { "r", "L" } }, - { INDEX_op_qemu_ld_i64, { "r", "L" } }, - { INDEX_op_qemu_st_i32, { "L", "L" } }, - { INDEX_op_qemu_st_i64, { "L", "L" } }, - - { INDEX_op_ld8u_i64, { "r", "r" } }, - { INDEX_op_ld8s_i64, { "r", "r" } }, - { INDEX_op_ld16u_i64, { "r", "r" } }, - { INDEX_op_ld16s_i64, { "r", "r" } }, - { INDEX_op_ld32u_i64, { "r", "r" } }, - { INDEX_op_ld32s_i64, { "r", "r" } }, - { INDEX_op_ld_i64, { "r", "r" } }, - - { INDEX_op_st8_i64, { "r", "r" } }, - { INDEX_op_st16_i64, { "r", "r" } }, - { INDEX_op_st32_i64, { "r", "r" } }, - { INDEX_op_st_i64, { "r", "r" } }, - - { INDEX_op_add_i64, { "r", "r", "ri" } }, - { INDEX_op_sub_i64, { "r", "0", "ri" } }, - { INDEX_op_mul_i64, { "r", "0", "rK" } }, - - { INDEX_op_div2_i64, { "b", "a", "0", "1", "r" } }, - { INDEX_op_divu2_i64, { "b", "a", "0", "1", "r" } }, - { INDEX_op_mulu2_i64, { "b", "a", "0", "r" } }, - - { INDEX_op_and_i64, { "r", "0", "ri" } }, - { INDEX_op_or_i64, { "r", "0", "rO" } }, - { INDEX_op_xor_i64, { "r", "0", "rX" } }, - - { INDEX_op_neg_i64, { "r", "r" } }, - - { INDEX_op_shl_i64, { "r", "r", "Ri" } }, - { INDEX_op_shr_i64, { "r", "r", "Ri" } }, - { INDEX_op_sar_i64, { "r", "r", "Ri" } }, - - { INDEX_op_rotl_i64, { "r", "r", "Ri" } }, - { INDEX_op_rotr_i64, { "r", "r", "Ri" } }, - - { INDEX_op_ext8s_i64, { "r", "r" } }, - { INDEX_op_ext8u_i64, { "r", "r" } }, - { INDEX_op_ext16s_i64, { "r", "r" } }, - { INDEX_op_ext16u_i64, { "r", "r" } }, - { INDEX_op_ext32s_i64, { "r", "r" } }, - { INDEX_op_ext32u_i64, { "r", "r" } }, - - { INDEX_op_ext_i32_i64, { "r", "r" } }, - { INDEX_op_extu_i32_i64, { "r", "r" } }, - - { INDEX_op_bswap16_i64, { "r", "r" } }, - { INDEX_op_bswap32_i64, { "r", "r" } }, - { INDEX_op_bswap64_i64, { "r", "r" } }, - - { INDEX_op_add2_i64, { "r", "r", "0", "1", "rA", "r" } }, - { INDEX_op_sub2_i64, { "r", "r", "0", "1", "rA", "r" } }, - - { INDEX_op_brcond_i64, { "r", "rC" } }, - { INDEX_op_setcond_i64, { "r", "r", "rC" } }, - { INDEX_op_movcond_i64, { "r", "r", "rC", "r", "0" } }, - { INDEX_op_deposit_i64, { "r", "0", "r" } }, - - { -1 }, -}; - -static void query_facilities(void) -{ - unsigned long hwcap = qemu_getauxval(AT_HWCAP); - - /* Is STORE FACILITY LIST EXTENDED available? Honestly, I believe this - is present on all 64-bit systems, but let's check for it anyway. */ - if (hwcap & HWCAP_S390_STFLE) { - register int r0 __asm__("0"); - register void *r1 __asm__("1"); - - /* stfle 0(%r1) */ - r1 = &facilities; - asm volatile(".word 0xb2b0,0x1000" - : "=r"(r0) : "0"(0), "r"(r1) : "memory", "cc"); - } -} - -static void tcg_target_init(TCGContext *s) -{ - query_facilities(); - - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff); - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff); - - tcg_regset_clear(tcg_target_call_clobber_regs); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5); - /* The r6 register is technically call-saved, but it's also a parameter - register, so it can get killed by setup for the qemu_st helper. */ - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6); - /* The return register can be considered call-clobbered. */ - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14); - - tcg_regset_clear(s->reserved_regs); - tcg_regset_set_reg(s->reserved_regs, TCG_TMP0); - /* XXX many insns can't be used with R0, so we better avoid it for now */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); - - tcg_add_target_add_op_defs(s390_op_defs); -} - -#define FRAME_SIZE ((int)(TCG_TARGET_CALL_STACK_OFFSET \ - + TCG_STATIC_CALL_ARGS_SIZE \ - + CPU_TEMP_BUF_NLONGS * sizeof(long))) - -static void tcg_target_qemu_prologue(TCGContext *s) -{ - /* stmg %r6,%r15,48(%r15) (save registers) */ - tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48); - - /* aghi %r15,-frame_size */ - tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -FRAME_SIZE); - - tcg_set_frame(s, TCG_REG_CALL_STACK, - TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET, - CPU_TEMP_BUF_NLONGS * sizeof(long)); - -#ifndef CONFIG_SOFTMMU - if (guest_base >= 0x80000) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); - tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); - } -#endif - - tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); - /* br %r3 (go to TB) */ - tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]); - - tb_ret_addr = s->code_ptr; - - /* lmg %r6,%r15,fs+48(%r15) (restore registers) */ - tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, - FRAME_SIZE + 48); - - /* br %r14 (return) */ - tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14); -} - -typedef struct { - DebugFrameHeader h; - uint8_t fde_def_cfa[4]; - uint8_t fde_reg_ofs[18]; -} DebugFrame; - -/* We're expecting a 2 byte uleb128 encoded value. */ -QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); - -#define ELF_HOST_MACHINE EM_S390 - -static const DebugFrame debug_frame = { - .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ - .h.cie.id = -1, - .h.cie.version = 1, - .h.cie.code_align = 1, - .h.cie.data_align = 8, /* sleb128 8 */ - .h.cie.return_column = TCG_REG_R14, - - /* Total FDE size does not include the "len" member. */ - .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), - - .fde_def_cfa = { - 12, TCG_REG_CALL_STACK, /* DW_CFA_def_cfa %r15, ... */ - (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ - (FRAME_SIZE >> 7) - }, - .fde_reg_ofs = { - 0x86, 6, /* DW_CFA_offset, %r6, 48 */ - 0x87, 7, /* DW_CFA_offset, %r7, 56 */ - 0x88, 8, /* DW_CFA_offset, %r8, 64 */ - 0x89, 9, /* DW_CFA_offset, %r92, 72 */ - 0x8a, 10, /* DW_CFA_offset, %r10, 80 */ - 0x8b, 11, /* DW_CFA_offset, %r11, 88 */ - 0x8c, 12, /* DW_CFA_offset, %r12, 96 */ - 0x8d, 13, /* DW_CFA_offset, %r13, 104 */ - 0x8e, 14, /* DW_CFA_offset, %r14, 112 */ - } -}; - -void tcg_register_jit(void *buf, size_t buf_size) -{ - tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); -} diff --git a/tcg/s390/tcg-target.inc.c b/tcg/s390/tcg-target.inc.c new file mode 100644 index 0000000000..58520fa22b --- /dev/null +++ b/tcg/s390/tcg-target.inc.c @@ -0,0 +1,2410 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2009 Ulrich Hecht + * Copyright (c) 2009 Alexander Graf + * Copyright (c) 2010 Richard Henderson + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "tcg-be-ldst.h" + +/* We only support generating code for 64-bit mode. */ +#if TCG_TARGET_REG_BITS != 64 +#error "unsupported code generation mode" +#endif + +#include "elf.h" + +/* ??? The translation blocks produced by TCG are generally small enough to + be entirely reachable with a 16-bit displacement. Leaving the option for + a 32-bit displacement here Just In Case. */ +#define USE_LONG_BRANCHES 0 + +#define TCG_CT_CONST_MULI 0x100 +#define TCG_CT_CONST_ORI 0x200 +#define TCG_CT_CONST_XORI 0x400 +#define TCG_CT_CONST_CMPI 0x800 +#define TCG_CT_CONST_ADLI 0x1000 + +/* Several places within the instruction set 0 means "no register" + rather than TCG_REG_R0. */ +#define TCG_REG_NONE 0 + +/* A scratch register that may be be used throughout the backend. */ +#define TCG_TMP0 TCG_REG_R14 + +#ifndef CONFIG_SOFTMMU +#define TCG_GUEST_BASE_REG TCG_REG_R13 +#endif + +/* All of the following instructions are prefixed with their instruction + format, and are defined as 8- or 16-bit quantities, even when the two + halves of the 16-bit quantity may appear 32 bits apart in the insn. + This makes it easy to copy the values from the tables in Appendix B. */ +typedef enum S390Opcode { + RIL_AFI = 0xc209, + RIL_AGFI = 0xc208, + RIL_ALFI = 0xc20b, + RIL_ALGFI = 0xc20a, + RIL_BRASL = 0xc005, + RIL_BRCL = 0xc004, + RIL_CFI = 0xc20d, + RIL_CGFI = 0xc20c, + RIL_CLFI = 0xc20f, + RIL_CLGFI = 0xc20e, + RIL_IIHF = 0xc008, + RIL_IILF = 0xc009, + RIL_LARL = 0xc000, + RIL_LGFI = 0xc001, + RIL_LGRL = 0xc408, + RIL_LLIHF = 0xc00e, + RIL_LLILF = 0xc00f, + RIL_LRL = 0xc40d, + RIL_MSFI = 0xc201, + RIL_MSGFI = 0xc200, + RIL_NIHF = 0xc00a, + RIL_NILF = 0xc00b, + RIL_OIHF = 0xc00c, + RIL_OILF = 0xc00d, + RIL_SLFI = 0xc205, + RIL_SLGFI = 0xc204, + RIL_XIHF = 0xc006, + RIL_XILF = 0xc007, + + RI_AGHI = 0xa70b, + RI_AHI = 0xa70a, + RI_BRC = 0xa704, + RI_IIHH = 0xa500, + RI_IIHL = 0xa501, + RI_IILH = 0xa502, + RI_IILL = 0xa503, + RI_LGHI = 0xa709, + RI_LLIHH = 0xa50c, + RI_LLIHL = 0xa50d, + RI_LLILH = 0xa50e, + RI_LLILL = 0xa50f, + RI_MGHI = 0xa70d, + RI_MHI = 0xa70c, + RI_NIHH = 0xa504, + RI_NIHL = 0xa505, + RI_NILH = 0xa506, + RI_NILL = 0xa507, + RI_OIHH = 0xa508, + RI_OIHL = 0xa509, + RI_OILH = 0xa50a, + RI_OILL = 0xa50b, + + RIE_CGIJ = 0xec7c, + RIE_CGRJ = 0xec64, + RIE_CIJ = 0xec7e, + RIE_CLGRJ = 0xec65, + RIE_CLIJ = 0xec7f, + RIE_CLGIJ = 0xec7d, + RIE_CLRJ = 0xec77, + RIE_CRJ = 0xec76, + RIE_RISBG = 0xec55, + + RRE_AGR = 0xb908, + RRE_ALGR = 0xb90a, + RRE_ALCR = 0xb998, + RRE_ALCGR = 0xb988, + RRE_CGR = 0xb920, + RRE_CLGR = 0xb921, + RRE_DLGR = 0xb987, + RRE_DLR = 0xb997, + RRE_DSGFR = 0xb91d, + RRE_DSGR = 0xb90d, + RRE_LGBR = 0xb906, + RRE_LCGR = 0xb903, + RRE_LGFR = 0xb914, + RRE_LGHR = 0xb907, + RRE_LGR = 0xb904, + RRE_LLGCR = 0xb984, + RRE_LLGFR = 0xb916, + RRE_LLGHR = 0xb985, + RRE_LRVR = 0xb91f, + RRE_LRVGR = 0xb90f, + RRE_LTGR = 0xb902, + RRE_MLGR = 0xb986, + RRE_MSGR = 0xb90c, + RRE_MSR = 0xb252, + RRE_NGR = 0xb980, + RRE_OGR = 0xb981, + RRE_SGR = 0xb909, + RRE_SLGR = 0xb90b, + RRE_SLBR = 0xb999, + RRE_SLBGR = 0xb989, + RRE_XGR = 0xb982, + + RRF_LOCR = 0xb9f2, + RRF_LOCGR = 0xb9e2, + + RR_AR = 0x1a, + RR_ALR = 0x1e, + RR_BASR = 0x0d, + RR_BCR = 0x07, + RR_CLR = 0x15, + RR_CR = 0x19, + RR_DR = 0x1d, + RR_LCR = 0x13, + RR_LR = 0x18, + RR_LTR = 0x12, + RR_NR = 0x14, + RR_OR = 0x16, + RR_SR = 0x1b, + RR_SLR = 0x1f, + RR_XR = 0x17, + + RSY_RLL = 0xeb1d, + RSY_RLLG = 0xeb1c, + RSY_SLLG = 0xeb0d, + RSY_SRAG = 0xeb0a, + RSY_SRLG = 0xeb0c, + + RS_SLL = 0x89, + RS_SRA = 0x8a, + RS_SRL = 0x88, + + RXY_AG = 0xe308, + RXY_AY = 0xe35a, + RXY_CG = 0xe320, + RXY_CY = 0xe359, + RXY_LAY = 0xe371, + RXY_LB = 0xe376, + RXY_LG = 0xe304, + RXY_LGB = 0xe377, + RXY_LGF = 0xe314, + RXY_LGH = 0xe315, + RXY_LHY = 0xe378, + RXY_LLGC = 0xe390, + RXY_LLGF = 0xe316, + RXY_LLGH = 0xe391, + RXY_LMG = 0xeb04, + RXY_LRV = 0xe31e, + RXY_LRVG = 0xe30f, + RXY_LRVH = 0xe31f, + RXY_LY = 0xe358, + RXY_STCY = 0xe372, + RXY_STG = 0xe324, + RXY_STHY = 0xe370, + RXY_STMG = 0xeb24, + RXY_STRV = 0xe33e, + RXY_STRVG = 0xe32f, + RXY_STRVH = 0xe33f, + RXY_STY = 0xe350, + + RX_A = 0x5a, + RX_C = 0x59, + RX_L = 0x58, + RX_LA = 0x41, + RX_LH = 0x48, + RX_ST = 0x50, + RX_STC = 0x42, + RX_STH = 0x40, +} S390Opcode; + +#ifndef NDEBUG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { + "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", + "%r8", "%r9", "%r10" "%r11" "%r12" "%r13" "%r14" "%r15" +}; +#endif + +/* Since R6 is a potential argument register, choose it last of the + call-saved registers. Likewise prefer the call-clobbered registers + in reverse order to maximize the chance of avoiding the arguments. */ +static const int tcg_target_reg_alloc_order[] = { + /* Call saved registers. */ + TCG_REG_R13, + TCG_REG_R12, + TCG_REG_R11, + TCG_REG_R10, + TCG_REG_R9, + TCG_REG_R8, + TCG_REG_R7, + TCG_REG_R6, + /* Call clobbered registers. */ + TCG_REG_R14, + TCG_REG_R0, + TCG_REG_R1, + /* Argument registers, in reverse order of allocation. */ + TCG_REG_R5, + TCG_REG_R4, + TCG_REG_R3, + TCG_REG_R2, +}; + +static const int tcg_target_call_iarg_regs[] = { + TCG_REG_R2, + TCG_REG_R3, + TCG_REG_R4, + TCG_REG_R5, + TCG_REG_R6, +}; + +static const int tcg_target_call_oarg_regs[] = { + TCG_REG_R2, +}; + +#define S390_CC_EQ 8 +#define S390_CC_LT 4 +#define S390_CC_GT 2 +#define S390_CC_OV 1 +#define S390_CC_NE (S390_CC_LT | S390_CC_GT) +#define S390_CC_LE (S390_CC_LT | S390_CC_EQ) +#define S390_CC_GE (S390_CC_GT | S390_CC_EQ) +#define S390_CC_NEVER 0 +#define S390_CC_ALWAYS 15 + +/* Condition codes that result from a COMPARE and COMPARE LOGICAL. */ +static const uint8_t tcg_cond_to_s390_cond[] = { + [TCG_COND_EQ] = S390_CC_EQ, + [TCG_COND_NE] = S390_CC_NE, + [TCG_COND_LT] = S390_CC_LT, + [TCG_COND_LE] = S390_CC_LE, + [TCG_COND_GT] = S390_CC_GT, + [TCG_COND_GE] = S390_CC_GE, + [TCG_COND_LTU] = S390_CC_LT, + [TCG_COND_LEU] = S390_CC_LE, + [TCG_COND_GTU] = S390_CC_GT, + [TCG_COND_GEU] = S390_CC_GE, +}; + +/* Condition codes that result from a LOAD AND TEST. Here, we have no + unsigned instruction variation, however since the test is vs zero we + can re-map the outcomes appropriately. */ +static const uint8_t tcg_cond_to_ltr_cond[] = { + [TCG_COND_EQ] = S390_CC_EQ, + [TCG_COND_NE] = S390_CC_NE, + [TCG_COND_LT] = S390_CC_LT, + [TCG_COND_LE] = S390_CC_LE, + [TCG_COND_GT] = S390_CC_GT, + [TCG_COND_GE] = S390_CC_GE, + [TCG_COND_LTU] = S390_CC_NEVER, + [TCG_COND_LEU] = S390_CC_EQ, + [TCG_COND_GTU] = S390_CC_NE, + [TCG_COND_GEU] = S390_CC_ALWAYS, +}; + +#ifdef CONFIG_SOFTMMU +static void * const qemu_ld_helpers[16] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_SB] = helper_ret_ldsb_mmu, + [MO_LEUW] = helper_le_lduw_mmu, + [MO_LESW] = helper_le_ldsw_mmu, + [MO_LEUL] = helper_le_ldul_mmu, + [MO_LESL] = helper_le_ldsl_mmu, + [MO_LEQ] = helper_le_ldq_mmu, + [MO_BEUW] = helper_be_lduw_mmu, + [MO_BESW] = helper_be_ldsw_mmu, + [MO_BEUL] = helper_be_ldul_mmu, + [MO_BESL] = helper_be_ldsl_mmu, + [MO_BEQ] = helper_be_ldq_mmu, +}; + +static void * const qemu_st_helpers[16] = { + [MO_UB] = helper_ret_stb_mmu, + [MO_LEUW] = helper_le_stw_mmu, + [MO_LEUL] = helper_le_stl_mmu, + [MO_LEQ] = helper_le_stq_mmu, + [MO_BEUW] = helper_be_stw_mmu, + [MO_BEUL] = helper_be_stl_mmu, + [MO_BEQ] = helper_be_stq_mmu, +}; +#endif + +static tcg_insn_unit *tb_ret_addr; + +/* A list of relevant facilities used by this translator. Some of these + are required for proper operation, and these are checked at startup. */ + +#define FACILITY_ZARCH_ACTIVE (1ULL << (63 - 2)) +#define FACILITY_LONG_DISP (1ULL << (63 - 18)) +#define FACILITY_EXT_IMM (1ULL << (63 - 21)) +#define FACILITY_GEN_INST_EXT (1ULL << (63 - 34)) +#define FACILITY_LOAD_ON_COND (1ULL << (63 - 45)) + +static uint64_t facilities; + +static void patch_reloc(tcg_insn_unit *code_ptr, int type, + intptr_t value, intptr_t addend) +{ + intptr_t pcrel2 = (tcg_insn_unit *)value - (code_ptr - 1); + assert(addend == -2); + + switch (type) { + case R_390_PC16DBL: + assert(pcrel2 == (int16_t)pcrel2); + tcg_patch16(code_ptr, pcrel2); + break; + case R_390_PC32DBL: + assert(pcrel2 == (int32_t)pcrel2); + tcg_patch32(code_ptr, pcrel2); + break; + default: + tcg_abort(); + break; + } +} + +/* parse target specific constraints */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) +{ + const char *ct_str = *pct_str; + + switch (ct_str[0]) { + case 'r': /* all registers */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, 0xffff); + break; + case 'R': /* not R0 */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, 0xffff); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); + break; + case 'L': /* qemu_ld/st constraint */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, 0xffff); + tcg_regset_reset_reg (ct->u.regs, TCG_REG_R2); + tcg_regset_reset_reg (ct->u.regs, TCG_REG_R3); + tcg_regset_reset_reg (ct->u.regs, TCG_REG_R4); + break; + case 'a': /* force R2 for division */ + ct->ct |= TCG_CT_REG; + tcg_regset_clear(ct->u.regs); + tcg_regset_set_reg(ct->u.regs, TCG_REG_R2); + break; + case 'b': /* force R3 for division */ + ct->ct |= TCG_CT_REG; + tcg_regset_clear(ct->u.regs); + tcg_regset_set_reg(ct->u.regs, TCG_REG_R3); + break; + case 'A': + ct->ct |= TCG_CT_CONST_ADLI; + break; + case 'K': + ct->ct |= TCG_CT_CONST_MULI; + break; + case 'O': + ct->ct |= TCG_CT_CONST_ORI; + break; + case 'X': + ct->ct |= TCG_CT_CONST_XORI; + break; + case 'C': + ct->ct |= TCG_CT_CONST_CMPI; + break; + default: + return -1; + } + ct_str++; + *pct_str = ct_str; + + return 0; +} + +/* Immediates to be used with logical OR. This is an optimization only, + since a full 64-bit immediate OR can always be performed with 4 sequential + OI[LH][LH] instructions. What we're looking for is immediates that we + can load efficiently, and the immediate load plus the reg-reg OR is + smaller than the sequential OI's. */ + +static int tcg_match_ori(TCGType type, tcg_target_long val) +{ + if (facilities & FACILITY_EXT_IMM) { + if (type == TCG_TYPE_I32) { + /* All 32-bit ORs can be performed with 1 48-bit insn. */ + return 1; + } + } + + /* Look for negative values. These are best to load with LGHI. */ + if (val < 0) { + if (val == (int16_t)val) { + return 0; + } + if (facilities & FACILITY_EXT_IMM) { + if (val == (int32_t)val) { + return 0; + } + } + } + + return 1; +} + +/* Immediates to be used with logical XOR. This is almost, but not quite, + only an optimization. XOR with immediate is only supported with the + extended-immediate facility. That said, there are a few patterns for + which it is better to load the value into a register first. */ + +static int tcg_match_xori(TCGType type, tcg_target_long val) +{ + if ((facilities & FACILITY_EXT_IMM) == 0) { + return 0; + } + + if (type == TCG_TYPE_I32) { + /* All 32-bit XORs can be performed with 1 48-bit insn. */ + return 1; + } + + /* Look for negative values. These are best to load with LGHI. */ + if (val < 0 && val == (int32_t)val) { + return 0; + } + + return 1; +} + +/* Imediates to be used with comparisons. */ + +static int tcg_match_cmpi(TCGType type, tcg_target_long val) +{ + if (facilities & FACILITY_EXT_IMM) { + /* The COMPARE IMMEDIATE instruction is available. */ + if (type == TCG_TYPE_I32) { + /* We have a 32-bit immediate and can compare against anything. */ + return 1; + } else { + /* ??? We have no insight here into whether the comparison is + signed or unsigned. The COMPARE IMMEDIATE insn uses a 32-bit + signed immediate, and the COMPARE LOGICAL IMMEDIATE insn uses + a 32-bit unsigned immediate. If we were to use the (semi) + obvious "val == (int32_t)val" we would be enabling unsigned + comparisons vs very large numbers. The only solution is to + take the intersection of the ranges. */ + /* ??? Another possible solution is to simply lie and allow all + constants here and force the out-of-range values into a temp + register in tgen_cmp when we have knowledge of the actual + comparison code in use. */ + return val >= 0 && val <= 0x7fffffff; + } + } else { + /* Only the LOAD AND TEST instruction is available. */ + return val == 0; + } +} + +/* Immediates to be used with add2/sub2. */ + +static int tcg_match_add2i(TCGType type, tcg_target_long val) +{ + if (facilities & FACILITY_EXT_IMM) { + if (type == TCG_TYPE_I32) { + return 1; + } else if (val >= -0xffffffffll && val <= 0xffffffffll) { + return 1; + } + } + return 0; +} + +/* Test if a constant matches the constraint. */ +static int tcg_target_const_match(tcg_target_long val, TCGType type, + const TCGArgConstraint *arg_ct) +{ + int ct = arg_ct->ct; + + if (ct & TCG_CT_CONST) { + return 1; + } + + if (type == TCG_TYPE_I32) { + val = (int32_t)val; + } + + /* The following are mutually exclusive. */ + if (ct & TCG_CT_CONST_MULI) { + /* Immediates that may be used with multiply. If we have the + general-instruction-extensions, then we have MULTIPLY SINGLE + IMMEDIATE with a signed 32-bit, otherwise we have only + MULTIPLY HALFWORD IMMEDIATE, with a signed 16-bit. */ + if (facilities & FACILITY_GEN_INST_EXT) { + return val == (int32_t)val; + } else { + return val == (int16_t)val; + } + } else if (ct & TCG_CT_CONST_ADLI) { + return tcg_match_add2i(type, val); + } else if (ct & TCG_CT_CONST_ORI) { + return tcg_match_ori(type, val); + } else if (ct & TCG_CT_CONST_XORI) { + return tcg_match_xori(type, val); + } else if (ct & TCG_CT_CONST_CMPI) { + return tcg_match_cmpi(type, val); + } + + return 0; +} + +/* Emit instructions according to the given instruction format. */ + +static void tcg_out_insn_RR(TCGContext *s, S390Opcode op, TCGReg r1, TCGReg r2) +{ + tcg_out16(s, (op << 8) | (r1 << 4) | r2); +} + +static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op, + TCGReg r1, TCGReg r2) +{ + tcg_out32(s, (op << 16) | (r1 << 4) | r2); +} + +static void tcg_out_insn_RRF(TCGContext *s, S390Opcode op, + TCGReg r1, TCGReg r2, int m3) +{ + tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2); +} + +static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2) +{ + tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff)); +} + +static void tcg_out_insn_RIL(TCGContext *s, S390Opcode op, TCGReg r1, int i2) +{ + tcg_out16(s, op | (r1 << 4)); + tcg_out32(s, i2); +} + +static void tcg_out_insn_RS(TCGContext *s, S390Opcode op, TCGReg r1, + TCGReg b2, TCGReg r3, int disp) +{ + tcg_out32(s, (op << 24) | (r1 << 20) | (r3 << 16) | (b2 << 12) + | (disp & 0xfff)); +} + +static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1, + TCGReg b2, TCGReg r3, int disp) +{ + tcg_out16(s, (op & 0xff00) | (r1 << 4) | r3); + tcg_out32(s, (op & 0xff) | (b2 << 28) + | ((disp & 0xfff) << 16) | ((disp & 0xff000) >> 4)); +} + +#define tcg_out_insn_RX tcg_out_insn_RS +#define tcg_out_insn_RXY tcg_out_insn_RSY + +/* Emit an opcode with "type-checking" of the format. */ +#define tcg_out_insn(S, FMT, OP, ...) \ + glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__) + + +/* emit 64-bit shifts */ +static void tcg_out_sh64(TCGContext* s, S390Opcode op, TCGReg dest, + TCGReg src, TCGReg sh_reg, int sh_imm) +{ + tcg_out_insn_RSY(s, op, dest, sh_reg, src, sh_imm); +} + +/* emit 32-bit shifts */ +static void tcg_out_sh32(TCGContext* s, S390Opcode op, TCGReg dest, + TCGReg sh_reg, int sh_imm) +{ + tcg_out_insn_RS(s, op, dest, sh_reg, 0, sh_imm); +} + +static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) +{ + if (src != dst) { + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RR, LR, dst, src); + } else { + tcg_out_insn(s, RRE, LGR, dst, src); + } + } +} + +/* load a register with an immediate value */ +static void tcg_out_movi(TCGContext *s, TCGType type, + TCGReg ret, tcg_target_long sval) +{ + static const S390Opcode lli_insns[4] = { + RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH + }; + + tcg_target_ulong uval = sval; + int i; + + if (type == TCG_TYPE_I32) { + uval = (uint32_t)sval; + sval = (int32_t)sval; + } + + /* Try all 32-bit insns that can load it in one go. */ + if (sval >= -0x8000 && sval < 0x8000) { + tcg_out_insn(s, RI, LGHI, ret, sval); + return; + } + + for (i = 0; i < 4; i++) { + tcg_target_long mask = 0xffffull << i*16; + if ((uval & mask) == uval) { + tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i*16); + return; + } + } + + /* Try all 48-bit insns that can load it in one go. */ + if (facilities & FACILITY_EXT_IMM) { + if (sval == (int32_t)sval) { + tcg_out_insn(s, RIL, LGFI, ret, sval); + return; + } + if (uval <= 0xffffffff) { + tcg_out_insn(s, RIL, LLILF, ret, uval); + return; + } + if ((uval & 0xffffffff) == 0) { + tcg_out_insn(s, RIL, LLIHF, ret, uval >> 31 >> 1); + return; + } + } + + /* Try for PC-relative address load. */ + if ((sval & 1) == 0) { + ptrdiff_t off = tcg_pcrel_diff(s, (void *)sval) >> 1; + if (off == (int32_t)off) { + tcg_out_insn(s, RIL, LARL, ret, off); + return; + } + } + + /* If extended immediates are not present, then we may have to issue + several instructions to load the low 32 bits. */ + if (!(facilities & FACILITY_EXT_IMM)) { + /* A 32-bit unsigned value can be loaded in 2 insns. And given + that the lli_insns loop above did not succeed, we know that + both insns are required. */ + if (uval <= 0xffffffff) { + tcg_out_insn(s, RI, LLILL, ret, uval); + tcg_out_insn(s, RI, IILH, ret, uval >> 16); + return; + } + + /* If all high bits are set, the value can be loaded in 2 or 3 insns. + We first want to make sure that all the high bits get set. With + luck the low 16-bits can be considered negative to perform that for + free, otherwise we load an explicit -1. */ + if (sval >> 31 >> 1 == -1) { + if (uval & 0x8000) { + tcg_out_insn(s, RI, LGHI, ret, uval); + } else { + tcg_out_insn(s, RI, LGHI, ret, -1); + tcg_out_insn(s, RI, IILL, ret, uval); + } + tcg_out_insn(s, RI, IILH, ret, uval >> 16); + return; + } + } + + /* If we get here, both the high and low parts have non-zero bits. */ + + /* Recurse to load the lower 32-bits. */ + tcg_out_movi(s, TCG_TYPE_I64, ret, uval & 0xffffffff); + + /* Insert data into the high 32-bits. */ + uval = uval >> 31 >> 1; + if (facilities & FACILITY_EXT_IMM) { + if (uval < 0x10000) { + tcg_out_insn(s, RI, IIHL, ret, uval); + } else if ((uval & 0xffff) == 0) { + tcg_out_insn(s, RI, IIHH, ret, uval >> 16); + } else { + tcg_out_insn(s, RIL, IIHF, ret, uval); + } + } else { + if (uval & 0xffff) { + tcg_out_insn(s, RI, IIHL, ret, uval); + } + if (uval & 0xffff0000) { + tcg_out_insn(s, RI, IIHH, ret, uval >> 16); + } + } +} + + +/* Emit a load/store type instruction. Inputs are: + DATA: The register to be loaded or stored. + BASE+OFS: The effective address. + OPC_RX: If the operation has an RX format opcode (e.g. STC), otherwise 0. + OPC_RXY: The RXY format opcode for the operation (e.g. STCY). */ + +static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy, + TCGReg data, TCGReg base, TCGReg index, + tcg_target_long ofs) +{ + if (ofs < -0x80000 || ofs >= 0x80000) { + /* Combine the low 20 bits of the offset with the actual load insn; + the high 44 bits must come from an immediate load. */ + tcg_target_long low = ((ofs & 0xfffff) ^ 0x80000) - 0x80000; + tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - low); + ofs = low; + + /* If we were already given an index register, add it in. */ + if (index != TCG_REG_NONE) { + tcg_out_insn(s, RRE, AGR, TCG_TMP0, index); + } + index = TCG_TMP0; + } + + if (opc_rx && ofs >= 0 && ofs < 0x1000) { + tcg_out_insn_RX(s, opc_rx, data, base, index, ofs); + } else { + tcg_out_insn_RXY(s, opc_rxy, data, base, index, ofs); + } +} + + +/* load data without address translation or endianness conversion */ +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, intptr_t ofs) +{ + if (type == TCG_TYPE_I32) { + tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs); + } else { + tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs); + } +} + +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, intptr_t ofs) +{ + if (type == TCG_TYPE_I32) { + tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs); + } else { + tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs); + } +} + +/* load data from an absolute host address */ +static void tcg_out_ld_abs(TCGContext *s, TCGType type, TCGReg dest, void *abs) +{ + intptr_t addr = (intptr_t)abs; + + if ((facilities & FACILITY_GEN_INST_EXT) && !(addr & 1)) { + ptrdiff_t disp = tcg_pcrel_diff(s, abs) >> 1; + if (disp == (int32_t)disp) { + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RIL, LRL, dest, disp); + } else { + tcg_out_insn(s, RIL, LGRL, dest, disp); + } + return; + } + } + + tcg_out_movi(s, TCG_TYPE_PTR, dest, addr & ~0xffff); + tcg_out_ld(s, type, dest, dest, addr & 0xffff); +} + +static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src, + int msb, int lsb, int ofs, int z) +{ + /* Format RIE-f */ + tcg_out16(s, (RIE_RISBG & 0xff00) | (dest << 4) | src); + tcg_out16(s, (msb << 8) | (z << 7) | lsb); + tcg_out16(s, (ofs << 8) | (RIE_RISBG & 0xff)); +} + +static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) +{ + if (facilities & FACILITY_EXT_IMM) { + tcg_out_insn(s, RRE, LGBR, dest, src); + return; + } + + if (type == TCG_TYPE_I32) { + if (dest == src) { + tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 24); + } else { + tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 24); + } + tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 24); + } else { + tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 56); + tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 56); + } +} + +static void tgen_ext8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) +{ + if (facilities & FACILITY_EXT_IMM) { + tcg_out_insn(s, RRE, LLGCR, dest, src); + return; + } + + if (dest == src) { + tcg_out_movi(s, type, TCG_TMP0, 0xff); + src = TCG_TMP0; + } else { + tcg_out_movi(s, type, dest, 0xff); + } + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RR, NR, dest, src); + } else { + tcg_out_insn(s, RRE, NGR, dest, src); + } +} + +static void tgen_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) +{ + if (facilities & FACILITY_EXT_IMM) { + tcg_out_insn(s, RRE, LGHR, dest, src); + return; + } + + if (type == TCG_TYPE_I32) { + if (dest == src) { + tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 16); + } else { + tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 16); + } + tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 16); + } else { + tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 48); + tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 48); + } +} + +static void tgen_ext16u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) +{ + if (facilities & FACILITY_EXT_IMM) { + tcg_out_insn(s, RRE, LLGHR, dest, src); + return; + } + + if (dest == src) { + tcg_out_movi(s, type, TCG_TMP0, 0xffff); + src = TCG_TMP0; + } else { + tcg_out_movi(s, type, dest, 0xffff); + } + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RR, NR, dest, src); + } else { + tcg_out_insn(s, RRE, NGR, dest, src); + } +} + +static inline void tgen_ext32s(TCGContext *s, TCGReg dest, TCGReg src) +{ + tcg_out_insn(s, RRE, LGFR, dest, src); +} + +static inline void tgen_ext32u(TCGContext *s, TCGReg dest, TCGReg src) +{ + tcg_out_insn(s, RRE, LLGFR, dest, src); +} + +/* Accept bit patterns like these: + 0....01....1 + 1....10....0 + 1..10..01..1 + 0..01..10..0 + Copied from gcc sources. */ +static inline bool risbg_mask(uint64_t c) +{ + uint64_t lsb; + /* We don't change the number of transitions by inverting, + so make sure we start with the LSB zero. */ + if (c & 1) { + c = ~c; + } + /* Reject all zeros or all ones. */ + if (c == 0) { + return false; + } + /* Find the first transition. */ + lsb = c & -c; + /* Invert to look for a second transition. */ + c = ~c; + /* Erase the first transition. */ + c &= -lsb; + /* Find the second transition, if any. */ + lsb = c & -c; + /* Match if all the bits are 1's, or if c is zero. */ + return c == -lsb; +} + +static void tgen_andi_risbg(TCGContext *s, TCGReg out, TCGReg in, uint64_t val) +{ + int msb, lsb; + if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) { + /* Achieve wraparound by swapping msb and lsb. */ + msb = 64 - ctz64(~val); + lsb = clz64(~val) - 1; + } else { + msb = clz64(val); + lsb = 63 - ctz64(val); + } + tcg_out_risbg(s, out, in, msb, lsb, 0, 1); +} + +static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val) +{ + static const S390Opcode ni_insns[4] = { + RI_NILL, RI_NILH, RI_NIHL, RI_NIHH + }; + static const S390Opcode nif_insns[2] = { + RIL_NILF, RIL_NIHF + }; + uint64_t valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull); + int i; + + /* Look for the zero-extensions. */ + if ((val & valid) == 0xffffffff) { + tgen_ext32u(s, dest, dest); + return; + } + if (facilities & FACILITY_EXT_IMM) { + if ((val & valid) == 0xff) { + tgen_ext8u(s, TCG_TYPE_I64, dest, dest); + return; + } + if ((val & valid) == 0xffff) { + tgen_ext16u(s, TCG_TYPE_I64, dest, dest); + return; + } + } + + /* Try all 32-bit insns that can perform it in one go. */ + for (i = 0; i < 4; i++) { + tcg_target_ulong mask = ~(0xffffull << i*16); + if (((val | ~valid) & mask) == mask) { + tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16); + return; + } + } + + /* Try all 48-bit insns that can perform it in one go. */ + if (facilities & FACILITY_EXT_IMM) { + for (i = 0; i < 2; i++) { + tcg_target_ulong mask = ~(0xffffffffull << i*32); + if (((val | ~valid) & mask) == mask) { + tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32); + return; + } + } + } + if ((facilities & FACILITY_GEN_INST_EXT) && risbg_mask(val)) { + tgen_andi_risbg(s, dest, dest, val); + return; + } + + /* Fall back to loading the constant. */ + tcg_out_movi(s, type, TCG_TMP0, val); + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RR, NR, dest, TCG_TMP0); + } else { + tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0); + } +} + +static void tgen64_ori(TCGContext *s, TCGReg dest, tcg_target_ulong val) +{ + static const S390Opcode oi_insns[4] = { + RI_OILL, RI_OILH, RI_OIHL, RI_OIHH + }; + static const S390Opcode nif_insns[2] = { + RIL_OILF, RIL_OIHF + }; + + int i; + + /* Look for no-op. */ + if (val == 0) { + return; + } + + if (facilities & FACILITY_EXT_IMM) { + /* Try all 32-bit insns that can perform it in one go. */ + for (i = 0; i < 4; i++) { + tcg_target_ulong mask = (0xffffull << i*16); + if ((val & mask) != 0 && (val & ~mask) == 0) { + tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16); + return; + } + } + + /* Try all 48-bit insns that can perform it in one go. */ + for (i = 0; i < 2; i++) { + tcg_target_ulong mask = (0xffffffffull << i*32); + if ((val & mask) != 0 && (val & ~mask) == 0) { + tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32); + return; + } + } + + /* Perform the OR via sequential modifications to the high and + low parts. Do this via recursion to handle 16-bit vs 32-bit + masks in each half. */ + tgen64_ori(s, dest, val & 0x00000000ffffffffull); + tgen64_ori(s, dest, val & 0xffffffff00000000ull); + } else { + /* With no extended-immediate facility, we don't need to be so + clever. Just iterate over the insns and mask in the constant. */ + for (i = 0; i < 4; i++) { + tcg_target_ulong mask = (0xffffull << i*16); + if ((val & mask) != 0) { + tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16); + } + } + } +} + +static void tgen64_xori(TCGContext *s, TCGReg dest, tcg_target_ulong val) +{ + /* Perform the xor by parts. */ + if (val & 0xffffffff) { + tcg_out_insn(s, RIL, XILF, dest, val); + } + if (val > 0xffffffff) { + tcg_out_insn(s, RIL, XIHF, dest, val >> 31 >> 1); + } +} + +static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1, + TCGArg c2, int c2const) +{ + bool is_unsigned = is_unsigned_cond(c); + if (c2const) { + if (c2 == 0) { + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RR, LTR, r1, r1); + } else { + tcg_out_insn(s, RRE, LTGR, r1, r1); + } + return tcg_cond_to_ltr_cond[c]; + } else { + if (is_unsigned) { + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RIL, CLFI, r1, c2); + } else { + tcg_out_insn(s, RIL, CLGFI, r1, c2); + } + } else { + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RIL, CFI, r1, c2); + } else { + tcg_out_insn(s, RIL, CGFI, r1, c2); + } + } + } + } else { + if (is_unsigned) { + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RR, CLR, r1, c2); + } else { + tcg_out_insn(s, RRE, CLGR, r1, c2); + } + } else { + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RR, CR, r1, c2); + } else { + tcg_out_insn(s, RRE, CGR, r1, c2); + } + } + } + return tcg_cond_to_s390_cond[c]; +} + +static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg c1, TCGArg c2, int c2const) +{ + int cc; + + switch (cond) { + case TCG_COND_GTU: + case TCG_COND_GT: + do_greater: + /* The result of a compare has CC=2 for GT and CC=3 unused. + ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit. */ + tgen_cmp(s, type, cond, c1, c2, c2const); + tcg_out_movi(s, type, dest, 0); + tcg_out_insn(s, RRE, ALCGR, dest, dest); + return; + + case TCG_COND_GEU: + do_geu: + /* We need "real" carry semantics, so use SUBTRACT LOGICAL + instead of COMPARE LOGICAL. This needs an extra move. */ + tcg_out_mov(s, type, TCG_TMP0, c1); + if (c2const) { + tcg_out_movi(s, TCG_TYPE_I64, dest, 0); + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RIL, SLFI, TCG_TMP0, c2); + } else { + tcg_out_insn(s, RIL, SLGFI, TCG_TMP0, c2); + } + } else { + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RR, SLR, TCG_TMP0, c2); + } else { + tcg_out_insn(s, RRE, SLGR, TCG_TMP0, c2); + } + tcg_out_movi(s, TCG_TYPE_I64, dest, 0); + } + tcg_out_insn(s, RRE, ALCGR, dest, dest); + return; + + case TCG_COND_LEU: + case TCG_COND_LTU: + case TCG_COND_LT: + /* Swap operands so that we can use GEU/GTU/GT. */ + if (c2const) { + tcg_out_movi(s, type, TCG_TMP0, c2); + c2 = c1; + c2const = 0; + c1 = TCG_TMP0; + } else { + TCGReg t = c1; + c1 = c2; + c2 = t; + } + if (cond == TCG_COND_LEU) { + goto do_geu; + } + cond = tcg_swap_cond(cond); + goto do_greater; + + case TCG_COND_NE: + /* X != 0 is X > 0. */ + if (c2const && c2 == 0) { + cond = TCG_COND_GTU; + goto do_greater; + } + break; + + case TCG_COND_EQ: + /* X == 0 is X <= 0 is 0 >= X. */ + if (c2const && c2 == 0) { + tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 0); + c2 = c1; + c2const = 0; + c1 = TCG_TMP0; + goto do_geu; + } + break; + + default: + break; + } + + cc = tgen_cmp(s, type, cond, c1, c2, c2const); + if (facilities & FACILITY_LOAD_ON_COND) { + /* Emit: d = 0, t = 1, d = (cc ? t : d). */ + tcg_out_movi(s, TCG_TYPE_I64, dest, 0); + tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1); + tcg_out_insn(s, RRF, LOCGR, dest, TCG_TMP0, cc); + } else { + /* Emit: d = 1; if (cc) goto over; d = 0; over: */ + tcg_out_movi(s, type, dest, 1); + tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1); + tcg_out_movi(s, type, dest, 0); + } +} + +static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest, + TCGReg c1, TCGArg c2, int c2const, TCGReg r3) +{ + int cc; + if (facilities & FACILITY_LOAD_ON_COND) { + cc = tgen_cmp(s, type, c, c1, c2, c2const); + tcg_out_insn(s, RRF, LOCGR, dest, r3, cc); + } else { + c = tcg_invert_cond(c); + cc = tgen_cmp(s, type, c, c1, c2, c2const); + + /* Emit: if (cc) goto over; dest = r3; over: */ + tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1); + tcg_out_insn(s, RRE, LGR, dest, r3); + } +} + +bool tcg_target_deposit_valid(int ofs, int len) +{ + return (facilities & FACILITY_GEN_INST_EXT) != 0; +} + +static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src, + int ofs, int len) +{ + int lsb = (63 - ofs); + int msb = lsb - (len - 1); + tcg_out_risbg(s, dest, src, msb, lsb, ofs, 0); +} + +static void tgen_gotoi(TCGContext *s, int cc, tcg_insn_unit *dest) +{ + ptrdiff_t off = dest - s->code_ptr; + if (off == (int16_t)off) { + tcg_out_insn(s, RI, BRC, cc, off); + } else if (off == (int32_t)off) { + tcg_out_insn(s, RIL, BRCL, cc, off); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest); + tcg_out_insn(s, RR, BCR, cc, TCG_TMP0); + } +} + +static void tgen_branch(TCGContext *s, int cc, TCGLabel *l) +{ + if (l->has_value) { + tgen_gotoi(s, cc, l->u.value_ptr); + } else if (USE_LONG_BRANCHES) { + tcg_out16(s, RIL_BRCL | (cc << 4)); + tcg_out_reloc(s, s->code_ptr, R_390_PC32DBL, l, -2); + s->code_ptr += 2; + } else { + tcg_out16(s, RI_BRC | (cc << 4)); + tcg_out_reloc(s, s->code_ptr, R_390_PC16DBL, l, -2); + s->code_ptr += 1; + } +} + +static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc, + TCGReg r1, TCGReg r2, TCGLabel *l) +{ + intptr_t off; + + if (l->has_value) { + off = l->u.value_ptr - s->code_ptr; + } else { + /* We need to keep the offset unchanged for retranslation. */ + off = s->code_ptr[1]; + tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, -2); + } + + tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2); + tcg_out16(s, off); + tcg_out16(s, cc << 12 | (opc & 0xff)); +} + +static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc, + TCGReg r1, int i2, TCGLabel *l) +{ + tcg_target_long off; + + if (l->has_value) { + off = l->u.value_ptr - s->code_ptr; + } else { + /* We need to keep the offset unchanged for retranslation. */ + off = s->code_ptr[1]; + tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, -2); + } + + tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc); + tcg_out16(s, off); + tcg_out16(s, (i2 << 8) | (opc & 0xff)); +} + +static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c, + TCGReg r1, TCGArg c2, int c2const, TCGLabel *l) +{ + int cc; + + if (facilities & FACILITY_GEN_INST_EXT) { + bool is_unsigned = is_unsigned_cond(c); + bool in_range; + S390Opcode opc; + + cc = tcg_cond_to_s390_cond[c]; + + if (!c2const) { + opc = (type == TCG_TYPE_I32 + ? (is_unsigned ? RIE_CLRJ : RIE_CRJ) + : (is_unsigned ? RIE_CLGRJ : RIE_CGRJ)); + tgen_compare_branch(s, opc, cc, r1, c2, l); + return; + } + + /* COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field. + If the immediate we've been given does not fit that range, we'll + fall back to separate compare and branch instructions using the + larger comparison range afforded by COMPARE IMMEDIATE. */ + if (type == TCG_TYPE_I32) { + if (is_unsigned) { + opc = RIE_CLIJ; + in_range = (uint32_t)c2 == (uint8_t)c2; + } else { + opc = RIE_CIJ; + in_range = (int32_t)c2 == (int8_t)c2; + } + } else { + if (is_unsigned) { + opc = RIE_CLGIJ; + in_range = (uint64_t)c2 == (uint8_t)c2; + } else { + opc = RIE_CGIJ; + in_range = (int64_t)c2 == (int8_t)c2; + } + } + if (in_range) { + tgen_compare_imm_branch(s, opc, cc, r1, c2, l); + return; + } + } + + cc = tgen_cmp(s, type, c, r1, c2, c2const); + tgen_branch(s, cc, l); +} + +static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest) +{ + ptrdiff_t off = dest - s->code_ptr; + if (off == (int32_t)off) { + tcg_out_insn(s, RIL, BRASL, TCG_REG_R14, off); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest); + tcg_out_insn(s, RR, BASR, TCG_REG_R14, TCG_TMP0); + } +} + +static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc, TCGReg data, + TCGReg base, TCGReg index, int disp) +{ + switch (opc & (MO_SSIZE | MO_BSWAP)) { + case MO_UB: + tcg_out_insn(s, RXY, LLGC, data, base, index, disp); + break; + case MO_SB: + tcg_out_insn(s, RXY, LGB, data, base, index, disp); + break; + + case MO_UW | MO_BSWAP: + /* swapped unsigned halfword load with upper bits zeroed */ + tcg_out_insn(s, RXY, LRVH, data, base, index, disp); + tgen_ext16u(s, TCG_TYPE_I64, data, data); + break; + case MO_UW: + tcg_out_insn(s, RXY, LLGH, data, base, index, disp); + break; + + case MO_SW | MO_BSWAP: + /* swapped sign-extended halfword load */ + tcg_out_insn(s, RXY, LRVH, data, base, index, disp); + tgen_ext16s(s, TCG_TYPE_I64, data, data); + break; + case MO_SW: + tcg_out_insn(s, RXY, LGH, data, base, index, disp); + break; + + case MO_UL | MO_BSWAP: + /* swapped unsigned int load with upper bits zeroed */ + tcg_out_insn(s, RXY, LRV, data, base, index, disp); + tgen_ext32u(s, data, data); + break; + case MO_UL: + tcg_out_insn(s, RXY, LLGF, data, base, index, disp); + break; + + case MO_SL | MO_BSWAP: + /* swapped sign-extended int load */ + tcg_out_insn(s, RXY, LRV, data, base, index, disp); + tgen_ext32s(s, data, data); + break; + case MO_SL: + tcg_out_insn(s, RXY, LGF, data, base, index, disp); + break; + + case MO_Q | MO_BSWAP: + tcg_out_insn(s, RXY, LRVG, data, base, index, disp); + break; + case MO_Q: + tcg_out_insn(s, RXY, LG, data, base, index, disp); + break; + + default: + tcg_abort(); + } +} + +static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc, TCGReg data, + TCGReg base, TCGReg index, int disp) +{ + switch (opc & (MO_SIZE | MO_BSWAP)) { + case MO_UB: + if (disp >= 0 && disp < 0x1000) { + tcg_out_insn(s, RX, STC, data, base, index, disp); + } else { + tcg_out_insn(s, RXY, STCY, data, base, index, disp); + } + break; + + case MO_UW | MO_BSWAP: + tcg_out_insn(s, RXY, STRVH, data, base, index, disp); + break; + case MO_UW: + if (disp >= 0 && disp < 0x1000) { + tcg_out_insn(s, RX, STH, data, base, index, disp); + } else { + tcg_out_insn(s, RXY, STHY, data, base, index, disp); + } + break; + + case MO_UL | MO_BSWAP: + tcg_out_insn(s, RXY, STRV, data, base, index, disp); + break; + case MO_UL: + if (disp >= 0 && disp < 0x1000) { + tcg_out_insn(s, RX, ST, data, base, index, disp); + } else { + tcg_out_insn(s, RXY, STY, data, base, index, disp); + } + break; + + case MO_Q | MO_BSWAP: + tcg_out_insn(s, RXY, STRVG, data, base, index, disp); + break; + case MO_Q: + tcg_out_insn(s, RXY, STG, data, base, index, disp); + break; + + default: + tcg_abort(); + } +} + +#if defined(CONFIG_SOFTMMU) +/* We're expecting to use a 20-bit signed offset on the tlb memory ops. + Using the offset of the second entry in the last tlb table ensures + that we can index all of the elements of the first entry. */ +QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1]) + > 0x7ffff); + +/* Load and compare a TLB entry, leaving the flags set. Loads the TLB + addend into R2. Returns a register with the santitized guest address. */ +static TCGReg tcg_out_tlb_read(TCGContext* s, TCGReg addr_reg, TCGMemOp opc, + int mem_index, bool is_ld) +{ + int s_mask = (1 << (opc & MO_SIZE)) - 1; + int ofs, a_off; + uint64_t tlb_mask; + + /* For aligned accesses, we check the first byte and include the alignment + bits within the address. For unaligned access, we check that we don't + cross pages using the address of the last byte of the access. */ + if ((opc & MO_AMASK) == MO_ALIGN || s_mask == 0) { + a_off = 0; + tlb_mask = TARGET_PAGE_MASK | s_mask; + } else { + a_off = s_mask; + tlb_mask = TARGET_PAGE_MASK; + } + + if (facilities & FACILITY_GEN_INST_EXT) { + tcg_out_risbg(s, TCG_REG_R2, addr_reg, + 64 - CPU_TLB_BITS - CPU_TLB_ENTRY_BITS, + 63 - CPU_TLB_ENTRY_BITS, + 64 + CPU_TLB_ENTRY_BITS - TARGET_PAGE_BITS, 1); + if (a_off) { + tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off); + tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask); + } else { + tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask); + } + } else { + tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off); + tgen_andi(s, TCG_TYPE_I64, TCG_REG_R2, + (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); + tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask); + } + + if (is_ld) { + ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read); + } else { + ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write); + } + if (TARGET_LONG_BITS == 32) { + tcg_out_mem(s, RX_C, RXY_CY, TCG_REG_R3, TCG_REG_R2, TCG_AREG0, ofs); + } else { + tcg_out_mem(s, 0, RXY_CG, TCG_REG_R3, TCG_REG_R2, TCG_AREG0, ofs); + } + + ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addend); + tcg_out_mem(s, 0, RXY_LG, TCG_REG_R2, TCG_REG_R2, TCG_AREG0, ofs); + + if (TARGET_LONG_BITS == 32) { + tgen_ext32u(s, TCG_REG_R3, addr_reg); + return TCG_REG_R3; + } + return addr_reg; +} + +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, + TCGReg data, TCGReg addr, + tcg_insn_unit *raddr, tcg_insn_unit *label_ptr) +{ + TCGLabelQemuLdst *label = new_ldst_label(s); + + label->is_ld = is_ld; + label->oi = oi; + label->datalo_reg = data; + label->addrlo_reg = addr; + label->raddr = raddr; + label->label_ptr[0] = label_ptr; +} + +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + TCGReg addr_reg = lb->addrlo_reg; + TCGReg data_reg = lb->datalo_reg; + TCGMemOpIdx oi = lb->oi; + TCGMemOp opc = get_memop(oi); + + patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)s->code_ptr, -2); + + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); + if (TARGET_LONG_BITS == 64) { + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg); + } + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr); + tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]); + tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2); + + tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); +} + +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + TCGReg addr_reg = lb->addrlo_reg; + TCGReg data_reg = lb->datalo_reg; + TCGMemOpIdx oi = lb->oi; + TCGMemOp opc = get_memop(oi); + + patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)s->code_ptr, -2); + + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); + if (TARGET_LONG_BITS == 64) { + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg); + } + switch (opc & MO_SIZE) { + case MO_UB: + tgen_ext8u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg); + break; + case MO_UW: + tgen_ext16u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg); + break; + case MO_UL: + tgen_ext32u(s, TCG_REG_R4, data_reg); + break; + case MO_Q: + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, data_reg); + break; + default: + tcg_abort(); + } + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr); + tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); + + tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); +} +#else +static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg, + TCGReg *index_reg, tcg_target_long *disp) +{ + if (TARGET_LONG_BITS == 32) { + tgen_ext32u(s, TCG_TMP0, *addr_reg); + *addr_reg = TCG_TMP0; + } + if (guest_base < 0x80000) { + *index_reg = TCG_REG_NONE; + *disp = guest_base; + } else { + *index_reg = TCG_GUEST_BASE_REG; + *disp = 0; + } +} +#endif /* CONFIG_SOFTMMU */ + +static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, + TCGMemOpIdx oi) +{ + TCGMemOp opc = get_memop(oi); +#ifdef CONFIG_SOFTMMU + unsigned mem_index = get_mmuidx(oi); + tcg_insn_unit *label_ptr; + TCGReg base_reg; + + base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1); + + /* We need to keep the offset unchanged for retranslation. */ + tcg_out16(s, RI_BRC | (S390_CC_NE << 4)); + label_ptr = s->code_ptr; + s->code_ptr += 1; + + tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0); + + add_qemu_ldst_label(s, 1, oi, data_reg, addr_reg, s->code_ptr, label_ptr); +#else + TCGReg index_reg; + tcg_target_long disp; + + tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp); + tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, index_reg, disp); +#endif +} + +static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, + TCGMemOpIdx oi) +{ + TCGMemOp opc = get_memop(oi); +#ifdef CONFIG_SOFTMMU + unsigned mem_index = get_mmuidx(oi); + tcg_insn_unit *label_ptr; + TCGReg base_reg; + + base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0); + + /* We need to keep the offset unchanged for retranslation. */ + tcg_out16(s, RI_BRC | (S390_CC_NE << 4)); + label_ptr = s->code_ptr; + s->code_ptr += 1; + + tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0); + + add_qemu_ldst_label(s, 0, oi, data_reg, addr_reg, s->code_ptr, label_ptr); +#else + TCGReg index_reg; + tcg_target_long disp; + + tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp); + tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, index_reg, disp); +#endif +} + +# define OP_32_64(x) \ + case glue(glue(INDEX_op_,x),_i32): \ + case glue(glue(INDEX_op_,x),_i64) + +static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, + const TCGArg *args, const int *const_args) +{ + S390Opcode op; + TCGArg a0, a1, a2; + + switch (opc) { + case INDEX_op_exit_tb: + /* return value */ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, args[0]); + tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr); + break; + + case INDEX_op_goto_tb: + if (s->tb_jmp_offset) { + tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4)); + s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); + s->code_ptr += 2; + } else { + /* load address stored at s->tb_next + args[0] */ + tcg_out_ld_abs(s, TCG_TYPE_PTR, TCG_TMP0, s->tb_next + args[0]); + /* and go there */ + tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_TMP0); + } + s->tb_next_offset[args[0]] = tcg_current_code_size(s); + break; + + OP_32_64(ld8u): + /* ??? LLC (RXY format) is only present with the extended-immediate + facility, whereas LLGC is always present. */ + tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]); + break; + + OP_32_64(ld8s): + /* ??? LB is no smaller than LGB, so no point to using it. */ + tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]); + break; + + OP_32_64(ld16u): + /* ??? LLH (RXY format) is only present with the extended-immediate + facility, whereas LLGH is always present. */ + tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]); + break; + + case INDEX_op_ld16s_i32: + tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]); + break; + + case INDEX_op_ld_i32: + tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]); + break; + + OP_32_64(st8): + tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1], + TCG_REG_NONE, args[2]); + break; + + OP_32_64(st16): + tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1], + TCG_REG_NONE, args[2]); + break; + + case INDEX_op_st_i32: + tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); + break; + + case INDEX_op_add_i32: + a0 = args[0], a1 = args[1], a2 = (int32_t)args[2]; + if (const_args[2]) { + do_addi_32: + if (a0 == a1) { + if (a2 == (int16_t)a2) { + tcg_out_insn(s, RI, AHI, a0, a2); + break; + } + if (facilities & FACILITY_EXT_IMM) { + tcg_out_insn(s, RIL, AFI, a0, a2); + break; + } + } + tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2); + } else if (a0 == a1) { + tcg_out_insn(s, RR, AR, a0, a2); + } else { + tcg_out_insn(s, RX, LA, a0, a1, a2, 0); + } + break; + case INDEX_op_sub_i32: + a0 = args[0], a1 = args[1], a2 = (int32_t)args[2]; + if (const_args[2]) { + a2 = -a2; + goto do_addi_32; + } + tcg_out_insn(s, RR, SR, args[0], args[2]); + break; + + case INDEX_op_and_i32: + if (const_args[2]) { + tgen_andi(s, TCG_TYPE_I32, args[0], args[2]); + } else { + tcg_out_insn(s, RR, NR, args[0], args[2]); + } + break; + case INDEX_op_or_i32: + if (const_args[2]) { + tgen64_ori(s, args[0], args[2] & 0xffffffff); + } else { + tcg_out_insn(s, RR, OR, args[0], args[2]); + } + break; + case INDEX_op_xor_i32: + if (const_args[2]) { + tgen64_xori(s, args[0], args[2] & 0xffffffff); + } else { + tcg_out_insn(s, RR, XR, args[0], args[2]); + } + break; + + case INDEX_op_neg_i32: + tcg_out_insn(s, RR, LCR, args[0], args[1]); + break; + + case INDEX_op_mul_i32: + if (const_args[2]) { + if ((int32_t)args[2] == (int16_t)args[2]) { + tcg_out_insn(s, RI, MHI, args[0], args[2]); + } else { + tcg_out_insn(s, RIL, MSFI, args[0], args[2]); + } + } else { + tcg_out_insn(s, RRE, MSR, args[0], args[2]); + } + break; + + case INDEX_op_div2_i32: + tcg_out_insn(s, RR, DR, TCG_REG_R2, args[4]); + break; + case INDEX_op_divu2_i32: + tcg_out_insn(s, RRE, DLR, TCG_REG_R2, args[4]); + break; + + case INDEX_op_shl_i32: + op = RS_SLL; + do_shift32: + if (const_args[2]) { + tcg_out_sh32(s, op, args[0], TCG_REG_NONE, args[2]); + } else { + tcg_out_sh32(s, op, args[0], args[2], 0); + } + break; + case INDEX_op_shr_i32: + op = RS_SRL; + goto do_shift32; + case INDEX_op_sar_i32: + op = RS_SRA; + goto do_shift32; + + case INDEX_op_rotl_i32: + /* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol. */ + if (const_args[2]) { + tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_REG_NONE, args[2]); + } else { + tcg_out_sh64(s, RSY_RLL, args[0], args[1], args[2], 0); + } + break; + case INDEX_op_rotr_i32: + if (const_args[2]) { + tcg_out_sh64(s, RSY_RLL, args[0], args[1], + TCG_REG_NONE, (32 - args[2]) & 31); + } else { + tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]); + tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_TMP0, 0); + } + break; + + case INDEX_op_ext8s_i32: + tgen_ext8s(s, TCG_TYPE_I32, args[0], args[1]); + break; + case INDEX_op_ext16s_i32: + tgen_ext16s(s, TCG_TYPE_I32, args[0], args[1]); + break; + case INDEX_op_ext8u_i32: + tgen_ext8u(s, TCG_TYPE_I32, args[0], args[1]); + break; + case INDEX_op_ext16u_i32: + tgen_ext16u(s, TCG_TYPE_I32, args[0], args[1]); + break; + + OP_32_64(bswap16): + /* The TCG bswap definition requires bits 0-47 already be zero. + Thus we don't need the G-type insns to implement bswap16_i64. */ + tcg_out_insn(s, RRE, LRVR, args[0], args[1]); + tcg_out_sh32(s, RS_SRL, args[0], TCG_REG_NONE, 16); + break; + OP_32_64(bswap32): + tcg_out_insn(s, RRE, LRVR, args[0], args[1]); + break; + + case INDEX_op_add2_i32: + if (const_args[4]) { + tcg_out_insn(s, RIL, ALFI, args[0], args[4]); + } else { + tcg_out_insn(s, RR, ALR, args[0], args[4]); + } + tcg_out_insn(s, RRE, ALCR, args[1], args[5]); + break; + case INDEX_op_sub2_i32: + if (const_args[4]) { + tcg_out_insn(s, RIL, SLFI, args[0], args[4]); + } else { + tcg_out_insn(s, RR, SLR, args[0], args[4]); + } + tcg_out_insn(s, RRE, SLBR, args[1], args[5]); + break; + + case INDEX_op_br: + tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0])); + break; + + case INDEX_op_brcond_i32: + tgen_brcond(s, TCG_TYPE_I32, args[2], args[0], + args[1], const_args[1], arg_label(args[3])); + break; + case INDEX_op_setcond_i32: + tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], + args[2], const_args[2]); + break; + case INDEX_op_movcond_i32: + tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], + args[2], const_args[2], args[3]); + break; + + case INDEX_op_qemu_ld_i32: + /* ??? Technically we can use a non-extending instruction. */ + case INDEX_op_qemu_ld_i64: + tcg_out_qemu_ld(s, args[0], args[1], args[2]); + break; + case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_st_i64: + tcg_out_qemu_st(s, args[0], args[1], args[2]); + break; + + case INDEX_op_ld16s_i64: + tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]); + break; + case INDEX_op_ld32u_i64: + tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]); + break; + case INDEX_op_ld32s_i64: + tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]); + break; + case INDEX_op_ld_i64: + tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]); + break; + + case INDEX_op_st32_i64: + tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); + break; + case INDEX_op_st_i64: + tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]); + break; + + case INDEX_op_add_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + do_addi_64: + if (a0 == a1) { + if (a2 == (int16_t)a2) { + tcg_out_insn(s, RI, AGHI, a0, a2); + break; + } + if (facilities & FACILITY_EXT_IMM) { + if (a2 == (int32_t)a2) { + tcg_out_insn(s, RIL, AGFI, a0, a2); + break; + } else if (a2 == (uint32_t)a2) { + tcg_out_insn(s, RIL, ALGFI, a0, a2); + break; + } else if (-a2 == (uint32_t)-a2) { + tcg_out_insn(s, RIL, SLGFI, a0, -a2); + break; + } + } + } + tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2); + } else if (a0 == a1) { + tcg_out_insn(s, RRE, AGR, a0, a2); + } else { + tcg_out_insn(s, RX, LA, a0, a1, a2, 0); + } + break; + case INDEX_op_sub_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + a2 = -a2; + goto do_addi_64; + } else { + tcg_out_insn(s, RRE, SGR, args[0], args[2]); + } + break; + + case INDEX_op_and_i64: + if (const_args[2]) { + tgen_andi(s, TCG_TYPE_I64, args[0], args[2]); + } else { + tcg_out_insn(s, RRE, NGR, args[0], args[2]); + } + break; + case INDEX_op_or_i64: + if (const_args[2]) { + tgen64_ori(s, args[0], args[2]); + } else { + tcg_out_insn(s, RRE, OGR, args[0], args[2]); + } + break; + case INDEX_op_xor_i64: + if (const_args[2]) { + tgen64_xori(s, args[0], args[2]); + } else { + tcg_out_insn(s, RRE, XGR, args[0], args[2]); + } + break; + + case INDEX_op_neg_i64: + tcg_out_insn(s, RRE, LCGR, args[0], args[1]); + break; + case INDEX_op_bswap64_i64: + tcg_out_insn(s, RRE, LRVGR, args[0], args[1]); + break; + + case INDEX_op_mul_i64: + if (const_args[2]) { + if (args[2] == (int16_t)args[2]) { + tcg_out_insn(s, RI, MGHI, args[0], args[2]); + } else { + tcg_out_insn(s, RIL, MSGFI, args[0], args[2]); + } + } else { + tcg_out_insn(s, RRE, MSGR, args[0], args[2]); + } + break; + + case INDEX_op_div2_i64: + /* ??? We get an unnecessary sign-extension of the dividend + into R3 with this definition, but as we do in fact always + produce both quotient and remainder using INDEX_op_div_i64 + instead requires jumping through even more hoops. */ + tcg_out_insn(s, RRE, DSGR, TCG_REG_R2, args[4]); + break; + case INDEX_op_divu2_i64: + tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, args[4]); + break; + case INDEX_op_mulu2_i64: + tcg_out_insn(s, RRE, MLGR, TCG_REG_R2, args[3]); + break; + + case INDEX_op_shl_i64: + op = RSY_SLLG; + do_shift64: + if (const_args[2]) { + tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]); + } else { + tcg_out_sh64(s, op, args[0], args[1], args[2], 0); + } + break; + case INDEX_op_shr_i64: + op = RSY_SRLG; + goto do_shift64; + case INDEX_op_sar_i64: + op = RSY_SRAG; + goto do_shift64; + + case INDEX_op_rotl_i64: + if (const_args[2]) { + tcg_out_sh64(s, RSY_RLLG, args[0], args[1], + TCG_REG_NONE, args[2]); + } else { + tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0); + } + break; + case INDEX_op_rotr_i64: + if (const_args[2]) { + tcg_out_sh64(s, RSY_RLLG, args[0], args[1], + TCG_REG_NONE, (64 - args[2]) & 63); + } else { + /* We can use the smaller 32-bit negate because only the + low 6 bits are examined for the rotate. */ + tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]); + tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0); + } + break; + + case INDEX_op_ext8s_i64: + tgen_ext8s(s, TCG_TYPE_I64, args[0], args[1]); + break; + case INDEX_op_ext16s_i64: + tgen_ext16s(s, TCG_TYPE_I64, args[0], args[1]); + break; + case INDEX_op_ext_i32_i64: + case INDEX_op_ext32s_i64: + tgen_ext32s(s, args[0], args[1]); + break; + case INDEX_op_ext8u_i64: + tgen_ext8u(s, TCG_TYPE_I64, args[0], args[1]); + break; + case INDEX_op_ext16u_i64: + tgen_ext16u(s, TCG_TYPE_I64, args[0], args[1]); + break; + case INDEX_op_extu_i32_i64: + case INDEX_op_ext32u_i64: + tgen_ext32u(s, args[0], args[1]); + break; + + case INDEX_op_add2_i64: + if (const_args[4]) { + if ((int64_t)args[4] >= 0) { + tcg_out_insn(s, RIL, ALGFI, args[0], args[4]); + } else { + tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]); + } + } else { + tcg_out_insn(s, RRE, ALGR, args[0], args[4]); + } + tcg_out_insn(s, RRE, ALCGR, args[1], args[5]); + break; + case INDEX_op_sub2_i64: + if (const_args[4]) { + if ((int64_t)args[4] >= 0) { + tcg_out_insn(s, RIL, SLGFI, args[0], args[4]); + } else { + tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]); + } + } else { + tcg_out_insn(s, RRE, SLGR, args[0], args[4]); + } + tcg_out_insn(s, RRE, SLBGR, args[1], args[5]); + break; + + case INDEX_op_brcond_i64: + tgen_brcond(s, TCG_TYPE_I64, args[2], args[0], + args[1], const_args[1], arg_label(args[3])); + break; + case INDEX_op_setcond_i64: + tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], + args[2], const_args[2]); + break; + case INDEX_op_movcond_i64: + tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], + args[2], const_args[2], args[3]); + break; + + OP_32_64(deposit): + tgen_deposit(s, args[0], args[2], args[3], args[4]); + break; + + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_mov_i64: + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ + case INDEX_op_movi_i64: + case INDEX_op_call: /* Always emitted via tcg_out_call. */ + default: + tcg_abort(); + } +} + +static const TCGTargetOpDef s390_op_defs[] = { + { INDEX_op_exit_tb, { } }, + { INDEX_op_goto_tb, { } }, + { INDEX_op_br, { } }, + + { INDEX_op_ld8u_i32, { "r", "r" } }, + { INDEX_op_ld8s_i32, { "r", "r" } }, + { INDEX_op_ld16u_i32, { "r", "r" } }, + { INDEX_op_ld16s_i32, { "r", "r" } }, + { INDEX_op_ld_i32, { "r", "r" } }, + { INDEX_op_st8_i32, { "r", "r" } }, + { INDEX_op_st16_i32, { "r", "r" } }, + { INDEX_op_st_i32, { "r", "r" } }, + + { INDEX_op_add_i32, { "r", "r", "ri" } }, + { INDEX_op_sub_i32, { "r", "0", "ri" } }, + { INDEX_op_mul_i32, { "r", "0", "rK" } }, + + { INDEX_op_div2_i32, { "b", "a", "0", "1", "r" } }, + { INDEX_op_divu2_i32, { "b", "a", "0", "1", "r" } }, + + { INDEX_op_and_i32, { "r", "0", "ri" } }, + { INDEX_op_or_i32, { "r", "0", "rO" } }, + { INDEX_op_xor_i32, { "r", "0", "rX" } }, + + { INDEX_op_neg_i32, { "r", "r" } }, + + { INDEX_op_shl_i32, { "r", "0", "Ri" } }, + { INDEX_op_shr_i32, { "r", "0", "Ri" } }, + { INDEX_op_sar_i32, { "r", "0", "Ri" } }, + + { INDEX_op_rotl_i32, { "r", "r", "Ri" } }, + { INDEX_op_rotr_i32, { "r", "r", "Ri" } }, + + { INDEX_op_ext8s_i32, { "r", "r" } }, + { INDEX_op_ext8u_i32, { "r", "r" } }, + { INDEX_op_ext16s_i32, { "r", "r" } }, + { INDEX_op_ext16u_i32, { "r", "r" } }, + + { INDEX_op_bswap16_i32, { "r", "r" } }, + { INDEX_op_bswap32_i32, { "r", "r" } }, + + { INDEX_op_add2_i32, { "r", "r", "0", "1", "rA", "r" } }, + { INDEX_op_sub2_i32, { "r", "r", "0", "1", "rA", "r" } }, + + { INDEX_op_brcond_i32, { "r", "rC" } }, + { INDEX_op_setcond_i32, { "r", "r", "rC" } }, + { INDEX_op_movcond_i32, { "r", "r", "rC", "r", "0" } }, + { INDEX_op_deposit_i32, { "r", "0", "r" } }, + + { INDEX_op_qemu_ld_i32, { "r", "L" } }, + { INDEX_op_qemu_ld_i64, { "r", "L" } }, + { INDEX_op_qemu_st_i32, { "L", "L" } }, + { INDEX_op_qemu_st_i64, { "L", "L" } }, + + { INDEX_op_ld8u_i64, { "r", "r" } }, + { INDEX_op_ld8s_i64, { "r", "r" } }, + { INDEX_op_ld16u_i64, { "r", "r" } }, + { INDEX_op_ld16s_i64, { "r", "r" } }, + { INDEX_op_ld32u_i64, { "r", "r" } }, + { INDEX_op_ld32s_i64, { "r", "r" } }, + { INDEX_op_ld_i64, { "r", "r" } }, + + { INDEX_op_st8_i64, { "r", "r" } }, + { INDEX_op_st16_i64, { "r", "r" } }, + { INDEX_op_st32_i64, { "r", "r" } }, + { INDEX_op_st_i64, { "r", "r" } }, + + { INDEX_op_add_i64, { "r", "r", "ri" } }, + { INDEX_op_sub_i64, { "r", "0", "ri" } }, + { INDEX_op_mul_i64, { "r", "0", "rK" } }, + + { INDEX_op_div2_i64, { "b", "a", "0", "1", "r" } }, + { INDEX_op_divu2_i64, { "b", "a", "0", "1", "r" } }, + { INDEX_op_mulu2_i64, { "b", "a", "0", "r" } }, + + { INDEX_op_and_i64, { "r", "0", "ri" } }, + { INDEX_op_or_i64, { "r", "0", "rO" } }, + { INDEX_op_xor_i64, { "r", "0", "rX" } }, + + { INDEX_op_neg_i64, { "r", "r" } }, + + { INDEX_op_shl_i64, { "r", "r", "Ri" } }, + { INDEX_op_shr_i64, { "r", "r", "Ri" } }, + { INDEX_op_sar_i64, { "r", "r", "Ri" } }, + + { INDEX_op_rotl_i64, { "r", "r", "Ri" } }, + { INDEX_op_rotr_i64, { "r", "r", "Ri" } }, + + { INDEX_op_ext8s_i64, { "r", "r" } }, + { INDEX_op_ext8u_i64, { "r", "r" } }, + { INDEX_op_ext16s_i64, { "r", "r" } }, + { INDEX_op_ext16u_i64, { "r", "r" } }, + { INDEX_op_ext32s_i64, { "r", "r" } }, + { INDEX_op_ext32u_i64, { "r", "r" } }, + + { INDEX_op_ext_i32_i64, { "r", "r" } }, + { INDEX_op_extu_i32_i64, { "r", "r" } }, + + { INDEX_op_bswap16_i64, { "r", "r" } }, + { INDEX_op_bswap32_i64, { "r", "r" } }, + { INDEX_op_bswap64_i64, { "r", "r" } }, + + { INDEX_op_add2_i64, { "r", "r", "0", "1", "rA", "r" } }, + { INDEX_op_sub2_i64, { "r", "r", "0", "1", "rA", "r" } }, + + { INDEX_op_brcond_i64, { "r", "rC" } }, + { INDEX_op_setcond_i64, { "r", "r", "rC" } }, + { INDEX_op_movcond_i64, { "r", "r", "rC", "r", "0" } }, + { INDEX_op_deposit_i64, { "r", "0", "r" } }, + + { -1 }, +}; + +static void query_facilities(void) +{ + unsigned long hwcap = qemu_getauxval(AT_HWCAP); + + /* Is STORE FACILITY LIST EXTENDED available? Honestly, I believe this + is present on all 64-bit systems, but let's check for it anyway. */ + if (hwcap & HWCAP_S390_STFLE) { + register int r0 __asm__("0"); + register void *r1 __asm__("1"); + + /* stfle 0(%r1) */ + r1 = &facilities; + asm volatile(".word 0xb2b0,0x1000" + : "=r"(r0) : "0"(0), "r"(r1) : "memory", "cc"); + } +} + +static void tcg_target_init(TCGContext *s) +{ + query_facilities(); + + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff); + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff); + + tcg_regset_clear(tcg_target_call_clobber_regs); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5); + /* The r6 register is technically call-saved, but it's also a parameter + register, so it can get killed by setup for the qemu_st helper. */ + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6); + /* The return register can be considered call-clobbered. */ + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14); + + tcg_regset_clear(s->reserved_regs); + tcg_regset_set_reg(s->reserved_regs, TCG_TMP0); + /* XXX many insns can't be used with R0, so we better avoid it for now */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); + + tcg_add_target_add_op_defs(s390_op_defs); +} + +#define FRAME_SIZE ((int)(TCG_TARGET_CALL_STACK_OFFSET \ + + TCG_STATIC_CALL_ARGS_SIZE \ + + CPU_TEMP_BUF_NLONGS * sizeof(long))) + +static void tcg_target_qemu_prologue(TCGContext *s) +{ + /* stmg %r6,%r15,48(%r15) (save registers) */ + tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48); + + /* aghi %r15,-frame_size */ + tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -FRAME_SIZE); + + tcg_set_frame(s, TCG_REG_CALL_STACK, + TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET, + CPU_TEMP_BUF_NLONGS * sizeof(long)); + +#ifndef CONFIG_SOFTMMU + if (guest_base >= 0x80000) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); + tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); + } +#endif + + tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); + /* br %r3 (go to TB) */ + tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]); + + tb_ret_addr = s->code_ptr; + + /* lmg %r6,%r15,fs+48(%r15) (restore registers) */ + tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, + FRAME_SIZE + 48); + + /* br %r14 (return) */ + tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14); +} + +typedef struct { + DebugFrameHeader h; + uint8_t fde_def_cfa[4]; + uint8_t fde_reg_ofs[18]; +} DebugFrame; + +/* We're expecting a 2 byte uleb128 encoded value. */ +QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); + +#define ELF_HOST_MACHINE EM_S390 + +static const DebugFrame debug_frame = { + .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ + .h.cie.id = -1, + .h.cie.version = 1, + .h.cie.code_align = 1, + .h.cie.data_align = 8, /* sleb128 8 */ + .h.cie.return_column = TCG_REG_R14, + + /* Total FDE size does not include the "len" member. */ + .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), + + .fde_def_cfa = { + 12, TCG_REG_CALL_STACK, /* DW_CFA_def_cfa %r15, ... */ + (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ + (FRAME_SIZE >> 7) + }, + .fde_reg_ofs = { + 0x86, 6, /* DW_CFA_offset, %r6, 48 */ + 0x87, 7, /* DW_CFA_offset, %r7, 56 */ + 0x88, 8, /* DW_CFA_offset, %r8, 64 */ + 0x89, 9, /* DW_CFA_offset, %r92, 72 */ + 0x8a, 10, /* DW_CFA_offset, %r10, 80 */ + 0x8b, 11, /* DW_CFA_offset, %r11, 88 */ + 0x8c, 12, /* DW_CFA_offset, %r12, 96 */ + 0x8d, 13, /* DW_CFA_offset, %r13, 104 */ + 0x8e, 14, /* DW_CFA_offset, %r14, 112 */ + } +}; + +void tcg_register_jit(void *buf, size_t buf_size) +{ + tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); +} diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c deleted file mode 100644 index d3100ab557..0000000000 --- a/tcg/sparc/tcg-target.c +++ /dev/null @@ -1,1653 +0,0 @@ -/* - * Tiny Code Generator for QEMU - * - * Copyright (c) 2008 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "tcg-be-null.h" - -#ifndef NDEBUG -static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { - "%g0", - "%g1", - "%g2", - "%g3", - "%g4", - "%g5", - "%g6", - "%g7", - "%o0", - "%o1", - "%o2", - "%o3", - "%o4", - "%o5", - "%o6", - "%o7", - "%l0", - "%l1", - "%l2", - "%l3", - "%l4", - "%l5", - "%l6", - "%l7", - "%i0", - "%i1", - "%i2", - "%i3", - "%i4", - "%i5", - "%i6", - "%i7", -}; -#endif - -#ifdef __arch64__ -# define SPARC64 1 -#else -# define SPARC64 0 -#endif - -/* Note that sparcv8plus can only hold 64 bit quantities in %g and %o - registers. These are saved manually by the kernel in full 64-bit - slots. The %i and %l registers are saved by the register window - mechanism, which only allocates space for 32 bits. Given that this - window spill/fill can happen on any signal, we must consider the - high bits of the %i and %l registers garbage at all times. */ -#if SPARC64 -# define ALL_64 0xffffffffu -#else -# define ALL_64 0xffffu -#endif - -/* Define some temporary registers. T2 is used for constant generation. */ -#define TCG_REG_T1 TCG_REG_G1 -#define TCG_REG_T2 TCG_REG_O7 - -#ifndef CONFIG_SOFTMMU -# define TCG_GUEST_BASE_REG TCG_REG_I5 -#endif - -static const int tcg_target_reg_alloc_order[] = { - TCG_REG_L0, - TCG_REG_L1, - TCG_REG_L2, - TCG_REG_L3, - TCG_REG_L4, - TCG_REG_L5, - TCG_REG_L6, - TCG_REG_L7, - - TCG_REG_I0, - TCG_REG_I1, - TCG_REG_I2, - TCG_REG_I3, - TCG_REG_I4, - TCG_REG_I5, - - TCG_REG_G2, - TCG_REG_G3, - TCG_REG_G4, - TCG_REG_G5, - - TCG_REG_O0, - TCG_REG_O1, - TCG_REG_O2, - TCG_REG_O3, - TCG_REG_O4, - TCG_REG_O5, -}; - -static const int tcg_target_call_iarg_regs[6] = { - TCG_REG_O0, - TCG_REG_O1, - TCG_REG_O2, - TCG_REG_O3, - TCG_REG_O4, - TCG_REG_O5, -}; - -static const int tcg_target_call_oarg_regs[] = { - TCG_REG_O0, - TCG_REG_O1, - TCG_REG_O2, - TCG_REG_O3, -}; - -#define INSN_OP(x) ((x) << 30) -#define INSN_OP2(x) ((x) << 22) -#define INSN_OP3(x) ((x) << 19) -#define INSN_OPF(x) ((x) << 5) -#define INSN_RD(x) ((x) << 25) -#define INSN_RS1(x) ((x) << 14) -#define INSN_RS2(x) (x) -#define INSN_ASI(x) ((x) << 5) - -#define INSN_IMM10(x) ((1 << 13) | ((x) & 0x3ff)) -#define INSN_IMM11(x) ((1 << 13) | ((x) & 0x7ff)) -#define INSN_IMM13(x) ((1 << 13) | ((x) & 0x1fff)) -#define INSN_OFF16(x) ((((x) >> 2) & 0x3fff) | ((((x) >> 16) & 3) << 20)) -#define INSN_OFF19(x) (((x) >> 2) & 0x07ffff) -#define INSN_COND(x) ((x) << 25) - -#define COND_N 0x0 -#define COND_E 0x1 -#define COND_LE 0x2 -#define COND_L 0x3 -#define COND_LEU 0x4 -#define COND_CS 0x5 -#define COND_NEG 0x6 -#define COND_VS 0x7 -#define COND_A 0x8 -#define COND_NE 0x9 -#define COND_G 0xa -#define COND_GE 0xb -#define COND_GU 0xc -#define COND_CC 0xd -#define COND_POS 0xe -#define COND_VC 0xf -#define BA (INSN_OP(0) | INSN_COND(COND_A) | INSN_OP2(0x2)) - -#define RCOND_Z 1 -#define RCOND_LEZ 2 -#define RCOND_LZ 3 -#define RCOND_NZ 5 -#define RCOND_GZ 6 -#define RCOND_GEZ 7 - -#define MOVCC_ICC (1 << 18) -#define MOVCC_XCC (1 << 18 | 1 << 12) - -#define BPCC_ICC 0 -#define BPCC_XCC (2 << 20) -#define BPCC_PT (1 << 19) -#define BPCC_PN 0 -#define BPCC_A (1 << 29) - -#define BPR_PT BPCC_PT - -#define ARITH_ADD (INSN_OP(2) | INSN_OP3(0x00)) -#define ARITH_ADDCC (INSN_OP(2) | INSN_OP3(0x10)) -#define ARITH_AND (INSN_OP(2) | INSN_OP3(0x01)) -#define ARITH_ANDN (INSN_OP(2) | INSN_OP3(0x05)) -#define ARITH_OR (INSN_OP(2) | INSN_OP3(0x02)) -#define ARITH_ORCC (INSN_OP(2) | INSN_OP3(0x12)) -#define ARITH_ORN (INSN_OP(2) | INSN_OP3(0x06)) -#define ARITH_XOR (INSN_OP(2) | INSN_OP3(0x03)) -#define ARITH_SUB (INSN_OP(2) | INSN_OP3(0x04)) -#define ARITH_SUBCC (INSN_OP(2) | INSN_OP3(0x14)) -#define ARITH_ADDC (INSN_OP(2) | INSN_OP3(0x08)) -#define ARITH_SUBC (INSN_OP(2) | INSN_OP3(0x0c)) -#define ARITH_UMUL (INSN_OP(2) | INSN_OP3(0x0a)) -#define ARITH_SMUL (INSN_OP(2) | INSN_OP3(0x0b)) -#define ARITH_UDIV (INSN_OP(2) | INSN_OP3(0x0e)) -#define ARITH_SDIV (INSN_OP(2) | INSN_OP3(0x0f)) -#define ARITH_MULX (INSN_OP(2) | INSN_OP3(0x09)) -#define ARITH_UDIVX (INSN_OP(2) | INSN_OP3(0x0d)) -#define ARITH_SDIVX (INSN_OP(2) | INSN_OP3(0x2d)) -#define ARITH_MOVCC (INSN_OP(2) | INSN_OP3(0x2c)) -#define ARITH_MOVR (INSN_OP(2) | INSN_OP3(0x2f)) - -#define ARITH_ADDXC (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x11)) -#define ARITH_UMULXHI (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x16)) - -#define SHIFT_SLL (INSN_OP(2) | INSN_OP3(0x25)) -#define SHIFT_SRL (INSN_OP(2) | INSN_OP3(0x26)) -#define SHIFT_SRA (INSN_OP(2) | INSN_OP3(0x27)) - -#define SHIFT_SLLX (INSN_OP(2) | INSN_OP3(0x25) | (1 << 12)) -#define SHIFT_SRLX (INSN_OP(2) | INSN_OP3(0x26) | (1 << 12)) -#define SHIFT_SRAX (INSN_OP(2) | INSN_OP3(0x27) | (1 << 12)) - -#define RDY (INSN_OP(2) | INSN_OP3(0x28) | INSN_RS1(0)) -#define WRY (INSN_OP(2) | INSN_OP3(0x30) | INSN_RD(0)) -#define JMPL (INSN_OP(2) | INSN_OP3(0x38)) -#define RETURN (INSN_OP(2) | INSN_OP3(0x39)) -#define SAVE (INSN_OP(2) | INSN_OP3(0x3c)) -#define RESTORE (INSN_OP(2) | INSN_OP3(0x3d)) -#define SETHI (INSN_OP(0) | INSN_OP2(0x4)) -#define CALL INSN_OP(1) -#define LDUB (INSN_OP(3) | INSN_OP3(0x01)) -#define LDSB (INSN_OP(3) | INSN_OP3(0x09)) -#define LDUH (INSN_OP(3) | INSN_OP3(0x02)) -#define LDSH (INSN_OP(3) | INSN_OP3(0x0a)) -#define LDUW (INSN_OP(3) | INSN_OP3(0x00)) -#define LDSW (INSN_OP(3) | INSN_OP3(0x08)) -#define LDX (INSN_OP(3) | INSN_OP3(0x0b)) -#define STB (INSN_OP(3) | INSN_OP3(0x05)) -#define STH (INSN_OP(3) | INSN_OP3(0x06)) -#define STW (INSN_OP(3) | INSN_OP3(0x04)) -#define STX (INSN_OP(3) | INSN_OP3(0x0e)) -#define LDUBA (INSN_OP(3) | INSN_OP3(0x11)) -#define LDSBA (INSN_OP(3) | INSN_OP3(0x19)) -#define LDUHA (INSN_OP(3) | INSN_OP3(0x12)) -#define LDSHA (INSN_OP(3) | INSN_OP3(0x1a)) -#define LDUWA (INSN_OP(3) | INSN_OP3(0x10)) -#define LDSWA (INSN_OP(3) | INSN_OP3(0x18)) -#define LDXA (INSN_OP(3) | INSN_OP3(0x1b)) -#define STBA (INSN_OP(3) | INSN_OP3(0x15)) -#define STHA (INSN_OP(3) | INSN_OP3(0x16)) -#define STWA (INSN_OP(3) | INSN_OP3(0x14)) -#define STXA (INSN_OP(3) | INSN_OP3(0x1e)) - -#ifndef ASI_PRIMARY_LITTLE -#define ASI_PRIMARY_LITTLE 0x88 -#endif - -#define LDUH_LE (LDUHA | INSN_ASI(ASI_PRIMARY_LITTLE)) -#define LDSH_LE (LDSHA | INSN_ASI(ASI_PRIMARY_LITTLE)) -#define LDUW_LE (LDUWA | INSN_ASI(ASI_PRIMARY_LITTLE)) -#define LDSW_LE (LDSWA | INSN_ASI(ASI_PRIMARY_LITTLE)) -#define LDX_LE (LDXA | INSN_ASI(ASI_PRIMARY_LITTLE)) - -#define STH_LE (STHA | INSN_ASI(ASI_PRIMARY_LITTLE)) -#define STW_LE (STWA | INSN_ASI(ASI_PRIMARY_LITTLE)) -#define STX_LE (STXA | INSN_ASI(ASI_PRIMARY_LITTLE)) - -#ifndef use_vis3_instructions -bool use_vis3_instructions; -#endif - -static inline int check_fit_i64(int64_t val, unsigned int bits) -{ - return val == sextract64(val, 0, bits); -} - -static inline int check_fit_i32(int32_t val, unsigned int bits) -{ - return val == sextract32(val, 0, bits); -} - -#define check_fit_tl check_fit_i64 -#if SPARC64 -# define check_fit_ptr check_fit_i64 -#else -# define check_fit_ptr check_fit_i32 -#endif - -static void patch_reloc(tcg_insn_unit *code_ptr, int type, - intptr_t value, intptr_t addend) -{ - uint32_t insn; - - assert(addend == 0); - value = tcg_ptr_byte_diff((tcg_insn_unit *)value, code_ptr); - - switch (type) { - case R_SPARC_WDISP16: - if (!check_fit_ptr(value >> 2, 16)) { - tcg_abort(); - } - insn = *code_ptr; - insn &= ~INSN_OFF16(-1); - insn |= INSN_OFF16(value); - *code_ptr = insn; - break; - case R_SPARC_WDISP19: - if (!check_fit_ptr(value >> 2, 19)) { - tcg_abort(); - } - insn = *code_ptr; - insn &= ~INSN_OFF19(-1); - insn |= INSN_OFF19(value); - *code_ptr = insn; - break; - default: - tcg_abort(); - } -} - -/* parse target specific constraints */ -static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) -{ - const char *ct_str; - - ct_str = *pct_str; - switch (ct_str[0]) { - case 'r': - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xffffffff); - break; - case 'R': - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, ALL_64); - break; - case 'A': /* qemu_ld/st address constraint */ - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, - TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff); - reserve_helpers: - tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2); - break; - case 's': /* qemu_st data 32-bit constraint */ - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xffffffff); - goto reserve_helpers; - case 'S': /* qemu_st data 64-bit constraint */ - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, ALL_64); - goto reserve_helpers; - case 'I': - ct->ct |= TCG_CT_CONST_S11; - break; - case 'J': - ct->ct |= TCG_CT_CONST_S13; - break; - case 'Z': - ct->ct |= TCG_CT_CONST_ZERO; - break; - default: - return -1; - } - ct_str++; - *pct_str = ct_str; - return 0; -} - -/* test if a constant matches the constraint */ -static inline int tcg_target_const_match(tcg_target_long val, TCGType type, - const TCGArgConstraint *arg_ct) -{ - int ct = arg_ct->ct; - - if (ct & TCG_CT_CONST) { - return 1; - } - - if (type == TCG_TYPE_I32) { - val = (int32_t)val; - } - - if ((ct & TCG_CT_CONST_ZERO) && val == 0) { - return 1; - } else if ((ct & TCG_CT_CONST_S11) && check_fit_tl(val, 11)) { - return 1; - } else if ((ct & TCG_CT_CONST_S13) && check_fit_tl(val, 13)) { - return 1; - } else { - return 0; - } -} - -static inline void tcg_out_arith(TCGContext *s, TCGReg rd, TCGReg rs1, - TCGReg rs2, int op) -{ - tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_RS2(rs2)); -} - -static inline void tcg_out_arithi(TCGContext *s, TCGReg rd, TCGReg rs1, - int32_t offset, int op) -{ - tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_IMM13(offset)); -} - -static void tcg_out_arithc(TCGContext *s, TCGReg rd, TCGReg rs1, - int32_t val2, int val2const, int op) -{ - tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) - | (val2const ? INSN_IMM13(val2) : INSN_RS2(val2))); -} - -static inline void tcg_out_mov(TCGContext *s, TCGType type, - TCGReg ret, TCGReg arg) -{ - if (ret != arg) { - tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR); - } -} - -static inline void tcg_out_sethi(TCGContext *s, TCGReg ret, uint32_t arg) -{ - tcg_out32(s, SETHI | INSN_RD(ret) | ((arg & 0xfffffc00) >> 10)); -} - -static inline void tcg_out_movi_imm13(TCGContext *s, TCGReg ret, int32_t arg) -{ - tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR); -} - -static void tcg_out_movi(TCGContext *s, TCGType type, - TCGReg ret, tcg_target_long arg) -{ - tcg_target_long hi, lo = (int32_t)arg; - - /* Make sure we test 32-bit constants for imm13 properly. */ - if (type == TCG_TYPE_I32) { - arg = lo; - } - - /* A 13-bit constant sign-extended to 64-bits. */ - if (check_fit_tl(arg, 13)) { - tcg_out_movi_imm13(s, ret, arg); - return; - } - - /* A 32-bit constant, or 32-bit zero-extended to 64-bits. */ - if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) { - tcg_out_sethi(s, ret, arg); - if (arg & 0x3ff) { - tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR); - } - return; - } - - /* A 32-bit constant sign-extended to 64-bits. */ - if (arg == lo) { - tcg_out_sethi(s, ret, ~arg); - tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR); - return; - } - - /* A 64-bit constant decomposed into 2 32-bit pieces. */ - if (check_fit_i32(lo, 13)) { - hi = (arg - lo) >> 32; - tcg_out_movi(s, TCG_TYPE_I32, ret, hi); - tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX); - tcg_out_arithi(s, ret, ret, lo, ARITH_ADD); - } else { - hi = arg >> 32; - tcg_out_movi(s, TCG_TYPE_I32, ret, hi); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T2, lo); - tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX); - tcg_out_arith(s, ret, ret, TCG_REG_T2, ARITH_OR); - } -} - -static inline void tcg_out_ldst_rr(TCGContext *s, TCGReg data, TCGReg a1, - TCGReg a2, int op) -{ - tcg_out32(s, op | INSN_RD(data) | INSN_RS1(a1) | INSN_RS2(a2)); -} - -static void tcg_out_ldst(TCGContext *s, TCGReg ret, TCGReg addr, - intptr_t offset, int op) -{ - if (check_fit_ptr(offset, 13)) { - tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(addr) | - INSN_IMM13(offset)); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, offset); - tcg_out_ldst_rr(s, ret, addr, TCG_REG_T1, op); - } -} - -static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, - TCGReg arg1, intptr_t arg2) -{ - tcg_out_ldst(s, ret, arg1, arg2, (type == TCG_TYPE_I32 ? LDUW : LDX)); -} - -static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, intptr_t arg2) -{ - tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX)); -} - -static void tcg_out_ld_ptr(TCGContext *s, TCGReg ret, uintptr_t arg) -{ - tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ff); - tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, arg & 0x3ff); -} - -static inline void tcg_out_sety(TCGContext *s, TCGReg rs) -{ - tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs)); -} - -static inline void tcg_out_rdy(TCGContext *s, TCGReg rd) -{ - tcg_out32(s, RDY | INSN_RD(rd)); -} - -static void tcg_out_div32(TCGContext *s, TCGReg rd, TCGReg rs1, - int32_t val2, int val2const, int uns) -{ - /* Load Y with the sign/zero extension of RS1 to 64-bits. */ - if (uns) { - tcg_out_sety(s, TCG_REG_G0); - } else { - tcg_out_arithi(s, TCG_REG_T1, rs1, 31, SHIFT_SRA); - tcg_out_sety(s, TCG_REG_T1); - } - - tcg_out_arithc(s, rd, rs1, val2, val2const, - uns ? ARITH_UDIV : ARITH_SDIV); -} - -static inline void tcg_out_nop(TCGContext *s) -{ - tcg_out_sethi(s, TCG_REG_G0, 0); -} - -static const uint8_t tcg_cond_to_bcond[] = { - [TCG_COND_EQ] = COND_E, - [TCG_COND_NE] = COND_NE, - [TCG_COND_LT] = COND_L, - [TCG_COND_GE] = COND_GE, - [TCG_COND_LE] = COND_LE, - [TCG_COND_GT] = COND_G, - [TCG_COND_LTU] = COND_CS, - [TCG_COND_GEU] = COND_CC, - [TCG_COND_LEU] = COND_LEU, - [TCG_COND_GTU] = COND_GU, -}; - -static const uint8_t tcg_cond_to_rcond[] = { - [TCG_COND_EQ] = RCOND_Z, - [TCG_COND_NE] = RCOND_NZ, - [TCG_COND_LT] = RCOND_LZ, - [TCG_COND_GT] = RCOND_GZ, - [TCG_COND_LE] = RCOND_LEZ, - [TCG_COND_GE] = RCOND_GEZ -}; - -static void tcg_out_bpcc0(TCGContext *s, int scond, int flags, int off19) -{ - tcg_out32(s, INSN_OP(0) | INSN_OP2(1) | INSN_COND(scond) | flags | off19); -} - -static void tcg_out_bpcc(TCGContext *s, int scond, int flags, TCGLabel *l) -{ - int off19; - - if (l->has_value) { - off19 = INSN_OFF19(tcg_pcrel_diff(s, l->u.value_ptr)); - } else { - /* Make sure to preserve destinations during retranslation. */ - off19 = *s->code_ptr & INSN_OFF19(-1); - tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, l, 0); - } - tcg_out_bpcc0(s, scond, flags, off19); -} - -static void tcg_out_cmp(TCGContext *s, TCGReg c1, int32_t c2, int c2const) -{ - tcg_out_arithc(s, TCG_REG_G0, c1, c2, c2const, ARITH_SUBCC); -} - -static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGReg arg1, - int32_t arg2, int const_arg2, TCGLabel *l) -{ - tcg_out_cmp(s, arg1, arg2, const_arg2); - tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_ICC | BPCC_PT, l); - tcg_out_nop(s); -} - -static void tcg_out_movcc(TCGContext *s, TCGCond cond, int cc, TCGReg ret, - int32_t v1, int v1const) -{ - tcg_out32(s, ARITH_MOVCC | cc | INSN_RD(ret) - | INSN_RS1(tcg_cond_to_bcond[cond]) - | (v1const ? INSN_IMM11(v1) : INSN_RS2(v1))); -} - -static void tcg_out_movcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg c1, int32_t c2, int c2const, - int32_t v1, int v1const) -{ - tcg_out_cmp(s, c1, c2, c2const); - tcg_out_movcc(s, cond, MOVCC_ICC, ret, v1, v1const); -} - -static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGReg arg1, - int32_t arg2, int const_arg2, TCGLabel *l) -{ - /* For 64-bit signed comparisons vs zero, we can avoid the compare. */ - if (arg2 == 0 && !is_unsigned_cond(cond)) { - int off16; - - if (l->has_value) { - off16 = INSN_OFF16(tcg_pcrel_diff(s, l->u.value_ptr)); - } else { - /* Make sure to preserve destinations during retranslation. */ - off16 = *s->code_ptr & INSN_OFF16(-1); - tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP16, l, 0); - } - tcg_out32(s, INSN_OP(0) | INSN_OP2(3) | BPR_PT | INSN_RS1(arg1) - | INSN_COND(tcg_cond_to_rcond[cond]) | off16); - } else { - tcg_out_cmp(s, arg1, arg2, const_arg2); - tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_XCC | BPCC_PT, l); - } - tcg_out_nop(s); -} - -static void tcg_out_movr(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg c1, - int32_t v1, int v1const) -{ - tcg_out32(s, ARITH_MOVR | INSN_RD(ret) | INSN_RS1(c1) - | (tcg_cond_to_rcond[cond] << 10) - | (v1const ? INSN_IMM10(v1) : INSN_RS2(v1))); -} - -static void tcg_out_movcond_i64(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg c1, int32_t c2, int c2const, - int32_t v1, int v1const) -{ - /* For 64-bit signed comparisons vs zero, we can avoid the compare. - Note that the immediate range is one bit smaller, so we must check - for that as well. */ - if (c2 == 0 && !is_unsigned_cond(cond) - && (!v1const || check_fit_i32(v1, 10))) { - tcg_out_movr(s, cond, ret, c1, v1, v1const); - } else { - tcg_out_cmp(s, c1, c2, c2const); - tcg_out_movcc(s, cond, MOVCC_XCC, ret, v1, v1const); - } -} - -static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg c1, int32_t c2, int c2const) -{ - /* For 32-bit comparisons, we can play games with ADDC/SUBC. */ - switch (cond) { - case TCG_COND_LTU: - case TCG_COND_GEU: - /* The result of the comparison is in the carry bit. */ - break; - - case TCG_COND_EQ: - case TCG_COND_NE: - /* For equality, we can transform to inequality vs zero. */ - if (c2 != 0) { - tcg_out_arithc(s, TCG_REG_T1, c1, c2, c2const, ARITH_XOR); - c2 = TCG_REG_T1; - } else { - c2 = c1; - } - c1 = TCG_REG_G0, c2const = 0; - cond = (cond == TCG_COND_EQ ? TCG_COND_GEU : TCG_COND_LTU); - break; - - case TCG_COND_GTU: - case TCG_COND_LEU: - /* If we don't need to load a constant into a register, we can - swap the operands on GTU/LEU. There's no benefit to loading - the constant into a temporary register. */ - if (!c2const || c2 == 0) { - TCGReg t = c1; - c1 = c2; - c2 = t; - c2const = 0; - cond = tcg_swap_cond(cond); - break; - } - /* FALLTHRU */ - - default: - tcg_out_cmp(s, c1, c2, c2const); - tcg_out_movi_imm13(s, ret, 0); - tcg_out_movcc(s, cond, MOVCC_ICC, ret, 1, 1); - return; - } - - tcg_out_cmp(s, c1, c2, c2const); - if (cond == TCG_COND_LTU) { - tcg_out_arithi(s, ret, TCG_REG_G0, 0, ARITH_ADDC); - } else { - tcg_out_arithi(s, ret, TCG_REG_G0, -1, ARITH_SUBC); - } -} - -static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg c1, int32_t c2, int c2const) -{ - if (use_vis3_instructions) { - switch (cond) { - case TCG_COND_NE: - if (c2 != 0) { - break; - } - c2 = c1, c2const = 0, c1 = TCG_REG_G0; - /* FALLTHRU */ - case TCG_COND_LTU: - tcg_out_cmp(s, c1, c2, c2const); - tcg_out_arith(s, ret, TCG_REG_G0, TCG_REG_G0, ARITH_ADDXC); - return; - default: - break; - } - } - - /* For 64-bit signed comparisons vs zero, we can avoid the compare - if the input does not overlap the output. */ - if (c2 == 0 && !is_unsigned_cond(cond) && c1 != ret) { - tcg_out_movi_imm13(s, ret, 0); - tcg_out_movr(s, cond, ret, c1, 1, 1); - } else { - tcg_out_cmp(s, c1, c2, c2const); - tcg_out_movi_imm13(s, ret, 0); - tcg_out_movcc(s, cond, MOVCC_XCC, ret, 1, 1); - } -} - -static void tcg_out_addsub2_i32(TCGContext *s, TCGReg rl, TCGReg rh, - TCGReg al, TCGReg ah, int32_t bl, int blconst, - int32_t bh, int bhconst, int opl, int oph) -{ - TCGReg tmp = TCG_REG_T1; - - /* Note that the low parts are fully consumed before tmp is set. */ - if (rl != ah && (bhconst || rl != bh)) { - tmp = rl; - } - - tcg_out_arithc(s, tmp, al, bl, blconst, opl); - tcg_out_arithc(s, rh, ah, bh, bhconst, oph); - tcg_out_mov(s, TCG_TYPE_I32, rl, tmp); -} - -static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh, - TCGReg al, TCGReg ah, int32_t bl, int blconst, - int32_t bh, int bhconst, bool is_sub) -{ - TCGReg tmp = TCG_REG_T1; - - /* Note that the low parts are fully consumed before tmp is set. */ - if (rl != ah && (bhconst || rl != bh)) { - tmp = rl; - } - - tcg_out_arithc(s, tmp, al, bl, blconst, is_sub ? ARITH_SUBCC : ARITH_ADDCC); - - if (use_vis3_instructions && !is_sub) { - /* Note that ADDXC doesn't accept immediates. */ - if (bhconst && bh != 0) { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh); - bh = TCG_REG_T2; - } - tcg_out_arith(s, rh, ah, bh, ARITH_ADDXC); - } else if (bh == TCG_REG_G0) { - /* If we have a zero, we can perform the operation in two insns, - with the arithmetic first, and a conditional move into place. */ - if (rh == ah) { - tcg_out_arithi(s, TCG_REG_T2, ah, 1, - is_sub ? ARITH_SUB : ARITH_ADD); - tcg_out_movcc(s, TCG_COND_LTU, MOVCC_XCC, rh, TCG_REG_T2, 0); - } else { - tcg_out_arithi(s, rh, ah, 1, is_sub ? ARITH_SUB : ARITH_ADD); - tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, rh, ah, 0); - } - } else { - /* Otherwise adjust BH as if there is carry into T2 ... */ - if (bhconst) { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh + (is_sub ? -1 : 1)); - } else { - tcg_out_arithi(s, TCG_REG_T2, bh, 1, - is_sub ? ARITH_SUB : ARITH_ADD); - } - /* ... smoosh T2 back to original BH if carry is clear ... */ - tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, TCG_REG_T2, bh, bhconst); - /* ... and finally perform the arithmetic with the new operand. */ - tcg_out_arith(s, rh, ah, TCG_REG_T2, is_sub ? ARITH_SUB : ARITH_ADD); - } - - tcg_out_mov(s, TCG_TYPE_I64, rl, tmp); -} - -static void tcg_out_call_nodelay(TCGContext *s, tcg_insn_unit *dest) -{ - ptrdiff_t disp = tcg_pcrel_diff(s, dest); - - if (disp == (int32_t)disp) { - tcg_out32(s, CALL | (uint32_t)disp >> 2); - } else { - uintptr_t desti = (uintptr_t)dest; - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, desti & ~0xfff); - tcg_out_arithi(s, TCG_REG_O7, TCG_REG_T1, desti & 0xfff, JMPL); - } -} - -static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest) -{ - tcg_out_call_nodelay(s, dest); - tcg_out_nop(s); -} - -#ifdef CONFIG_SOFTMMU -static tcg_insn_unit *qemu_ld_trampoline[16]; -static tcg_insn_unit *qemu_st_trampoline[16]; - -static void build_trampolines(TCGContext *s) -{ - static void * const qemu_ld_helpers[16] = { - [MO_UB] = helper_ret_ldub_mmu, - [MO_SB] = helper_ret_ldsb_mmu, - [MO_LEUW] = helper_le_lduw_mmu, - [MO_LESW] = helper_le_ldsw_mmu, - [MO_LEUL] = helper_le_ldul_mmu, - [MO_LEQ] = helper_le_ldq_mmu, - [MO_BEUW] = helper_be_lduw_mmu, - [MO_BESW] = helper_be_ldsw_mmu, - [MO_BEUL] = helper_be_ldul_mmu, - [MO_BEQ] = helper_be_ldq_mmu, - }; - static void * const qemu_st_helpers[16] = { - [MO_UB] = helper_ret_stb_mmu, - [MO_LEUW] = helper_le_stw_mmu, - [MO_LEUL] = helper_le_stl_mmu, - [MO_LEQ] = helper_le_stq_mmu, - [MO_BEUW] = helper_be_stw_mmu, - [MO_BEUL] = helper_be_stl_mmu, - [MO_BEQ] = helper_be_stq_mmu, - }; - - int i; - TCGReg ra; - - for (i = 0; i < 16; ++i) { - if (qemu_ld_helpers[i] == NULL) { - continue; - } - - /* May as well align the trampoline. */ - while ((uintptr_t)s->code_ptr & 15) { - tcg_out_nop(s); - } - qemu_ld_trampoline[i] = s->code_ptr; - - if (SPARC64 || TARGET_LONG_BITS == 32) { - ra = TCG_REG_O3; - } else { - /* Install the high part of the address. */ - tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O2, 32, SHIFT_SRLX); - ra = TCG_REG_O4; - } - - /* Set the retaddr operand. */ - tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7); - /* Set the env operand. */ - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0); - /* Tail call. */ - tcg_out_call_nodelay(s, qemu_ld_helpers[i]); - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra); - } - - for (i = 0; i < 16; ++i) { - if (qemu_st_helpers[i] == NULL) { - continue; - } - - /* May as well align the trampoline. */ - while ((uintptr_t)s->code_ptr & 15) { - tcg_out_nop(s); - } - qemu_st_trampoline[i] = s->code_ptr; - - if (SPARC64) { - ra = TCG_REG_O4; - } else { - ra = TCG_REG_O1; - if (TARGET_LONG_BITS == 64) { - /* Install the high part of the address. */ - tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX); - ra += 2; - } else { - ra += 1; - } - if ((i & MO_SIZE) == MO_64) { - /* Install the high part of the data. */ - tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX); - ra += 2; - } else { - ra += 1; - } - /* Skip the oi argument. */ - ra += 1; - } - - /* Set the retaddr operand. */ - if (ra >= TCG_REG_O6) { - tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_O7, TCG_REG_CALL_STACK, - TCG_TARGET_CALL_STACK_OFFSET); - ra = TCG_REG_G1; - } - tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7); - /* Set the env operand. */ - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0); - /* Tail call. */ - tcg_out_call_nodelay(s, qemu_st_helpers[i]); - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra); - } -} -#endif - -/* Generate global QEMU prologue and epilogue code */ -static void tcg_target_qemu_prologue(TCGContext *s) -{ - int tmp_buf_size, frame_size; - - /* The TCG temp buffer is at the top of the frame, immediately - below the frame pointer. */ - tmp_buf_size = CPU_TEMP_BUF_NLONGS * (int)sizeof(long); - tcg_set_frame(s, TCG_REG_I6, TCG_TARGET_STACK_BIAS - tmp_buf_size, - tmp_buf_size); - - /* TCG_TARGET_CALL_STACK_OFFSET includes the stack bias, but is - otherwise the minimal frame usable by callees. */ - frame_size = TCG_TARGET_CALL_STACK_OFFSET - TCG_TARGET_STACK_BIAS; - frame_size += TCG_STATIC_CALL_ARGS_SIZE + tmp_buf_size; - frame_size += TCG_TARGET_STACK_ALIGN - 1; - frame_size &= -TCG_TARGET_STACK_ALIGN; - tcg_out32(s, SAVE | INSN_RD(TCG_REG_O6) | INSN_RS1(TCG_REG_O6) | - INSN_IMM13(-frame_size)); - -#ifndef CONFIG_SOFTMMU - if (guest_base != 0) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); - tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); - } -#endif - - tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I1, 0, JMPL); - /* delay slot */ - tcg_out_nop(s); - - /* No epilogue required. We issue ret + restore directly in the TB. */ - -#ifdef CONFIG_SOFTMMU - build_trampolines(s); -#endif -} - -#if defined(CONFIG_SOFTMMU) -/* Perform the TLB load and compare. - - Inputs: - ADDRLO and ADDRHI contain the possible two parts of the address. - - MEM_INDEX and S_BITS are the memory context and log2 size of the load. - - WHICH is the offset into the CPUTLBEntry structure of the slot to read. - This should be offsetof addr_read or addr_write. - - The result of the TLB comparison is in %[ix]cc. The sanitized address - is in the returned register, maybe %o0. The TLB addend is in %o1. */ - -static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index, - TCGMemOp s_bits, int which) -{ - const TCGReg r0 = TCG_REG_O0; - const TCGReg r1 = TCG_REG_O1; - const TCGReg r2 = TCG_REG_O2; - int tlb_ofs; - - /* Shift the page number down. */ - tcg_out_arithi(s, r1, addr, TARGET_PAGE_BITS, SHIFT_SRL); - - /* Mask out the page offset, except for the required alignment. */ - tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_T1, - TARGET_PAGE_MASK | ((1 << s_bits) - 1)); - - /* Mask the tlb index. */ - tcg_out_arithi(s, r1, r1, CPU_TLB_SIZE - 1, ARITH_AND); - - /* Mask page, part 2. */ - tcg_out_arith(s, r0, addr, TCG_REG_T1, ARITH_AND); - - /* Shift the tlb index into place. */ - tcg_out_arithi(s, r1, r1, CPU_TLB_ENTRY_BITS, SHIFT_SLL); - - /* Relative to the current ENV. */ - tcg_out_arith(s, r1, TCG_AREG0, r1, ARITH_ADD); - - /* Find a base address that can load both tlb comparator and addend. */ - tlb_ofs = offsetof(CPUArchState, tlb_table[mem_index][0]); - if (!check_fit_ptr(tlb_ofs + sizeof(CPUTLBEntry), 13)) { - if (tlb_ofs & ~0x3ff) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, tlb_ofs & ~0x3ff); - tcg_out_arith(s, r1, r1, TCG_REG_T1, ARITH_ADD); - } - tlb_ofs &= 0x3ff; - } - - /* Load the tlb comparator and the addend. */ - tcg_out_ld(s, TCG_TYPE_TL, r2, r1, tlb_ofs + which); - tcg_out_ld(s, TCG_TYPE_PTR, r1, r1, tlb_ofs+offsetof(CPUTLBEntry, addend)); - - /* subcc arg0, arg2, %g0 */ - tcg_out_cmp(s, r0, r2, 0); - - /* If the guest address must be zero-extended, do so now. */ - if (SPARC64 && TARGET_LONG_BITS == 32) { - tcg_out_arithi(s, r0, addr, 0, SHIFT_SRL); - return r0; - } - return addr; -} -#endif /* CONFIG_SOFTMMU */ - -static const int qemu_ld_opc[16] = { - [MO_UB] = LDUB, - [MO_SB] = LDSB, - - [MO_BEUW] = LDUH, - [MO_BESW] = LDSH, - [MO_BEUL] = LDUW, - [MO_BESL] = LDSW, - [MO_BEQ] = LDX, - - [MO_LEUW] = LDUH_LE, - [MO_LESW] = LDSH_LE, - [MO_LEUL] = LDUW_LE, - [MO_LESL] = LDSW_LE, - [MO_LEQ] = LDX_LE, -}; - -static const int qemu_st_opc[16] = { - [MO_UB] = STB, - - [MO_BEUW] = STH, - [MO_BEUL] = STW, - [MO_BEQ] = STX, - - [MO_LEUW] = STH_LE, - [MO_LEUL] = STW_LE, - [MO_LEQ] = STX_LE, -}; - -static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, - TCGMemOpIdx oi, bool is_64) -{ - TCGMemOp memop = get_memop(oi); -#ifdef CONFIG_SOFTMMU - unsigned memi = get_mmuidx(oi); - TCGReg addrz, param; - tcg_insn_unit *func; - tcg_insn_unit *label_ptr; - - addrz = tcg_out_tlb_load(s, addr, memi, memop & MO_SIZE, - offsetof(CPUTLBEntry, addr_read)); - - /* The fast path is exactly one insn. Thus we can perform the - entire TLB Hit in the (annulled) delay slot of the branch - over the TLB Miss case. */ - - /* beq,a,pt %[xi]cc, label0 */ - label_ptr = s->code_ptr; - tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT - | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0); - /* delay slot */ - tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1, - qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]); - - /* TLB Miss. */ - - param = TCG_REG_O1; - if (!SPARC64 && TARGET_LONG_BITS == 64) { - /* Skip the high-part; we'll perform the extract in the trampoline. */ - param++; - } - tcg_out_mov(s, TCG_TYPE_REG, param++, addr); - - /* We use the helpers to extend SB and SW data, leaving the case - of SL needing explicit extending below. */ - if ((memop & MO_SSIZE) == MO_SL) { - func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SIZE)]; - } else { - func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SSIZE)]; - } - assert(func != NULL); - tcg_out_call_nodelay(s, func); - /* delay slot */ - tcg_out_movi(s, TCG_TYPE_I32, param, oi); - - /* Recall that all of the helpers return 64-bit results. - Which complicates things for sparcv8plus. */ - if (SPARC64) { - /* We let the helper sign-extend SB and SW, but leave SL for here. */ - if (is_64 && (memop & MO_SSIZE) == MO_SL) { - tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA); - } else { - tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0); - } - } else { - if ((memop & MO_SIZE) == MO_64) { - tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, 32, SHIFT_SLLX); - tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O1, 0, SHIFT_SRL); - tcg_out_arith(s, data, TCG_REG_O0, TCG_REG_O1, ARITH_OR); - } else if (is_64) { - /* Re-extend from 32-bit rather than reassembling when we - know the high register must be an extension. */ - tcg_out_arithi(s, data, TCG_REG_O1, 0, - memop & MO_SIGN ? SHIFT_SRA : SHIFT_SRL); - } else { - tcg_out_mov(s, TCG_TYPE_I32, data, TCG_REG_O1); - } - } - - *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr)); -#else - if (SPARC64 && TARGET_LONG_BITS == 32) { - tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL); - addr = TCG_REG_T1; - } - tcg_out_ldst_rr(s, data, addr, - (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0), - qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]); -#endif /* CONFIG_SOFTMMU */ -} - -static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, - TCGMemOpIdx oi) -{ - TCGMemOp memop = get_memop(oi); -#ifdef CONFIG_SOFTMMU - unsigned memi = get_mmuidx(oi); - TCGReg addrz, param; - tcg_insn_unit *func; - tcg_insn_unit *label_ptr; - - addrz = tcg_out_tlb_load(s, addr, memi, memop & MO_SIZE, - offsetof(CPUTLBEntry, addr_write)); - - /* The fast path is exactly one insn. Thus we can perform the entire - TLB Hit in the (annulled) delay slot of the branch over TLB Miss. */ - /* beq,a,pt %[xi]cc, label0 */ - label_ptr = s->code_ptr; - tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT - | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0); - /* delay slot */ - tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1, - qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]); - - /* TLB Miss. */ - - param = TCG_REG_O1; - if (!SPARC64 && TARGET_LONG_BITS == 64) { - /* Skip the high-part; we'll perform the extract in the trampoline. */ - param++; - } - tcg_out_mov(s, TCG_TYPE_REG, param++, addr); - if (!SPARC64 && (memop & MO_SIZE) == MO_64) { - /* Skip the high-part; we'll perform the extract in the trampoline. */ - param++; - } - tcg_out_mov(s, TCG_TYPE_REG, param++, data); - - func = qemu_st_trampoline[memop & (MO_BSWAP | MO_SIZE)]; - assert(func != NULL); - tcg_out_call_nodelay(s, func); - /* delay slot */ - tcg_out_movi(s, TCG_TYPE_I32, param, oi); - - *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr)); -#else - if (SPARC64 && TARGET_LONG_BITS == 32) { - tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL); - addr = TCG_REG_T1; - } - tcg_out_ldst_rr(s, data, addr, - (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0), - qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]); -#endif /* CONFIG_SOFTMMU */ -} - -static void tcg_out_op(TCGContext *s, TCGOpcode opc, - const TCGArg args[TCG_MAX_OP_ARGS], - const int const_args[TCG_MAX_OP_ARGS]) -{ - TCGArg a0, a1, a2; - int c, c2; - - /* Hoist the loads of the most common arguments. */ - a0 = args[0]; - a1 = args[1]; - a2 = args[2]; - c2 = const_args[2]; - - switch (opc) { - case INDEX_op_exit_tb: - if (check_fit_ptr(a0, 13)) { - tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); - tcg_out_movi_imm13(s, TCG_REG_O0, a0); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I0, a0 & ~0x3ff); - tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); - tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, a0 & 0x3ff, ARITH_OR); - } - break; - case INDEX_op_goto_tb: - if (s->tb_jmp_offset) { - /* direct jump method */ - s->tb_jmp_offset[a0] = tcg_current_code_size(s); - /* Make sure to preserve links during retranslation. */ - tcg_out32(s, CALL | (*s->code_ptr & ~INSN_OP(-1))); - } else { - /* indirect jump method */ - tcg_out_ld_ptr(s, TCG_REG_T1, (uintptr_t)(s->tb_next + a0)); - tcg_out_arithi(s, TCG_REG_G0, TCG_REG_T1, 0, JMPL); - } - tcg_out_nop(s); - s->tb_next_offset[a0] = tcg_current_code_size(s); - break; - case INDEX_op_br: - tcg_out_bpcc(s, COND_A, BPCC_PT, arg_label(a0)); - tcg_out_nop(s); - break; - -#define OP_32_64(x) \ - glue(glue(case INDEX_op_, x), _i32): \ - glue(glue(case INDEX_op_, x), _i64) - - OP_32_64(ld8u): - tcg_out_ldst(s, a0, a1, a2, LDUB); - break; - OP_32_64(ld8s): - tcg_out_ldst(s, a0, a1, a2, LDSB); - break; - OP_32_64(ld16u): - tcg_out_ldst(s, a0, a1, a2, LDUH); - break; - OP_32_64(ld16s): - tcg_out_ldst(s, a0, a1, a2, LDSH); - break; - case INDEX_op_ld_i32: - case INDEX_op_ld32u_i64: - tcg_out_ldst(s, a0, a1, a2, LDUW); - break; - OP_32_64(st8): - tcg_out_ldst(s, a0, a1, a2, STB); - break; - OP_32_64(st16): - tcg_out_ldst(s, a0, a1, a2, STH); - break; - case INDEX_op_st_i32: - case INDEX_op_st32_i64: - tcg_out_ldst(s, a0, a1, a2, STW); - break; - OP_32_64(add): - c = ARITH_ADD; - goto gen_arith; - OP_32_64(sub): - c = ARITH_SUB; - goto gen_arith; - OP_32_64(and): - c = ARITH_AND; - goto gen_arith; - OP_32_64(andc): - c = ARITH_ANDN; - goto gen_arith; - OP_32_64(or): - c = ARITH_OR; - goto gen_arith; - OP_32_64(orc): - c = ARITH_ORN; - goto gen_arith; - OP_32_64(xor): - c = ARITH_XOR; - goto gen_arith; - case INDEX_op_shl_i32: - c = SHIFT_SLL; - do_shift32: - /* Limit immediate shift count lest we create an illegal insn. */ - tcg_out_arithc(s, a0, a1, a2 & 31, c2, c); - break; - case INDEX_op_shr_i32: - c = SHIFT_SRL; - goto do_shift32; - case INDEX_op_sar_i32: - c = SHIFT_SRA; - goto do_shift32; - case INDEX_op_mul_i32: - c = ARITH_UMUL; - goto gen_arith; - - OP_32_64(neg): - c = ARITH_SUB; - goto gen_arith1; - OP_32_64(not): - c = ARITH_ORN; - goto gen_arith1; - - case INDEX_op_div_i32: - tcg_out_div32(s, a0, a1, a2, c2, 0); - break; - case INDEX_op_divu_i32: - tcg_out_div32(s, a0, a1, a2, c2, 1); - break; - - case INDEX_op_brcond_i32: - tcg_out_brcond_i32(s, a2, a0, a1, const_args[1], arg_label(args[3])); - break; - case INDEX_op_setcond_i32: - tcg_out_setcond_i32(s, args[3], a0, a1, a2, c2); - break; - case INDEX_op_movcond_i32: - tcg_out_movcond_i32(s, args[5], a0, a1, a2, c2, args[3], const_args[3]); - break; - - case INDEX_op_add2_i32: - tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3], - args[4], const_args[4], args[5], const_args[5], - ARITH_ADDCC, ARITH_ADDC); - break; - case INDEX_op_sub2_i32: - tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3], - args[4], const_args[4], args[5], const_args[5], - ARITH_SUBCC, ARITH_SUBC); - break; - case INDEX_op_mulu2_i32: - c = ARITH_UMUL; - goto do_mul2; - case INDEX_op_muls2_i32: - c = ARITH_SMUL; - do_mul2: - /* The 32-bit multiply insns produce a full 64-bit result. If the - destination register can hold it, we can avoid the slower RDY. */ - tcg_out_arithc(s, a0, a2, args[3], const_args[3], c); - if (SPARC64 || a0 <= TCG_REG_O7) { - tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX); - } else { - tcg_out_rdy(s, a1); - } - break; - - case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, a0, a1, a2, false); - break; - case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, a0, a1, a2, true); - break; - case INDEX_op_qemu_st_i32: - case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, a0, a1, a2); - break; - - case INDEX_op_ld32s_i64: - tcg_out_ldst(s, a0, a1, a2, LDSW); - break; - case INDEX_op_ld_i64: - tcg_out_ldst(s, a0, a1, a2, LDX); - break; - case INDEX_op_st_i64: - tcg_out_ldst(s, a0, a1, a2, STX); - break; - case INDEX_op_shl_i64: - c = SHIFT_SLLX; - do_shift64: - /* Limit immediate shift count lest we create an illegal insn. */ - tcg_out_arithc(s, a0, a1, a2 & 63, c2, c); - break; - case INDEX_op_shr_i64: - c = SHIFT_SRLX; - goto do_shift64; - case INDEX_op_sar_i64: - c = SHIFT_SRAX; - goto do_shift64; - case INDEX_op_mul_i64: - c = ARITH_MULX; - goto gen_arith; - case INDEX_op_div_i64: - c = ARITH_SDIVX; - goto gen_arith; - case INDEX_op_divu_i64: - c = ARITH_UDIVX; - goto gen_arith; - case INDEX_op_ext_i32_i64: - case INDEX_op_ext32s_i64: - tcg_out_arithi(s, a0, a1, 0, SHIFT_SRA); - break; - case INDEX_op_extu_i32_i64: - case INDEX_op_ext32u_i64: - tcg_out_arithi(s, a0, a1, 0, SHIFT_SRL); - break; - case INDEX_op_extrl_i64_i32: - tcg_out_mov(s, TCG_TYPE_I32, a0, a1); - break; - case INDEX_op_extrh_i64_i32: - tcg_out_arithi(s, a0, a1, 32, SHIFT_SRLX); - break; - - case INDEX_op_brcond_i64: - tcg_out_brcond_i64(s, a2, a0, a1, const_args[1], arg_label(args[3])); - break; - case INDEX_op_setcond_i64: - tcg_out_setcond_i64(s, args[3], a0, a1, a2, c2); - break; - case INDEX_op_movcond_i64: - tcg_out_movcond_i64(s, args[5], a0, a1, a2, c2, args[3], const_args[3]); - break; - case INDEX_op_add2_i64: - tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4], - const_args[4], args[5], const_args[5], false); - break; - case INDEX_op_sub2_i64: - tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4], - const_args[4], args[5], const_args[5], true); - break; - case INDEX_op_muluh_i64: - tcg_out_arith(s, args[0], args[1], args[2], ARITH_UMULXHI); - break; - - gen_arith: - tcg_out_arithc(s, a0, a1, a2, c2, c); - break; - - gen_arith1: - tcg_out_arithc(s, a0, TCG_REG_G0, a1, const_args[1], c); - break; - - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: - case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ - case INDEX_op_movi_i64: - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - default: - tcg_abort(); - } -} - -static const TCGTargetOpDef sparc_op_defs[] = { - { INDEX_op_exit_tb, { } }, - { INDEX_op_goto_tb, { } }, - { INDEX_op_br, { } }, - - { INDEX_op_ld8u_i32, { "r", "r" } }, - { INDEX_op_ld8s_i32, { "r", "r" } }, - { INDEX_op_ld16u_i32, { "r", "r" } }, - { INDEX_op_ld16s_i32, { "r", "r" } }, - { INDEX_op_ld_i32, { "r", "r" } }, - { INDEX_op_st8_i32, { "rZ", "r" } }, - { INDEX_op_st16_i32, { "rZ", "r" } }, - { INDEX_op_st_i32, { "rZ", "r" } }, - - { INDEX_op_add_i32, { "r", "rZ", "rJ" } }, - { INDEX_op_mul_i32, { "r", "rZ", "rJ" } }, - { INDEX_op_div_i32, { "r", "rZ", "rJ" } }, - { INDEX_op_divu_i32, { "r", "rZ", "rJ" } }, - { INDEX_op_sub_i32, { "r", "rZ", "rJ" } }, - { INDEX_op_and_i32, { "r", "rZ", "rJ" } }, - { INDEX_op_andc_i32, { "r", "rZ", "rJ" } }, - { INDEX_op_or_i32, { "r", "rZ", "rJ" } }, - { INDEX_op_orc_i32, { "r", "rZ", "rJ" } }, - { INDEX_op_xor_i32, { "r", "rZ", "rJ" } }, - - { INDEX_op_shl_i32, { "r", "rZ", "rJ" } }, - { INDEX_op_shr_i32, { "r", "rZ", "rJ" } }, - { INDEX_op_sar_i32, { "r", "rZ", "rJ" } }, - - { INDEX_op_neg_i32, { "r", "rJ" } }, - { INDEX_op_not_i32, { "r", "rJ" } }, - - { INDEX_op_brcond_i32, { "rZ", "rJ" } }, - { INDEX_op_setcond_i32, { "r", "rZ", "rJ" } }, - { INDEX_op_movcond_i32, { "r", "rZ", "rJ", "rI", "0" } }, - - { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, - { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, - { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rJ" } }, - { INDEX_op_muls2_i32, { "r", "r", "rZ", "rJ" } }, - - { INDEX_op_ld8u_i64, { "R", "r" } }, - { INDEX_op_ld8s_i64, { "R", "r" } }, - { INDEX_op_ld16u_i64, { "R", "r" } }, - { INDEX_op_ld16s_i64, { "R", "r" } }, - { INDEX_op_ld32u_i64, { "R", "r" } }, - { INDEX_op_ld32s_i64, { "R", "r" } }, - { INDEX_op_ld_i64, { "R", "r" } }, - { INDEX_op_st8_i64, { "RZ", "r" } }, - { INDEX_op_st16_i64, { "RZ", "r" } }, - { INDEX_op_st32_i64, { "RZ", "r" } }, - { INDEX_op_st_i64, { "RZ", "r" } }, - - { INDEX_op_add_i64, { "R", "RZ", "RJ" } }, - { INDEX_op_mul_i64, { "R", "RZ", "RJ" } }, - { INDEX_op_div_i64, { "R", "RZ", "RJ" } }, - { INDEX_op_divu_i64, { "R", "RZ", "RJ" } }, - { INDEX_op_sub_i64, { "R", "RZ", "RJ" } }, - { INDEX_op_and_i64, { "R", "RZ", "RJ" } }, - { INDEX_op_andc_i64, { "R", "RZ", "RJ" } }, - { INDEX_op_or_i64, { "R", "RZ", "RJ" } }, - { INDEX_op_orc_i64, { "R", "RZ", "RJ" } }, - { INDEX_op_xor_i64, { "R", "RZ", "RJ" } }, - - { INDEX_op_shl_i64, { "R", "RZ", "RJ" } }, - { INDEX_op_shr_i64, { "R", "RZ", "RJ" } }, - { INDEX_op_sar_i64, { "R", "RZ", "RJ" } }, - - { INDEX_op_neg_i64, { "R", "RJ" } }, - { INDEX_op_not_i64, { "R", "RJ" } }, - - { INDEX_op_ext32s_i64, { "R", "R" } }, - { INDEX_op_ext32u_i64, { "R", "R" } }, - { INDEX_op_ext_i32_i64, { "R", "r" } }, - { INDEX_op_extu_i32_i64, { "R", "r" } }, - { INDEX_op_extrl_i64_i32, { "r", "R" } }, - { INDEX_op_extrh_i64_i32, { "r", "R" } }, - - { INDEX_op_brcond_i64, { "RZ", "RJ" } }, - { INDEX_op_setcond_i64, { "R", "RZ", "RJ" } }, - { INDEX_op_movcond_i64, { "R", "RZ", "RJ", "RI", "0" } }, - - { INDEX_op_add2_i64, { "R", "R", "RZ", "RZ", "RJ", "RI" } }, - { INDEX_op_sub2_i64, { "R", "R", "RZ", "RZ", "RJ", "RI" } }, - { INDEX_op_muluh_i64, { "R", "RZ", "RZ" } }, - - { INDEX_op_qemu_ld_i32, { "r", "A" } }, - { INDEX_op_qemu_ld_i64, { "R", "A" } }, - { INDEX_op_qemu_st_i32, { "sZ", "A" } }, - { INDEX_op_qemu_st_i64, { "SZ", "A" } }, - - { -1 }, -}; - -static void tcg_target_init(TCGContext *s) -{ - /* Only probe for the platform and capabilities if we havn't already - determined maximum values at compile time. */ -#ifndef use_vis3_instructions - { - unsigned long hwcap = qemu_getauxval(AT_HWCAP); - use_vis3_instructions = (hwcap & HWCAP_SPARC_VIS3) != 0; - } -#endif - - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, ALL_64); - - tcg_regset_set32(tcg_target_call_clobber_regs, 0, - (1 << TCG_REG_G1) | - (1 << TCG_REG_G2) | - (1 << TCG_REG_G3) | - (1 << TCG_REG_G4) | - (1 << TCG_REG_G5) | - (1 << TCG_REG_G6) | - (1 << TCG_REG_G7) | - (1 << TCG_REG_O0) | - (1 << TCG_REG_O1) | - (1 << TCG_REG_O2) | - (1 << TCG_REG_O3) | - (1 << TCG_REG_O4) | - (1 << TCG_REG_O5) | - (1 << TCG_REG_O7)); - - tcg_regset_clear(s->reserved_regs); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_G0); /* zero */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_G6); /* reserved for os */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_G7); /* thread pointer */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_I6); /* frame pointer */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_I7); /* return address */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6); /* stack pointer */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_T1); /* for internal use */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */ - - tcg_add_target_add_op_defs(sparc_op_defs); -} - -#if SPARC64 -# define ELF_HOST_MACHINE EM_SPARCV9 -#else -# define ELF_HOST_MACHINE EM_SPARC32PLUS -# define ELF_HOST_FLAGS EF_SPARC_32PLUS -#endif - -typedef struct { - DebugFrameHeader h; - uint8_t fde_def_cfa[SPARC64 ? 4 : 2]; - uint8_t fde_win_save; - uint8_t fde_ret_save[3]; -} DebugFrame; - -static const DebugFrame debug_frame = { - .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ - .h.cie.id = -1, - .h.cie.version = 1, - .h.cie.code_align = 1, - .h.cie.data_align = -sizeof(void *) & 0x7f, - .h.cie.return_column = 15, /* o7 */ - - /* Total FDE size does not include the "len" member. */ - .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), - - .fde_def_cfa = { -#if SPARC64 - 12, 30, /* DW_CFA_def_cfa i6, 2047 */ - (2047 & 0x7f) | 0x80, (2047 >> 7) -#else - 13, 30 /* DW_CFA_def_cfa_register i6 */ -#endif - }, - .fde_win_save = 0x2d, /* DW_CFA_GNU_window_save */ - .fde_ret_save = { 9, 15, 31 }, /* DW_CFA_register o7, i7 */ -}; - -void tcg_register_jit(void *buf, size_t buf_size) -{ - tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); -} - -void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) -{ - uint32_t *ptr = (uint32_t *)jmp_addr; - uintptr_t disp = addr - jmp_addr; - - /* We can reach the entire address space for 32-bit. For 64-bit - the code_gen_buffer can't be larger than 2GB. */ - assert(disp == (int32_t)disp); - - *ptr = CALL | (uint32_t)disp >> 2; - flush_icache_range(jmp_addr, jmp_addr + 4); -} diff --git a/tcg/sparc/tcg-target.inc.c b/tcg/sparc/tcg-target.inc.c new file mode 100644 index 0000000000..d3100ab557 --- /dev/null +++ b/tcg/sparc/tcg-target.inc.c @@ -0,0 +1,1653 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "tcg-be-null.h" + +#ifndef NDEBUG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { + "%g0", + "%g1", + "%g2", + "%g3", + "%g4", + "%g5", + "%g6", + "%g7", + "%o0", + "%o1", + "%o2", + "%o3", + "%o4", + "%o5", + "%o6", + "%o7", + "%l0", + "%l1", + "%l2", + "%l3", + "%l4", + "%l5", + "%l6", + "%l7", + "%i0", + "%i1", + "%i2", + "%i3", + "%i4", + "%i5", + "%i6", + "%i7", +}; +#endif + +#ifdef __arch64__ +# define SPARC64 1 +#else +# define SPARC64 0 +#endif + +/* Note that sparcv8plus can only hold 64 bit quantities in %g and %o + registers. These are saved manually by the kernel in full 64-bit + slots. The %i and %l registers are saved by the register window + mechanism, which only allocates space for 32 bits. Given that this + window spill/fill can happen on any signal, we must consider the + high bits of the %i and %l registers garbage at all times. */ +#if SPARC64 +# define ALL_64 0xffffffffu +#else +# define ALL_64 0xffffu +#endif + +/* Define some temporary registers. T2 is used for constant generation. */ +#define TCG_REG_T1 TCG_REG_G1 +#define TCG_REG_T2 TCG_REG_O7 + +#ifndef CONFIG_SOFTMMU +# define TCG_GUEST_BASE_REG TCG_REG_I5 +#endif + +static const int tcg_target_reg_alloc_order[] = { + TCG_REG_L0, + TCG_REG_L1, + TCG_REG_L2, + TCG_REG_L3, + TCG_REG_L4, + TCG_REG_L5, + TCG_REG_L6, + TCG_REG_L7, + + TCG_REG_I0, + TCG_REG_I1, + TCG_REG_I2, + TCG_REG_I3, + TCG_REG_I4, + TCG_REG_I5, + + TCG_REG_G2, + TCG_REG_G3, + TCG_REG_G4, + TCG_REG_G5, + + TCG_REG_O0, + TCG_REG_O1, + TCG_REG_O2, + TCG_REG_O3, + TCG_REG_O4, + TCG_REG_O5, +}; + +static const int tcg_target_call_iarg_regs[6] = { + TCG_REG_O0, + TCG_REG_O1, + TCG_REG_O2, + TCG_REG_O3, + TCG_REG_O4, + TCG_REG_O5, +}; + +static const int tcg_target_call_oarg_regs[] = { + TCG_REG_O0, + TCG_REG_O1, + TCG_REG_O2, + TCG_REG_O3, +}; + +#define INSN_OP(x) ((x) << 30) +#define INSN_OP2(x) ((x) << 22) +#define INSN_OP3(x) ((x) << 19) +#define INSN_OPF(x) ((x) << 5) +#define INSN_RD(x) ((x) << 25) +#define INSN_RS1(x) ((x) << 14) +#define INSN_RS2(x) (x) +#define INSN_ASI(x) ((x) << 5) + +#define INSN_IMM10(x) ((1 << 13) | ((x) & 0x3ff)) +#define INSN_IMM11(x) ((1 << 13) | ((x) & 0x7ff)) +#define INSN_IMM13(x) ((1 << 13) | ((x) & 0x1fff)) +#define INSN_OFF16(x) ((((x) >> 2) & 0x3fff) | ((((x) >> 16) & 3) << 20)) +#define INSN_OFF19(x) (((x) >> 2) & 0x07ffff) +#define INSN_COND(x) ((x) << 25) + +#define COND_N 0x0 +#define COND_E 0x1 +#define COND_LE 0x2 +#define COND_L 0x3 +#define COND_LEU 0x4 +#define COND_CS 0x5 +#define COND_NEG 0x6 +#define COND_VS 0x7 +#define COND_A 0x8 +#define COND_NE 0x9 +#define COND_G 0xa +#define COND_GE 0xb +#define COND_GU 0xc +#define COND_CC 0xd +#define COND_POS 0xe +#define COND_VC 0xf +#define BA (INSN_OP(0) | INSN_COND(COND_A) | INSN_OP2(0x2)) + +#define RCOND_Z 1 +#define RCOND_LEZ 2 +#define RCOND_LZ 3 +#define RCOND_NZ 5 +#define RCOND_GZ 6 +#define RCOND_GEZ 7 + +#define MOVCC_ICC (1 << 18) +#define MOVCC_XCC (1 << 18 | 1 << 12) + +#define BPCC_ICC 0 +#define BPCC_XCC (2 << 20) +#define BPCC_PT (1 << 19) +#define BPCC_PN 0 +#define BPCC_A (1 << 29) + +#define BPR_PT BPCC_PT + +#define ARITH_ADD (INSN_OP(2) | INSN_OP3(0x00)) +#define ARITH_ADDCC (INSN_OP(2) | INSN_OP3(0x10)) +#define ARITH_AND (INSN_OP(2) | INSN_OP3(0x01)) +#define ARITH_ANDN (INSN_OP(2) | INSN_OP3(0x05)) +#define ARITH_OR (INSN_OP(2) | INSN_OP3(0x02)) +#define ARITH_ORCC (INSN_OP(2) | INSN_OP3(0x12)) +#define ARITH_ORN (INSN_OP(2) | INSN_OP3(0x06)) +#define ARITH_XOR (INSN_OP(2) | INSN_OP3(0x03)) +#define ARITH_SUB (INSN_OP(2) | INSN_OP3(0x04)) +#define ARITH_SUBCC (INSN_OP(2) | INSN_OP3(0x14)) +#define ARITH_ADDC (INSN_OP(2) | INSN_OP3(0x08)) +#define ARITH_SUBC (INSN_OP(2) | INSN_OP3(0x0c)) +#define ARITH_UMUL (INSN_OP(2) | INSN_OP3(0x0a)) +#define ARITH_SMUL (INSN_OP(2) | INSN_OP3(0x0b)) +#define ARITH_UDIV (INSN_OP(2) | INSN_OP3(0x0e)) +#define ARITH_SDIV (INSN_OP(2) | INSN_OP3(0x0f)) +#define ARITH_MULX (INSN_OP(2) | INSN_OP3(0x09)) +#define ARITH_UDIVX (INSN_OP(2) | INSN_OP3(0x0d)) +#define ARITH_SDIVX (INSN_OP(2) | INSN_OP3(0x2d)) +#define ARITH_MOVCC (INSN_OP(2) | INSN_OP3(0x2c)) +#define ARITH_MOVR (INSN_OP(2) | INSN_OP3(0x2f)) + +#define ARITH_ADDXC (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x11)) +#define ARITH_UMULXHI (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x16)) + +#define SHIFT_SLL (INSN_OP(2) | INSN_OP3(0x25)) +#define SHIFT_SRL (INSN_OP(2) | INSN_OP3(0x26)) +#define SHIFT_SRA (INSN_OP(2) | INSN_OP3(0x27)) + +#define SHIFT_SLLX (INSN_OP(2) | INSN_OP3(0x25) | (1 << 12)) +#define SHIFT_SRLX (INSN_OP(2) | INSN_OP3(0x26) | (1 << 12)) +#define SHIFT_SRAX (INSN_OP(2) | INSN_OP3(0x27) | (1 << 12)) + +#define RDY (INSN_OP(2) | INSN_OP3(0x28) | INSN_RS1(0)) +#define WRY (INSN_OP(2) | INSN_OP3(0x30) | INSN_RD(0)) +#define JMPL (INSN_OP(2) | INSN_OP3(0x38)) +#define RETURN (INSN_OP(2) | INSN_OP3(0x39)) +#define SAVE (INSN_OP(2) | INSN_OP3(0x3c)) +#define RESTORE (INSN_OP(2) | INSN_OP3(0x3d)) +#define SETHI (INSN_OP(0) | INSN_OP2(0x4)) +#define CALL INSN_OP(1) +#define LDUB (INSN_OP(3) | INSN_OP3(0x01)) +#define LDSB (INSN_OP(3) | INSN_OP3(0x09)) +#define LDUH (INSN_OP(3) | INSN_OP3(0x02)) +#define LDSH (INSN_OP(3) | INSN_OP3(0x0a)) +#define LDUW (INSN_OP(3) | INSN_OP3(0x00)) +#define LDSW (INSN_OP(3) | INSN_OP3(0x08)) +#define LDX (INSN_OP(3) | INSN_OP3(0x0b)) +#define STB (INSN_OP(3) | INSN_OP3(0x05)) +#define STH (INSN_OP(3) | INSN_OP3(0x06)) +#define STW (INSN_OP(3) | INSN_OP3(0x04)) +#define STX (INSN_OP(3) | INSN_OP3(0x0e)) +#define LDUBA (INSN_OP(3) | INSN_OP3(0x11)) +#define LDSBA (INSN_OP(3) | INSN_OP3(0x19)) +#define LDUHA (INSN_OP(3) | INSN_OP3(0x12)) +#define LDSHA (INSN_OP(3) | INSN_OP3(0x1a)) +#define LDUWA (INSN_OP(3) | INSN_OP3(0x10)) +#define LDSWA (INSN_OP(3) | INSN_OP3(0x18)) +#define LDXA (INSN_OP(3) | INSN_OP3(0x1b)) +#define STBA (INSN_OP(3) | INSN_OP3(0x15)) +#define STHA (INSN_OP(3) | INSN_OP3(0x16)) +#define STWA (INSN_OP(3) | INSN_OP3(0x14)) +#define STXA (INSN_OP(3) | INSN_OP3(0x1e)) + +#ifndef ASI_PRIMARY_LITTLE +#define ASI_PRIMARY_LITTLE 0x88 +#endif + +#define LDUH_LE (LDUHA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define LDSH_LE (LDSHA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define LDUW_LE (LDUWA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define LDSW_LE (LDSWA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define LDX_LE (LDXA | INSN_ASI(ASI_PRIMARY_LITTLE)) + +#define STH_LE (STHA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define STW_LE (STWA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define STX_LE (STXA | INSN_ASI(ASI_PRIMARY_LITTLE)) + +#ifndef use_vis3_instructions +bool use_vis3_instructions; +#endif + +static inline int check_fit_i64(int64_t val, unsigned int bits) +{ + return val == sextract64(val, 0, bits); +} + +static inline int check_fit_i32(int32_t val, unsigned int bits) +{ + return val == sextract32(val, 0, bits); +} + +#define check_fit_tl check_fit_i64 +#if SPARC64 +# define check_fit_ptr check_fit_i64 +#else +# define check_fit_ptr check_fit_i32 +#endif + +static void patch_reloc(tcg_insn_unit *code_ptr, int type, + intptr_t value, intptr_t addend) +{ + uint32_t insn; + + assert(addend == 0); + value = tcg_ptr_byte_diff((tcg_insn_unit *)value, code_ptr); + + switch (type) { + case R_SPARC_WDISP16: + if (!check_fit_ptr(value >> 2, 16)) { + tcg_abort(); + } + insn = *code_ptr; + insn &= ~INSN_OFF16(-1); + insn |= INSN_OFF16(value); + *code_ptr = insn; + break; + case R_SPARC_WDISP19: + if (!check_fit_ptr(value >> 2, 19)) { + tcg_abort(); + } + insn = *code_ptr; + insn &= ~INSN_OFF19(-1); + insn |= INSN_OFF19(value); + *code_ptr = insn; + break; + default: + tcg_abort(); + } +} + +/* parse target specific constraints */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) +{ + const char *ct_str; + + ct_str = *pct_str; + switch (ct_str[0]) { + case 'r': + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, 0xffffffff); + break; + case 'R': + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, ALL_64); + break; + case 'A': /* qemu_ld/st address constraint */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, + TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff); + reserve_helpers: + tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2); + break; + case 's': /* qemu_st data 32-bit constraint */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, 0xffffffff); + goto reserve_helpers; + case 'S': /* qemu_st data 64-bit constraint */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, ALL_64); + goto reserve_helpers; + case 'I': + ct->ct |= TCG_CT_CONST_S11; + break; + case 'J': + ct->ct |= TCG_CT_CONST_S13; + break; + case 'Z': + ct->ct |= TCG_CT_CONST_ZERO; + break; + default: + return -1; + } + ct_str++; + *pct_str = ct_str; + return 0; +} + +/* test if a constant matches the constraint */ +static inline int tcg_target_const_match(tcg_target_long val, TCGType type, + const TCGArgConstraint *arg_ct) +{ + int ct = arg_ct->ct; + + if (ct & TCG_CT_CONST) { + return 1; + } + + if (type == TCG_TYPE_I32) { + val = (int32_t)val; + } + + if ((ct & TCG_CT_CONST_ZERO) && val == 0) { + return 1; + } else if ((ct & TCG_CT_CONST_S11) && check_fit_tl(val, 11)) { + return 1; + } else if ((ct & TCG_CT_CONST_S13) && check_fit_tl(val, 13)) { + return 1; + } else { + return 0; + } +} + +static inline void tcg_out_arith(TCGContext *s, TCGReg rd, TCGReg rs1, + TCGReg rs2, int op) +{ + tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_RS2(rs2)); +} + +static inline void tcg_out_arithi(TCGContext *s, TCGReg rd, TCGReg rs1, + int32_t offset, int op) +{ + tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_IMM13(offset)); +} + +static void tcg_out_arithc(TCGContext *s, TCGReg rd, TCGReg rs1, + int32_t val2, int val2const, int op) +{ + tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) + | (val2const ? INSN_IMM13(val2) : INSN_RS2(val2))); +} + +static inline void tcg_out_mov(TCGContext *s, TCGType type, + TCGReg ret, TCGReg arg) +{ + if (ret != arg) { + tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR); + } +} + +static inline void tcg_out_sethi(TCGContext *s, TCGReg ret, uint32_t arg) +{ + tcg_out32(s, SETHI | INSN_RD(ret) | ((arg & 0xfffffc00) >> 10)); +} + +static inline void tcg_out_movi_imm13(TCGContext *s, TCGReg ret, int32_t arg) +{ + tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR); +} + +static void tcg_out_movi(TCGContext *s, TCGType type, + TCGReg ret, tcg_target_long arg) +{ + tcg_target_long hi, lo = (int32_t)arg; + + /* Make sure we test 32-bit constants for imm13 properly. */ + if (type == TCG_TYPE_I32) { + arg = lo; + } + + /* A 13-bit constant sign-extended to 64-bits. */ + if (check_fit_tl(arg, 13)) { + tcg_out_movi_imm13(s, ret, arg); + return; + } + + /* A 32-bit constant, or 32-bit zero-extended to 64-bits. */ + if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) { + tcg_out_sethi(s, ret, arg); + if (arg & 0x3ff) { + tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR); + } + return; + } + + /* A 32-bit constant sign-extended to 64-bits. */ + if (arg == lo) { + tcg_out_sethi(s, ret, ~arg); + tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR); + return; + } + + /* A 64-bit constant decomposed into 2 32-bit pieces. */ + if (check_fit_i32(lo, 13)) { + hi = (arg - lo) >> 32; + tcg_out_movi(s, TCG_TYPE_I32, ret, hi); + tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX); + tcg_out_arithi(s, ret, ret, lo, ARITH_ADD); + } else { + hi = arg >> 32; + tcg_out_movi(s, TCG_TYPE_I32, ret, hi); + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T2, lo); + tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX); + tcg_out_arith(s, ret, ret, TCG_REG_T2, ARITH_OR); + } +} + +static inline void tcg_out_ldst_rr(TCGContext *s, TCGReg data, TCGReg a1, + TCGReg a2, int op) +{ + tcg_out32(s, op | INSN_RD(data) | INSN_RS1(a1) | INSN_RS2(a2)); +} + +static void tcg_out_ldst(TCGContext *s, TCGReg ret, TCGReg addr, + intptr_t offset, int op) +{ + if (check_fit_ptr(offset, 13)) { + tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(addr) | + INSN_IMM13(offset)); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, offset); + tcg_out_ldst_rr(s, ret, addr, TCG_REG_T1, op); + } +} + +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, + TCGReg arg1, intptr_t arg2) +{ + tcg_out_ldst(s, ret, arg1, arg2, (type == TCG_TYPE_I32 ? LDUW : LDX)); +} + +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) +{ + tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX)); +} + +static void tcg_out_ld_ptr(TCGContext *s, TCGReg ret, uintptr_t arg) +{ + tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ff); + tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, arg & 0x3ff); +} + +static inline void tcg_out_sety(TCGContext *s, TCGReg rs) +{ + tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs)); +} + +static inline void tcg_out_rdy(TCGContext *s, TCGReg rd) +{ + tcg_out32(s, RDY | INSN_RD(rd)); +} + +static void tcg_out_div32(TCGContext *s, TCGReg rd, TCGReg rs1, + int32_t val2, int val2const, int uns) +{ + /* Load Y with the sign/zero extension of RS1 to 64-bits. */ + if (uns) { + tcg_out_sety(s, TCG_REG_G0); + } else { + tcg_out_arithi(s, TCG_REG_T1, rs1, 31, SHIFT_SRA); + tcg_out_sety(s, TCG_REG_T1); + } + + tcg_out_arithc(s, rd, rs1, val2, val2const, + uns ? ARITH_UDIV : ARITH_SDIV); +} + +static inline void tcg_out_nop(TCGContext *s) +{ + tcg_out_sethi(s, TCG_REG_G0, 0); +} + +static const uint8_t tcg_cond_to_bcond[] = { + [TCG_COND_EQ] = COND_E, + [TCG_COND_NE] = COND_NE, + [TCG_COND_LT] = COND_L, + [TCG_COND_GE] = COND_GE, + [TCG_COND_LE] = COND_LE, + [TCG_COND_GT] = COND_G, + [TCG_COND_LTU] = COND_CS, + [TCG_COND_GEU] = COND_CC, + [TCG_COND_LEU] = COND_LEU, + [TCG_COND_GTU] = COND_GU, +}; + +static const uint8_t tcg_cond_to_rcond[] = { + [TCG_COND_EQ] = RCOND_Z, + [TCG_COND_NE] = RCOND_NZ, + [TCG_COND_LT] = RCOND_LZ, + [TCG_COND_GT] = RCOND_GZ, + [TCG_COND_LE] = RCOND_LEZ, + [TCG_COND_GE] = RCOND_GEZ +}; + +static void tcg_out_bpcc0(TCGContext *s, int scond, int flags, int off19) +{ + tcg_out32(s, INSN_OP(0) | INSN_OP2(1) | INSN_COND(scond) | flags | off19); +} + +static void tcg_out_bpcc(TCGContext *s, int scond, int flags, TCGLabel *l) +{ + int off19; + + if (l->has_value) { + off19 = INSN_OFF19(tcg_pcrel_diff(s, l->u.value_ptr)); + } else { + /* Make sure to preserve destinations during retranslation. */ + off19 = *s->code_ptr & INSN_OFF19(-1); + tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, l, 0); + } + tcg_out_bpcc0(s, scond, flags, off19); +} + +static void tcg_out_cmp(TCGContext *s, TCGReg c1, int32_t c2, int c2const) +{ + tcg_out_arithc(s, TCG_REG_G0, c1, c2, c2const, ARITH_SUBCC); +} + +static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGReg arg1, + int32_t arg2, int const_arg2, TCGLabel *l) +{ + tcg_out_cmp(s, arg1, arg2, const_arg2); + tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_ICC | BPCC_PT, l); + tcg_out_nop(s); +} + +static void tcg_out_movcc(TCGContext *s, TCGCond cond, int cc, TCGReg ret, + int32_t v1, int v1const) +{ + tcg_out32(s, ARITH_MOVCC | cc | INSN_RD(ret) + | INSN_RS1(tcg_cond_to_bcond[cond]) + | (v1const ? INSN_IMM11(v1) : INSN_RS2(v1))); +} + +static void tcg_out_movcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg c1, int32_t c2, int c2const, + int32_t v1, int v1const) +{ + tcg_out_cmp(s, c1, c2, c2const); + tcg_out_movcc(s, cond, MOVCC_ICC, ret, v1, v1const); +} + +static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGReg arg1, + int32_t arg2, int const_arg2, TCGLabel *l) +{ + /* For 64-bit signed comparisons vs zero, we can avoid the compare. */ + if (arg2 == 0 && !is_unsigned_cond(cond)) { + int off16; + + if (l->has_value) { + off16 = INSN_OFF16(tcg_pcrel_diff(s, l->u.value_ptr)); + } else { + /* Make sure to preserve destinations during retranslation. */ + off16 = *s->code_ptr & INSN_OFF16(-1); + tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP16, l, 0); + } + tcg_out32(s, INSN_OP(0) | INSN_OP2(3) | BPR_PT | INSN_RS1(arg1) + | INSN_COND(tcg_cond_to_rcond[cond]) | off16); + } else { + tcg_out_cmp(s, arg1, arg2, const_arg2); + tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_XCC | BPCC_PT, l); + } + tcg_out_nop(s); +} + +static void tcg_out_movr(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg c1, + int32_t v1, int v1const) +{ + tcg_out32(s, ARITH_MOVR | INSN_RD(ret) | INSN_RS1(c1) + | (tcg_cond_to_rcond[cond] << 10) + | (v1const ? INSN_IMM10(v1) : INSN_RS2(v1))); +} + +static void tcg_out_movcond_i64(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg c1, int32_t c2, int c2const, + int32_t v1, int v1const) +{ + /* For 64-bit signed comparisons vs zero, we can avoid the compare. + Note that the immediate range is one bit smaller, so we must check + for that as well. */ + if (c2 == 0 && !is_unsigned_cond(cond) + && (!v1const || check_fit_i32(v1, 10))) { + tcg_out_movr(s, cond, ret, c1, v1, v1const); + } else { + tcg_out_cmp(s, c1, c2, c2const); + tcg_out_movcc(s, cond, MOVCC_XCC, ret, v1, v1const); + } +} + +static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg c1, int32_t c2, int c2const) +{ + /* For 32-bit comparisons, we can play games with ADDC/SUBC. */ + switch (cond) { + case TCG_COND_LTU: + case TCG_COND_GEU: + /* The result of the comparison is in the carry bit. */ + break; + + case TCG_COND_EQ: + case TCG_COND_NE: + /* For equality, we can transform to inequality vs zero. */ + if (c2 != 0) { + tcg_out_arithc(s, TCG_REG_T1, c1, c2, c2const, ARITH_XOR); + c2 = TCG_REG_T1; + } else { + c2 = c1; + } + c1 = TCG_REG_G0, c2const = 0; + cond = (cond == TCG_COND_EQ ? TCG_COND_GEU : TCG_COND_LTU); + break; + + case TCG_COND_GTU: + case TCG_COND_LEU: + /* If we don't need to load a constant into a register, we can + swap the operands on GTU/LEU. There's no benefit to loading + the constant into a temporary register. */ + if (!c2const || c2 == 0) { + TCGReg t = c1; + c1 = c2; + c2 = t; + c2const = 0; + cond = tcg_swap_cond(cond); + break; + } + /* FALLTHRU */ + + default: + tcg_out_cmp(s, c1, c2, c2const); + tcg_out_movi_imm13(s, ret, 0); + tcg_out_movcc(s, cond, MOVCC_ICC, ret, 1, 1); + return; + } + + tcg_out_cmp(s, c1, c2, c2const); + if (cond == TCG_COND_LTU) { + tcg_out_arithi(s, ret, TCG_REG_G0, 0, ARITH_ADDC); + } else { + tcg_out_arithi(s, ret, TCG_REG_G0, -1, ARITH_SUBC); + } +} + +static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg c1, int32_t c2, int c2const) +{ + if (use_vis3_instructions) { + switch (cond) { + case TCG_COND_NE: + if (c2 != 0) { + break; + } + c2 = c1, c2const = 0, c1 = TCG_REG_G0; + /* FALLTHRU */ + case TCG_COND_LTU: + tcg_out_cmp(s, c1, c2, c2const); + tcg_out_arith(s, ret, TCG_REG_G0, TCG_REG_G0, ARITH_ADDXC); + return; + default: + break; + } + } + + /* For 64-bit signed comparisons vs zero, we can avoid the compare + if the input does not overlap the output. */ + if (c2 == 0 && !is_unsigned_cond(cond) && c1 != ret) { + tcg_out_movi_imm13(s, ret, 0); + tcg_out_movr(s, cond, ret, c1, 1, 1); + } else { + tcg_out_cmp(s, c1, c2, c2const); + tcg_out_movi_imm13(s, ret, 0); + tcg_out_movcc(s, cond, MOVCC_XCC, ret, 1, 1); + } +} + +static void tcg_out_addsub2_i32(TCGContext *s, TCGReg rl, TCGReg rh, + TCGReg al, TCGReg ah, int32_t bl, int blconst, + int32_t bh, int bhconst, int opl, int oph) +{ + TCGReg tmp = TCG_REG_T1; + + /* Note that the low parts are fully consumed before tmp is set. */ + if (rl != ah && (bhconst || rl != bh)) { + tmp = rl; + } + + tcg_out_arithc(s, tmp, al, bl, blconst, opl); + tcg_out_arithc(s, rh, ah, bh, bhconst, oph); + tcg_out_mov(s, TCG_TYPE_I32, rl, tmp); +} + +static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh, + TCGReg al, TCGReg ah, int32_t bl, int blconst, + int32_t bh, int bhconst, bool is_sub) +{ + TCGReg tmp = TCG_REG_T1; + + /* Note that the low parts are fully consumed before tmp is set. */ + if (rl != ah && (bhconst || rl != bh)) { + tmp = rl; + } + + tcg_out_arithc(s, tmp, al, bl, blconst, is_sub ? ARITH_SUBCC : ARITH_ADDCC); + + if (use_vis3_instructions && !is_sub) { + /* Note that ADDXC doesn't accept immediates. */ + if (bhconst && bh != 0) { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh); + bh = TCG_REG_T2; + } + tcg_out_arith(s, rh, ah, bh, ARITH_ADDXC); + } else if (bh == TCG_REG_G0) { + /* If we have a zero, we can perform the operation in two insns, + with the arithmetic first, and a conditional move into place. */ + if (rh == ah) { + tcg_out_arithi(s, TCG_REG_T2, ah, 1, + is_sub ? ARITH_SUB : ARITH_ADD); + tcg_out_movcc(s, TCG_COND_LTU, MOVCC_XCC, rh, TCG_REG_T2, 0); + } else { + tcg_out_arithi(s, rh, ah, 1, is_sub ? ARITH_SUB : ARITH_ADD); + tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, rh, ah, 0); + } + } else { + /* Otherwise adjust BH as if there is carry into T2 ... */ + if (bhconst) { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh + (is_sub ? -1 : 1)); + } else { + tcg_out_arithi(s, TCG_REG_T2, bh, 1, + is_sub ? ARITH_SUB : ARITH_ADD); + } + /* ... smoosh T2 back to original BH if carry is clear ... */ + tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, TCG_REG_T2, bh, bhconst); + /* ... and finally perform the arithmetic with the new operand. */ + tcg_out_arith(s, rh, ah, TCG_REG_T2, is_sub ? ARITH_SUB : ARITH_ADD); + } + + tcg_out_mov(s, TCG_TYPE_I64, rl, tmp); +} + +static void tcg_out_call_nodelay(TCGContext *s, tcg_insn_unit *dest) +{ + ptrdiff_t disp = tcg_pcrel_diff(s, dest); + + if (disp == (int32_t)disp) { + tcg_out32(s, CALL | (uint32_t)disp >> 2); + } else { + uintptr_t desti = (uintptr_t)dest; + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, desti & ~0xfff); + tcg_out_arithi(s, TCG_REG_O7, TCG_REG_T1, desti & 0xfff, JMPL); + } +} + +static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest) +{ + tcg_out_call_nodelay(s, dest); + tcg_out_nop(s); +} + +#ifdef CONFIG_SOFTMMU +static tcg_insn_unit *qemu_ld_trampoline[16]; +static tcg_insn_unit *qemu_st_trampoline[16]; + +static void build_trampolines(TCGContext *s) +{ + static void * const qemu_ld_helpers[16] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_SB] = helper_ret_ldsb_mmu, + [MO_LEUW] = helper_le_lduw_mmu, + [MO_LESW] = helper_le_ldsw_mmu, + [MO_LEUL] = helper_le_ldul_mmu, + [MO_LEQ] = helper_le_ldq_mmu, + [MO_BEUW] = helper_be_lduw_mmu, + [MO_BESW] = helper_be_ldsw_mmu, + [MO_BEUL] = helper_be_ldul_mmu, + [MO_BEQ] = helper_be_ldq_mmu, + }; + static void * const qemu_st_helpers[16] = { + [MO_UB] = helper_ret_stb_mmu, + [MO_LEUW] = helper_le_stw_mmu, + [MO_LEUL] = helper_le_stl_mmu, + [MO_LEQ] = helper_le_stq_mmu, + [MO_BEUW] = helper_be_stw_mmu, + [MO_BEUL] = helper_be_stl_mmu, + [MO_BEQ] = helper_be_stq_mmu, + }; + + int i; + TCGReg ra; + + for (i = 0; i < 16; ++i) { + if (qemu_ld_helpers[i] == NULL) { + continue; + } + + /* May as well align the trampoline. */ + while ((uintptr_t)s->code_ptr & 15) { + tcg_out_nop(s); + } + qemu_ld_trampoline[i] = s->code_ptr; + + if (SPARC64 || TARGET_LONG_BITS == 32) { + ra = TCG_REG_O3; + } else { + /* Install the high part of the address. */ + tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O2, 32, SHIFT_SRLX); + ra = TCG_REG_O4; + } + + /* Set the retaddr operand. */ + tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7); + /* Set the env operand. */ + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0); + /* Tail call. */ + tcg_out_call_nodelay(s, qemu_ld_helpers[i]); + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra); + } + + for (i = 0; i < 16; ++i) { + if (qemu_st_helpers[i] == NULL) { + continue; + } + + /* May as well align the trampoline. */ + while ((uintptr_t)s->code_ptr & 15) { + tcg_out_nop(s); + } + qemu_st_trampoline[i] = s->code_ptr; + + if (SPARC64) { + ra = TCG_REG_O4; + } else { + ra = TCG_REG_O1; + if (TARGET_LONG_BITS == 64) { + /* Install the high part of the address. */ + tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX); + ra += 2; + } else { + ra += 1; + } + if ((i & MO_SIZE) == MO_64) { + /* Install the high part of the data. */ + tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX); + ra += 2; + } else { + ra += 1; + } + /* Skip the oi argument. */ + ra += 1; + } + + /* Set the retaddr operand. */ + if (ra >= TCG_REG_O6) { + tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_O7, TCG_REG_CALL_STACK, + TCG_TARGET_CALL_STACK_OFFSET); + ra = TCG_REG_G1; + } + tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7); + /* Set the env operand. */ + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0); + /* Tail call. */ + tcg_out_call_nodelay(s, qemu_st_helpers[i]); + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra); + } +} +#endif + +/* Generate global QEMU prologue and epilogue code */ +static void tcg_target_qemu_prologue(TCGContext *s) +{ + int tmp_buf_size, frame_size; + + /* The TCG temp buffer is at the top of the frame, immediately + below the frame pointer. */ + tmp_buf_size = CPU_TEMP_BUF_NLONGS * (int)sizeof(long); + tcg_set_frame(s, TCG_REG_I6, TCG_TARGET_STACK_BIAS - tmp_buf_size, + tmp_buf_size); + + /* TCG_TARGET_CALL_STACK_OFFSET includes the stack bias, but is + otherwise the minimal frame usable by callees. */ + frame_size = TCG_TARGET_CALL_STACK_OFFSET - TCG_TARGET_STACK_BIAS; + frame_size += TCG_STATIC_CALL_ARGS_SIZE + tmp_buf_size; + frame_size += TCG_TARGET_STACK_ALIGN - 1; + frame_size &= -TCG_TARGET_STACK_ALIGN; + tcg_out32(s, SAVE | INSN_RD(TCG_REG_O6) | INSN_RS1(TCG_REG_O6) | + INSN_IMM13(-frame_size)); + +#ifndef CONFIG_SOFTMMU + if (guest_base != 0) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); + tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); + } +#endif + + tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I1, 0, JMPL); + /* delay slot */ + tcg_out_nop(s); + + /* No epilogue required. We issue ret + restore directly in the TB. */ + +#ifdef CONFIG_SOFTMMU + build_trampolines(s); +#endif +} + +#if defined(CONFIG_SOFTMMU) +/* Perform the TLB load and compare. + + Inputs: + ADDRLO and ADDRHI contain the possible two parts of the address. + + MEM_INDEX and S_BITS are the memory context and log2 size of the load. + + WHICH is the offset into the CPUTLBEntry structure of the slot to read. + This should be offsetof addr_read or addr_write. + + The result of the TLB comparison is in %[ix]cc. The sanitized address + is in the returned register, maybe %o0. The TLB addend is in %o1. */ + +static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index, + TCGMemOp s_bits, int which) +{ + const TCGReg r0 = TCG_REG_O0; + const TCGReg r1 = TCG_REG_O1; + const TCGReg r2 = TCG_REG_O2; + int tlb_ofs; + + /* Shift the page number down. */ + tcg_out_arithi(s, r1, addr, TARGET_PAGE_BITS, SHIFT_SRL); + + /* Mask out the page offset, except for the required alignment. */ + tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_T1, + TARGET_PAGE_MASK | ((1 << s_bits) - 1)); + + /* Mask the tlb index. */ + tcg_out_arithi(s, r1, r1, CPU_TLB_SIZE - 1, ARITH_AND); + + /* Mask page, part 2. */ + tcg_out_arith(s, r0, addr, TCG_REG_T1, ARITH_AND); + + /* Shift the tlb index into place. */ + tcg_out_arithi(s, r1, r1, CPU_TLB_ENTRY_BITS, SHIFT_SLL); + + /* Relative to the current ENV. */ + tcg_out_arith(s, r1, TCG_AREG0, r1, ARITH_ADD); + + /* Find a base address that can load both tlb comparator and addend. */ + tlb_ofs = offsetof(CPUArchState, tlb_table[mem_index][0]); + if (!check_fit_ptr(tlb_ofs + sizeof(CPUTLBEntry), 13)) { + if (tlb_ofs & ~0x3ff) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, tlb_ofs & ~0x3ff); + tcg_out_arith(s, r1, r1, TCG_REG_T1, ARITH_ADD); + } + tlb_ofs &= 0x3ff; + } + + /* Load the tlb comparator and the addend. */ + tcg_out_ld(s, TCG_TYPE_TL, r2, r1, tlb_ofs + which); + tcg_out_ld(s, TCG_TYPE_PTR, r1, r1, tlb_ofs+offsetof(CPUTLBEntry, addend)); + + /* subcc arg0, arg2, %g0 */ + tcg_out_cmp(s, r0, r2, 0); + + /* If the guest address must be zero-extended, do so now. */ + if (SPARC64 && TARGET_LONG_BITS == 32) { + tcg_out_arithi(s, r0, addr, 0, SHIFT_SRL); + return r0; + } + return addr; +} +#endif /* CONFIG_SOFTMMU */ + +static const int qemu_ld_opc[16] = { + [MO_UB] = LDUB, + [MO_SB] = LDSB, + + [MO_BEUW] = LDUH, + [MO_BESW] = LDSH, + [MO_BEUL] = LDUW, + [MO_BESL] = LDSW, + [MO_BEQ] = LDX, + + [MO_LEUW] = LDUH_LE, + [MO_LESW] = LDSH_LE, + [MO_LEUL] = LDUW_LE, + [MO_LESL] = LDSW_LE, + [MO_LEQ] = LDX_LE, +}; + +static const int qemu_st_opc[16] = { + [MO_UB] = STB, + + [MO_BEUW] = STH, + [MO_BEUL] = STW, + [MO_BEQ] = STX, + + [MO_LEUW] = STH_LE, + [MO_LEUL] = STW_LE, + [MO_LEQ] = STX_LE, +}; + +static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, + TCGMemOpIdx oi, bool is_64) +{ + TCGMemOp memop = get_memop(oi); +#ifdef CONFIG_SOFTMMU + unsigned memi = get_mmuidx(oi); + TCGReg addrz, param; + tcg_insn_unit *func; + tcg_insn_unit *label_ptr; + + addrz = tcg_out_tlb_load(s, addr, memi, memop & MO_SIZE, + offsetof(CPUTLBEntry, addr_read)); + + /* The fast path is exactly one insn. Thus we can perform the + entire TLB Hit in the (annulled) delay slot of the branch + over the TLB Miss case. */ + + /* beq,a,pt %[xi]cc, label0 */ + label_ptr = s->code_ptr; + tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT + | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0); + /* delay slot */ + tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1, + qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]); + + /* TLB Miss. */ + + param = TCG_REG_O1; + if (!SPARC64 && TARGET_LONG_BITS == 64) { + /* Skip the high-part; we'll perform the extract in the trampoline. */ + param++; + } + tcg_out_mov(s, TCG_TYPE_REG, param++, addr); + + /* We use the helpers to extend SB and SW data, leaving the case + of SL needing explicit extending below. */ + if ((memop & MO_SSIZE) == MO_SL) { + func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SIZE)]; + } else { + func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SSIZE)]; + } + assert(func != NULL); + tcg_out_call_nodelay(s, func); + /* delay slot */ + tcg_out_movi(s, TCG_TYPE_I32, param, oi); + + /* Recall that all of the helpers return 64-bit results. + Which complicates things for sparcv8plus. */ + if (SPARC64) { + /* We let the helper sign-extend SB and SW, but leave SL for here. */ + if (is_64 && (memop & MO_SSIZE) == MO_SL) { + tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA); + } else { + tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0); + } + } else { + if ((memop & MO_SIZE) == MO_64) { + tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, 32, SHIFT_SLLX); + tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O1, 0, SHIFT_SRL); + tcg_out_arith(s, data, TCG_REG_O0, TCG_REG_O1, ARITH_OR); + } else if (is_64) { + /* Re-extend from 32-bit rather than reassembling when we + know the high register must be an extension. */ + tcg_out_arithi(s, data, TCG_REG_O1, 0, + memop & MO_SIGN ? SHIFT_SRA : SHIFT_SRL); + } else { + tcg_out_mov(s, TCG_TYPE_I32, data, TCG_REG_O1); + } + } + + *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr)); +#else + if (SPARC64 && TARGET_LONG_BITS == 32) { + tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL); + addr = TCG_REG_T1; + } + tcg_out_ldst_rr(s, data, addr, + (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0), + qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]); +#endif /* CONFIG_SOFTMMU */ +} + +static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, + TCGMemOpIdx oi) +{ + TCGMemOp memop = get_memop(oi); +#ifdef CONFIG_SOFTMMU + unsigned memi = get_mmuidx(oi); + TCGReg addrz, param; + tcg_insn_unit *func; + tcg_insn_unit *label_ptr; + + addrz = tcg_out_tlb_load(s, addr, memi, memop & MO_SIZE, + offsetof(CPUTLBEntry, addr_write)); + + /* The fast path is exactly one insn. Thus we can perform the entire + TLB Hit in the (annulled) delay slot of the branch over TLB Miss. */ + /* beq,a,pt %[xi]cc, label0 */ + label_ptr = s->code_ptr; + tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT + | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0); + /* delay slot */ + tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1, + qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]); + + /* TLB Miss. */ + + param = TCG_REG_O1; + if (!SPARC64 && TARGET_LONG_BITS == 64) { + /* Skip the high-part; we'll perform the extract in the trampoline. */ + param++; + } + tcg_out_mov(s, TCG_TYPE_REG, param++, addr); + if (!SPARC64 && (memop & MO_SIZE) == MO_64) { + /* Skip the high-part; we'll perform the extract in the trampoline. */ + param++; + } + tcg_out_mov(s, TCG_TYPE_REG, param++, data); + + func = qemu_st_trampoline[memop & (MO_BSWAP | MO_SIZE)]; + assert(func != NULL); + tcg_out_call_nodelay(s, func); + /* delay slot */ + tcg_out_movi(s, TCG_TYPE_I32, param, oi); + + *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr)); +#else + if (SPARC64 && TARGET_LONG_BITS == 32) { + tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL); + addr = TCG_REG_T1; + } + tcg_out_ldst_rr(s, data, addr, + (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0), + qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]); +#endif /* CONFIG_SOFTMMU */ +} + +static void tcg_out_op(TCGContext *s, TCGOpcode opc, + const TCGArg args[TCG_MAX_OP_ARGS], + const int const_args[TCG_MAX_OP_ARGS]) +{ + TCGArg a0, a1, a2; + int c, c2; + + /* Hoist the loads of the most common arguments. */ + a0 = args[0]; + a1 = args[1]; + a2 = args[2]; + c2 = const_args[2]; + + switch (opc) { + case INDEX_op_exit_tb: + if (check_fit_ptr(a0, 13)) { + tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); + tcg_out_movi_imm13(s, TCG_REG_O0, a0); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I0, a0 & ~0x3ff); + tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); + tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, a0 & 0x3ff, ARITH_OR); + } + break; + case INDEX_op_goto_tb: + if (s->tb_jmp_offset) { + /* direct jump method */ + s->tb_jmp_offset[a0] = tcg_current_code_size(s); + /* Make sure to preserve links during retranslation. */ + tcg_out32(s, CALL | (*s->code_ptr & ~INSN_OP(-1))); + } else { + /* indirect jump method */ + tcg_out_ld_ptr(s, TCG_REG_T1, (uintptr_t)(s->tb_next + a0)); + tcg_out_arithi(s, TCG_REG_G0, TCG_REG_T1, 0, JMPL); + } + tcg_out_nop(s); + s->tb_next_offset[a0] = tcg_current_code_size(s); + break; + case INDEX_op_br: + tcg_out_bpcc(s, COND_A, BPCC_PT, arg_label(a0)); + tcg_out_nop(s); + break; + +#define OP_32_64(x) \ + glue(glue(case INDEX_op_, x), _i32): \ + glue(glue(case INDEX_op_, x), _i64) + + OP_32_64(ld8u): + tcg_out_ldst(s, a0, a1, a2, LDUB); + break; + OP_32_64(ld8s): + tcg_out_ldst(s, a0, a1, a2, LDSB); + break; + OP_32_64(ld16u): + tcg_out_ldst(s, a0, a1, a2, LDUH); + break; + OP_32_64(ld16s): + tcg_out_ldst(s, a0, a1, a2, LDSH); + break; + case INDEX_op_ld_i32: + case INDEX_op_ld32u_i64: + tcg_out_ldst(s, a0, a1, a2, LDUW); + break; + OP_32_64(st8): + tcg_out_ldst(s, a0, a1, a2, STB); + break; + OP_32_64(st16): + tcg_out_ldst(s, a0, a1, a2, STH); + break; + case INDEX_op_st_i32: + case INDEX_op_st32_i64: + tcg_out_ldst(s, a0, a1, a2, STW); + break; + OP_32_64(add): + c = ARITH_ADD; + goto gen_arith; + OP_32_64(sub): + c = ARITH_SUB; + goto gen_arith; + OP_32_64(and): + c = ARITH_AND; + goto gen_arith; + OP_32_64(andc): + c = ARITH_ANDN; + goto gen_arith; + OP_32_64(or): + c = ARITH_OR; + goto gen_arith; + OP_32_64(orc): + c = ARITH_ORN; + goto gen_arith; + OP_32_64(xor): + c = ARITH_XOR; + goto gen_arith; + case INDEX_op_shl_i32: + c = SHIFT_SLL; + do_shift32: + /* Limit immediate shift count lest we create an illegal insn. */ + tcg_out_arithc(s, a0, a1, a2 & 31, c2, c); + break; + case INDEX_op_shr_i32: + c = SHIFT_SRL; + goto do_shift32; + case INDEX_op_sar_i32: + c = SHIFT_SRA; + goto do_shift32; + case INDEX_op_mul_i32: + c = ARITH_UMUL; + goto gen_arith; + + OP_32_64(neg): + c = ARITH_SUB; + goto gen_arith1; + OP_32_64(not): + c = ARITH_ORN; + goto gen_arith1; + + case INDEX_op_div_i32: + tcg_out_div32(s, a0, a1, a2, c2, 0); + break; + case INDEX_op_divu_i32: + tcg_out_div32(s, a0, a1, a2, c2, 1); + break; + + case INDEX_op_brcond_i32: + tcg_out_brcond_i32(s, a2, a0, a1, const_args[1], arg_label(args[3])); + break; + case INDEX_op_setcond_i32: + tcg_out_setcond_i32(s, args[3], a0, a1, a2, c2); + break; + case INDEX_op_movcond_i32: + tcg_out_movcond_i32(s, args[5], a0, a1, a2, c2, args[3], const_args[3]); + break; + + case INDEX_op_add2_i32: + tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3], + args[4], const_args[4], args[5], const_args[5], + ARITH_ADDCC, ARITH_ADDC); + break; + case INDEX_op_sub2_i32: + tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3], + args[4], const_args[4], args[5], const_args[5], + ARITH_SUBCC, ARITH_SUBC); + break; + case INDEX_op_mulu2_i32: + c = ARITH_UMUL; + goto do_mul2; + case INDEX_op_muls2_i32: + c = ARITH_SMUL; + do_mul2: + /* The 32-bit multiply insns produce a full 64-bit result. If the + destination register can hold it, we can avoid the slower RDY. */ + tcg_out_arithc(s, a0, a2, args[3], const_args[3], c); + if (SPARC64 || a0 <= TCG_REG_O7) { + tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX); + } else { + tcg_out_rdy(s, a1); + } + break; + + case INDEX_op_qemu_ld_i32: + tcg_out_qemu_ld(s, a0, a1, a2, false); + break; + case INDEX_op_qemu_ld_i64: + tcg_out_qemu_ld(s, a0, a1, a2, true); + break; + case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_st_i64: + tcg_out_qemu_st(s, a0, a1, a2); + break; + + case INDEX_op_ld32s_i64: + tcg_out_ldst(s, a0, a1, a2, LDSW); + break; + case INDEX_op_ld_i64: + tcg_out_ldst(s, a0, a1, a2, LDX); + break; + case INDEX_op_st_i64: + tcg_out_ldst(s, a0, a1, a2, STX); + break; + case INDEX_op_shl_i64: + c = SHIFT_SLLX; + do_shift64: + /* Limit immediate shift count lest we create an illegal insn. */ + tcg_out_arithc(s, a0, a1, a2 & 63, c2, c); + break; + case INDEX_op_shr_i64: + c = SHIFT_SRLX; + goto do_shift64; + case INDEX_op_sar_i64: + c = SHIFT_SRAX; + goto do_shift64; + case INDEX_op_mul_i64: + c = ARITH_MULX; + goto gen_arith; + case INDEX_op_div_i64: + c = ARITH_SDIVX; + goto gen_arith; + case INDEX_op_divu_i64: + c = ARITH_UDIVX; + goto gen_arith; + case INDEX_op_ext_i32_i64: + case INDEX_op_ext32s_i64: + tcg_out_arithi(s, a0, a1, 0, SHIFT_SRA); + break; + case INDEX_op_extu_i32_i64: + case INDEX_op_ext32u_i64: + tcg_out_arithi(s, a0, a1, 0, SHIFT_SRL); + break; + case INDEX_op_extrl_i64_i32: + tcg_out_mov(s, TCG_TYPE_I32, a0, a1); + break; + case INDEX_op_extrh_i64_i32: + tcg_out_arithi(s, a0, a1, 32, SHIFT_SRLX); + break; + + case INDEX_op_brcond_i64: + tcg_out_brcond_i64(s, a2, a0, a1, const_args[1], arg_label(args[3])); + break; + case INDEX_op_setcond_i64: + tcg_out_setcond_i64(s, args[3], a0, a1, a2, c2); + break; + case INDEX_op_movcond_i64: + tcg_out_movcond_i64(s, args[5], a0, a1, a2, c2, args[3], const_args[3]); + break; + case INDEX_op_add2_i64: + tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4], + const_args[4], args[5], const_args[5], false); + break; + case INDEX_op_sub2_i64: + tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4], + const_args[4], args[5], const_args[5], true); + break; + case INDEX_op_muluh_i64: + tcg_out_arith(s, args[0], args[1], args[2], ARITH_UMULXHI); + break; + + gen_arith: + tcg_out_arithc(s, a0, a1, a2, c2, c); + break; + + gen_arith1: + tcg_out_arithc(s, a0, TCG_REG_G0, a1, const_args[1], c); + break; + + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_mov_i64: + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ + case INDEX_op_movi_i64: + case INDEX_op_call: /* Always emitted via tcg_out_call. */ + default: + tcg_abort(); + } +} + +static const TCGTargetOpDef sparc_op_defs[] = { + { INDEX_op_exit_tb, { } }, + { INDEX_op_goto_tb, { } }, + { INDEX_op_br, { } }, + + { INDEX_op_ld8u_i32, { "r", "r" } }, + { INDEX_op_ld8s_i32, { "r", "r" } }, + { INDEX_op_ld16u_i32, { "r", "r" } }, + { INDEX_op_ld16s_i32, { "r", "r" } }, + { INDEX_op_ld_i32, { "r", "r" } }, + { INDEX_op_st8_i32, { "rZ", "r" } }, + { INDEX_op_st16_i32, { "rZ", "r" } }, + { INDEX_op_st_i32, { "rZ", "r" } }, + + { INDEX_op_add_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_mul_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_div_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_divu_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_sub_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_and_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_andc_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_or_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_orc_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_xor_i32, { "r", "rZ", "rJ" } }, + + { INDEX_op_shl_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_shr_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_sar_i32, { "r", "rZ", "rJ" } }, + + { INDEX_op_neg_i32, { "r", "rJ" } }, + { INDEX_op_not_i32, { "r", "rJ" } }, + + { INDEX_op_brcond_i32, { "rZ", "rJ" } }, + { INDEX_op_setcond_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_movcond_i32, { "r", "rZ", "rJ", "rI", "0" } }, + + { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, + { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, + { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rJ" } }, + { INDEX_op_muls2_i32, { "r", "r", "rZ", "rJ" } }, + + { INDEX_op_ld8u_i64, { "R", "r" } }, + { INDEX_op_ld8s_i64, { "R", "r" } }, + { INDEX_op_ld16u_i64, { "R", "r" } }, + { INDEX_op_ld16s_i64, { "R", "r" } }, + { INDEX_op_ld32u_i64, { "R", "r" } }, + { INDEX_op_ld32s_i64, { "R", "r" } }, + { INDEX_op_ld_i64, { "R", "r" } }, + { INDEX_op_st8_i64, { "RZ", "r" } }, + { INDEX_op_st16_i64, { "RZ", "r" } }, + { INDEX_op_st32_i64, { "RZ", "r" } }, + { INDEX_op_st_i64, { "RZ", "r" } }, + + { INDEX_op_add_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_mul_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_div_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_divu_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_sub_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_and_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_andc_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_or_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_orc_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_xor_i64, { "R", "RZ", "RJ" } }, + + { INDEX_op_shl_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_shr_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_sar_i64, { "R", "RZ", "RJ" } }, + + { INDEX_op_neg_i64, { "R", "RJ" } }, + { INDEX_op_not_i64, { "R", "RJ" } }, + + { INDEX_op_ext32s_i64, { "R", "R" } }, + { INDEX_op_ext32u_i64, { "R", "R" } }, + { INDEX_op_ext_i32_i64, { "R", "r" } }, + { INDEX_op_extu_i32_i64, { "R", "r" } }, + { INDEX_op_extrl_i64_i32, { "r", "R" } }, + { INDEX_op_extrh_i64_i32, { "r", "R" } }, + + { INDEX_op_brcond_i64, { "RZ", "RJ" } }, + { INDEX_op_setcond_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_movcond_i64, { "R", "RZ", "RJ", "RI", "0" } }, + + { INDEX_op_add2_i64, { "R", "R", "RZ", "RZ", "RJ", "RI" } }, + { INDEX_op_sub2_i64, { "R", "R", "RZ", "RZ", "RJ", "RI" } }, + { INDEX_op_muluh_i64, { "R", "RZ", "RZ" } }, + + { INDEX_op_qemu_ld_i32, { "r", "A" } }, + { INDEX_op_qemu_ld_i64, { "R", "A" } }, + { INDEX_op_qemu_st_i32, { "sZ", "A" } }, + { INDEX_op_qemu_st_i64, { "SZ", "A" } }, + + { -1 }, +}; + +static void tcg_target_init(TCGContext *s) +{ + /* Only probe for the platform and capabilities if we havn't already + determined maximum values at compile time. */ +#ifndef use_vis3_instructions + { + unsigned long hwcap = qemu_getauxval(AT_HWCAP); + use_vis3_instructions = (hwcap & HWCAP_SPARC_VIS3) != 0; + } +#endif + + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, ALL_64); + + tcg_regset_set32(tcg_target_call_clobber_regs, 0, + (1 << TCG_REG_G1) | + (1 << TCG_REG_G2) | + (1 << TCG_REG_G3) | + (1 << TCG_REG_G4) | + (1 << TCG_REG_G5) | + (1 << TCG_REG_G6) | + (1 << TCG_REG_G7) | + (1 << TCG_REG_O0) | + (1 << TCG_REG_O1) | + (1 << TCG_REG_O2) | + (1 << TCG_REG_O3) | + (1 << TCG_REG_O4) | + (1 << TCG_REG_O5) | + (1 << TCG_REG_O7)); + + tcg_regset_clear(s->reserved_regs); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_G0); /* zero */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_G6); /* reserved for os */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_G7); /* thread pointer */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_I6); /* frame pointer */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_I7); /* return address */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6); /* stack pointer */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_T1); /* for internal use */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */ + + tcg_add_target_add_op_defs(sparc_op_defs); +} + +#if SPARC64 +# define ELF_HOST_MACHINE EM_SPARCV9 +#else +# define ELF_HOST_MACHINE EM_SPARC32PLUS +# define ELF_HOST_FLAGS EF_SPARC_32PLUS +#endif + +typedef struct { + DebugFrameHeader h; + uint8_t fde_def_cfa[SPARC64 ? 4 : 2]; + uint8_t fde_win_save; + uint8_t fde_ret_save[3]; +} DebugFrame; + +static const DebugFrame debug_frame = { + .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ + .h.cie.id = -1, + .h.cie.version = 1, + .h.cie.code_align = 1, + .h.cie.data_align = -sizeof(void *) & 0x7f, + .h.cie.return_column = 15, /* o7 */ + + /* Total FDE size does not include the "len" member. */ + .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), + + .fde_def_cfa = { +#if SPARC64 + 12, 30, /* DW_CFA_def_cfa i6, 2047 */ + (2047 & 0x7f) | 0x80, (2047 >> 7) +#else + 13, 30 /* DW_CFA_def_cfa_register i6 */ +#endif + }, + .fde_win_save = 0x2d, /* DW_CFA_GNU_window_save */ + .fde_ret_save = { 9, 15, 31 }, /* DW_CFA_register o7, i7 */ +}; + +void tcg_register_jit(void *buf, size_t buf_size) +{ + tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); +} + +void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) +{ + uint32_t *ptr = (uint32_t *)jmp_addr; + uintptr_t disp = addr - jmp_addr; + + /* We can reach the entire address space for 32-bit. For 64-bit + the code_gen_buffer can't be larger than 2GB. */ + assert(disp == (int32_t)disp); + + *ptr = CALL | (uint32_t)disp >> 2; + flush_icache_range(jmp_addr, jmp_addr + 4); +} diff --git a/tcg/tcg.c b/tcg/tcg.c index 86208a83ee..550671b94d 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -62,7 +62,8 @@ #include "elf.h" #include "exec/log.h" -/* Forward declarations for functions declared in tcg-target.c and used here. */ +/* Forward declarations for functions declared in tcg-target.inc.c and + used here. */ static void tcg_target_init(TCGContext *s); static void tcg_target_qemu_prologue(TCGContext *s); static void patch_reloc(tcg_insn_unit *code_ptr, int type, @@ -96,7 +97,7 @@ static void tcg_register_jit_int(void *buf, size_t size, size_t debug_frame_size) __attribute__((unused)); -/* Forward declarations for functions declared and used in tcg-target.c. */ +/* Forward declarations for functions declared and used in tcg-target.inc.c. */ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str); static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, intptr_t arg2); @@ -250,7 +251,7 @@ TCGLabel *gen_new_label(void) return l; } -#include "tcg-target.c" +#include "tcg-target.inc.c" /* pool based memory allocation */ void *tcg_malloc_internal(TCGContext *s, int size) diff --git a/tcg/tcg.h b/tcg/tcg.h index d181694596..c45329a7cb 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -568,7 +568,7 @@ struct TCGContext { TBContext tb_ctx; - /* The TCGBackendData structure is private to tcg-target.c. */ + /* The TCGBackendData structure is private to tcg-target.inc.c. */ struct TCGBackendData *be; TCGTempSet free_temps[TCG_TYPE_COUNT * 2]; diff --git a/tcg/tci/README b/tcg/tci/README index dc57f076b5..3786b0915b 100644 --- a/tcg/tci/README +++ b/tcg/tci/README @@ -21,7 +21,7 @@ This is what TCI (Tiny Code Interpreter) does. 2) Implementation Like each TCG host frontend, TCI implements the code generator in -tcg-target.c, tcg-target.h. Both files are in directory tcg/tci. +tcg-target.inc.c, tcg-target.h. Both files are in directory tcg/tci. The additional file tcg/tci.c adds the interpreter. @@ -123,7 +123,7 @@ u1 = linux-user-test works would also improve speed for hosts which support byte alignment). * A better disassembler for the pseudo code would be nice (a very primitive - disassembler is included in tcg-target.c). + disassembler is included in tcg-target.inc.c). * It might be useful to have a runtime option which selects the native TCG or TCI, so QEMU would have to include two TCGs. Today, selecting TCI diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c deleted file mode 100644 index 16ce048361..0000000000 --- a/tcg/tci/tcg-target.c +++ /dev/null @@ -1,880 +0,0 @@ -/* - * Tiny Code Generator for QEMU - * - * Copyright (c) 2009, 2011 Stefan Weil - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "tcg-be-null.h" - -/* TODO list: - * - See TODO comments in code. - */ - -/* Marker for missing code. */ -#define TODO() \ - do { \ - fprintf(stderr, "TODO %s:%u: %s()\n", \ - __FILE__, __LINE__, __func__); \ - tcg_abort(); \ - } while (0) - -/* Bitfield n...m (in 32 bit value). */ -#define BITS(n, m) (((0xffffffffU << (31 - n)) >> (31 - n + m)) << m) - -/* Macros used in tcg_target_op_defs. */ -#define R "r" -#define RI "ri" -#if TCG_TARGET_REG_BITS == 32 -# define R64 "r", "r" -#else -# define R64 "r" -#endif -#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS -# define L "L", "L" -# define S "S", "S" -#else -# define L "L" -# define S "S" -#endif - -/* TODO: documentation. */ -static const TCGTargetOpDef tcg_target_op_defs[] = { - { INDEX_op_exit_tb, { NULL } }, - { INDEX_op_goto_tb, { NULL } }, - { INDEX_op_br, { NULL } }, - - { INDEX_op_ld8u_i32, { R, R } }, - { INDEX_op_ld8s_i32, { R, R } }, - { INDEX_op_ld16u_i32, { R, R } }, - { INDEX_op_ld16s_i32, { R, R } }, - { INDEX_op_ld_i32, { R, R } }, - { INDEX_op_st8_i32, { R, R } }, - { INDEX_op_st16_i32, { R, R } }, - { INDEX_op_st_i32, { R, R } }, - - { INDEX_op_add_i32, { R, RI, RI } }, - { INDEX_op_sub_i32, { R, RI, RI } }, - { INDEX_op_mul_i32, { R, RI, RI } }, -#if TCG_TARGET_HAS_div_i32 - { INDEX_op_div_i32, { R, R, R } }, - { INDEX_op_divu_i32, { R, R, R } }, - { INDEX_op_rem_i32, { R, R, R } }, - { INDEX_op_remu_i32, { R, R, R } }, -#elif TCG_TARGET_HAS_div2_i32 - { INDEX_op_div2_i32, { R, R, "0", "1", R } }, - { INDEX_op_divu2_i32, { R, R, "0", "1", R } }, -#endif - /* TODO: Does R, RI, RI result in faster code than R, R, RI? - If both operands are constants, we can optimize. */ - { INDEX_op_and_i32, { R, RI, RI } }, -#if TCG_TARGET_HAS_andc_i32 - { INDEX_op_andc_i32, { R, RI, RI } }, -#endif -#if TCG_TARGET_HAS_eqv_i32 - { INDEX_op_eqv_i32, { R, RI, RI } }, -#endif -#if TCG_TARGET_HAS_nand_i32 - { INDEX_op_nand_i32, { R, RI, RI } }, -#endif -#if TCG_TARGET_HAS_nor_i32 - { INDEX_op_nor_i32, { R, RI, RI } }, -#endif - { INDEX_op_or_i32, { R, RI, RI } }, -#if TCG_TARGET_HAS_orc_i32 - { INDEX_op_orc_i32, { R, RI, RI } }, -#endif - { INDEX_op_xor_i32, { R, RI, RI } }, - { INDEX_op_shl_i32, { R, RI, RI } }, - { INDEX_op_shr_i32, { R, RI, RI } }, - { INDEX_op_sar_i32, { R, RI, RI } }, -#if TCG_TARGET_HAS_rot_i32 - { INDEX_op_rotl_i32, { R, RI, RI } }, - { INDEX_op_rotr_i32, { R, RI, RI } }, -#endif -#if TCG_TARGET_HAS_deposit_i32 - { INDEX_op_deposit_i32, { R, "0", R } }, -#endif - - { INDEX_op_brcond_i32, { R, RI } }, - - { INDEX_op_setcond_i32, { R, R, RI } }, -#if TCG_TARGET_REG_BITS == 64 - { INDEX_op_setcond_i64, { R, R, RI } }, -#endif /* TCG_TARGET_REG_BITS == 64 */ - -#if TCG_TARGET_REG_BITS == 32 - /* TODO: Support R, R, R, R, RI, RI? Will it be faster? */ - { INDEX_op_add2_i32, { R, R, R, R, R, R } }, - { INDEX_op_sub2_i32, { R, R, R, R, R, R } }, - { INDEX_op_brcond2_i32, { R, R, RI, RI } }, - { INDEX_op_mulu2_i32, { R, R, R, R } }, - { INDEX_op_setcond2_i32, { R, R, R, RI, RI } }, -#endif - -#if TCG_TARGET_HAS_not_i32 - { INDEX_op_not_i32, { R, R } }, -#endif -#if TCG_TARGET_HAS_neg_i32 - { INDEX_op_neg_i32, { R, R } }, -#endif - -#if TCG_TARGET_REG_BITS == 64 - { INDEX_op_ld8u_i64, { R, R } }, - { INDEX_op_ld8s_i64, { R, R } }, - { INDEX_op_ld16u_i64, { R, R } }, - { INDEX_op_ld16s_i64, { R, R } }, - { INDEX_op_ld32u_i64, { R, R } }, - { INDEX_op_ld32s_i64, { R, R } }, - { INDEX_op_ld_i64, { R, R } }, - - { INDEX_op_st8_i64, { R, R } }, - { INDEX_op_st16_i64, { R, R } }, - { INDEX_op_st32_i64, { R, R } }, - { INDEX_op_st_i64, { R, R } }, - - { INDEX_op_add_i64, { R, RI, RI } }, - { INDEX_op_sub_i64, { R, RI, RI } }, - { INDEX_op_mul_i64, { R, RI, RI } }, -#if TCG_TARGET_HAS_div_i64 - { INDEX_op_div_i64, { R, R, R } }, - { INDEX_op_divu_i64, { R, R, R } }, - { INDEX_op_rem_i64, { R, R, R } }, - { INDEX_op_remu_i64, { R, R, R } }, -#elif TCG_TARGET_HAS_div2_i64 - { INDEX_op_div2_i64, { R, R, "0", "1", R } }, - { INDEX_op_divu2_i64, { R, R, "0", "1", R } }, -#endif - { INDEX_op_and_i64, { R, RI, RI } }, -#if TCG_TARGET_HAS_andc_i64 - { INDEX_op_andc_i64, { R, RI, RI } }, -#endif -#if TCG_TARGET_HAS_eqv_i64 - { INDEX_op_eqv_i64, { R, RI, RI } }, -#endif -#if TCG_TARGET_HAS_nand_i64 - { INDEX_op_nand_i64, { R, RI, RI } }, -#endif -#if TCG_TARGET_HAS_nor_i64 - { INDEX_op_nor_i64, { R, RI, RI } }, -#endif - { INDEX_op_or_i64, { R, RI, RI } }, -#if TCG_TARGET_HAS_orc_i64 - { INDEX_op_orc_i64, { R, RI, RI } }, -#endif - { INDEX_op_xor_i64, { R, RI, RI } }, - { INDEX_op_shl_i64, { R, RI, RI } }, - { INDEX_op_shr_i64, { R, RI, RI } }, - { INDEX_op_sar_i64, { R, RI, RI } }, -#if TCG_TARGET_HAS_rot_i64 - { INDEX_op_rotl_i64, { R, RI, RI } }, - { INDEX_op_rotr_i64, { R, RI, RI } }, -#endif -#if TCG_TARGET_HAS_deposit_i64 - { INDEX_op_deposit_i64, { R, "0", R } }, -#endif - { INDEX_op_brcond_i64, { R, RI } }, - -#if TCG_TARGET_HAS_ext8s_i64 - { INDEX_op_ext8s_i64, { R, R } }, -#endif -#if TCG_TARGET_HAS_ext16s_i64 - { INDEX_op_ext16s_i64, { R, R } }, -#endif -#if TCG_TARGET_HAS_ext32s_i64 - { INDEX_op_ext32s_i64, { R, R } }, -#endif -#if TCG_TARGET_HAS_ext8u_i64 - { INDEX_op_ext8u_i64, { R, R } }, -#endif -#if TCG_TARGET_HAS_ext16u_i64 - { INDEX_op_ext16u_i64, { R, R } }, -#endif -#if TCG_TARGET_HAS_ext32u_i64 - { INDEX_op_ext32u_i64, { R, R } }, -#endif - { INDEX_op_ext_i32_i64, { R, R } }, - { INDEX_op_extu_i32_i64, { R, R } }, -#if TCG_TARGET_HAS_bswap16_i64 - { INDEX_op_bswap16_i64, { R, R } }, -#endif -#if TCG_TARGET_HAS_bswap32_i64 - { INDEX_op_bswap32_i64, { R, R } }, -#endif -#if TCG_TARGET_HAS_bswap64_i64 - { INDEX_op_bswap64_i64, { R, R } }, -#endif -#if TCG_TARGET_HAS_not_i64 - { INDEX_op_not_i64, { R, R } }, -#endif -#if TCG_TARGET_HAS_neg_i64 - { INDEX_op_neg_i64, { R, R } }, -#endif -#endif /* TCG_TARGET_REG_BITS == 64 */ - - { INDEX_op_qemu_ld_i32, { R, L } }, - { INDEX_op_qemu_ld_i64, { R64, L } }, - - { INDEX_op_qemu_st_i32, { R, S } }, - { INDEX_op_qemu_st_i64, { R64, S } }, - -#if TCG_TARGET_HAS_ext8s_i32 - { INDEX_op_ext8s_i32, { R, R } }, -#endif -#if TCG_TARGET_HAS_ext16s_i32 - { INDEX_op_ext16s_i32, { R, R } }, -#endif -#if TCG_TARGET_HAS_ext8u_i32 - { INDEX_op_ext8u_i32, { R, R } }, -#endif -#if TCG_TARGET_HAS_ext16u_i32 - { INDEX_op_ext16u_i32, { R, R } }, -#endif - -#if TCG_TARGET_HAS_bswap16_i32 - { INDEX_op_bswap16_i32, { R, R } }, -#endif -#if TCG_TARGET_HAS_bswap32_i32 - { INDEX_op_bswap32_i32, { R, R } }, -#endif - - { -1 }, -}; - -static const int tcg_target_reg_alloc_order[] = { - TCG_REG_R0, - TCG_REG_R1, - TCG_REG_R2, - TCG_REG_R3, -#if 0 /* used for TCG_REG_CALL_STACK */ - TCG_REG_R4, -#endif - TCG_REG_R5, - TCG_REG_R6, - TCG_REG_R7, -#if TCG_TARGET_NB_REGS >= 16 - TCG_REG_R8, - TCG_REG_R9, - TCG_REG_R10, - TCG_REG_R11, - TCG_REG_R12, - TCG_REG_R13, - TCG_REG_R14, - TCG_REG_R15, -#endif -}; - -#if MAX_OPC_PARAM_IARGS != 5 -# error Fix needed, number of supported input arguments changed! -#endif - -static const int tcg_target_call_iarg_regs[] = { - TCG_REG_R0, - TCG_REG_R1, - TCG_REG_R2, - TCG_REG_R3, -#if 0 /* used for TCG_REG_CALL_STACK */ - TCG_REG_R4, -#endif - TCG_REG_R5, -#if TCG_TARGET_REG_BITS == 32 - /* 32 bit hosts need 2 * MAX_OPC_PARAM_IARGS registers. */ - TCG_REG_R6, - TCG_REG_R7, -#if TCG_TARGET_NB_REGS >= 16 - TCG_REG_R8, - TCG_REG_R9, - TCG_REG_R10, -#else -# error Too few input registers available -#endif -#endif -}; - -static const int tcg_target_call_oarg_regs[] = { - TCG_REG_R0, -#if TCG_TARGET_REG_BITS == 32 - TCG_REG_R1 -#endif -}; - -#ifndef NDEBUG -static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { - "r00", - "r01", - "r02", - "r03", - "r04", - "r05", - "r06", - "r07", -#if TCG_TARGET_NB_REGS >= 16 - "r08", - "r09", - "r10", - "r11", - "r12", - "r13", - "r14", - "r15", -#if TCG_TARGET_NB_REGS >= 32 - "r16", - "r17", - "r18", - "r19", - "r20", - "r21", - "r22", - "r23", - "r24", - "r25", - "r26", - "r27", - "r28", - "r29", - "r30", - "r31" -#endif -#endif -}; -#endif - -static void patch_reloc(tcg_insn_unit *code_ptr, int type, - intptr_t value, intptr_t addend) -{ - /* tcg_out_reloc always uses the same type, addend. */ - assert(type == sizeof(tcg_target_long)); - assert(addend == 0); - assert(value != 0); - if (TCG_TARGET_REG_BITS == 32) { - tcg_patch32(code_ptr, value); - } else { - tcg_patch64(code_ptr, value); - } -} - -/* Parse target specific constraints. */ -static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) -{ - const char *ct_str = *pct_str; - switch (ct_str[0]) { - case 'r': - case 'L': /* qemu_ld constraint */ - case 'S': /* qemu_st constraint */ - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, BIT(TCG_TARGET_NB_REGS) - 1); - break; - default: - return -1; - } - ct_str++; - *pct_str = ct_str; - return 0; -} - -#if defined(CONFIG_DEBUG_TCG_INTERPRETER) -/* Show current bytecode. Used by tcg interpreter. */ -void tci_disas(uint8_t opc) -{ - const TCGOpDef *def = &tcg_op_defs[opc]; - fprintf(stderr, "TCG %s %u, %u, %u\n", - def->name, def->nb_oargs, def->nb_iargs, def->nb_cargs); -} -#endif - -/* Write value (native size). */ -static void tcg_out_i(TCGContext *s, tcg_target_ulong v) -{ - if (TCG_TARGET_REG_BITS == 32) { - tcg_out32(s, v); - } else { - tcg_out64(s, v); - } -} - -/* Write opcode. */ -static void tcg_out_op_t(TCGContext *s, TCGOpcode op) -{ - tcg_out8(s, op); - tcg_out8(s, 0); -} - -/* Write register. */ -static void tcg_out_r(TCGContext *s, TCGArg t0) -{ - assert(t0 < TCG_TARGET_NB_REGS); - tcg_out8(s, t0); -} - -/* Write register or constant (native size). */ -static void tcg_out_ri(TCGContext *s, int const_arg, TCGArg arg) -{ - if (const_arg) { - assert(const_arg == 1); - tcg_out8(s, TCG_CONST); - tcg_out_i(s, arg); - } else { - tcg_out_r(s, arg); - } -} - -/* Write register or constant (32 bit). */ -static void tcg_out_ri32(TCGContext *s, int const_arg, TCGArg arg) -{ - if (const_arg) { - assert(const_arg == 1); - tcg_out8(s, TCG_CONST); - tcg_out32(s, arg); - } else { - tcg_out_r(s, arg); - } -} - -#if TCG_TARGET_REG_BITS == 64 -/* Write register or constant (64 bit). */ -static void tcg_out_ri64(TCGContext *s, int const_arg, TCGArg arg) -{ - if (const_arg) { - assert(const_arg == 1); - tcg_out8(s, TCG_CONST); - tcg_out64(s, arg); - } else { - tcg_out_r(s, arg); - } -} -#endif - -/* Write label. */ -static void tci_out_label(TCGContext *s, TCGLabel *label) -{ - if (label->has_value) { - tcg_out_i(s, label->u.value); - assert(label->u.value); - } else { - tcg_out_reloc(s, s->code_ptr, sizeof(tcg_target_ulong), label, 0); - s->code_ptr += sizeof(tcg_target_ulong); - } -} - -static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, - intptr_t arg2) -{ - uint8_t *old_code_ptr = s->code_ptr; - if (type == TCG_TYPE_I32) { - tcg_out_op_t(s, INDEX_op_ld_i32); - tcg_out_r(s, ret); - tcg_out_r(s, arg1); - tcg_out32(s, arg2); - } else { - assert(type == TCG_TYPE_I64); -#if TCG_TARGET_REG_BITS == 64 - tcg_out_op_t(s, INDEX_op_ld_i64); - tcg_out_r(s, ret); - tcg_out_r(s, arg1); - assert(arg2 == (int32_t)arg2); - tcg_out32(s, arg2); -#else - TODO(); -#endif - } - old_code_ptr[1] = s->code_ptr - old_code_ptr; -} - -static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) -{ - uint8_t *old_code_ptr = s->code_ptr; - assert(ret != arg); -#if TCG_TARGET_REG_BITS == 32 - tcg_out_op_t(s, INDEX_op_mov_i32); -#else - tcg_out_op_t(s, INDEX_op_mov_i64); -#endif - tcg_out_r(s, ret); - tcg_out_r(s, arg); - old_code_ptr[1] = s->code_ptr - old_code_ptr; -} - -static void tcg_out_movi(TCGContext *s, TCGType type, - TCGReg t0, tcg_target_long arg) -{ - uint8_t *old_code_ptr = s->code_ptr; - uint32_t arg32 = arg; - if (type == TCG_TYPE_I32 || arg == arg32) { - tcg_out_op_t(s, INDEX_op_movi_i32); - tcg_out_r(s, t0); - tcg_out32(s, arg32); - } else { - assert(type == TCG_TYPE_I64); -#if TCG_TARGET_REG_BITS == 64 - tcg_out_op_t(s, INDEX_op_movi_i64); - tcg_out_r(s, t0); - tcg_out64(s, arg); -#else - TODO(); -#endif - } - old_code_ptr[1] = s->code_ptr - old_code_ptr; -} - -static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *arg) -{ - uint8_t *old_code_ptr = s->code_ptr; - tcg_out_op_t(s, INDEX_op_call); - tcg_out_ri(s, 1, (uintptr_t)arg); - old_code_ptr[1] = s->code_ptr - old_code_ptr; -} - -static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, - const int *const_args) -{ - uint8_t *old_code_ptr = s->code_ptr; - - tcg_out_op_t(s, opc); - - switch (opc) { - case INDEX_op_exit_tb: - tcg_out64(s, args[0]); - break; - case INDEX_op_goto_tb: - if (s->tb_jmp_offset) { - /* Direct jump method. */ - assert(args[0] < ARRAY_SIZE(s->tb_jmp_offset)); - s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); - tcg_out32(s, 0); - } else { - /* Indirect jump method. */ - TODO(); - } - assert(args[0] < ARRAY_SIZE(s->tb_next_offset)); - s->tb_next_offset[args[0]] = tcg_current_code_size(s); - break; - case INDEX_op_br: - tci_out_label(s, arg_label(args[0])); - break; - case INDEX_op_setcond_i32: - tcg_out_r(s, args[0]); - tcg_out_r(s, args[1]); - tcg_out_ri32(s, const_args[2], args[2]); - tcg_out8(s, args[3]); /* condition */ - break; -#if TCG_TARGET_REG_BITS == 32 - case INDEX_op_setcond2_i32: - /* setcond2_i32 cond, t0, t1_low, t1_high, t2_low, t2_high */ - tcg_out_r(s, args[0]); - tcg_out_r(s, args[1]); - tcg_out_r(s, args[2]); - tcg_out_ri32(s, const_args[3], args[3]); - tcg_out_ri32(s, const_args[4], args[4]); - tcg_out8(s, args[5]); /* condition */ - break; -#elif TCG_TARGET_REG_BITS == 64 - case INDEX_op_setcond_i64: - tcg_out_r(s, args[0]); - tcg_out_r(s, args[1]); - tcg_out_ri64(s, const_args[2], args[2]); - tcg_out8(s, args[3]); /* condition */ - break; -#endif - case INDEX_op_ld8u_i32: - case INDEX_op_ld8s_i32: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16s_i32: - case INDEX_op_ld_i32: - case INDEX_op_st8_i32: - case INDEX_op_st16_i32: - case INDEX_op_st_i32: - case INDEX_op_ld8u_i64: - case INDEX_op_ld8s_i64: - case INDEX_op_ld16u_i64: - case INDEX_op_ld16s_i64: - case INDEX_op_ld32u_i64: - case INDEX_op_ld32s_i64: - case INDEX_op_ld_i64: - case INDEX_op_st8_i64: - case INDEX_op_st16_i64: - case INDEX_op_st32_i64: - case INDEX_op_st_i64: - tcg_out_r(s, args[0]); - tcg_out_r(s, args[1]); - assert(args[2] == (int32_t)args[2]); - tcg_out32(s, args[2]); - break; - case INDEX_op_add_i32: - case INDEX_op_sub_i32: - case INDEX_op_mul_i32: - case INDEX_op_and_i32: - case INDEX_op_andc_i32: /* Optional (TCG_TARGET_HAS_andc_i32). */ - case INDEX_op_eqv_i32: /* Optional (TCG_TARGET_HAS_eqv_i32). */ - case INDEX_op_nand_i32: /* Optional (TCG_TARGET_HAS_nand_i32). */ - case INDEX_op_nor_i32: /* Optional (TCG_TARGET_HAS_nor_i32). */ - case INDEX_op_or_i32: - case INDEX_op_orc_i32: /* Optional (TCG_TARGET_HAS_orc_i32). */ - case INDEX_op_xor_i32: - case INDEX_op_shl_i32: - case INDEX_op_shr_i32: - case INDEX_op_sar_i32: - case INDEX_op_rotl_i32: /* Optional (TCG_TARGET_HAS_rot_i32). */ - case INDEX_op_rotr_i32: /* Optional (TCG_TARGET_HAS_rot_i32). */ - tcg_out_r(s, args[0]); - tcg_out_ri32(s, const_args[1], args[1]); - tcg_out_ri32(s, const_args[2], args[2]); - break; - case INDEX_op_deposit_i32: /* Optional (TCG_TARGET_HAS_deposit_i32). */ - tcg_out_r(s, args[0]); - tcg_out_r(s, args[1]); - tcg_out_r(s, args[2]); - assert(args[3] <= UINT8_MAX); - tcg_out8(s, args[3]); - assert(args[4] <= UINT8_MAX); - tcg_out8(s, args[4]); - break; - -#if TCG_TARGET_REG_BITS == 64 - case INDEX_op_add_i64: - case INDEX_op_sub_i64: - case INDEX_op_mul_i64: - case INDEX_op_and_i64: - case INDEX_op_andc_i64: /* Optional (TCG_TARGET_HAS_andc_i64). */ - case INDEX_op_eqv_i64: /* Optional (TCG_TARGET_HAS_eqv_i64). */ - case INDEX_op_nand_i64: /* Optional (TCG_TARGET_HAS_nand_i64). */ - case INDEX_op_nor_i64: /* Optional (TCG_TARGET_HAS_nor_i64). */ - case INDEX_op_or_i64: - case INDEX_op_orc_i64: /* Optional (TCG_TARGET_HAS_orc_i64). */ - case INDEX_op_xor_i64: - case INDEX_op_shl_i64: - case INDEX_op_shr_i64: - case INDEX_op_sar_i64: - case INDEX_op_rotl_i64: /* Optional (TCG_TARGET_HAS_rot_i64). */ - case INDEX_op_rotr_i64: /* Optional (TCG_TARGET_HAS_rot_i64). */ - tcg_out_r(s, args[0]); - tcg_out_ri64(s, const_args[1], args[1]); - tcg_out_ri64(s, const_args[2], args[2]); - break; - case INDEX_op_deposit_i64: /* Optional (TCG_TARGET_HAS_deposit_i64). */ - tcg_out_r(s, args[0]); - tcg_out_r(s, args[1]); - tcg_out_r(s, args[2]); - assert(args[3] <= UINT8_MAX); - tcg_out8(s, args[3]); - assert(args[4] <= UINT8_MAX); - tcg_out8(s, args[4]); - break; - case INDEX_op_div_i64: /* Optional (TCG_TARGET_HAS_div_i64). */ - case INDEX_op_divu_i64: /* Optional (TCG_TARGET_HAS_div_i64). */ - case INDEX_op_rem_i64: /* Optional (TCG_TARGET_HAS_div_i64). */ - case INDEX_op_remu_i64: /* Optional (TCG_TARGET_HAS_div_i64). */ - TODO(); - break; - case INDEX_op_div2_i64: /* Optional (TCG_TARGET_HAS_div2_i64). */ - case INDEX_op_divu2_i64: /* Optional (TCG_TARGET_HAS_div2_i64). */ - TODO(); - break; - case INDEX_op_brcond_i64: - tcg_out_r(s, args[0]); - tcg_out_ri64(s, const_args[1], args[1]); - tcg_out8(s, args[2]); /* condition */ - tci_out_label(s, arg_label(args[3])); - break; - case INDEX_op_bswap16_i64: /* Optional (TCG_TARGET_HAS_bswap16_i64). */ - case INDEX_op_bswap32_i64: /* Optional (TCG_TARGET_HAS_bswap32_i64). */ - case INDEX_op_bswap64_i64: /* Optional (TCG_TARGET_HAS_bswap64_i64). */ - case INDEX_op_not_i64: /* Optional (TCG_TARGET_HAS_not_i64). */ - case INDEX_op_neg_i64: /* Optional (TCG_TARGET_HAS_neg_i64). */ - case INDEX_op_ext8s_i64: /* Optional (TCG_TARGET_HAS_ext8s_i64). */ - case INDEX_op_ext8u_i64: /* Optional (TCG_TARGET_HAS_ext8u_i64). */ - case INDEX_op_ext16s_i64: /* Optional (TCG_TARGET_HAS_ext16s_i64). */ - case INDEX_op_ext16u_i64: /* Optional (TCG_TARGET_HAS_ext16u_i64). */ - case INDEX_op_ext32s_i64: /* Optional (TCG_TARGET_HAS_ext32s_i64). */ - case INDEX_op_ext32u_i64: /* Optional (TCG_TARGET_HAS_ext32u_i64). */ - case INDEX_op_ext_i32_i64: - case INDEX_op_extu_i32_i64: -#endif /* TCG_TARGET_REG_BITS == 64 */ - case INDEX_op_neg_i32: /* Optional (TCG_TARGET_HAS_neg_i32). */ - case INDEX_op_not_i32: /* Optional (TCG_TARGET_HAS_not_i32). */ - case INDEX_op_ext8s_i32: /* Optional (TCG_TARGET_HAS_ext8s_i32). */ - case INDEX_op_ext16s_i32: /* Optional (TCG_TARGET_HAS_ext16s_i32). */ - case INDEX_op_ext8u_i32: /* Optional (TCG_TARGET_HAS_ext8u_i32). */ - case INDEX_op_ext16u_i32: /* Optional (TCG_TARGET_HAS_ext16u_i32). */ - case INDEX_op_bswap16_i32: /* Optional (TCG_TARGET_HAS_bswap16_i32). */ - case INDEX_op_bswap32_i32: /* Optional (TCG_TARGET_HAS_bswap32_i32). */ - tcg_out_r(s, args[0]); - tcg_out_r(s, args[1]); - break; - case INDEX_op_div_i32: /* Optional (TCG_TARGET_HAS_div_i32). */ - case INDEX_op_divu_i32: /* Optional (TCG_TARGET_HAS_div_i32). */ - case INDEX_op_rem_i32: /* Optional (TCG_TARGET_HAS_div_i32). */ - case INDEX_op_remu_i32: /* Optional (TCG_TARGET_HAS_div_i32). */ - tcg_out_r(s, args[0]); - tcg_out_ri32(s, const_args[1], args[1]); - tcg_out_ri32(s, const_args[2], args[2]); - break; - case INDEX_op_div2_i32: /* Optional (TCG_TARGET_HAS_div2_i32). */ - case INDEX_op_divu2_i32: /* Optional (TCG_TARGET_HAS_div2_i32). */ - TODO(); - break; -#if TCG_TARGET_REG_BITS == 32 - case INDEX_op_add2_i32: - case INDEX_op_sub2_i32: - tcg_out_r(s, args[0]); - tcg_out_r(s, args[1]); - tcg_out_r(s, args[2]); - tcg_out_r(s, args[3]); - tcg_out_r(s, args[4]); - tcg_out_r(s, args[5]); - break; - case INDEX_op_brcond2_i32: - tcg_out_r(s, args[0]); - tcg_out_r(s, args[1]); - tcg_out_ri32(s, const_args[2], args[2]); - tcg_out_ri32(s, const_args[3], args[3]); - tcg_out8(s, args[4]); /* condition */ - tci_out_label(s, arg_label(args[5])); - break; - case INDEX_op_mulu2_i32: - tcg_out_r(s, args[0]); - tcg_out_r(s, args[1]); - tcg_out_r(s, args[2]); - tcg_out_r(s, args[3]); - break; -#endif - case INDEX_op_brcond_i32: - tcg_out_r(s, args[0]); - tcg_out_ri32(s, const_args[1], args[1]); - tcg_out8(s, args[2]); /* condition */ - tci_out_label(s, arg_label(args[3])); - break; - case INDEX_op_qemu_ld_i32: - tcg_out_r(s, *args++); - tcg_out_r(s, *args++); - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - tcg_out_r(s, *args++); - } - tcg_out_i(s, *args++); - break; - case INDEX_op_qemu_ld_i64: - tcg_out_r(s, *args++); - if (TCG_TARGET_REG_BITS == 32) { - tcg_out_r(s, *args++); - } - tcg_out_r(s, *args++); - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - tcg_out_r(s, *args++); - } - tcg_out_i(s, *args++); - break; - case INDEX_op_qemu_st_i32: - tcg_out_r(s, *args++); - tcg_out_r(s, *args++); - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - tcg_out_r(s, *args++); - } - tcg_out_i(s, *args++); - break; - case INDEX_op_qemu_st_i64: - tcg_out_r(s, *args++); - if (TCG_TARGET_REG_BITS == 32) { - tcg_out_r(s, *args++); - } - tcg_out_r(s, *args++); - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - tcg_out_r(s, *args++); - } - tcg_out_i(s, *args++); - break; - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: - case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ - case INDEX_op_movi_i64: - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - default: - tcg_abort(); - } - old_code_ptr[1] = s->code_ptr - old_code_ptr; -} - -static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, - intptr_t arg2) -{ - uint8_t *old_code_ptr = s->code_ptr; - if (type == TCG_TYPE_I32) { - tcg_out_op_t(s, INDEX_op_st_i32); - tcg_out_r(s, arg); - tcg_out_r(s, arg1); - tcg_out32(s, arg2); - } else { - assert(type == TCG_TYPE_I64); -#if TCG_TARGET_REG_BITS == 64 - tcg_out_op_t(s, INDEX_op_st_i64); - tcg_out_r(s, arg); - tcg_out_r(s, arg1); - tcg_out32(s, arg2); -#else - TODO(); -#endif - } - old_code_ptr[1] = s->code_ptr - old_code_ptr; -} - -/* Test if a constant matches the constraint. */ -static int tcg_target_const_match(tcg_target_long val, TCGType type, - const TCGArgConstraint *arg_ct) -{ - /* No need to return 0 or 1, 0 or != 0 is good enough. */ - return arg_ct->ct & TCG_CT_CONST; -} - -static void tcg_target_init(TCGContext *s) -{ -#if defined(CONFIG_DEBUG_TCG_INTERPRETER) - const char *envval = getenv("DEBUG_TCG"); - if (envval) { - qemu_set_log(strtol(envval, NULL, 0)); - } -#endif - - /* The current code uses uint8_t for tcg operations. */ - assert(tcg_op_defs_max <= UINT8_MAX); - - /* Registers available for 32 bit operations. */ - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, - BIT(TCG_TARGET_NB_REGS) - 1); - /* Registers available for 64 bit operations. */ - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, - BIT(TCG_TARGET_NB_REGS) - 1); - /* TODO: Which registers should be set here? */ - tcg_regset_set32(tcg_target_call_clobber_regs, 0, - BIT(TCG_TARGET_NB_REGS) - 1); - - tcg_regset_clear(s->reserved_regs); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); - tcg_add_target_add_op_defs(tcg_target_op_defs); - - /* We use negative offsets from "sp" so that we can distinguish - stores that might pretend to be call arguments. */ - tcg_set_frame(s, TCG_REG_CALL_STACK, - -CPU_TEMP_BUF_NLONGS * sizeof(long), - CPU_TEMP_BUF_NLONGS * sizeof(long)); -} - -/* Generate global QEMU prologue and epilogue code. */ -static inline void tcg_target_qemu_prologue(TCGContext *s) -{ -} diff --git a/tcg/tci/tcg-target.inc.c b/tcg/tci/tcg-target.inc.c new file mode 100644 index 0000000000..16ce048361 --- /dev/null +++ b/tcg/tci/tcg-target.inc.c @@ -0,0 +1,880 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2009, 2011 Stefan Weil + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "tcg-be-null.h" + +/* TODO list: + * - See TODO comments in code. + */ + +/* Marker for missing code. */ +#define TODO() \ + do { \ + fprintf(stderr, "TODO %s:%u: %s()\n", \ + __FILE__, __LINE__, __func__); \ + tcg_abort(); \ + } while (0) + +/* Bitfield n...m (in 32 bit value). */ +#define BITS(n, m) (((0xffffffffU << (31 - n)) >> (31 - n + m)) << m) + +/* Macros used in tcg_target_op_defs. */ +#define R "r" +#define RI "ri" +#if TCG_TARGET_REG_BITS == 32 +# define R64 "r", "r" +#else +# define R64 "r" +#endif +#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS +# define L "L", "L" +# define S "S", "S" +#else +# define L "L" +# define S "S" +#endif + +/* TODO: documentation. */ +static const TCGTargetOpDef tcg_target_op_defs[] = { + { INDEX_op_exit_tb, { NULL } }, + { INDEX_op_goto_tb, { NULL } }, + { INDEX_op_br, { NULL } }, + + { INDEX_op_ld8u_i32, { R, R } }, + { INDEX_op_ld8s_i32, { R, R } }, + { INDEX_op_ld16u_i32, { R, R } }, + { INDEX_op_ld16s_i32, { R, R } }, + { INDEX_op_ld_i32, { R, R } }, + { INDEX_op_st8_i32, { R, R } }, + { INDEX_op_st16_i32, { R, R } }, + { INDEX_op_st_i32, { R, R } }, + + { INDEX_op_add_i32, { R, RI, RI } }, + { INDEX_op_sub_i32, { R, RI, RI } }, + { INDEX_op_mul_i32, { R, RI, RI } }, +#if TCG_TARGET_HAS_div_i32 + { INDEX_op_div_i32, { R, R, R } }, + { INDEX_op_divu_i32, { R, R, R } }, + { INDEX_op_rem_i32, { R, R, R } }, + { INDEX_op_remu_i32, { R, R, R } }, +#elif TCG_TARGET_HAS_div2_i32 + { INDEX_op_div2_i32, { R, R, "0", "1", R } }, + { INDEX_op_divu2_i32, { R, R, "0", "1", R } }, +#endif + /* TODO: Does R, RI, RI result in faster code than R, R, RI? + If both operands are constants, we can optimize. */ + { INDEX_op_and_i32, { R, RI, RI } }, +#if TCG_TARGET_HAS_andc_i32 + { INDEX_op_andc_i32, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_eqv_i32 + { INDEX_op_eqv_i32, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_nand_i32 + { INDEX_op_nand_i32, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_nor_i32 + { INDEX_op_nor_i32, { R, RI, RI } }, +#endif + { INDEX_op_or_i32, { R, RI, RI } }, +#if TCG_TARGET_HAS_orc_i32 + { INDEX_op_orc_i32, { R, RI, RI } }, +#endif + { INDEX_op_xor_i32, { R, RI, RI } }, + { INDEX_op_shl_i32, { R, RI, RI } }, + { INDEX_op_shr_i32, { R, RI, RI } }, + { INDEX_op_sar_i32, { R, RI, RI } }, +#if TCG_TARGET_HAS_rot_i32 + { INDEX_op_rotl_i32, { R, RI, RI } }, + { INDEX_op_rotr_i32, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_deposit_i32 + { INDEX_op_deposit_i32, { R, "0", R } }, +#endif + + { INDEX_op_brcond_i32, { R, RI } }, + + { INDEX_op_setcond_i32, { R, R, RI } }, +#if TCG_TARGET_REG_BITS == 64 + { INDEX_op_setcond_i64, { R, R, RI } }, +#endif /* TCG_TARGET_REG_BITS == 64 */ + +#if TCG_TARGET_REG_BITS == 32 + /* TODO: Support R, R, R, R, RI, RI? Will it be faster? */ + { INDEX_op_add2_i32, { R, R, R, R, R, R } }, + { INDEX_op_sub2_i32, { R, R, R, R, R, R } }, + { INDEX_op_brcond2_i32, { R, R, RI, RI } }, + { INDEX_op_mulu2_i32, { R, R, R, R } }, + { INDEX_op_setcond2_i32, { R, R, R, RI, RI } }, +#endif + +#if TCG_TARGET_HAS_not_i32 + { INDEX_op_not_i32, { R, R } }, +#endif +#if TCG_TARGET_HAS_neg_i32 + { INDEX_op_neg_i32, { R, R } }, +#endif + +#if TCG_TARGET_REG_BITS == 64 + { INDEX_op_ld8u_i64, { R, R } }, + { INDEX_op_ld8s_i64, { R, R } }, + { INDEX_op_ld16u_i64, { R, R } }, + { INDEX_op_ld16s_i64, { R, R } }, + { INDEX_op_ld32u_i64, { R, R } }, + { INDEX_op_ld32s_i64, { R, R } }, + { INDEX_op_ld_i64, { R, R } }, + + { INDEX_op_st8_i64, { R, R } }, + { INDEX_op_st16_i64, { R, R } }, + { INDEX_op_st32_i64, { R, R } }, + { INDEX_op_st_i64, { R, R } }, + + { INDEX_op_add_i64, { R, RI, RI } }, + { INDEX_op_sub_i64, { R, RI, RI } }, + { INDEX_op_mul_i64, { R, RI, RI } }, +#if TCG_TARGET_HAS_div_i64 + { INDEX_op_div_i64, { R, R, R } }, + { INDEX_op_divu_i64, { R, R, R } }, + { INDEX_op_rem_i64, { R, R, R } }, + { INDEX_op_remu_i64, { R, R, R } }, +#elif TCG_TARGET_HAS_div2_i64 + { INDEX_op_div2_i64, { R, R, "0", "1", R } }, + { INDEX_op_divu2_i64, { R, R, "0", "1", R } }, +#endif + { INDEX_op_and_i64, { R, RI, RI } }, +#if TCG_TARGET_HAS_andc_i64 + { INDEX_op_andc_i64, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_eqv_i64 + { INDEX_op_eqv_i64, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_nand_i64 + { INDEX_op_nand_i64, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_nor_i64 + { INDEX_op_nor_i64, { R, RI, RI } }, +#endif + { INDEX_op_or_i64, { R, RI, RI } }, +#if TCG_TARGET_HAS_orc_i64 + { INDEX_op_orc_i64, { R, RI, RI } }, +#endif + { INDEX_op_xor_i64, { R, RI, RI } }, + { INDEX_op_shl_i64, { R, RI, RI } }, + { INDEX_op_shr_i64, { R, RI, RI } }, + { INDEX_op_sar_i64, { R, RI, RI } }, +#if TCG_TARGET_HAS_rot_i64 + { INDEX_op_rotl_i64, { R, RI, RI } }, + { INDEX_op_rotr_i64, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_deposit_i64 + { INDEX_op_deposit_i64, { R, "0", R } }, +#endif + { INDEX_op_brcond_i64, { R, RI } }, + +#if TCG_TARGET_HAS_ext8s_i64 + { INDEX_op_ext8s_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext16s_i64 + { INDEX_op_ext16s_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext32s_i64 + { INDEX_op_ext32s_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext8u_i64 + { INDEX_op_ext8u_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext16u_i64 + { INDEX_op_ext16u_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext32u_i64 + { INDEX_op_ext32u_i64, { R, R } }, +#endif + { INDEX_op_ext_i32_i64, { R, R } }, + { INDEX_op_extu_i32_i64, { R, R } }, +#if TCG_TARGET_HAS_bswap16_i64 + { INDEX_op_bswap16_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_bswap32_i64 + { INDEX_op_bswap32_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_bswap64_i64 + { INDEX_op_bswap64_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_not_i64 + { INDEX_op_not_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_neg_i64 + { INDEX_op_neg_i64, { R, R } }, +#endif +#endif /* TCG_TARGET_REG_BITS == 64 */ + + { INDEX_op_qemu_ld_i32, { R, L } }, + { INDEX_op_qemu_ld_i64, { R64, L } }, + + { INDEX_op_qemu_st_i32, { R, S } }, + { INDEX_op_qemu_st_i64, { R64, S } }, + +#if TCG_TARGET_HAS_ext8s_i32 + { INDEX_op_ext8s_i32, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext16s_i32 + { INDEX_op_ext16s_i32, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext8u_i32 + { INDEX_op_ext8u_i32, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext16u_i32 + { INDEX_op_ext16u_i32, { R, R } }, +#endif + +#if TCG_TARGET_HAS_bswap16_i32 + { INDEX_op_bswap16_i32, { R, R } }, +#endif +#if TCG_TARGET_HAS_bswap32_i32 + { INDEX_op_bswap32_i32, { R, R } }, +#endif + + { -1 }, +}; + +static const int tcg_target_reg_alloc_order[] = { + TCG_REG_R0, + TCG_REG_R1, + TCG_REG_R2, + TCG_REG_R3, +#if 0 /* used for TCG_REG_CALL_STACK */ + TCG_REG_R4, +#endif + TCG_REG_R5, + TCG_REG_R6, + TCG_REG_R7, +#if TCG_TARGET_NB_REGS >= 16 + TCG_REG_R8, + TCG_REG_R9, + TCG_REG_R10, + TCG_REG_R11, + TCG_REG_R12, + TCG_REG_R13, + TCG_REG_R14, + TCG_REG_R15, +#endif +}; + +#if MAX_OPC_PARAM_IARGS != 5 +# error Fix needed, number of supported input arguments changed! +#endif + +static const int tcg_target_call_iarg_regs[] = { + TCG_REG_R0, + TCG_REG_R1, + TCG_REG_R2, + TCG_REG_R3, +#if 0 /* used for TCG_REG_CALL_STACK */ + TCG_REG_R4, +#endif + TCG_REG_R5, +#if TCG_TARGET_REG_BITS == 32 + /* 32 bit hosts need 2 * MAX_OPC_PARAM_IARGS registers. */ + TCG_REG_R6, + TCG_REG_R7, +#if TCG_TARGET_NB_REGS >= 16 + TCG_REG_R8, + TCG_REG_R9, + TCG_REG_R10, +#else +# error Too few input registers available +#endif +#endif +}; + +static const int tcg_target_call_oarg_regs[] = { + TCG_REG_R0, +#if TCG_TARGET_REG_BITS == 32 + TCG_REG_R1 +#endif +}; + +#ifndef NDEBUG +static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { + "r00", + "r01", + "r02", + "r03", + "r04", + "r05", + "r06", + "r07", +#if TCG_TARGET_NB_REGS >= 16 + "r08", + "r09", + "r10", + "r11", + "r12", + "r13", + "r14", + "r15", +#if TCG_TARGET_NB_REGS >= 32 + "r16", + "r17", + "r18", + "r19", + "r20", + "r21", + "r22", + "r23", + "r24", + "r25", + "r26", + "r27", + "r28", + "r29", + "r30", + "r31" +#endif +#endif +}; +#endif + +static void patch_reloc(tcg_insn_unit *code_ptr, int type, + intptr_t value, intptr_t addend) +{ + /* tcg_out_reloc always uses the same type, addend. */ + assert(type == sizeof(tcg_target_long)); + assert(addend == 0); + assert(value != 0); + if (TCG_TARGET_REG_BITS == 32) { + tcg_patch32(code_ptr, value); + } else { + tcg_patch64(code_ptr, value); + } +} + +/* Parse target specific constraints. */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) +{ + const char *ct_str = *pct_str; + switch (ct_str[0]) { + case 'r': + case 'L': /* qemu_ld constraint */ + case 'S': /* qemu_st constraint */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, BIT(TCG_TARGET_NB_REGS) - 1); + break; + default: + return -1; + } + ct_str++; + *pct_str = ct_str; + return 0; +} + +#if defined(CONFIG_DEBUG_TCG_INTERPRETER) +/* Show current bytecode. Used by tcg interpreter. */ +void tci_disas(uint8_t opc) +{ + const TCGOpDef *def = &tcg_op_defs[opc]; + fprintf(stderr, "TCG %s %u, %u, %u\n", + def->name, def->nb_oargs, def->nb_iargs, def->nb_cargs); +} +#endif + +/* Write value (native size). */ +static void tcg_out_i(TCGContext *s, tcg_target_ulong v) +{ + if (TCG_TARGET_REG_BITS == 32) { + tcg_out32(s, v); + } else { + tcg_out64(s, v); + } +} + +/* Write opcode. */ +static void tcg_out_op_t(TCGContext *s, TCGOpcode op) +{ + tcg_out8(s, op); + tcg_out8(s, 0); +} + +/* Write register. */ +static void tcg_out_r(TCGContext *s, TCGArg t0) +{ + assert(t0 < TCG_TARGET_NB_REGS); + tcg_out8(s, t0); +} + +/* Write register or constant (native size). */ +static void tcg_out_ri(TCGContext *s, int const_arg, TCGArg arg) +{ + if (const_arg) { + assert(const_arg == 1); + tcg_out8(s, TCG_CONST); + tcg_out_i(s, arg); + } else { + tcg_out_r(s, arg); + } +} + +/* Write register or constant (32 bit). */ +static void tcg_out_ri32(TCGContext *s, int const_arg, TCGArg arg) +{ + if (const_arg) { + assert(const_arg == 1); + tcg_out8(s, TCG_CONST); + tcg_out32(s, arg); + } else { + tcg_out_r(s, arg); + } +} + +#if TCG_TARGET_REG_BITS == 64 +/* Write register or constant (64 bit). */ +static void tcg_out_ri64(TCGContext *s, int const_arg, TCGArg arg) +{ + if (const_arg) { + assert(const_arg == 1); + tcg_out8(s, TCG_CONST); + tcg_out64(s, arg); + } else { + tcg_out_r(s, arg); + } +} +#endif + +/* Write label. */ +static void tci_out_label(TCGContext *s, TCGLabel *label) +{ + if (label->has_value) { + tcg_out_i(s, label->u.value); + assert(label->u.value); + } else { + tcg_out_reloc(s, s->code_ptr, sizeof(tcg_target_ulong), label, 0); + s->code_ptr += sizeof(tcg_target_ulong); + } +} + +static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, + intptr_t arg2) +{ + uint8_t *old_code_ptr = s->code_ptr; + if (type == TCG_TYPE_I32) { + tcg_out_op_t(s, INDEX_op_ld_i32); + tcg_out_r(s, ret); + tcg_out_r(s, arg1); + tcg_out32(s, arg2); + } else { + assert(type == TCG_TYPE_I64); +#if TCG_TARGET_REG_BITS == 64 + tcg_out_op_t(s, INDEX_op_ld_i64); + tcg_out_r(s, ret); + tcg_out_r(s, arg1); + assert(arg2 == (int32_t)arg2); + tcg_out32(s, arg2); +#else + TODO(); +#endif + } + old_code_ptr[1] = s->code_ptr - old_code_ptr; +} + +static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) +{ + uint8_t *old_code_ptr = s->code_ptr; + assert(ret != arg); +#if TCG_TARGET_REG_BITS == 32 + tcg_out_op_t(s, INDEX_op_mov_i32); +#else + tcg_out_op_t(s, INDEX_op_mov_i64); +#endif + tcg_out_r(s, ret); + tcg_out_r(s, arg); + old_code_ptr[1] = s->code_ptr - old_code_ptr; +} + +static void tcg_out_movi(TCGContext *s, TCGType type, + TCGReg t0, tcg_target_long arg) +{ + uint8_t *old_code_ptr = s->code_ptr; + uint32_t arg32 = arg; + if (type == TCG_TYPE_I32 || arg == arg32) { + tcg_out_op_t(s, INDEX_op_movi_i32); + tcg_out_r(s, t0); + tcg_out32(s, arg32); + } else { + assert(type == TCG_TYPE_I64); +#if TCG_TARGET_REG_BITS == 64 + tcg_out_op_t(s, INDEX_op_movi_i64); + tcg_out_r(s, t0); + tcg_out64(s, arg); +#else + TODO(); +#endif + } + old_code_ptr[1] = s->code_ptr - old_code_ptr; +} + +static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *arg) +{ + uint8_t *old_code_ptr = s->code_ptr; + tcg_out_op_t(s, INDEX_op_call); + tcg_out_ri(s, 1, (uintptr_t)arg); + old_code_ptr[1] = s->code_ptr - old_code_ptr; +} + +static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, + const int *const_args) +{ + uint8_t *old_code_ptr = s->code_ptr; + + tcg_out_op_t(s, opc); + + switch (opc) { + case INDEX_op_exit_tb: + tcg_out64(s, args[0]); + break; + case INDEX_op_goto_tb: + if (s->tb_jmp_offset) { + /* Direct jump method. */ + assert(args[0] < ARRAY_SIZE(s->tb_jmp_offset)); + s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); + tcg_out32(s, 0); + } else { + /* Indirect jump method. */ + TODO(); + } + assert(args[0] < ARRAY_SIZE(s->tb_next_offset)); + s->tb_next_offset[args[0]] = tcg_current_code_size(s); + break; + case INDEX_op_br: + tci_out_label(s, arg_label(args[0])); + break; + case INDEX_op_setcond_i32: + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + tcg_out_ri32(s, const_args[2], args[2]); + tcg_out8(s, args[3]); /* condition */ + break; +#if TCG_TARGET_REG_BITS == 32 + case INDEX_op_setcond2_i32: + /* setcond2_i32 cond, t0, t1_low, t1_high, t2_low, t2_high */ + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + tcg_out_r(s, args[2]); + tcg_out_ri32(s, const_args[3], args[3]); + tcg_out_ri32(s, const_args[4], args[4]); + tcg_out8(s, args[5]); /* condition */ + break; +#elif TCG_TARGET_REG_BITS == 64 + case INDEX_op_setcond_i64: + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + tcg_out_ri64(s, const_args[2], args[2]); + tcg_out8(s, args[3]); /* condition */ + break; +#endif + case INDEX_op_ld8u_i32: + case INDEX_op_ld8s_i32: + case INDEX_op_ld16u_i32: + case INDEX_op_ld16s_i32: + case INDEX_op_ld_i32: + case INDEX_op_st8_i32: + case INDEX_op_st16_i32: + case INDEX_op_st_i32: + case INDEX_op_ld8u_i64: + case INDEX_op_ld8s_i64: + case INDEX_op_ld16u_i64: + case INDEX_op_ld16s_i64: + case INDEX_op_ld32u_i64: + case INDEX_op_ld32s_i64: + case INDEX_op_ld_i64: + case INDEX_op_st8_i64: + case INDEX_op_st16_i64: + case INDEX_op_st32_i64: + case INDEX_op_st_i64: + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + assert(args[2] == (int32_t)args[2]); + tcg_out32(s, args[2]); + break; + case INDEX_op_add_i32: + case INDEX_op_sub_i32: + case INDEX_op_mul_i32: + case INDEX_op_and_i32: + case INDEX_op_andc_i32: /* Optional (TCG_TARGET_HAS_andc_i32). */ + case INDEX_op_eqv_i32: /* Optional (TCG_TARGET_HAS_eqv_i32). */ + case INDEX_op_nand_i32: /* Optional (TCG_TARGET_HAS_nand_i32). */ + case INDEX_op_nor_i32: /* Optional (TCG_TARGET_HAS_nor_i32). */ + case INDEX_op_or_i32: + case INDEX_op_orc_i32: /* Optional (TCG_TARGET_HAS_orc_i32). */ + case INDEX_op_xor_i32: + case INDEX_op_shl_i32: + case INDEX_op_shr_i32: + case INDEX_op_sar_i32: + case INDEX_op_rotl_i32: /* Optional (TCG_TARGET_HAS_rot_i32). */ + case INDEX_op_rotr_i32: /* Optional (TCG_TARGET_HAS_rot_i32). */ + tcg_out_r(s, args[0]); + tcg_out_ri32(s, const_args[1], args[1]); + tcg_out_ri32(s, const_args[2], args[2]); + break; + case INDEX_op_deposit_i32: /* Optional (TCG_TARGET_HAS_deposit_i32). */ + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + tcg_out_r(s, args[2]); + assert(args[3] <= UINT8_MAX); + tcg_out8(s, args[3]); + assert(args[4] <= UINT8_MAX); + tcg_out8(s, args[4]); + break; + +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_add_i64: + case INDEX_op_sub_i64: + case INDEX_op_mul_i64: + case INDEX_op_and_i64: + case INDEX_op_andc_i64: /* Optional (TCG_TARGET_HAS_andc_i64). */ + case INDEX_op_eqv_i64: /* Optional (TCG_TARGET_HAS_eqv_i64). */ + case INDEX_op_nand_i64: /* Optional (TCG_TARGET_HAS_nand_i64). */ + case INDEX_op_nor_i64: /* Optional (TCG_TARGET_HAS_nor_i64). */ + case INDEX_op_or_i64: + case INDEX_op_orc_i64: /* Optional (TCG_TARGET_HAS_orc_i64). */ + case INDEX_op_xor_i64: + case INDEX_op_shl_i64: + case INDEX_op_shr_i64: + case INDEX_op_sar_i64: + case INDEX_op_rotl_i64: /* Optional (TCG_TARGET_HAS_rot_i64). */ + case INDEX_op_rotr_i64: /* Optional (TCG_TARGET_HAS_rot_i64). */ + tcg_out_r(s, args[0]); + tcg_out_ri64(s, const_args[1], args[1]); + tcg_out_ri64(s, const_args[2], args[2]); + break; + case INDEX_op_deposit_i64: /* Optional (TCG_TARGET_HAS_deposit_i64). */ + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + tcg_out_r(s, args[2]); + assert(args[3] <= UINT8_MAX); + tcg_out8(s, args[3]); + assert(args[4] <= UINT8_MAX); + tcg_out8(s, args[4]); + break; + case INDEX_op_div_i64: /* Optional (TCG_TARGET_HAS_div_i64). */ + case INDEX_op_divu_i64: /* Optional (TCG_TARGET_HAS_div_i64). */ + case INDEX_op_rem_i64: /* Optional (TCG_TARGET_HAS_div_i64). */ + case INDEX_op_remu_i64: /* Optional (TCG_TARGET_HAS_div_i64). */ + TODO(); + break; + case INDEX_op_div2_i64: /* Optional (TCG_TARGET_HAS_div2_i64). */ + case INDEX_op_divu2_i64: /* Optional (TCG_TARGET_HAS_div2_i64). */ + TODO(); + break; + case INDEX_op_brcond_i64: + tcg_out_r(s, args[0]); + tcg_out_ri64(s, const_args[1], args[1]); + tcg_out8(s, args[2]); /* condition */ + tci_out_label(s, arg_label(args[3])); + break; + case INDEX_op_bswap16_i64: /* Optional (TCG_TARGET_HAS_bswap16_i64). */ + case INDEX_op_bswap32_i64: /* Optional (TCG_TARGET_HAS_bswap32_i64). */ + case INDEX_op_bswap64_i64: /* Optional (TCG_TARGET_HAS_bswap64_i64). */ + case INDEX_op_not_i64: /* Optional (TCG_TARGET_HAS_not_i64). */ + case INDEX_op_neg_i64: /* Optional (TCG_TARGET_HAS_neg_i64). */ + case INDEX_op_ext8s_i64: /* Optional (TCG_TARGET_HAS_ext8s_i64). */ + case INDEX_op_ext8u_i64: /* Optional (TCG_TARGET_HAS_ext8u_i64). */ + case INDEX_op_ext16s_i64: /* Optional (TCG_TARGET_HAS_ext16s_i64). */ + case INDEX_op_ext16u_i64: /* Optional (TCG_TARGET_HAS_ext16u_i64). */ + case INDEX_op_ext32s_i64: /* Optional (TCG_TARGET_HAS_ext32s_i64). */ + case INDEX_op_ext32u_i64: /* Optional (TCG_TARGET_HAS_ext32u_i64). */ + case INDEX_op_ext_i32_i64: + case INDEX_op_extu_i32_i64: +#endif /* TCG_TARGET_REG_BITS == 64 */ + case INDEX_op_neg_i32: /* Optional (TCG_TARGET_HAS_neg_i32). */ + case INDEX_op_not_i32: /* Optional (TCG_TARGET_HAS_not_i32). */ + case INDEX_op_ext8s_i32: /* Optional (TCG_TARGET_HAS_ext8s_i32). */ + case INDEX_op_ext16s_i32: /* Optional (TCG_TARGET_HAS_ext16s_i32). */ + case INDEX_op_ext8u_i32: /* Optional (TCG_TARGET_HAS_ext8u_i32). */ + case INDEX_op_ext16u_i32: /* Optional (TCG_TARGET_HAS_ext16u_i32). */ + case INDEX_op_bswap16_i32: /* Optional (TCG_TARGET_HAS_bswap16_i32). */ + case INDEX_op_bswap32_i32: /* Optional (TCG_TARGET_HAS_bswap32_i32). */ + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + break; + case INDEX_op_div_i32: /* Optional (TCG_TARGET_HAS_div_i32). */ + case INDEX_op_divu_i32: /* Optional (TCG_TARGET_HAS_div_i32). */ + case INDEX_op_rem_i32: /* Optional (TCG_TARGET_HAS_div_i32). */ + case INDEX_op_remu_i32: /* Optional (TCG_TARGET_HAS_div_i32). */ + tcg_out_r(s, args[0]); + tcg_out_ri32(s, const_args[1], args[1]); + tcg_out_ri32(s, const_args[2], args[2]); + break; + case INDEX_op_div2_i32: /* Optional (TCG_TARGET_HAS_div2_i32). */ + case INDEX_op_divu2_i32: /* Optional (TCG_TARGET_HAS_div2_i32). */ + TODO(); + break; +#if TCG_TARGET_REG_BITS == 32 + case INDEX_op_add2_i32: + case INDEX_op_sub2_i32: + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + tcg_out_r(s, args[2]); + tcg_out_r(s, args[3]); + tcg_out_r(s, args[4]); + tcg_out_r(s, args[5]); + break; + case INDEX_op_brcond2_i32: + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + tcg_out_ri32(s, const_args[2], args[2]); + tcg_out_ri32(s, const_args[3], args[3]); + tcg_out8(s, args[4]); /* condition */ + tci_out_label(s, arg_label(args[5])); + break; + case INDEX_op_mulu2_i32: + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + tcg_out_r(s, args[2]); + tcg_out_r(s, args[3]); + break; +#endif + case INDEX_op_brcond_i32: + tcg_out_r(s, args[0]); + tcg_out_ri32(s, const_args[1], args[1]); + tcg_out8(s, args[2]); /* condition */ + tci_out_label(s, arg_label(args[3])); + break; + case INDEX_op_qemu_ld_i32: + tcg_out_r(s, *args++); + tcg_out_r(s, *args++); + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + tcg_out_r(s, *args++); + } + tcg_out_i(s, *args++); + break; + case INDEX_op_qemu_ld_i64: + tcg_out_r(s, *args++); + if (TCG_TARGET_REG_BITS == 32) { + tcg_out_r(s, *args++); + } + tcg_out_r(s, *args++); + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + tcg_out_r(s, *args++); + } + tcg_out_i(s, *args++); + break; + case INDEX_op_qemu_st_i32: + tcg_out_r(s, *args++); + tcg_out_r(s, *args++); + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + tcg_out_r(s, *args++); + } + tcg_out_i(s, *args++); + break; + case INDEX_op_qemu_st_i64: + tcg_out_r(s, *args++); + if (TCG_TARGET_REG_BITS == 32) { + tcg_out_r(s, *args++); + } + tcg_out_r(s, *args++); + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + tcg_out_r(s, *args++); + } + tcg_out_i(s, *args++); + break; + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_mov_i64: + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ + case INDEX_op_movi_i64: + case INDEX_op_call: /* Always emitted via tcg_out_call. */ + default: + tcg_abort(); + } + old_code_ptr[1] = s->code_ptr - old_code_ptr; +} + +static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, + intptr_t arg2) +{ + uint8_t *old_code_ptr = s->code_ptr; + if (type == TCG_TYPE_I32) { + tcg_out_op_t(s, INDEX_op_st_i32); + tcg_out_r(s, arg); + tcg_out_r(s, arg1); + tcg_out32(s, arg2); + } else { + assert(type == TCG_TYPE_I64); +#if TCG_TARGET_REG_BITS == 64 + tcg_out_op_t(s, INDEX_op_st_i64); + tcg_out_r(s, arg); + tcg_out_r(s, arg1); + tcg_out32(s, arg2); +#else + TODO(); +#endif + } + old_code_ptr[1] = s->code_ptr - old_code_ptr; +} + +/* Test if a constant matches the constraint. */ +static int tcg_target_const_match(tcg_target_long val, TCGType type, + const TCGArgConstraint *arg_ct) +{ + /* No need to return 0 or 1, 0 or != 0 is good enough. */ + return arg_ct->ct & TCG_CT_CONST; +} + +static void tcg_target_init(TCGContext *s) +{ +#if defined(CONFIG_DEBUG_TCG_INTERPRETER) + const char *envval = getenv("DEBUG_TCG"); + if (envval) { + qemu_set_log(strtol(envval, NULL, 0)); + } +#endif + + /* The current code uses uint8_t for tcg operations. */ + assert(tcg_op_defs_max <= UINT8_MAX); + + /* Registers available for 32 bit operations. */ + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, + BIT(TCG_TARGET_NB_REGS) - 1); + /* Registers available for 64 bit operations. */ + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, + BIT(TCG_TARGET_NB_REGS) - 1); + /* TODO: Which registers should be set here? */ + tcg_regset_set32(tcg_target_call_clobber_regs, 0, + BIT(TCG_TARGET_NB_REGS) - 1); + + tcg_regset_clear(s->reserved_regs); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); + tcg_add_target_add_op_defs(tcg_target_op_defs); + + /* We use negative offsets from "sp" so that we can distinguish + stores that might pretend to be call arguments. */ + tcg_set_frame(s, TCG_REG_CALL_STACK, + -CPU_TEMP_BUF_NLONGS * sizeof(long), + CPU_TEMP_BUF_NLONGS * sizeof(long)); +} + +/* Generate global QEMU prologue and epilogue code. */ +static inline void tcg_target_qemu_prologue(TCGContext *s) +{ +} -- cgit v1.2.3