aboutsummaryrefslogtreecommitdiff
path: root/tcg
diff options
context:
space:
mode:
Diffstat (limited to 'tcg')
-rw-r--r--tcg/README41
-rw-r--r--tcg/aarch64/tcg-target.h10
-rw-r--r--tcg/aarch64/tcg-target.inc.c157
-rw-r--r--tcg/arm/tcg-target.h41
-rw-r--r--tcg/arm/tcg-target.inc.c121
-rw-r--r--tcg/i386/tcg-target.h17
-rw-r--r--tcg/i386/tcg-target.inc.c727
-rw-r--r--tcg/ia64/tcg-target.h10
-rw-r--r--tcg/ia64/tcg-target.inc.c28
-rw-r--r--tcg/mips/tcg-target.h70
-rw-r--r--tcg/mips/tcg-target.inc.c1252
-rw-r--r--tcg/optimize.c94
-rw-r--r--tcg/ppc/tcg-target.h13
-rw-r--r--tcg/ppc/tcg-target.inc.c117
-rw-r--r--tcg/s390/tcg-target.h128
-rw-r--r--tcg/s390/tcg-target.inc.c248
-rw-r--r--tcg/sparc/tcg-target.h10
-rw-r--r--tcg/sparc/tcg-target.inc.c28
-rw-r--r--tcg/tcg-op.c692
-rw-r--r--tcg/tcg-op.h42
-rw-r--r--tcg/tcg-opc.h10
-rw-r--r--tcg/tcg-runtime.h9
-rw-r--r--tcg/tcg.c173
-rw-r--r--tcg/tcg.h14
-rw-r--r--tcg/tci/tcg-target.h10
-rw-r--r--tcg/tci/tcg-target.inc.c25
26 files changed, 3175 insertions, 912 deletions
diff --git a/tcg/README b/tcg/README
index ae31388c59..a9858c2f74 100644
--- a/tcg/README
+++ b/tcg/README
@@ -246,6 +246,14 @@ t0=~(t1|t2)
t0=t1|~t2
+* clz_i32/i64 t0, t1, t2
+
+t0 = t1 ? clz(t1) : t2
+
+* ctz_i32/i64 t0, t1, t2
+
+t0 = t1 ? ctz(t1) : t2
+
********* Shifts/Rotates
* shl_i32/i64 t0, t1, t2
@@ -314,11 +322,27 @@ The bitfield is described by POS/LEN, which are immediate values:
LEN - the length of the bitfield
POS - the position of the first bit, counting from the LSB
-For example, pos=8, len=4 indicates a 4-bit field at bit 8.
-This operation would be equivalent to
+For example, "deposit_i32 dest, t1, t2, 8, 4" indicates a 4-bit field
+at bit 8. This operation would be equivalent to
dest = (t1 & ~0x0f00) | ((t2 << 8) & 0x0f00)
+* extract_i32/i64 dest, t1, pos, len
+* sextract_i32/i64 dest, t1, pos, len
+
+Extract a bitfield from T1, placing the result in DEST.
+The bitfield is described by POS/LEN, which are immediate values,
+as above for deposit. For extract_*, the result will be extended
+to the left with zeros; for sextract_*, the result will be extended
+to the left with copies of the bitfield sign bit at pos + len - 1.
+
+For example, "sextract_i32 dest, t1, 8, 4" indicates a 4-bit field
+at bit 8. This operation would be equivalent to
+
+ dest = (t1 << 20) >> 28
+
+(using an arithmetic right shift).
+
* extrl_i64_i32 t0, t1
For 64-bit hosts only, extract the low 32-bits of input T1 and place it
@@ -523,24 +547,29 @@ version. Aliases are specified in the input operands as for GCC.
The same register may be used for both an input and an output, even when
they are not explicitly aliased. If an op expands to multiple target
instructions then care must be taken to avoid clobbering input values.
-GCC style "early clobber" outputs are not currently supported.
+GCC style "early clobber" outputs are supported, with '&'.
A target can define specific register or constant constraints. If an
operation uses a constant input constraint which does not allow all
constants, it must also accept registers in order to have a fallback.
+The constraint 'i' is defined generically to accept any constant.
+The constraint 'r' is not defined generically, but is consistently
+used by each backend to indicate all registers.
The movi_i32 and movi_i64 operations must accept any constants.
The mov_i32 and mov_i64 operations must accept any registers of the
same type.
-The ld/st instructions must accept signed 32 bit constant offsets. It
-can be implemented by reserving a specific register to compute the
-address if the offset is too big.
+The ld/st/sti instructions must accept signed 32 bit constant offsets.
+This can be implemented by reserving a specific register in which to
+compute the address if the offset is too big.
The ld/st instructions must accept any destination (ld) or source (st)
register.
+The sti instruction may fail if it cannot store the given constant.
+
4.3) Function call assumptions
- The only supported types for parameters and return value are: 32 and
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index a1d101f891..1a5ea23844 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -62,7 +62,12 @@ typedef enum {
#define TCG_TARGET_HAS_eqv_i32 1
#define TCG_TARGET_HAS_nand_i32 0
#define TCG_TARGET_HAS_nor_i32 0
+#define TCG_TARGET_HAS_clz_i32 1
+#define TCG_TARGET_HAS_ctz_i32 1
+#define TCG_TARGET_HAS_ctpop_i32 0
#define TCG_TARGET_HAS_deposit_i32 1
+#define TCG_TARGET_HAS_extract_i32 1
+#define TCG_TARGET_HAS_sextract_i32 1
#define TCG_TARGET_HAS_movcond_i32 1
#define TCG_TARGET_HAS_add2_i32 1
#define TCG_TARGET_HAS_sub2_i32 1
@@ -92,7 +97,12 @@ typedef enum {
#define TCG_TARGET_HAS_eqv_i64 1
#define TCG_TARGET_HAS_nand_i64 0
#define TCG_TARGET_HAS_nor_i64 0
+#define TCG_TARGET_HAS_clz_i64 1
+#define TCG_TARGET_HAS_ctz_i64 1
+#define TCG_TARGET_HAS_ctpop_i64 0
#define TCG_TARGET_HAS_deposit_i64 1
+#define TCG_TARGET_HAS_extract_i64 1
+#define TCG_TARGET_HAS_sextract_i64 1
#define TCG_TARGET_HAS_movcond_i64 1
#define TCG_TARGET_HAS_add2_i64 1
#define TCG_TARGET_HAS_sub2_i64 1
diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
index 1939d3528f..6d227a5a6a 100644
--- a/tcg/aarch64/tcg-target.inc.c
+++ b/tcg/aarch64/tcg-target.inc.c
@@ -115,12 +115,10 @@ static inline void patch_reloc(tcg_insn_unit *code_ptr, int type,
#define TCG_CT_CONST_MONE 0x800
/* parse target specific constraints */
-static int target_parse_constraint(TCGArgConstraint *ct,
- const char **pct_str)
+static const char *target_parse_constraint(TCGArgConstraint *ct,
+ const char *ct_str, TCGType type)
{
- const char *ct_str = *pct_str;
-
- switch (ct_str[0]) {
+ switch (*ct_str++) {
case 'r':
ct->ct |= TCG_CT_REG;
tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
@@ -150,12 +148,9 @@ static int target_parse_constraint(TCGArgConstraint *ct,
ct->ct |= TCG_CT_CONST_ZERO;
break;
default:
- return -1;
+ return NULL;
}
-
- ct_str++;
- *pct_str = ct_str;
- return 0;
+ return ct_str;
}
static inline bool is_aimm(uint64_t val)
@@ -344,8 +339,12 @@ typedef enum {
/* Conditional select instructions. */
I3506_CSEL = 0x1a800000,
I3506_CSINC = 0x1a800400,
+ I3506_CSINV = 0x5a800000,
+ I3506_CSNEG = 0x5a800400,
/* Data-processing (1 source) instructions. */
+ I3507_CLZ = 0x5ac01000,
+ I3507_RBIT = 0x5ac00000,
I3507_REV16 = 0x5ac00400,
I3507_REV32 = 0x5ac00800,
I3507_REV64 = 0x5ac00c00,
@@ -581,11 +580,9 @@ static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
tcg_target_long value)
{
- AArch64Insn insn;
int i, wantinv, shift;
tcg_target_long svalue = value;
tcg_target_long ivalue = ~value;
- tcg_target_long imask;
/* For 32-bit values, discard potential garbage in value. For 64-bit
values within [2**31, 2**32-1], we can create smaller sequences by
@@ -631,42 +628,35 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
/* Would it take fewer insns to begin with MOVN? For the value and its
inverse, count the number of 16-bit lanes that are 0. */
- for (i = wantinv = imask = 0; i < 64; i += 16) {
+ for (i = wantinv = 0; i < 64; i += 16) {
tcg_target_long mask = 0xffffull << i;
- if ((value & mask) == 0) {
- wantinv -= 1;
- }
- if ((ivalue & mask) == 0) {
- wantinv += 1;
- imask |= mask;
- }
- }
-
- /* If we had more 0xffff than 0x0000, invert VALUE and use MOVN. */
- insn = I3405_MOVZ;
- if (wantinv > 0) {
- value = ivalue;
- insn = I3405_MOVN;
+ wantinv -= ((value & mask) == 0);
+ wantinv += ((ivalue & mask) == 0);
}
- /* Find the lowest lane that is not 0x0000. */
- shift = ctz64(value) & (63 & -16);
- tcg_out_insn_3405(s, insn, type, rd, value >> shift, shift);
-
- if (wantinv > 0) {
- /* Re-invert the value, so MOVK sees non-inverted bits. */
- value = ~value;
- /* Clear out all the 0xffff lanes. */
- value ^= imask;
- }
- /* Clear out the lane that we just set. */
- value &= ~(0xffffUL << shift);
-
- /* Iterate until all lanes have been set, and thus cleared from VALUE. */
- while (value) {
+ if (wantinv <= 0) {
+ /* Find the lowest lane that is not 0x0000. */
shift = ctz64(value) & (63 & -16);
- tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift);
+ tcg_out_insn(s, 3405, MOVZ, type, rd, value >> shift, shift);
+ /* Clear out the lane that we just set. */
value &= ~(0xffffUL << shift);
+ /* Iterate until all non-zero lanes have been processed. */
+ while (value) {
+ shift = ctz64(value) & (63 & -16);
+ tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift);
+ value &= ~(0xffffUL << shift);
+ }
+ } else {
+ /* Like above, but with the inverted value and MOVN to start. */
+ shift = ctz64(ivalue) & (63 & -16);
+ tcg_out_insn(s, 3405, MOVN, type, rd, ivalue >> shift, shift);
+ ivalue &= ~(0xffffUL << shift);
+ while (ivalue) {
+ shift = ctz64(ivalue) & (63 & -16);
+ /* Provide MOVK with the non-inverted value. */
+ tcg_out_insn(s, 3405, MOVK, type, rd, ~(ivalue >> shift), shift);
+ ivalue &= ~(0xffffUL << shift);
+ }
}
}
@@ -965,6 +955,15 @@ static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl,
insn = I3401_SUBSI;
bl = -bl;
}
+ if (unlikely(al == TCG_REG_XZR)) {
+ /* ??? We want to allow al to be zero for the benefit of
+ negation via subtraction. However, that leaves open the
+ possibility of adding 0+const in the low part, and the
+ immediate add instructions encode XSP not XZR. Don't try
+ anything more elaborate here than loading another zero. */
+ al = TCG_REG_TMP;
+ tcg_out_movi(s, ext, al, 0);
+ }
tcg_out_insn_3401(s, insn, ext, rl, al, bl);
} else {
tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
@@ -998,6 +997,37 @@ static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
tcg_out32(s, sync[a0 & TCG_MO_ALL]);
}
+static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
+ TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
+{
+ TCGReg a1 = a0;
+ if (is_ctz) {
+ a1 = TCG_REG_TMP;
+ tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
+ }
+ if (const_b && b == (ext ? 64 : 32)) {
+ tcg_out_insn(s, 3507, CLZ, ext, d, a1);
+ } else {
+ AArch64Insn sel = I3506_CSEL;
+
+ tcg_out_cmp(s, ext, a0, 0, 1);
+ tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
+
+ if (const_b) {
+ if (b == -1) {
+ b = TCG_REG_XZR;
+ sel = I3506_CSINV;
+ } else if (b == 0) {
+ b = TCG_REG_XZR;
+ } else {
+ tcg_out_movi(s, ext, d, b);
+ b = d;
+ }
+ }
+ tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
+ }
+}
+
#ifdef CONFIG_SOFTMMU
/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
* TCGMemOpIdx oi, uintptr_t ra)
@@ -1564,6 +1594,15 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
break;
+ case INDEX_op_clz_i64:
+ case INDEX_op_clz_i32:
+ tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
+ break;
+ case INDEX_op_ctz_i64:
+ case INDEX_op_ctz_i32:
+ tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
+ break;
+
case INDEX_op_brcond_i32:
a1 = (int32_t)a1;
/* FALLTHRU */
@@ -1640,6 +1679,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
break;
+ case INDEX_op_extract_i64:
+ case INDEX_op_extract_i32:
+ tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
+ break;
+
+ case INDEX_op_sextract_i64:
+ case INDEX_op_sextract_i32:
+ tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
+ break;
+
case INDEX_op_add2_i32:
tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
(int32_t)args[4], args[5], const_args[4],
@@ -1745,11 +1794,15 @@ static const TCGTargetOpDef aarch64_op_defs[] = {
{ INDEX_op_sar_i32, { "r", "r", "ri" } },
{ INDEX_op_rotl_i32, { "r", "r", "ri" } },
{ INDEX_op_rotr_i32, { "r", "r", "ri" } },
+ { INDEX_op_clz_i32, { "r", "r", "rAL" } },
+ { INDEX_op_ctz_i32, { "r", "r", "rAL" } },
{ INDEX_op_shl_i64, { "r", "r", "ri" } },
{ INDEX_op_shr_i64, { "r", "r", "ri" } },
{ INDEX_op_sar_i64, { "r", "r", "ri" } },
{ INDEX_op_rotl_i64, { "r", "r", "ri" } },
{ INDEX_op_rotr_i64, { "r", "r", "ri" } },
+ { INDEX_op_clz_i64, { "r", "r", "rAL" } },
+ { INDEX_op_ctz_i64, { "r", "r", "rAL" } },
{ INDEX_op_brcond_i32, { "r", "rA" } },
{ INDEX_op_brcond_i64, { "r", "rA" } },
@@ -1785,6 +1838,10 @@ static const TCGTargetOpDef aarch64_op_defs[] = {
{ INDEX_op_deposit_i32, { "r", "0", "rZ" } },
{ INDEX_op_deposit_i64, { "r", "0", "rZ" } },
+ { INDEX_op_extract_i32, { "r", "r" } },
+ { INDEX_op_extract_i64, { "r", "r" } },
+ { INDEX_op_sextract_i32, { "r", "r" } },
+ { INDEX_op_sextract_i64, { "r", "r" } },
{ INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
{ INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
@@ -1798,6 +1855,18 @@ static const TCGTargetOpDef aarch64_op_defs[] = {
{ -1 },
};
+static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
+{
+ int i, n = ARRAY_SIZE(aarch64_op_defs);
+
+ for (i = 0; i < n; ++i) {
+ if (aarch64_op_defs[i].op == op) {
+ return &aarch64_op_defs[i];
+ }
+ }
+ return NULL;
+}
+
static void tcg_target_init(TCGContext *s)
{
tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
@@ -1820,8 +1889,6 @@ static void tcg_target_init(TCGContext *s)
tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
-
- tcg_add_target_add_op_defs(aarch64_op_defs);
}
/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index a0e1acfa77..09a19c6f35 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -26,6 +26,37 @@
#ifndef ARM_TCG_TARGET_H
#define ARM_TCG_TARGET_H
+/* The __ARM_ARCH define is provided by gcc 4.8. Construct it otherwise. */
+#ifndef __ARM_ARCH
+# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
+ || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
+ || defined(__ARM_ARCH_7EM__)
+# define __ARM_ARCH 7
+# elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
+ || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
+ || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__)
+# define __ARM_ARCH 6
+# elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5E__) \
+ || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \
+ || defined(__ARM_ARCH_5TEJ__)
+# define __ARM_ARCH 5
+# else
+# define __ARM_ARCH 4
+# endif
+#endif
+
+extern int arm_arch;
+
+#if defined(__ARM_ARCH_5T__) \
+ || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__)
+# define use_armv5t_instructions 1
+#else
+# define use_armv5t_instructions use_armv6_instructions
+#endif
+
+#define use_armv6_instructions (__ARM_ARCH >= 6 || arm_arch >= 6)
+#define use_armv7_instructions (__ARM_ARCH >= 7 || arm_arch >= 7)
+
#undef TCG_TARGET_STACK_GROWSUP
#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
@@ -79,7 +110,12 @@ extern bool use_idiv_instructions;
#define TCG_TARGET_HAS_eqv_i32 0
#define TCG_TARGET_HAS_nand_i32 0
#define TCG_TARGET_HAS_nor_i32 0
-#define TCG_TARGET_HAS_deposit_i32 1
+#define TCG_TARGET_HAS_clz_i32 use_armv5t_instructions
+#define TCG_TARGET_HAS_ctz_i32 use_armv7_instructions
+#define TCG_TARGET_HAS_ctpop_i32 0
+#define TCG_TARGET_HAS_deposit_i32 use_armv7_instructions
+#define TCG_TARGET_HAS_extract_i32 use_armv7_instructions
+#define TCG_TARGET_HAS_sextract_i32 use_armv7_instructions
#define TCG_TARGET_HAS_movcond_i32 1
#define TCG_TARGET_HAS_mulu2_i32 1
#define TCG_TARGET_HAS_muls2_i32 1
@@ -88,9 +124,6 @@ extern bool use_idiv_instructions;
#define TCG_TARGET_HAS_div_i32 use_idiv_instructions
#define TCG_TARGET_HAS_rem_i32 0
-extern bool tcg_target_deposit_valid(int ofs, int len);
-#define TCG_TARGET_deposit_i32_valid tcg_target_deposit_valid
-
enum {
TCG_AREG0 = TCG_REG_R6,
};
diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
index ffa0d40660..e75a6d4943 100644
--- a/tcg/arm/tcg-target.inc.c
+++ b/tcg/arm/tcg-target.inc.c
@@ -25,36 +25,7 @@
#include "elf.h"
#include "tcg-be-ldst.h"
-/* The __ARM_ARCH define is provided by gcc 4.8. Construct it otherwise. */
-#ifndef __ARM_ARCH
-# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
- || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
- || defined(__ARM_ARCH_7EM__)
-# define __ARM_ARCH 7
-# elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
- || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
- || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__)
-# define __ARM_ARCH 6
-# elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5E__) \
- || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \
- || defined(__ARM_ARCH_5TEJ__)
-# define __ARM_ARCH 5
-# else
-# define __ARM_ARCH 4
-# endif
-#endif
-
-static int arm_arch = __ARM_ARCH;
-
-#if defined(__ARM_ARCH_5T__) \
- || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__)
-# define use_armv5t_instructions 1
-#else
-# define use_armv5t_instructions use_armv6_instructions
-#endif
-
-#define use_armv6_instructions (__ARM_ARCH >= 6 || arm_arch >= 6)
-#define use_armv7_instructions (__ARM_ARCH >= 7 || arm_arch >= 7)
+int arm_arch = __ARM_ARCH;
#ifndef use_idiv_instructions
bool use_idiv_instructions;
@@ -143,12 +114,10 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type,
#define TCG_CT_CONST_ZERO 0x800
/* parse target specific constraints */
-static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
+static const char *target_parse_constraint(TCGArgConstraint *ct,
+ const char *ct_str, TCGType type)
{
- const char *ct_str;
-
- ct_str = *pct_str;
- switch (ct_str[0]) {
+ switch (*ct_str++) {
case 'I':
ct->ct |= TCG_CT_CONST_ARM;
break;
@@ -201,12 +170,9 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
break;
default:
- return -1;
+ return NULL;
}
- ct_str++;
- *pct_str = ct_str;
-
- return 0;
+ return ct_str;
}
static inline uint32_t rotl(uint32_t val, int n)
@@ -290,6 +256,9 @@ typedef enum {
ARITH_BIC = 0xe << 21,
ARITH_MVN = 0xf << 21,
+ INSN_CLZ = 0x016f0f10,
+ INSN_RBIT = 0x06ff0f30,
+
INSN_LDR_IMM = 0x04100000,
INSN_LDR_REG = 0x06100000,
INSN_STR_IMM = 0x04000000,
@@ -730,16 +699,6 @@ static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn)
}
}
-bool tcg_target_deposit_valid(int ofs, int len)
-{
- /* ??? Without bfi, we could improve over generic code by combining
- the right-shift from a non-zero ofs with the orr. We do run into
- problems when rd == rs, and the mask generated from ofs+len doesn't
- fit into an immediate. We would have to be careful not to pessimize
- wrt the optimizations performed on the expanded code. */
- return use_armv7_instructions;
-}
-
static inline void tcg_out_deposit(TCGContext *s, int cond, TCGReg rd,
TCGArg a1, int ofs, int len, bool const_a1)
{
@@ -752,6 +711,22 @@ static inline void tcg_out_deposit(TCGContext *s, int cond, TCGReg rd,
| (ofs << 7) | ((ofs + len - 1) << 16));
}
+static inline void tcg_out_extract(TCGContext *s, int cond, TCGReg rd,
+ TCGArg a1, int ofs, int len)
+{
+ /* ubfx */
+ tcg_out32(s, 0x07e00050 | (cond << 28) | (rd << 12) | a1
+ | (ofs << 7) | ((len - 1) << 16));
+}
+
+static inline void tcg_out_sextract(TCGContext *s, int cond, TCGReg rd,
+ TCGArg a1, int ofs, int len)
+{
+ /* sbfx */
+ tcg_out32(s, 0x07a00050 | (cond << 28) | (rd << 12) | a1
+ | (ofs << 7) | ((len - 1) << 16));
+}
+
/* Note that this routine is used for both LDR and LDRH formats, so we do
not wish to include an immediate shift at this point. */
static void tcg_out_memop_r(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
@@ -1857,6 +1832,28 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
break;
+ case INDEX_op_ctz_i32:
+ tcg_out_dat_reg(s, COND_AL, INSN_RBIT, TCG_REG_TMP, 0, args[1], 0);
+ a1 = TCG_REG_TMP;
+ goto do_clz;
+
+ case INDEX_op_clz_i32:
+ a1 = args[1];
+ do_clz:
+ a0 = args[0];
+ a2 = args[2];
+ c = const_args[2];
+ if (c && a2 == 32) {
+ tcg_out_dat_reg(s, COND_AL, INSN_CLZ, a0, 0, a1, 0);
+ break;
+ }
+ tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0, a1, 0);
+ tcg_out_dat_reg(s, COND_NE, INSN_CLZ, a0, 0, a1, 0);
+ if (c || a0 != a2) {
+ tcg_out_dat_rIK(s, COND_EQ, ARITH_MOV, ARITH_MVN, a0, 0, a2, c);
+ }
+ break;
+
case INDEX_op_brcond_i32:
tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
args[0], args[1], const_args[1]);
@@ -1933,6 +1930,12 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_deposit(s, COND_AL, args[0], args[2],
args[3], args[4], const_args[2]);
break;
+ case INDEX_op_extract_i32:
+ tcg_out_extract(s, COND_AL, args[0], args[1], args[2], args[3]);
+ break;
+ case INDEX_op_sextract_i32:
+ tcg_out_sextract(s, COND_AL, args[0], args[1], args[2], args[3]);
+ break;
case INDEX_op_div_i32:
tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]);
@@ -1985,6 +1988,8 @@ static const TCGTargetOpDef arm_op_defs[] = {
{ INDEX_op_sar_i32, { "r", "r", "ri" } },
{ INDEX_op_rotl_i32, { "r", "r", "ri" } },
{ INDEX_op_rotr_i32, { "r", "r", "ri" } },
+ { INDEX_op_clz_i32, { "r", "r", "rIK" } },
+ { INDEX_op_ctz_i32, { "r", "r", "rIK" } },
{ INDEX_op_brcond_i32, { "r", "rIN" } },
{ INDEX_op_setcond_i32, { "r", "r", "rIN" } },
@@ -2015,6 +2020,8 @@ static const TCGTargetOpDef arm_op_defs[] = {
{ INDEX_op_ext16u_i32, { "r", "r" } },
{ INDEX_op_deposit_i32, { "r", "0", "rZ" } },
+ { INDEX_op_extract_i32, { "r", "r" } },
+ { INDEX_op_sextract_i32, { "r", "r" } },
{ INDEX_op_div_i32, { "r", "r", "r" } },
{ INDEX_op_divu_i32, { "r", "r", "r" } },
@@ -2023,6 +2030,18 @@ static const TCGTargetOpDef arm_op_defs[] = {
{ -1 },
};
+static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
+{
+ int i, n = ARRAY_SIZE(arm_op_defs);
+
+ for (i = 0; i < n; ++i) {
+ if (arm_op_defs[i].op == op) {
+ return &arm_op_defs[i];
+ }
+ }
+ return NULL;
+}
+
static void tcg_target_init(TCGContext *s)
{
/* Only probe for the platform and capabilities if we havn't already
@@ -2053,8 +2072,6 @@ static void tcg_target_init(TCGContext *s)
tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
-
- tcg_add_target_add_op_defs(arm_op_defs);
}
static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 524cfc61fd..21d96ec35c 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -76,6 +76,7 @@ typedef enum {
#endif
extern bool have_bmi1;
+extern bool have_popcnt;
/* optional instructions */
#define TCG_TARGET_HAS_div2_i32 1
@@ -93,7 +94,12 @@ extern bool have_bmi1;
#define TCG_TARGET_HAS_eqv_i32 0
#define TCG_TARGET_HAS_nand_i32 0
#define TCG_TARGET_HAS_nor_i32 0
+#define TCG_TARGET_HAS_clz_i32 1
+#define TCG_TARGET_HAS_ctz_i32 1
+#define TCG_TARGET_HAS_ctpop_i32 have_popcnt
#define TCG_TARGET_HAS_deposit_i32 1
+#define TCG_TARGET_HAS_extract_i32 1
+#define TCG_TARGET_HAS_sextract_i32 1
#define TCG_TARGET_HAS_movcond_i32 1
#define TCG_TARGET_HAS_add2_i32 1
#define TCG_TARGET_HAS_sub2_i32 1
@@ -123,7 +129,12 @@ extern bool have_bmi1;
#define TCG_TARGET_HAS_eqv_i64 0
#define TCG_TARGET_HAS_nand_i64 0
#define TCG_TARGET_HAS_nor_i64 0
+#define TCG_TARGET_HAS_clz_i64 1
+#define TCG_TARGET_HAS_ctz_i64 1
+#define TCG_TARGET_HAS_ctpop_i64 have_popcnt
#define TCG_TARGET_HAS_deposit_i64 1
+#define TCG_TARGET_HAS_extract_i64 1
+#define TCG_TARGET_HAS_sextract_i64 0
#define TCG_TARGET_HAS_movcond_i64 1
#define TCG_TARGET_HAS_add2_i64 1
#define TCG_TARGET_HAS_sub2_i64 1
@@ -138,6 +149,12 @@ extern bool have_bmi1;
((ofs) == 0 && (len) == 16))
#define TCG_TARGET_deposit_i64_valid TCG_TARGET_deposit_i32_valid
+/* Check for the possibility of high-byte extraction and, for 64-bit,
+ zero-extending 32-bit right-shift. */
+#define TCG_TARGET_extract_i32_valid(ofs, len) ((ofs) == 8 && (len) == 8)
+#define TCG_TARGET_extract_i64_valid(ofs, len) \
+ (((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32)
+
#if TCG_TARGET_REG_BITS == 64
# define TCG_AREG0 TCG_REG_R14
#else
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
index eeb1777bbb..5918008296 100644
--- a/tcg/i386/tcg-target.inc.c
+++ b/tcg/i386/tcg-target.inc.c
@@ -92,6 +92,7 @@ static const int tcg_target_call_oarg_regs[] = {
#define TCG_CT_CONST_S32 0x100
#define TCG_CT_CONST_U32 0x200
#define TCG_CT_CONST_I32 0x400
+#define TCG_CT_CONST_WSZ 0x800
/* Registers used with L constraint, which are the first argument
registers on x86_64, and two random call clobbered registers on
@@ -129,15 +130,21 @@ static bool have_movbe;
# define have_movbe 0
#endif
-/* We need this symbol in tcg-target.h, and we can't properly conditionalize
+/* We need these symbols in tcg-target.h, and we can't properly conditionalize
it there. Therefore we always define the variable. */
bool have_bmi1;
+bool have_popcnt;
#if defined(CONFIG_CPUID_H) && defined(bit_BMI2)
static bool have_bmi2;
#else
# define have_bmi2 0
#endif
+#if defined(CONFIG_CPUID_H) && defined(bit_LZCNT)
+static bool have_lzcnt;
+#else
+# define have_lzcnt 0
+#endif
static tcg_insn_unit *tb_ret_addr;
@@ -166,12 +173,10 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type,
}
/* parse target specific constraints */
-static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
+static const char *target_parse_constraint(TCGArgConstraint *ct,
+ const char *ct_str, TCGType type)
{
- const char *ct_str;
-
- ct_str = *pct_str;
- switch(ct_str[0]) {
+ switch(*ct_str++) {
case 'a':
ct->ct |= TCG_CT_REG;
tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
@@ -181,7 +186,6 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
break;
case 'c':
- case_c:
ct->ct |= TCG_CT_REG;
tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
break;
@@ -210,7 +214,6 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
tcg_regset_set32(ct->u.regs, 0, 0xf);
break;
case 'r':
- case_r:
ct->ct |= TCG_CT_REG;
if (TCG_TARGET_REG_BITS == 64) {
tcg_regset_set32(ct->u.regs, 0, 0xffff);
@@ -218,13 +221,10 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
tcg_regset_set32(ct->u.regs, 0, 0xff);
}
break;
- case 'C':
- /* With SHRX et al, we need not use ECX as shift count register. */
- if (have_bmi2) {
- goto case_r;
- } else {
- goto case_c;
- }
+ case 'W':
+ /* With TZCNT/LZCNT, we can have operand-size as an input. */
+ ct->ct |= TCG_CT_CONST_WSZ;
+ break;
/* qemu_ld/st address constraint */
case 'L':
@@ -239,21 +239,19 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
break;
case 'e':
- ct->ct |= TCG_CT_CONST_S32;
+ ct->ct |= (type == TCG_TYPE_I32 ? TCG_CT_CONST : TCG_CT_CONST_S32);
break;
case 'Z':
- ct->ct |= TCG_CT_CONST_U32;
+ ct->ct |= (type == TCG_TYPE_I32 ? TCG_CT_CONST : TCG_CT_CONST_U32);
break;
case 'I':
- ct->ct |= TCG_CT_CONST_I32;
+ ct->ct |= (type == TCG_TYPE_I32 ? TCG_CT_CONST : TCG_CT_CONST_I32);
break;
default:
- return -1;
+ return NULL;
}
- ct_str++;
- *pct_str = ct_str;
- return 0;
+ return ct_str;
}
/* test if a constant matches the constraint */
@@ -273,6 +271,9 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
return 1;
}
+ if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
+ return 1;
+ }
return 0;
}
@@ -306,6 +307,8 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
#define OPC_ANDN (0xf2 | P_EXT38)
#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
+#define OPC_BSF (0xbc | P_EXT)
+#define OPC_BSR (0xbd | P_EXT)
#define OPC_BSWAP (0xc8 | P_EXT)
#define OPC_CALL_Jz (0xe8)
#define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
@@ -320,6 +323,7 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
#define OPC_JMP_long (0xe9)
#define OPC_JMP_short (0xeb)
#define OPC_LEA (0x8d)
+#define OPC_LZCNT (0xbd | P_EXT | P_SIMDF3)
#define OPC_MOVB_EvGv (0x88) /* stores, more or less */
#define OPC_MOVL_EvGv (0x89) /* stores, more or less */
#define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
@@ -334,6 +338,7 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
#define OPC_MOVZBL (0xb6 | P_EXT)
#define OPC_MOVZWL (0xb7 | P_EXT)
#define OPC_POP_r32 (0x58)
+#define OPC_POPCNT (0xb8 | P_EXT | P_SIMDF3)
#define OPC_PUSH_r32 (0x50)
#define OPC_PUSH_Iv (0x68)
#define OPC_PUSH_Ib (0x6a)
@@ -346,6 +351,7 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
#define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16)
#define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2)
#define OPC_TESTL (0x85)
+#define OPC_TZCNT (0xbc | P_EXT | P_SIMDF3)
#define OPC_XCHG_ax_r32 (0x90)
#define OPC_GRP3_Ev (0xf7)
@@ -431,6 +437,11 @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
if (opc & P_ADDR32) {
tcg_out8(s, 0x67);
}
+ if (opc & P_SIMDF3) {
+ tcg_out8(s, 0xf3);
+ } else if (opc & P_SIMDF2) {
+ tcg_out8(s, 0xf2);
+ }
rex = 0;
rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */
@@ -465,6 +476,11 @@ static void tcg_out_opc(TCGContext *s, int opc)
if (opc & P_DATA16) {
tcg_out8(s, 0x66);
}
+ if (opc & P_SIMDF3) {
+ tcg_out8(s, 0xf3);
+ } else if (opc & P_SIMDF2) {
+ tcg_out8(s, 0xf2);
+ }
if (opc & (P_EXT | P_EXT38)) {
tcg_out8(s, 0x0f);
if (opc & P_EXT38) {
@@ -1093,13 +1109,11 @@ static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
}
#endif
-static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
- TCGArg c1, TCGArg c2, int const_c2,
- TCGArg v1)
+static void tcg_out_cmov(TCGContext *s, TCGCond cond, int rexw,
+ TCGReg dest, TCGReg v1)
{
- tcg_out_cmp(s, c1, c2, const_c2, 0);
if (have_cmov) {
- tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
+ tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | rexw, dest, v1);
} else {
TCGLabel *over = gen_new_label();
tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
@@ -1108,16 +1122,68 @@ static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
}
}
+static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGReg dest,
+ TCGReg c1, TCGArg c2, int const_c2,
+ TCGReg v1)
+{
+ tcg_out_cmp(s, c1, c2, const_c2, 0);
+ tcg_out_cmov(s, cond, 0, dest, v1);
+}
+
#if TCG_TARGET_REG_BITS == 64
-static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
- TCGArg c1, TCGArg c2, int const_c2,
- TCGArg v1)
+static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGReg dest,
+ TCGReg c1, TCGArg c2, int const_c2,
+ TCGReg v1)
{
tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
- tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
+ tcg_out_cmov(s, cond, P_REXW, dest, v1);
}
#endif
+static void tcg_out_ctz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1,
+ TCGArg arg2, bool const_a2)
+{
+ if (have_bmi1) {
+ tcg_out_modrm(s, OPC_TZCNT + rexw, dest, arg1);
+ if (const_a2) {
+ tcg_debug_assert(arg2 == (rexw ? 64 : 32));
+ } else {
+ tcg_debug_assert(dest != arg2);
+ tcg_out_cmov(s, TCG_COND_LTU, rexw, dest, arg2);
+ }
+ } else {
+ tcg_debug_assert(dest != arg2);
+ tcg_out_modrm(s, OPC_BSF + rexw, dest, arg1);
+ tcg_out_cmov(s, TCG_COND_EQ, rexw, dest, arg2);
+ }
+}
+
+static void tcg_out_clz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1,
+ TCGArg arg2, bool const_a2)
+{
+ if (have_lzcnt) {
+ tcg_out_modrm(s, OPC_LZCNT + rexw, dest, arg1);
+ if (const_a2) {
+ tcg_debug_assert(arg2 == (rexw ? 64 : 32));
+ } else {
+ tcg_debug_assert(dest != arg2);
+ tcg_out_cmov(s, TCG_COND_LTU, rexw, dest, arg2);
+ }
+ } else {
+ tcg_debug_assert(!const_a2);
+ tcg_debug_assert(dest != arg1);
+ tcg_debug_assert(dest != arg2);
+
+ /* Recall that the output of BSR is the index not the count. */
+ tcg_out_modrm(s, OPC_BSR + rexw, dest, arg1);
+ tgen_arithi(s, ARITH_XOR + rexw, dest, rexw ? 63 : 31, 0);
+
+ /* Since we have destroyed the flags from BSR, we have to re-test. */
+ tcg_out_cmp(s, arg1, 0, 1, rexw);
+ tcg_out_cmov(s, TCG_COND_EQ, rexw, dest, arg2);
+ }
+}
+
static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest)
{
intptr_t disp = tcg_pcrel_diff(s, dest) - 5;
@@ -1795,7 +1861,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
const TCGArg *args, const int *const_args)
{
- int c, vexop, rexw = 0;
+ TCGArg a0, a1, a2;
+ int c, const_a2, vexop, rexw = 0;
#if TCG_TARGET_REG_BITS == 64
# define OP_32_64(x) \
@@ -1807,9 +1874,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
case glue(glue(INDEX_op_, x), _i32)
#endif
- switch(opc) {
+ /* Hoist the loads of the most common arguments. */
+ a0 = args[0];
+ a1 = args[1];
+ a2 = args[2];
+ const_a2 = const_args[2];
+
+ switch (opc) {
case INDEX_op_exit_tb:
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, a0);
tcg_out_jmp(s, tb_ret_addr);
break;
case INDEX_op_goto_tb:
@@ -1824,57 +1897,53 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_nopn(s, gap - 1);
}
tcg_out8(s, OPC_JMP_long); /* jmp im */
- s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
+ s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
tcg_out32(s, 0);
} else {
/* indirect jump method */
tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
- (intptr_t)(s->tb_jmp_target_addr + args[0]));
+ (intptr_t)(s->tb_jmp_target_addr + a0));
}
- s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
+ s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
break;
case INDEX_op_br:
- tcg_out_jxx(s, JCC_JMP, arg_label(args[0]), 0);
+ tcg_out_jxx(s, JCC_JMP, arg_label(a0), 0);
break;
OP_32_64(ld8u):
/* Note that we can ignore REXW for the zero-extend to 64-bit. */
- tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
+ tcg_out_modrm_offset(s, OPC_MOVZBL, a0, a1, a2);
break;
OP_32_64(ld8s):
- tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
+ tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, a0, a1, a2);
break;
OP_32_64(ld16u):
/* Note that we can ignore REXW for the zero-extend to 64-bit. */
- tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
+ tcg_out_modrm_offset(s, OPC_MOVZWL, a0, a1, a2);
break;
OP_32_64(ld16s):
- tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
+ tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, a0, a1, a2);
break;
#if TCG_TARGET_REG_BITS == 64
case INDEX_op_ld32u_i64:
#endif
case INDEX_op_ld_i32:
- tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
+ tcg_out_ld(s, TCG_TYPE_I32, a0, a1, a2);
break;
OP_32_64(st8):
if (const_args[0]) {
- tcg_out_modrm_offset(s, OPC_MOVB_EvIz,
- 0, args[1], args[2]);
- tcg_out8(s, args[0]);
+ tcg_out_modrm_offset(s, OPC_MOVB_EvIz, 0, a1, a2);
+ tcg_out8(s, a0);
} else {
- tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
- args[0], args[1], args[2]);
+ tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, a0, a1, a2);
}
break;
OP_32_64(st16):
if (const_args[0]) {
- tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16,
- 0, args[1], args[2]);
- tcg_out16(s, args[0]);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16, 0, a1, a2);
+ tcg_out16(s, a0);
} else {
- tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
- args[0], args[1], args[2]);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16, a0, a1, a2);
}
break;
#if TCG_TARGET_REG_BITS == 64
@@ -1882,19 +1951,18 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
#endif
case INDEX_op_st_i32:
if (const_args[0]) {
- tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]);
- tcg_out32(s, args[0]);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, a1, a2);
+ tcg_out32(s, a0);
} else {
- tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
+ tcg_out_st(s, TCG_TYPE_I32, a0, a1, a2);
}
break;
OP_32_64(add):
/* For 3-operand addition, use LEA. */
- if (args[0] != args[1]) {
- TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
-
- if (const_args[2]) {
+ if (a0 != a1) {
+ TCGArg c3 = 0;
+ if (const_a2) {
c3 = a2, a2 = -1;
} else if (a0 == a2) {
/* Watch out for dest = src + dest, since we've removed
@@ -1921,36 +1989,35 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
c = ARITH_XOR;
goto gen_arith;
gen_arith:
- if (const_args[2]) {
- tgen_arithi(s, c + rexw, args[0], args[2], 0);
+ if (const_a2) {
+ tgen_arithi(s, c + rexw, a0, a2, 0);
} else {
- tgen_arithr(s, c + rexw, args[0], args[2]);
+ tgen_arithr(s, c + rexw, a0, a2);
}
break;
OP_32_64(andc):
- if (const_args[2]) {
- tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32,
- args[0], args[1]);
- tgen_arithi(s, ARITH_AND + rexw, args[0], ~args[2], 0);
+ if (const_a2) {
+ tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, a0, a1);
+ tgen_arithi(s, ARITH_AND + rexw, a0, ~a2, 0);
} else {
- tcg_out_vex_modrm(s, OPC_ANDN + rexw, args[0], args[2], args[1]);
+ tcg_out_vex_modrm(s, OPC_ANDN + rexw, a0, a2, a1);
}
break;
OP_32_64(mul):
- if (const_args[2]) {
+ if (const_a2) {
int32_t val;
- val = args[2];
+ val = a2;
if (val == (int8_t)val) {
- tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
+ tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, a0, a0);
tcg_out8(s, val);
} else {
- tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
+ tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, a0, a0);
tcg_out32(s, val);
}
} else {
- tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
+ tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, a0, a2);
}
break;
@@ -1962,6 +2029,17 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
OP_32_64(shl):
+ /* For small constant 3-operand shift, use LEA. */
+ if (const_a2 && a0 != a1 && (a2 - 1) < 3) {
+ if (a2 - 1 == 0) {
+ /* shl $1,a1,a0 -> lea (a1,a1),a0 */
+ tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a1, 0, 0);
+ } else {
+ /* shl $n,a1,a0 -> lea 0(,a1,n),a0 */
+ tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, -1, a1, a2, 0);
+ }
+ break;
+ }
c = SHIFT_SHL;
vexop = OPC_SHLX;
goto gen_shift_maybe_vex;
@@ -1980,57 +2058,67 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
c = SHIFT_ROR;
goto gen_shift;
gen_shift_maybe_vex:
- if (have_bmi2 && !const_args[2]) {
- tcg_out_vex_modrm(s, vexop + rexw, args[0], args[2], args[1]);
- break;
+ if (have_bmi2) {
+ if (!const_a2) {
+ tcg_out_vex_modrm(s, vexop + rexw, a0, a2, a1);
+ break;
+ }
+ tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, a0, a1);
}
/* FALLTHRU */
gen_shift:
- if (const_args[2]) {
- tcg_out_shifti(s, c + rexw, args[0], args[2]);
+ if (const_a2) {
+ tcg_out_shifti(s, c + rexw, a0, a2);
} else {
- tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
+ tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, a0);
}
break;
+ OP_32_64(ctz):
+ tcg_out_ctz(s, rexw, args[0], args[1], args[2], const_args[2]);
+ break;
+ OP_32_64(clz):
+ tcg_out_clz(s, rexw, args[0], args[1], args[2], const_args[2]);
+ break;
+ OP_32_64(ctpop):
+ tcg_out_modrm(s, OPC_POPCNT + rexw, a0, a1);
+ break;
+
case INDEX_op_brcond_i32:
- tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
- arg_label(args[3]), 0);
+ tcg_out_brcond32(s, a2, a0, a1, const_args[1], arg_label(args[3]), 0);
break;
case INDEX_op_setcond_i32:
- tcg_out_setcond32(s, args[3], args[0], args[1],
- args[2], const_args[2]);
+ tcg_out_setcond32(s, args[3], a0, a1, a2, const_a2);
break;
case INDEX_op_movcond_i32:
- tcg_out_movcond32(s, args[5], args[0], args[1],
- args[2], const_args[2], args[3]);
+ tcg_out_movcond32(s, args[5], a0, a1, a2, const_a2, args[3]);
break;
OP_32_64(bswap16):
- tcg_out_rolw_8(s, args[0]);
+ tcg_out_rolw_8(s, a0);
break;
OP_32_64(bswap32):
- tcg_out_bswap32(s, args[0]);
+ tcg_out_bswap32(s, a0);
break;
OP_32_64(neg):
- tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
+ tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, a0);
break;
OP_32_64(not):
- tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
+ tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, a0);
break;
OP_32_64(ext8s):
- tcg_out_ext8s(s, args[0], args[1], rexw);
+ tcg_out_ext8s(s, a0, a1, rexw);
break;
OP_32_64(ext16s):
- tcg_out_ext16s(s, args[0], args[1], rexw);
+ tcg_out_ext16s(s, a0, a1, rexw);
break;
OP_32_64(ext8u):
- tcg_out_ext8u(s, args[0], args[1]);
+ tcg_out_ext8u(s, a0, a1);
break;
OP_32_64(ext16u):
- tcg_out_ext16u(s, args[0], args[1]);
+ tcg_out_ext16u(s, a0, a1);
break;
case INDEX_op_qemu_ld_i32:
@@ -2054,26 +2142,26 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
OP_32_64(add2):
if (const_args[4]) {
- tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1);
+ tgen_arithi(s, ARITH_ADD + rexw, a0, args[4], 1);
} else {
- tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]);
+ tgen_arithr(s, ARITH_ADD + rexw, a0, args[4]);
}
if (const_args[5]) {
- tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1);
+ tgen_arithi(s, ARITH_ADC + rexw, a1, args[5], 1);
} else {
- tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]);
+ tgen_arithr(s, ARITH_ADC + rexw, a1, args[5]);
}
break;
OP_32_64(sub2):
if (const_args[4]) {
- tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1);
+ tgen_arithi(s, ARITH_SUB + rexw, a0, args[4], 1);
} else {
- tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]);
+ tgen_arithr(s, ARITH_SUB + rexw, a0, args[4]);
}
if (const_args[5]) {
- tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1);
+ tgen_arithi(s, ARITH_SBB + rexw, a1, args[5], 1);
} else {
- tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]);
+ tgen_arithr(s, ARITH_SBB + rexw, a1, args[5]);
}
break;
@@ -2086,65 +2174,94 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
#else /* TCG_TARGET_REG_BITS == 64 */
case INDEX_op_ld32s_i64:
- tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
+ tcg_out_modrm_offset(s, OPC_MOVSLQ, a0, a1, a2);
break;
case INDEX_op_ld_i64:
- tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
+ tcg_out_ld(s, TCG_TYPE_I64, a0, a1, a2);
break;
case INDEX_op_st_i64:
if (const_args[0]) {
- tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW,
- 0, args[1], args[2]);
- tcg_out32(s, args[0]);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW, 0, a1, a2);
+ tcg_out32(s, a0);
} else {
- tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
+ tcg_out_st(s, TCG_TYPE_I64, a0, a1, a2);
}
break;
case INDEX_op_brcond_i64:
- tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
- arg_label(args[3]), 0);
+ tcg_out_brcond64(s, a2, a0, a1, const_args[1], arg_label(args[3]), 0);
break;
case INDEX_op_setcond_i64:
- tcg_out_setcond64(s, args[3], args[0], args[1],
- args[2], const_args[2]);
+ tcg_out_setcond64(s, args[3], a0, a1, a2, const_a2);
break;
case INDEX_op_movcond_i64:
- tcg_out_movcond64(s, args[5], args[0], args[1],
- args[2], const_args[2], args[3]);
+ tcg_out_movcond64(s, args[5], a0, a1, a2, const_a2, args[3]);
break;
case INDEX_op_bswap64_i64:
- tcg_out_bswap64(s, args[0]);
+ tcg_out_bswap64(s, a0);
break;
case INDEX_op_extu_i32_i64:
case INDEX_op_ext32u_i64:
- tcg_out_ext32u(s, args[0], args[1]);
+ tcg_out_ext32u(s, a0, a1);
break;
case INDEX_op_ext_i32_i64:
case INDEX_op_ext32s_i64:
- tcg_out_ext32s(s, args[0], args[1]);
+ tcg_out_ext32s(s, a0, a1);
break;
#endif
OP_32_64(deposit):
if (args[3] == 0 && args[4] == 8) {
/* load bits 0..7 */
- tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
- args[2], args[0]);
+ tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, a2, a0);
} else if (args[3] == 8 && args[4] == 8) {
/* load bits 8..15 */
- tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
+ tcg_out_modrm(s, OPC_MOVB_EvGv, a2, a0 + 4);
} else if (args[3] == 0 && args[4] == 16) {
/* load bits 0..15 */
- tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
+ tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, a2, a0);
} else {
tcg_abort();
}
break;
+ case INDEX_op_extract_i64:
+ if (a2 + args[3] == 32) {
+ /* This is a 32-bit zero-extending right shift. */
+ tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
+ tcg_out_shifti(s, SHIFT_SHR, a0, a2);
+ break;
+ }
+ /* FALLTHRU */
+ case INDEX_op_extract_i32:
+ /* On the off-chance that we can use the high-byte registers.
+ Otherwise we emit the same ext16 + shift pattern that we
+ would have gotten from the normal tcg-op.c expansion. */
+ tcg_debug_assert(a2 == 8 && args[3] == 8);
+ if (a1 < 4 && a0 < 8) {
+ tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4);
+ } else {
+ tcg_out_ext16u(s, a0, a1);
+ tcg_out_shifti(s, SHIFT_SHR, a0, 8);
+ }
+ break;
+
+ case INDEX_op_sextract_i32:
+ /* We don't implement sextract_i64, as we cannot sign-extend to
+ 64-bits without using the REX prefix that explicitly excludes
+ access to the high-byte registers. */
+ tcg_debug_assert(a2 == 8 && args[3] == 8);
+ if (a1 < 4 && a0 < 8) {
+ tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4);
+ } else {
+ tcg_out_ext16s(s, a0, a1, 0);
+ tcg_out_shifti(s, SHIFT_SAR, a0, 8);
+ }
+ break;
+
case INDEX_op_mb:
- tcg_out_mb(s, args[0]);
+ tcg_out_mb(s, a0);
break;
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
case INDEX_op_mov_i64:
@@ -2158,139 +2275,231 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
#undef OP_32_64
}
-static const TCGTargetOpDef x86_op_defs[] = {
- { INDEX_op_exit_tb, { } },
- { INDEX_op_goto_tb, { } },
- { INDEX_op_br, { } },
- { INDEX_op_ld8u_i32, { "r", "r" } },
- { INDEX_op_ld8s_i32, { "r", "r" } },
- { INDEX_op_ld16u_i32, { "r", "r" } },
- { INDEX_op_ld16s_i32, { "r", "r" } },
- { INDEX_op_ld_i32, { "r", "r" } },
- { INDEX_op_st8_i32, { "qi", "r" } },
- { INDEX_op_st16_i32, { "ri", "r" } },
- { INDEX_op_st_i32, { "ri", "r" } },
-
- { INDEX_op_add_i32, { "r", "r", "ri" } },
- { INDEX_op_sub_i32, { "r", "0", "ri" } },
- { INDEX_op_mul_i32, { "r", "0", "ri" } },
- { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
- { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
- { INDEX_op_and_i32, { "r", "0", "ri" } },
- { INDEX_op_or_i32, { "r", "0", "ri" } },
- { INDEX_op_xor_i32, { "r", "0", "ri" } },
- { INDEX_op_andc_i32, { "r", "r", "ri" } },
-
- { INDEX_op_shl_i32, { "r", "0", "Ci" } },
- { INDEX_op_shr_i32, { "r", "0", "Ci" } },
- { INDEX_op_sar_i32, { "r", "0", "Ci" } },
- { INDEX_op_rotl_i32, { "r", "0", "ci" } },
- { INDEX_op_rotr_i32, { "r", "0", "ci" } },
-
- { INDEX_op_brcond_i32, { "r", "ri" } },
-
- { INDEX_op_bswap16_i32, { "r", "0" } },
- { INDEX_op_bswap32_i32, { "r", "0" } },
-
- { INDEX_op_neg_i32, { "r", "0" } },
-
- { INDEX_op_not_i32, { "r", "0" } },
-
- { INDEX_op_ext8s_i32, { "r", "q" } },
- { INDEX_op_ext16s_i32, { "r", "r" } },
- { INDEX_op_ext8u_i32, { "r", "q" } },
- { INDEX_op_ext16u_i32, { "r", "r" } },
-
- { INDEX_op_setcond_i32, { "q", "r", "ri" } },
-
- { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
- { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
-
- { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
- { INDEX_op_muls2_i32, { "a", "d", "a", "r" } },
- { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
- { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
-
- { INDEX_op_mb, { } },
+static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
+{
+ static const TCGTargetOpDef ri_r = { .args_ct_str = { "ri", "r" } };
+ static const TCGTargetOpDef re_r = { .args_ct_str = { "re", "r" } };
+ static const TCGTargetOpDef qi_r = { .args_ct_str = { "qi", "r" } };
+ static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
+ static const TCGTargetOpDef r_q = { .args_ct_str = { "r", "q" } };
+ static const TCGTargetOpDef r_re = { .args_ct_str = { "r", "re" } };
+ static const TCGTargetOpDef r_0 = { .args_ct_str = { "r", "0" } };
+ static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
+ static const TCGTargetOpDef r_r_re = { .args_ct_str = { "r", "r", "re" } };
+ static const TCGTargetOpDef r_0_re = { .args_ct_str = { "r", "0", "re" } };
+ static const TCGTargetOpDef r_0_ci = { .args_ct_str = { "r", "0", "ci" } };
+ static const TCGTargetOpDef r_L = { .args_ct_str = { "r", "L" } };
+ static const TCGTargetOpDef L_L = { .args_ct_str = { "L", "L" } };
+ static const TCGTargetOpDef r_L_L = { .args_ct_str = { "r", "L", "L" } };
+ static const TCGTargetOpDef r_r_L = { .args_ct_str = { "r", "r", "L" } };
+ static const TCGTargetOpDef L_L_L = { .args_ct_str = { "L", "L", "L" } };
+ static const TCGTargetOpDef r_r_L_L
+ = { .args_ct_str = { "r", "r", "L", "L" } };
+ static const TCGTargetOpDef L_L_L_L
+ = { .args_ct_str = { "L", "L", "L", "L" } };
+
+ switch (op) {
+ case INDEX_op_ld8u_i32:
+ case INDEX_op_ld8u_i64:
+ case INDEX_op_ld8s_i32:
+ case INDEX_op_ld8s_i64:
+ case INDEX_op_ld16u_i32:
+ case INDEX_op_ld16u_i64:
+ case INDEX_op_ld16s_i32:
+ case INDEX_op_ld16s_i64:
+ case INDEX_op_ld_i32:
+ case INDEX_op_ld32u_i64:
+ case INDEX_op_ld32s_i64:
+ case INDEX_op_ld_i64:
+ return &r_r;
-#if TCG_TARGET_REG_BITS == 32
- { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
- { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
-#else
- { INDEX_op_ld8u_i64, { "r", "r" } },
- { INDEX_op_ld8s_i64, { "r", "r" } },
- { INDEX_op_ld16u_i64, { "r", "r" } },
- { INDEX_op_ld16s_i64, { "r", "r" } },
- { INDEX_op_ld32u_i64, { "r", "r" } },
- { INDEX_op_ld32s_i64, { "r", "r" } },
- { INDEX_op_ld_i64, { "r", "r" } },
- { INDEX_op_st8_i64, { "ri", "r" } },
- { INDEX_op_st16_i64, { "ri", "r" } },
- { INDEX_op_st32_i64, { "ri", "r" } },
- { INDEX_op_st_i64, { "re", "r" } },
-
- { INDEX_op_add_i64, { "r", "r", "re" } },
- { INDEX_op_mul_i64, { "r", "0", "re" } },
- { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
- { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
- { INDEX_op_sub_i64, { "r", "0", "re" } },
- { INDEX_op_and_i64, { "r", "0", "reZ" } },
- { INDEX_op_or_i64, { "r", "0", "re" } },
- { INDEX_op_xor_i64, { "r", "0", "re" } },
- { INDEX_op_andc_i64, { "r", "r", "rI" } },
-
- { INDEX_op_shl_i64, { "r", "0", "Ci" } },
- { INDEX_op_shr_i64, { "r", "0", "Ci" } },
- { INDEX_op_sar_i64, { "r", "0", "Ci" } },
- { INDEX_op_rotl_i64, { "r", "0", "ci" } },
- { INDEX_op_rotr_i64, { "r", "0", "ci" } },
-
- { INDEX_op_brcond_i64, { "r", "re" } },
- { INDEX_op_setcond_i64, { "r", "r", "re" } },
-
- { INDEX_op_bswap16_i64, { "r", "0" } },
- { INDEX_op_bswap32_i64, { "r", "0" } },
- { INDEX_op_bswap64_i64, { "r", "0" } },
- { INDEX_op_neg_i64, { "r", "0" } },
- { INDEX_op_not_i64, { "r", "0" } },
-
- { INDEX_op_ext8s_i64, { "r", "r" } },
- { INDEX_op_ext16s_i64, { "r", "r" } },
- { INDEX_op_ext32s_i64, { "r", "r" } },
- { INDEX_op_ext8u_i64, { "r", "r" } },
- { INDEX_op_ext16u_i64, { "r", "r" } },
- { INDEX_op_ext32u_i64, { "r", "r" } },
-
- { INDEX_op_ext_i32_i64, { "r", "r" } },
- { INDEX_op_extu_i32_i64, { "r", "r" } },
-
- { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
- { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
-
- { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } },
- { INDEX_op_muls2_i64, { "a", "d", "a", "r" } },
- { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } },
- { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } },
-#endif
+ case INDEX_op_st8_i32:
+ case INDEX_op_st8_i64:
+ return &qi_r;
+ case INDEX_op_st16_i32:
+ case INDEX_op_st16_i64:
+ case INDEX_op_st_i32:
+ case INDEX_op_st32_i64:
+ return &ri_r;
+ case INDEX_op_st_i64:
+ return &re_r;
+
+ case INDEX_op_add_i32:
+ case INDEX_op_add_i64:
+ return &r_r_re;
+ case INDEX_op_sub_i32:
+ case INDEX_op_sub_i64:
+ case INDEX_op_mul_i32:
+ case INDEX_op_mul_i64:
+ case INDEX_op_or_i32:
+ case INDEX_op_or_i64:
+ case INDEX_op_xor_i32:
+ case INDEX_op_xor_i64:
+ return &r_0_re;
+
+ case INDEX_op_and_i32:
+ case INDEX_op_and_i64:
+ {
+ static const TCGTargetOpDef and
+ = { .args_ct_str = { "r", "0", "reZ" } };
+ return &and;
+ }
+ break;
+ case INDEX_op_andc_i32:
+ case INDEX_op_andc_i64:
+ {
+ static const TCGTargetOpDef andc
+ = { .args_ct_str = { "r", "r", "rI" } };
+ return &andc;
+ }
+ break;
-#if TCG_TARGET_REG_BITS == 64
- { INDEX_op_qemu_ld_i32, { "r", "L" } },
- { INDEX_op_qemu_st_i32, { "L", "L" } },
- { INDEX_op_qemu_ld_i64, { "r", "L" } },
- { INDEX_op_qemu_st_i64, { "L", "L" } },
-#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
- { INDEX_op_qemu_ld_i32, { "r", "L" } },
- { INDEX_op_qemu_st_i32, { "L", "L" } },
- { INDEX_op_qemu_ld_i64, { "r", "r", "L" } },
- { INDEX_op_qemu_st_i64, { "L", "L", "L" } },
-#else
- { INDEX_op_qemu_ld_i32, { "r", "L", "L" } },
- { INDEX_op_qemu_st_i32, { "L", "L", "L" } },
- { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } },
- { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } },
-#endif
- { -1 },
-};
+ case INDEX_op_shl_i32:
+ case INDEX_op_shl_i64:
+ case INDEX_op_shr_i32:
+ case INDEX_op_shr_i64:
+ case INDEX_op_sar_i32:
+ case INDEX_op_sar_i64:
+ return have_bmi2 ? &r_r_ri : &r_0_ci;
+ case INDEX_op_rotl_i32:
+ case INDEX_op_rotl_i64:
+ case INDEX_op_rotr_i32:
+ case INDEX_op_rotr_i64:
+ return &r_0_ci;
+
+ case INDEX_op_brcond_i32:
+ case INDEX_op_brcond_i64:
+ return &r_re;
+
+ case INDEX_op_bswap16_i32:
+ case INDEX_op_bswap16_i64:
+ case INDEX_op_bswap32_i32:
+ case INDEX_op_bswap32_i64:
+ case INDEX_op_bswap64_i64:
+ case INDEX_op_neg_i32:
+ case INDEX_op_neg_i64:
+ case INDEX_op_not_i32:
+ case INDEX_op_not_i64:
+ return &r_0;
+
+ case INDEX_op_ext8s_i32:
+ case INDEX_op_ext8s_i64:
+ case INDEX_op_ext8u_i32:
+ case INDEX_op_ext8u_i64:
+ return &r_q;
+ case INDEX_op_ext16s_i32:
+ case INDEX_op_ext16s_i64:
+ case INDEX_op_ext16u_i32:
+ case INDEX_op_ext16u_i64:
+ case INDEX_op_ext32s_i64:
+ case INDEX_op_ext32u_i64:
+ case INDEX_op_ext_i32_i64:
+ case INDEX_op_extu_i32_i64:
+ case INDEX_op_extract_i32:
+ case INDEX_op_extract_i64:
+ case INDEX_op_sextract_i32:
+ case INDEX_op_ctpop_i32:
+ case INDEX_op_ctpop_i64:
+ return &r_r;
+
+ case INDEX_op_deposit_i32:
+ case INDEX_op_deposit_i64:
+ {
+ static const TCGTargetOpDef dep
+ = { .args_ct_str = { "Q", "0", "Q" } };
+ return &dep;
+ }
+ case INDEX_op_setcond_i32:
+ case INDEX_op_setcond_i64:
+ {
+ static const TCGTargetOpDef setc
+ = { .args_ct_str = { "q", "r", "re" } };
+ return &setc;
+ }
+ case INDEX_op_movcond_i32:
+ case INDEX_op_movcond_i64:
+ {
+ static const TCGTargetOpDef movc
+ = { .args_ct_str = { "r", "r", "re", "r", "0" } };
+ return &movc;
+ }
+ case INDEX_op_div2_i32:
+ case INDEX_op_div2_i64:
+ case INDEX_op_divu2_i32:
+ case INDEX_op_divu2_i64:
+ {
+ static const TCGTargetOpDef div2
+ = { .args_ct_str = { "a", "d", "0", "1", "r" } };
+ return &div2;
+ }
+ case INDEX_op_mulu2_i32:
+ case INDEX_op_mulu2_i64:
+ case INDEX_op_muls2_i32:
+ case INDEX_op_muls2_i64:
+ {
+ static const TCGTargetOpDef mul2
+ = { .args_ct_str = { "a", "d", "a", "r" } };
+ return &mul2;
+ }
+ case INDEX_op_add2_i32:
+ case INDEX_op_add2_i64:
+ case INDEX_op_sub2_i32:
+ case INDEX_op_sub2_i64:
+ {
+ static const TCGTargetOpDef arith2
+ = { .args_ct_str = { "r", "r", "0", "1", "re", "re" } };
+ return &arith2;
+ }
+ case INDEX_op_ctz_i32:
+ case INDEX_op_ctz_i64:
+ {
+ static const TCGTargetOpDef ctz[2] = {
+ { .args_ct_str = { "&r", "r", "r" } },
+ { .args_ct_str = { "&r", "r", "rW" } },
+ };
+ return &ctz[have_bmi1];
+ }
+ case INDEX_op_clz_i32:
+ case INDEX_op_clz_i64:
+ {
+ static const TCGTargetOpDef clz[2] = {
+ { .args_ct_str = { "&r", "r", "r" } },
+ { .args_ct_str = { "&r", "r", "rW" } },
+ };
+ return &clz[have_lzcnt];
+ }
+
+ case INDEX_op_qemu_ld_i32:
+ return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_L : &r_L_L;
+ case INDEX_op_qemu_st_i32:
+ return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &L_L : &L_L_L;
+ case INDEX_op_qemu_ld_i64:
+ return (TCG_TARGET_REG_BITS == 64 ? &r_L
+ : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_r_L
+ : &r_r_L_L);
+ case INDEX_op_qemu_st_i64:
+ return (TCG_TARGET_REG_BITS == 64 ? &L_L
+ : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &L_L_L
+ : &L_L_L_L);
+
+ case INDEX_op_brcond2_i32:
+ {
+ static const TCGTargetOpDef b2
+ = { .args_ct_str = { "r", "r", "ri", "ri" } };
+ return &b2;
+ }
+ case INDEX_op_setcond2_i32:
+ {
+ static const TCGTargetOpDef s2
+ = { .args_ct_str = { "r", "r", "r", "ri", "ri" } };
+ return &s2;
+ }
+
+ default:
+ break;
+ }
+ return NULL;
+}
static int tcg_target_callee_save_regs[] = {
#if TCG_TARGET_REG_BITS == 64
@@ -2395,6 +2604,9 @@ static void tcg_target_init(TCGContext *s)
need to probe for it. */
have_movbe = (c & bit_MOVBE) != 0;
#endif
+#ifdef bit_POPCNT
+ have_popcnt = (c & bit_POPCNT) != 0;
+#endif
}
if (max >= 7) {
@@ -2409,6 +2621,15 @@ static void tcg_target_init(TCGContext *s)
}
#endif
+#ifndef have_lzcnt
+ max = __get_cpuid_max(0x8000000, 0);
+ if (max >= 1) {
+ __cpuid(0x80000001, a, b, c, d);
+ /* LZCNT was introduced with AMD Barcelona and Intel Haswell CPUs. */
+ have_lzcnt = (c & bit_LZCNT) != 0;
+ }
+#endif
+
if (TCG_TARGET_REG_BITS == 64) {
tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
@@ -2433,8 +2654,6 @@ static void tcg_target_init(TCGContext *s)
tcg_regset_clear(s->reserved_regs);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
-
- tcg_add_target_add_op_defs(x86_op_defs);
}
typedef struct {
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index 6dddb7f772..42aea03a8b 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -140,6 +140,12 @@ typedef enum {
#define TCG_TARGET_HAS_nand_i32 1
#define TCG_TARGET_HAS_nand_i64 1
#define TCG_TARGET_HAS_nor_i32 1
+#define TCG_TARGET_HAS_clz_i32 0
+#define TCG_TARGET_HAS_clz_i64 0
+#define TCG_TARGET_HAS_ctz_i32 0
+#define TCG_TARGET_HAS_ctz_i64 0
+#define TCG_TARGET_HAS_ctpop_i32 0
+#define TCG_TARGET_HAS_ctpop_i64 0
#define TCG_TARGET_HAS_nor_i64 1
#define TCG_TARGET_HAS_orc_i32 1
#define TCG_TARGET_HAS_orc_i64 1
@@ -149,6 +155,10 @@ typedef enum {
#define TCG_TARGET_HAS_movcond_i64 1
#define TCG_TARGET_HAS_deposit_i32 1
#define TCG_TARGET_HAS_deposit_i64 1
+#define TCG_TARGET_HAS_extract_i32 0
+#define TCG_TARGET_HAS_extract_i64 0
+#define TCG_TARGET_HAS_sextract_i32 0
+#define TCG_TARGET_HAS_sextract_i64 0
#define TCG_TARGET_HAS_add2_i32 0
#define TCG_TARGET_HAS_add2_i64 0
#define TCG_TARGET_HAS_sub2_i32 0
diff --git a/tcg/ia64/tcg-target.inc.c b/tcg/ia64/tcg-target.inc.c
index b04d716c3d..bf9a97d75c 100644
--- a/tcg/ia64/tcg-target.inc.c
+++ b/tcg/ia64/tcg-target.inc.c
@@ -721,12 +721,10 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type,
*/
/* parse target specific constraints */
-static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
+static const char *target_parse_constraint(TCGArgConstraint *ct,
+ const char *ct_str, TCGType type)
{
- const char *ct_str;
-
- ct_str = *pct_str;
- switch(ct_str[0]) {
+ switch(*ct_str++) {
case 'r':
ct->ct |= TCG_CT_REG;
tcg_regset_set(ct->u.regs, 0xffffffffffffffffull);
@@ -750,11 +748,9 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
ct->ct |= TCG_CT_CONST_ZERO;
break;
default:
- return -1;
+ return NULL;
}
- ct_str++;
- *pct_str = ct_str;
- return 0;
+ return ct_str;
}
/* test if a constant matches the constraint */
@@ -2352,6 +2348,18 @@ static const TCGTargetOpDef ia64_op_defs[] = {
{ -1 },
};
+static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
+{
+ int i, n = ARRAY_SIZE(ia64_op_defs);
+
+ for (i = 0; i < n; ++i) {
+ if (ia64_op_defs[i].op == op) {
+ return &ia64_op_defs[i];
+ }
+ }
+ return NULL;
+}
+
/* Generate global QEMU prologue and epilogue code */
static void tcg_target_qemu_prologue(TCGContext *s)
{
@@ -2471,6 +2479,4 @@ static void tcg_target_init(TCGContext *s)
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R5);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R6);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R7);
-
- tcg_add_target_add_op_defs(ia64_op_defs);
}
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index 3aeac87614..f46d64a3a7 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -27,6 +27,14 @@
#ifndef MIPS_TCG_TARGET_H
#define MIPS_TCG_TARGET_H
+#if _MIPS_SIM == _ABIO32
+# define TCG_TARGET_REG_BITS 32
+#elif _MIPS_SIM == _ABIN32 || _MIPS_SIM == _ABI64
+# define TCG_TARGET_REG_BITS 64
+#else
+# error "Unknown ABI"
+#endif
+
#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
#define TCG_TARGET_NB_REGS 32
@@ -70,9 +78,13 @@ typedef enum {
} TCGReg;
/* used for function call generation */
-#define TCG_TARGET_STACK_ALIGN 8
-#define TCG_TARGET_CALL_STACK_OFFSET 16
-#define TCG_TARGET_CALL_ALIGN_ARGS 1
+#define TCG_TARGET_STACK_ALIGN 16
+#if _MIPS_SIM == _ABIO32
+# define TCG_TARGET_CALL_STACK_OFFSET 16
+#else
+# define TCG_TARGET_CALL_STACK_OFFSET 0
+#endif
+#define TCG_TARGET_CALL_ALIGN_ARGS 1
/* MOVN/MOVZ instructions detection */
#if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \
@@ -117,21 +129,71 @@ extern bool use_mips32r2_instructions;
#define TCG_TARGET_HAS_muls2_i32 (!use_mips32r6_instructions)
#define TCG_TARGET_HAS_muluh_i32 1
#define TCG_TARGET_HAS_mulsh_i32 1
+#define TCG_TARGET_HAS_bswap32_i32 1
+
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_TARGET_HAS_add2_i32 0
+#define TCG_TARGET_HAS_sub2_i32 0
+#define TCG_TARGET_HAS_extrl_i64_i32 1
+#define TCG_TARGET_HAS_extrh_i64_i32 1
+#define TCG_TARGET_HAS_div_i64 1
+#define TCG_TARGET_HAS_rem_i64 1
+#define TCG_TARGET_HAS_not_i64 1
+#define TCG_TARGET_HAS_nor_i64 1
+#define TCG_TARGET_HAS_andc_i64 0
+#define TCG_TARGET_HAS_orc_i64 0
+#define TCG_TARGET_HAS_eqv_i64 0
+#define TCG_TARGET_HAS_nand_i64 0
+#define TCG_TARGET_HAS_add2_i64 0
+#define TCG_TARGET_HAS_sub2_i64 0
+#define TCG_TARGET_HAS_mulu2_i64 (!use_mips32r6_instructions)
+#define TCG_TARGET_HAS_muls2_i64 (!use_mips32r6_instructions)
+#define TCG_TARGET_HAS_muluh_i64 1
+#define TCG_TARGET_HAS_mulsh_i64 1
+#define TCG_TARGET_HAS_ext32s_i64 1
+#define TCG_TARGET_HAS_ext32u_i64 1
+#endif
/* optional instructions detected at runtime */
#define TCG_TARGET_HAS_movcond_i32 use_movnz_instructions
#define TCG_TARGET_HAS_bswap16_i32 use_mips32r2_instructions
-#define TCG_TARGET_HAS_bswap32_i32 use_mips32r2_instructions
#define TCG_TARGET_HAS_deposit_i32 use_mips32r2_instructions
+#define TCG_TARGET_HAS_extract_i32 use_mips32r2_instructions
+#define TCG_TARGET_HAS_sextract_i32 0
#define TCG_TARGET_HAS_ext8s_i32 use_mips32r2_instructions
#define TCG_TARGET_HAS_ext16s_i32 use_mips32r2_instructions
#define TCG_TARGET_HAS_rot_i32 use_mips32r2_instructions
+#define TCG_TARGET_HAS_clz_i32 use_mips32r2_instructions
+#define TCG_TARGET_HAS_ctz_i32 0
+#define TCG_TARGET_HAS_ctpop_i32 0
+
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_TARGET_HAS_movcond_i64 use_movnz_instructions
+#define TCG_TARGET_HAS_bswap16_i64 use_mips32r2_instructions
+#define TCG_TARGET_HAS_bswap32_i64 use_mips32r2_instructions
+#define TCG_TARGET_HAS_bswap64_i64 use_mips32r2_instructions
+#define TCG_TARGET_HAS_deposit_i64 use_mips32r2_instructions
+#define TCG_TARGET_HAS_extract_i64 use_mips32r2_instructions
+#define TCG_TARGET_HAS_sextract_i64 0
+#define TCG_TARGET_HAS_ext8s_i64 use_mips32r2_instructions
+#define TCG_TARGET_HAS_ext16s_i64 use_mips32r2_instructions
+#define TCG_TARGET_HAS_rot_i64 use_mips32r2_instructions
+#define TCG_TARGET_HAS_clz_i64 use_mips32r2_instructions
+#define TCG_TARGET_HAS_ctz_i64 0
+#define TCG_TARGET_HAS_ctpop_i64 0
+#endif
/* optional instructions automatically implemented */
#define TCG_TARGET_HAS_neg_i32 0 /* sub rd, zero, rt */
#define TCG_TARGET_HAS_ext8u_i32 0 /* andi rt, rs, 0xff */
#define TCG_TARGET_HAS_ext16u_i32 0 /* andi rt, rs, 0xffff */
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_TARGET_HAS_neg_i64 0 /* sub rd, zero, rt */
+#define TCG_TARGET_HAS_ext8u_i64 0 /* andi rt, rs, 0xff */
+#define TCG_TARGET_HAS_ext16u_i64 0 /* andi rt, rs, 0xffff */
+#endif
+
#ifdef __OpenBSD__
#include <machine/sysarch.h>
#else
diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c
index abce6026f8..01ac7b2c81 100644
--- a/tcg/mips/tcg-target.inc.c
+++ b/tcg/mips/tcg-target.inc.c
@@ -32,8 +32,16 @@
# define MIPS_BE 0
#endif
-#define LO_OFF (MIPS_BE * 4)
-#define HI_OFF (4 - LO_OFF)
+#if TCG_TARGET_REG_BITS == 32
+# define LO_OFF (MIPS_BE * 4)
+# define HI_OFF (4 - LO_OFF)
+#else
+/* To assert at compile-time that these values are never used
+ for TCG_TARGET_REG_BITS == 64. */
+/* extern */ int link_error(void);
+# define LO_OFF link_error()
+# define HI_OFF link_error()
+#endif
#ifdef CONFIG_DEBUG_TCG
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
@@ -74,6 +82,8 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
#define TCG_TMP0 TCG_REG_AT
#define TCG_TMP1 TCG_REG_T9
+#define TCG_TMP2 TCG_REG_T8
+#define TCG_TMP3 TCG_REG_T7
/* check if we really need so many registers :P */
static const int tcg_target_reg_alloc_order[] = {
@@ -89,10 +99,6 @@ static const int tcg_target_reg_alloc_order[] = {
TCG_REG_S8,
/* Call clobbered registers. */
- TCG_REG_T0,
- TCG_REG_T1,
- TCG_REG_T2,
- TCG_REG_T3,
TCG_REG_T4,
TCG_REG_T5,
TCG_REG_T6,
@@ -103,17 +109,27 @@ static const int tcg_target_reg_alloc_order[] = {
TCG_REG_V0,
/* Argument registers, opposite order of allocation. */
+ TCG_REG_T3,
+ TCG_REG_T2,
+ TCG_REG_T1,
+ TCG_REG_T0,
TCG_REG_A3,
TCG_REG_A2,
TCG_REG_A1,
TCG_REG_A0,
};
-static const TCGReg tcg_target_call_iarg_regs[4] = {
+static const TCGReg tcg_target_call_iarg_regs[] = {
TCG_REG_A0,
TCG_REG_A1,
TCG_REG_A2,
- TCG_REG_A3
+ TCG_REG_A3,
+#if _MIPS_SIM == _ABIN32 || _MIPS_SIM == _ABI64
+ TCG_REG_T0,
+ TCG_REG_T1,
+ TCG_REG_T2,
+ TCG_REG_T3,
+#endif
};
static const TCGReg tcg_target_call_oarg_regs[2] = {
@@ -122,6 +138,9 @@ static const TCGReg tcg_target_call_oarg_regs[2] = {
};
static tcg_insn_unit *tb_ret_addr;
+static tcg_insn_unit *bswap32_addr;
+static tcg_insn_unit *bswap32u_addr;
+static tcg_insn_unit *bswap64_addr;
static inline uint32_t reloc_pc16_val(tcg_insn_unit *pc, tcg_insn_unit *target)
{
@@ -160,6 +179,7 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type,
#define TCG_CT_CONST_S16 0x400 /* Signed 16-bit: -32768 - 32767 */
#define TCG_CT_CONST_P2M1 0x800 /* Power of 2 minus 1. */
#define TCG_CT_CONST_N16 0x1000 /* "Negatable" 16-bit: -32767 - 32767 */
+#define TCG_CT_CONST_WSZ 0x2000 /* word size */
static inline bool is_p2m1(tcg_target_long val)
{
@@ -167,27 +187,20 @@ static inline bool is_p2m1(tcg_target_long val)
}
/* parse target specific constraints */
-static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
+static const char *target_parse_constraint(TCGArgConstraint *ct,
+ const char *ct_str, TCGType type)
{
- const char *ct_str;
-
- ct_str = *pct_str;
- switch(ct_str[0]) {
+ switch(*ct_str++) {
case 'r':
ct->ct |= TCG_CT_REG;
tcg_regset_set(ct->u.regs, 0xffffffff);
break;
- case 'L': /* qemu_ld output arg constraint */
- ct->ct |= TCG_CT_REG;
- tcg_regset_set(ct->u.regs, 0xffffffff);
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_V0);
- break;
- case 'l': /* qemu_ld input arg constraint */
+ case 'L': /* qemu_ld input arg constraint */
ct->ct |= TCG_CT_REG;
tcg_regset_set(ct->u.regs, 0xffffffff);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0);
#if defined(CONFIG_SOFTMMU)
- if (TARGET_LONG_BITS == 64) {
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2);
}
#endif
@@ -197,11 +210,11 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
tcg_regset_set(ct->u.regs, 0xffffffff);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0);
#if defined(CONFIG_SOFTMMU)
- if (TARGET_LONG_BITS == 32) {
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_A1);
- } else {
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_A3);
+ } else {
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_A1);
}
#endif
break;
@@ -217,6 +230,9 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
case 'N':
ct->ct |= TCG_CT_CONST_N16;
break;
+ case 'W':
+ ct->ct |= TCG_CT_CONST_WSZ;
+ break;
case 'Z':
/* We are cheating a bit here, using the fact that the register
ZERO is also the register number 0. Hence there is no need
@@ -224,11 +240,9 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
ct->ct |= TCG_CT_CONST_ZERO;
break;
default:
- return -1;
+ return NULL;
}
- ct_str++;
- *pct_str = ct_str;
- return 0;
+ return ct_str;
}
/* test if a constant matches the constraint */
@@ -250,87 +264,131 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
} else if ((ct & TCG_CT_CONST_P2M1)
&& use_mips32r2_instructions && is_p2m1(val)) {
return 1;
+ } else if ((ct & TCG_CT_CONST_WSZ)
+ && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
+ return 1;
}
return 0;
}
/* instruction opcodes */
typedef enum {
- OPC_J = 0x02 << 26,
- OPC_JAL = 0x03 << 26,
- OPC_BEQ = 0x04 << 26,
- OPC_BNE = 0x05 << 26,
- OPC_BLEZ = 0x06 << 26,
- OPC_BGTZ = 0x07 << 26,
- OPC_ADDIU = 0x09 << 26,
- OPC_SLTI = 0x0A << 26,
- OPC_SLTIU = 0x0B << 26,
- OPC_ANDI = 0x0C << 26,
- OPC_ORI = 0x0D << 26,
- OPC_XORI = 0x0E << 26,
- OPC_LUI = 0x0F << 26,
- OPC_LB = 0x20 << 26,
- OPC_LH = 0x21 << 26,
- OPC_LW = 0x23 << 26,
- OPC_LBU = 0x24 << 26,
- OPC_LHU = 0x25 << 26,
- OPC_LWU = 0x27 << 26,
- OPC_SB = 0x28 << 26,
- OPC_SH = 0x29 << 26,
- OPC_SW = 0x2B << 26,
-
- OPC_SPECIAL = 0x00 << 26,
- OPC_SLL = OPC_SPECIAL | 0x00,
- OPC_SRL = OPC_SPECIAL | 0x02,
- OPC_ROTR = OPC_SPECIAL | (0x01 << 21) | 0x02,
- OPC_SRA = OPC_SPECIAL | 0x03,
- OPC_SLLV = OPC_SPECIAL | 0x04,
- OPC_SRLV = OPC_SPECIAL | 0x06,
- OPC_ROTRV = OPC_SPECIAL | (0x01 << 6) | 0x06,
- OPC_SRAV = OPC_SPECIAL | 0x07,
- OPC_JR_R5 = OPC_SPECIAL | 0x08,
- OPC_JALR = OPC_SPECIAL | 0x09,
- OPC_MOVZ = OPC_SPECIAL | 0x0A,
- OPC_MOVN = OPC_SPECIAL | 0x0B,
- OPC_SYNC = OPC_SPECIAL | 0x0F,
- OPC_MFHI = OPC_SPECIAL | 0x10,
- OPC_MFLO = OPC_SPECIAL | 0x12,
- OPC_MULT = OPC_SPECIAL | 0x18,
- OPC_MUL_R6 = OPC_SPECIAL | (0x02 << 6) | 0x18,
- OPC_MUH = OPC_SPECIAL | (0x03 << 6) | 0x18,
- OPC_MULTU = OPC_SPECIAL | 0x19,
- OPC_MULU = OPC_SPECIAL | (0x02 << 6) | 0x19,
- OPC_MUHU = OPC_SPECIAL | (0x03 << 6) | 0x19,
- OPC_DIV = OPC_SPECIAL | 0x1A,
- OPC_DIV_R6 = OPC_SPECIAL | (0x02 << 6) | 0x1A,
- OPC_MOD = OPC_SPECIAL | (0x03 << 6) | 0x1A,
- OPC_DIVU = OPC_SPECIAL | 0x1B,
- OPC_DIVU_R6 = OPC_SPECIAL | (0x02 << 6) | 0x1B,
- OPC_MODU = OPC_SPECIAL | (0x03 << 6) | 0x1B,
- OPC_ADDU = OPC_SPECIAL | 0x21,
- OPC_SUBU = OPC_SPECIAL | 0x23,
- OPC_AND = OPC_SPECIAL | 0x24,
- OPC_OR = OPC_SPECIAL | 0x25,
- OPC_XOR = OPC_SPECIAL | 0x26,
- OPC_NOR = OPC_SPECIAL | 0x27,
- OPC_SLT = OPC_SPECIAL | 0x2A,
- OPC_SLTU = OPC_SPECIAL | 0x2B,
- OPC_SELEQZ = OPC_SPECIAL | 0x35,
- OPC_SELNEZ = OPC_SPECIAL | 0x37,
-
- OPC_REGIMM = 0x01 << 26,
- OPC_BLTZ = OPC_REGIMM | (0x00 << 16),
- OPC_BGEZ = OPC_REGIMM | (0x01 << 16),
-
- OPC_SPECIAL2 = 0x1c << 26,
- OPC_MUL_R5 = OPC_SPECIAL2 | 0x002,
-
- OPC_SPECIAL3 = 0x1f << 26,
- OPC_EXT = OPC_SPECIAL3 | 0x000,
- OPC_INS = OPC_SPECIAL3 | 0x004,
- OPC_WSBH = OPC_SPECIAL3 | 0x0a0,
- OPC_SEB = OPC_SPECIAL3 | 0x420,
- OPC_SEH = OPC_SPECIAL3 | 0x620,
+ OPC_J = 002 << 26,
+ OPC_JAL = 003 << 26,
+ OPC_BEQ = 004 << 26,
+ OPC_BNE = 005 << 26,
+ OPC_BLEZ = 006 << 26,
+ OPC_BGTZ = 007 << 26,
+ OPC_ADDIU = 011 << 26,
+ OPC_SLTI = 012 << 26,
+ OPC_SLTIU = 013 << 26,
+ OPC_ANDI = 014 << 26,
+ OPC_ORI = 015 << 26,
+ OPC_XORI = 016 << 26,
+ OPC_LUI = 017 << 26,
+ OPC_DADDIU = 031 << 26,
+ OPC_LB = 040 << 26,
+ OPC_LH = 041 << 26,
+ OPC_LW = 043 << 26,
+ OPC_LBU = 044 << 26,
+ OPC_LHU = 045 << 26,
+ OPC_LWU = 047 << 26,
+ OPC_SB = 050 << 26,
+ OPC_SH = 051 << 26,
+ OPC_SW = 053 << 26,
+ OPC_LD = 067 << 26,
+ OPC_SD = 077 << 26,
+
+ OPC_SPECIAL = 000 << 26,
+ OPC_SLL = OPC_SPECIAL | 000,
+ OPC_SRL = OPC_SPECIAL | 002,
+ OPC_ROTR = OPC_SPECIAL | 002 | (1 << 21),
+ OPC_SRA = OPC_SPECIAL | 003,
+ OPC_SLLV = OPC_SPECIAL | 004,
+ OPC_SRLV = OPC_SPECIAL | 006,
+ OPC_ROTRV = OPC_SPECIAL | 006 | 0100,
+ OPC_SRAV = OPC_SPECIAL | 007,
+ OPC_JR_R5 = OPC_SPECIAL | 010,
+ OPC_JALR = OPC_SPECIAL | 011,
+ OPC_MOVZ = OPC_SPECIAL | 012,
+ OPC_MOVN = OPC_SPECIAL | 013,
+ OPC_SYNC = OPC_SPECIAL | 017,
+ OPC_MFHI = OPC_SPECIAL | 020,
+ OPC_MFLO = OPC_SPECIAL | 022,
+ OPC_DSLLV = OPC_SPECIAL | 024,
+ OPC_DSRLV = OPC_SPECIAL | 026,
+ OPC_DROTRV = OPC_SPECIAL | 026 | 0100,
+ OPC_DSRAV = OPC_SPECIAL | 027,
+ OPC_MULT = OPC_SPECIAL | 030,
+ OPC_MUL_R6 = OPC_SPECIAL | 030 | 0200,
+ OPC_MUH = OPC_SPECIAL | 030 | 0300,
+ OPC_MULTU = OPC_SPECIAL | 031,
+ OPC_MULU = OPC_SPECIAL | 031 | 0200,
+ OPC_MUHU = OPC_SPECIAL | 031 | 0300,
+ OPC_DIV = OPC_SPECIAL | 032,
+ OPC_DIV_R6 = OPC_SPECIAL | 032 | 0200,
+ OPC_MOD = OPC_SPECIAL | 032 | 0300,
+ OPC_DIVU = OPC_SPECIAL | 033,
+ OPC_DIVU_R6 = OPC_SPECIAL | 033 | 0200,
+ OPC_MODU = OPC_SPECIAL | 033 | 0300,
+ OPC_DMULT = OPC_SPECIAL | 034,
+ OPC_DMUL = OPC_SPECIAL | 034 | 0200,
+ OPC_DMUH = OPC_SPECIAL | 034 | 0300,
+ OPC_DMULTU = OPC_SPECIAL | 035,
+ OPC_DMULU = OPC_SPECIAL | 035 | 0200,
+ OPC_DMUHU = OPC_SPECIAL | 035 | 0300,
+ OPC_DDIV = OPC_SPECIAL | 036,
+ OPC_DDIV_R6 = OPC_SPECIAL | 036 | 0200,
+ OPC_DMOD = OPC_SPECIAL | 036 | 0300,
+ OPC_DDIVU = OPC_SPECIAL | 037,
+ OPC_DDIVU_R6 = OPC_SPECIAL | 037 | 0200,
+ OPC_DMODU = OPC_SPECIAL | 037 | 0300,
+ OPC_ADDU = OPC_SPECIAL | 041,
+ OPC_SUBU = OPC_SPECIAL | 043,
+ OPC_AND = OPC_SPECIAL | 044,
+ OPC_OR = OPC_SPECIAL | 045,
+ OPC_XOR = OPC_SPECIAL | 046,
+ OPC_NOR = OPC_SPECIAL | 047,
+ OPC_SLT = OPC_SPECIAL | 052,
+ OPC_SLTU = OPC_SPECIAL | 053,
+ OPC_DADDU = OPC_SPECIAL | 055,
+ OPC_DSUBU = OPC_SPECIAL | 057,
+ OPC_SELEQZ = OPC_SPECIAL | 065,
+ OPC_SELNEZ = OPC_SPECIAL | 067,
+ OPC_DSLL = OPC_SPECIAL | 070,
+ OPC_DSRL = OPC_SPECIAL | 072,
+ OPC_DROTR = OPC_SPECIAL | 072 | (1 << 21),
+ OPC_DSRA = OPC_SPECIAL | 073,
+ OPC_DSLL32 = OPC_SPECIAL | 074,
+ OPC_DSRL32 = OPC_SPECIAL | 076,
+ OPC_DROTR32 = OPC_SPECIAL | 076 | (1 << 21),
+ OPC_DSRA32 = OPC_SPECIAL | 077,
+ OPC_CLZ_R6 = OPC_SPECIAL | 0120,
+ OPC_DCLZ_R6 = OPC_SPECIAL | 0122,
+
+ OPC_REGIMM = 001 << 26,
+ OPC_BLTZ = OPC_REGIMM | (000 << 16),
+ OPC_BGEZ = OPC_REGIMM | (001 << 16),
+
+ OPC_SPECIAL2 = 034 << 26,
+ OPC_MUL_R5 = OPC_SPECIAL2 | 002,
+ OPC_CLZ = OPC_SPECIAL2 | 040,
+ OPC_DCLZ = OPC_SPECIAL2 | 044,
+
+ OPC_SPECIAL3 = 037 << 26,
+ OPC_EXT = OPC_SPECIAL3 | 000,
+ OPC_DEXTM = OPC_SPECIAL3 | 001,
+ OPC_DEXTU = OPC_SPECIAL3 | 002,
+ OPC_DEXT = OPC_SPECIAL3 | 003,
+ OPC_INS = OPC_SPECIAL3 | 004,
+ OPC_DINSM = OPC_SPECIAL3 | 005,
+ OPC_DINSU = OPC_SPECIAL3 | 006,
+ OPC_DINS = OPC_SPECIAL3 | 007,
+ OPC_WSBH = OPC_SPECIAL3 | 00240,
+ OPC_DSBH = OPC_SPECIAL3 | 00244,
+ OPC_DSHD = OPC_SPECIAL3 | 00544,
+ OPC_SEB = OPC_SPECIAL3 | 02040,
+ OPC_SEH = OPC_SPECIAL3 | 03040,
/* MIPS r6 doesn't have JR, JALR should be used instead */
OPC_JR = use_mips32r6_instructions ? OPC_JALR : OPC_JR_R5,
@@ -348,6 +406,12 @@ typedef enum {
OPC_SYNC_ACQUIRE = OPC_SYNC | 0x11 << 5,
OPC_SYNC_RELEASE = OPC_SYNC | 0x12 << 5,
OPC_SYNC_RMB = OPC_SYNC | 0x13 << 5,
+
+ /* Aliases for convenience. */
+ ALIAS_PADD = sizeof(void *) == 4 ? OPC_ADDU : OPC_DADDU,
+ ALIAS_PADDI = sizeof(void *) == 4 ? OPC_ADDIU : OPC_DADDIU,
+ ALIAS_TSRL = TARGET_LONG_BITS == 32 || TCG_TARGET_REG_BITS == 32
+ ? OPC_SRL : OPC_DSRL,
} MIPSInsn;
/*
@@ -396,6 +460,21 @@ static inline void tcg_out_opc_bf(TCGContext *s, MIPSInsn opc, TCGReg rt,
tcg_out32(s, inst);
}
+static inline void tcg_out_opc_bf64(TCGContext *s, MIPSInsn opc, MIPSInsn opm,
+ MIPSInsn oph, TCGReg rt, TCGReg rs,
+ int msb, int lsb)
+{
+ if (lsb >= 32) {
+ opc = oph;
+ msb -= 32;
+ lsb -= 32;
+ } else if (msb >= 32) {
+ opc = opm;
+ msb -= 32;
+ }
+ tcg_out_opc_bf(s, opc, rt, rs, msb, lsb);
+}
+
/*
* Type branch
*/
@@ -426,6 +505,18 @@ static inline void tcg_out_opc_sa(TCGContext *s, MIPSInsn opc,
}
+static void tcg_out_opc_sa64(TCGContext *s, MIPSInsn opc1, MIPSInsn opc2,
+ TCGReg rd, TCGReg rt, TCGArg sa)
+{
+ int32_t inst;
+
+ inst = (sa & 32 ? opc2 : opc1);
+ inst |= (rt & 0x1F) << 16;
+ inst |= (rd & 0x1F) << 11;
+ inst |= (sa & 0x1F) << 6;
+ tcg_out32(s, inst);
+}
+
/*
* Type jump.
* Returns true if the branch was in range and the insn was emitted.
@@ -454,28 +545,59 @@ static inline void tcg_out_nop(TCGContext *s)
tcg_out32(s, 0);
}
+static inline void tcg_out_dsll(TCGContext *s, TCGReg rd, TCGReg rt, TCGArg sa)
+{
+ tcg_out_opc_sa64(s, OPC_DSLL, OPC_DSLL32, rd, rt, sa);
+}
+
+static inline void tcg_out_dsrl(TCGContext *s, TCGReg rd, TCGReg rt, TCGArg sa)
+{
+ tcg_out_opc_sa64(s, OPC_DSRL, OPC_DSRL32, rd, rt, sa);
+}
+
+static inline void tcg_out_dsra(TCGContext *s, TCGReg rd, TCGReg rt, TCGArg sa)
+{
+ tcg_out_opc_sa64(s, OPC_DSRA, OPC_DSRA32, rd, rt, sa);
+}
+
static inline void tcg_out_mov(TCGContext *s, TCGType type,
TCGReg ret, TCGReg arg)
{
/* Simple reg-reg move, optimising out the 'do nothing' case */
if (ret != arg) {
- tcg_out_opc_reg(s, OPC_ADDU, ret, arg, TCG_REG_ZERO);
+ tcg_out_opc_reg(s, OPC_OR, ret, arg, TCG_REG_ZERO);
}
}
-static inline void tcg_out_movi(TCGContext *s, TCGType type,
- TCGReg reg, tcg_target_long arg)
+static void tcg_out_movi(TCGContext *s, TCGType type,
+ TCGReg ret, tcg_target_long arg)
{
+ if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
+ arg = (int32_t)arg;
+ }
if (arg == (int16_t)arg) {
- tcg_out_opc_imm(s, OPC_ADDIU, reg, TCG_REG_ZERO, arg);
- } else if (arg == (uint16_t)arg) {
- tcg_out_opc_imm(s, OPC_ORI, reg, TCG_REG_ZERO, arg);
+ tcg_out_opc_imm(s, OPC_ADDIU, ret, TCG_REG_ZERO, arg);
+ return;
+ }
+ if (arg == (uint16_t)arg) {
+ tcg_out_opc_imm(s, OPC_ORI, ret, TCG_REG_ZERO, arg);
+ return;
+ }
+ if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
+ tcg_out_opc_imm(s, OPC_LUI, ret, TCG_REG_ZERO, arg >> 16);
} else {
- tcg_out_opc_imm(s, OPC_LUI, reg, TCG_REG_ZERO, arg >> 16);
- if (arg & 0xffff) {
- tcg_out_opc_imm(s, OPC_ORI, reg, reg, arg & 0xffff);
+ tcg_out_movi(s, TCG_TYPE_I32, ret, arg >> 31 >> 1);
+ if (arg & 0xffff0000ull) {
+ tcg_out_dsll(s, ret, ret, 16);
+ tcg_out_opc_imm(s, OPC_ORI, ret, ret, arg >> 16);
+ tcg_out_dsll(s, ret, ret, 16);
+ } else {
+ tcg_out_dsll(s, ret, ret, 32);
}
}
+ if (arg & 0xffff) {
+ tcg_out_opc_imm(s, OPC_ORI, ret, ret, arg & 0xffff);
+ }
}
static inline void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg)
@@ -513,29 +635,49 @@ static inline void tcg_out_bswap16s(TCGContext *s, TCGReg ret, TCGReg arg)
}
}
-static inline void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg)
+static void tcg_out_bswap_subr(TCGContext *s, tcg_insn_unit *sub)
+{
+ bool ok = tcg_out_opc_jmp(s, OPC_JAL, sub);
+ tcg_debug_assert(ok);
+}
+
+static void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg)
{
if (use_mips32r2_instructions) {
tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16);
} else {
- /* ret and arg must be different and can't be register at */
- if (ret == arg || ret == TCG_TMP0 || arg == TCG_TMP0) {
- tcg_abort();
- }
-
- tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24);
-
- tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 24);
- tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
+ tcg_out_bswap_subr(s, bswap32_addr);
+ /* delay slot -- never omit the insn, like tcg_out_mov might. */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP0, arg, TCG_REG_ZERO);
+ tcg_out_mov(s, TCG_TYPE_I32, ret, TCG_TMP3);
+ }
+}
- tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, arg, 0xff00);
- tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8);
- tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
+static void tcg_out_bswap32u(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+ if (use_mips32r2_instructions) {
+ tcg_out_opc_reg(s, OPC_DSBH, ret, 0, arg);
+ tcg_out_opc_reg(s, OPC_DSHD, ret, 0, ret);
+ tcg_out_dsrl(s, ret, ret, 32);
+ } else {
+ tcg_out_bswap_subr(s, bswap32u_addr);
+ /* delay slot -- never omit the insn, like tcg_out_mov might. */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP0, arg, TCG_REG_ZERO);
+ tcg_out_mov(s, TCG_TYPE_I32, ret, TCG_TMP3);
+ }
+}
- tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8);
- tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0xff00);
- tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
+static void tcg_out_bswap64(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+ if (use_mips32r2_instructions) {
+ tcg_out_opc_reg(s, OPC_DSBH, ret, 0, arg);
+ tcg_out_opc_reg(s, OPC_DSHD, ret, 0, ret);
+ } else {
+ tcg_out_bswap_subr(s, bswap64_addr);
+ /* delay slot -- never omit the insn, like tcg_out_mov might. */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP0, arg, TCG_REG_ZERO);
+ tcg_out_mov(s, TCG_TYPE_I32, ret, TCG_TMP3);
}
}
@@ -559,6 +701,16 @@ static inline void tcg_out_ext16s(TCGContext *s, TCGReg ret, TCGReg arg)
}
}
+static inline void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+ if (use_mips32r2_instructions) {
+ tcg_out_opc_bf(s, OPC_DEXT, ret, arg, 31, 0);
+ } else {
+ tcg_out_dsll(s, ret, arg, 32);
+ tcg_out_dsrl(s, ret, ret, 32);
+ }
+}
+
static void tcg_out_ldst(TCGContext *s, MIPSInsn opc, TCGReg data,
TCGReg addr, intptr_t ofs)
{
@@ -566,7 +718,7 @@ static void tcg_out_ldst(TCGContext *s, MIPSInsn opc, TCGReg data,
if (ofs != lo) {
tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - lo);
if (addr != TCG_REG_ZERO) {
- tcg_out_opc_reg(s, OPC_ADDU, TCG_TMP0, TCG_TMP0, addr);
+ tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP0, TCG_TMP0, addr);
}
addr = TCG_TMP0;
}
@@ -576,13 +728,21 @@ static void tcg_out_ldst(TCGContext *s, MIPSInsn opc, TCGReg data,
static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
TCGReg arg1, intptr_t arg2)
{
- tcg_out_ldst(s, OPC_LW, arg, arg1, arg2);
+ MIPSInsn opc = OPC_LD;
+ if (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I32) {
+ opc = OPC_LW;
+ }
+ tcg_out_ldst(s, opc, arg, arg1, arg2);
}
static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
TCGReg arg1, intptr_t arg2)
{
- tcg_out_ldst(s, OPC_SW, arg, arg1, arg2);
+ MIPSInsn opc = OPC_SD;
+ if (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I32) {
+ opc = OPC_SW;
+ }
+ tcg_out_ldst(s, opc, arg, arg1, arg2);
}
static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
@@ -595,16 +755,6 @@ static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
return false;
}
-static inline void tcg_out_addi(TCGContext *s, TCGReg reg, TCGArg val)
-{
- if (val == (int16_t)val) {
- tcg_out_opc_imm(s, OPC_ADDIU, reg, reg, val);
- } else {
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, val);
- tcg_out_opc_reg(s, OPC_ADDU, reg, reg, TCG_TMP0);
- }
-}
-
static void tcg_out_addsub2(TCGContext *s, TCGReg rl, TCGReg rh, TCGReg al,
TCGReg ah, TCGArg bl, TCGArg bh, bool cbl,
bool cbh, bool is_sub)
@@ -969,6 +1119,10 @@ static void * const qemu_ld_helpers[16] = {
[MO_BESW] = helper_be_ldsw_mmu,
[MO_BEUL] = helper_be_ldul_mmu,
[MO_BEQ] = helper_be_ldq_mmu,
+#if TCG_TARGET_REG_BITS == 64
+ [MO_LESL] = helper_le_ldsl_mmu,
+ [MO_BESL] = helper_be_ldsl_mmu,
+#endif
};
static void * const qemu_st_helpers[16] = {
@@ -996,6 +1150,9 @@ static int tcg_out_call_iarg_reg(TCGContext *s, int i, TCGReg arg)
if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[i], arg);
} else {
+ /* For N32 and N64, the initial offset is different. But there
+ we also have 8 argument register so we don't run out here. */
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
tcg_out_st(s, TCG_TYPE_REG, arg, TCG_REG_SP, 4 * i);
}
return i + 1;
@@ -1037,6 +1194,7 @@ static int tcg_out_call_iarg_imm(TCGContext *s, int i, TCGArg arg)
static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah)
{
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
i = (i + 1) & ~1;
i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? ah : al));
i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? al : ah));
@@ -1044,7 +1202,7 @@ static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah)
}
/* Perform the tlb comparison operation. The complete host address is
- placed in BASE. Clobbers AT, T0, A0. */
+ placed in BASE. Clobbers TMP0, TMP1, TMP2, A0. */
static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
TCGReg addrh, TCGMemOpIdx oi,
tcg_insn_unit *label_ptr[2], bool is_load)
@@ -1052,6 +1210,7 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
TCGMemOp opc = get_memop(oi);
unsigned s_bits = opc & MO_SIZE;
unsigned a_bits = get_alignment_bits(opc);
+ target_ulong mask;
int mem_index = get_mmuidx(oi);
int cmp_off
= (is_load
@@ -1059,11 +1218,11 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
: offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
- tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addrl,
+ tcg_out_opc_sa(s, ALIAS_TSRL, TCG_REG_A0, addrl,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_A0, TCG_REG_A0,
(CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
- tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_A0, TCG_REG_A0, TCG_AREG0);
+ tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, TCG_REG_A0, TCG_AREG0);
/* Compensate for very large offsets. */
if (add_off >= 0x8000) {
@@ -1073,51 +1232,63 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
QEMU_BUILD_BUG_ON(offsetof(CPUArchState,
tlb_table[NB_MMU_MODES - 1][1])
> 0x7ff0 + 0x7fff);
- tcg_out_opc_imm(s, OPC_ADDIU, TCG_REG_A0, TCG_REG_A0, 0x7ff0);
+ tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_A0, TCG_REG_A0, 0x7ff0);
cmp_off -= 0x7ff0;
add_off -= 0x7ff0;
}
- /* Load the (low half) tlb comparator. */
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0,
- cmp_off + (TARGET_LONG_BITS == 64 ? LO_OFF : 0));
-
/* We don't currently support unaligned accesses.
We could do so with mips32r6. */
if (a_bits < s_bits) {
a_bits = s_bits;
}
- /* Mask the page bits, keeping the alignment bits to compare against.
- In between on 32-bit targets, load the tlb addend for the fast path. */
- tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1,
- TARGET_PAGE_MASK | ((1 << a_bits) - 1));
- if (TARGET_LONG_BITS == 32) {
- tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off);
+
+ mask = (target_ulong)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
+
+ /* Load the (low half) tlb comparator. Mask the page bits, keeping the
+ alignment bits to compare against. */
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_TMP0, TCG_REG_A0, cmp_off + LO_OFF);
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, mask);
+ } else {
+ tcg_out_ldst(s,
+ (TARGET_LONG_BITS == 64 ? OPC_LD
+ : TCG_TARGET_REG_BITS == 64 ? OPC_LWU : OPC_LW),
+ TCG_TMP0, TCG_REG_A0, cmp_off);
+ tcg_out_movi(s, TCG_TYPE_TL, TCG_TMP1, mask);
+ /* No second compare is required here;
+ load the tlb addend for the fast path. */
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_REG_A0, add_off);
}
tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl);
+ /* Zero extend a 32-bit guest address for a 64-bit host. */
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+ tcg_out_ext32u(s, base, addrl);
+ addrl = base;
+ }
+
label_ptr[0] = s->code_ptr;
tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0);
/* Load and test the high half tlb comparator. */
- if (TARGET_LONG_BITS == 64) {
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
/* delay slot */
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0, cmp_off + HI_OFF);
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_TMP0, TCG_REG_A0, cmp_off + HI_OFF);
- /* Load the tlb addend for the fast path. We can't do it earlier with
- 64-bit targets or we'll clobber a0 before reading the high half tlb
- comparator. */
- tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off);
+ /* Load the tlb addend for the fast path. */
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_REG_A0, add_off);
label_ptr[1] = s->code_ptr;
tcg_out_opc_br(s, OPC_BNE, addrh, TCG_TMP0);
}
/* delay slot */
- tcg_out_opc_reg(s, OPC_ADDU, base, TCG_REG_A0, addrl);
+ tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP2, addrl);
}
static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi,
+ TCGType ext,
TCGReg datalo, TCGReg datahi,
TCGReg addrlo, TCGReg addrhi,
void *raddr, tcg_insn_unit *label_ptr[2])
@@ -1126,13 +1297,14 @@ static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi,
label->is_ld = is_ld;
label->oi = oi;
+ label->type = ext;
label->datalo_reg = datalo;
label->datahi_reg = datahi;
label->addrlo_reg = addrlo;
label->addrhi_reg = addrhi;
label->raddr = raddr;
label->label_ptr[0] = label_ptr[0];
- if (TARGET_LONG_BITS == 64) {
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
label->label_ptr[1] = label_ptr[1];
}
}
@@ -1146,12 +1318,12 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
/* resolve label address */
reloc_pc16(l->label_ptr[0], s->code_ptr);
- if (TARGET_LONG_BITS == 64) {
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
reloc_pc16(l->label_ptr[1], s->code_ptr);
}
i = 1;
- if (TARGET_LONG_BITS == 64) {
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg);
} else {
i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg);
@@ -1163,7 +1335,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
v0 = l->datalo_reg;
- if ((opc & MO_SIZE) == MO_64) {
+ if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
/* We eliminated V0 from the possible output registers, so it
cannot be clobbered here. So we must move V1 first. */
if (MIPS_BE) {
@@ -1177,7 +1349,12 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
reloc_pc16(s->code_ptr, l->raddr);
tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO);
/* delay slot */
- tcg_out_mov(s, TCG_TYPE_REG, v0, TCG_REG_V0);
+ if (TCG_TARGET_REG_BITS == 64 && l->type == TCG_TYPE_I32) {
+ /* we always sign-extend 32-bit loads */
+ tcg_out_opc_sa(s, OPC_SLL, v0, TCG_REG_V0, 0);
+ } else {
+ tcg_out_opc_reg(s, OPC_OR, v0, TCG_REG_V0, TCG_REG_ZERO);
+ }
}
static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
@@ -1189,12 +1366,12 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
/* resolve label address */
reloc_pc16(l->label_ptr[0], s->code_ptr);
- if (TARGET_LONG_BITS == 64) {
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
reloc_pc16(l->label_ptr[1], s->code_ptr);
}
i = 1;
- if (TARGET_LONG_BITS == 64) {
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg);
} else {
i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg);
@@ -1210,7 +1387,11 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
i = tcg_out_call_iarg_reg(s, i, l->datalo_reg);
break;
case MO_64:
- i = tcg_out_call_iarg_reg2(s, i, l->datalo_reg, l->datahi_reg);
+ if (TCG_TARGET_REG_BITS == 32) {
+ i = tcg_out_call_iarg_reg2(s, i, l->datalo_reg, l->datahi_reg);
+ } else {
+ i = tcg_out_call_iarg_reg(s, i, l->datalo_reg);
+ }
break;
default:
tcg_abort();
@@ -1227,46 +1408,104 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
}
#endif
-static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
- TCGReg base, TCGMemOp opc)
+static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
+ TCGReg base, TCGMemOp opc, bool is_64)
{
switch (opc & (MO_SSIZE | MO_BSWAP)) {
case MO_UB:
- tcg_out_opc_imm(s, OPC_LBU, datalo, base, 0);
+ tcg_out_opc_imm(s, OPC_LBU, lo, base, 0);
break;
case MO_SB:
- tcg_out_opc_imm(s, OPC_LB, datalo, base, 0);
+ tcg_out_opc_imm(s, OPC_LB, lo, base, 0);
break;
case MO_UW | MO_BSWAP:
tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0);
- tcg_out_bswap16(s, datalo, TCG_TMP1);
+ tcg_out_bswap16(s, lo, TCG_TMP1);
break;
case MO_UW:
- tcg_out_opc_imm(s, OPC_LHU, datalo, base, 0);
+ tcg_out_opc_imm(s, OPC_LHU, lo, base, 0);
break;
case MO_SW | MO_BSWAP:
tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0);
- tcg_out_bswap16s(s, datalo, TCG_TMP1);
+ tcg_out_bswap16s(s, lo, TCG_TMP1);
break;
case MO_SW:
- tcg_out_opc_imm(s, OPC_LH, datalo, base, 0);
+ tcg_out_opc_imm(s, OPC_LH, lo, base, 0);
break;
case MO_UL | MO_BSWAP:
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, 0);
- tcg_out_bswap32(s, datalo, TCG_TMP1);
+ if (TCG_TARGET_REG_BITS == 64 && is_64) {
+ if (use_mips32r2_instructions) {
+ tcg_out_opc_imm(s, OPC_LWU, lo, base, 0);
+ tcg_out_bswap32u(s, lo, lo);
+ } else {
+ tcg_out_bswap_subr(s, bswap32u_addr);
+ /* delay slot */
+ tcg_out_opc_imm(s, OPC_LWU, TCG_TMP0, base, 0);
+ tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_TMP3);
+ }
+ break;
+ }
+ /* FALLTHRU */
+ case MO_SL | MO_BSWAP:
+ if (use_mips32r2_instructions) {
+ tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
+ tcg_out_bswap32(s, lo, lo);
+ } else {
+ tcg_out_bswap_subr(s, bswap32_addr);
+ /* delay slot */
+ tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0);
+ tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_TMP3);
+ }
break;
case MO_UL:
- tcg_out_opc_imm(s, OPC_LW, datalo, base, 0);
+ if (TCG_TARGET_REG_BITS == 64 && is_64) {
+ tcg_out_opc_imm(s, OPC_LWU, lo, base, 0);
+ break;
+ }
+ /* FALLTHRU */
+ case MO_SL:
+ tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
break;
case MO_Q | MO_BSWAP:
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, HI_OFF);
- tcg_out_bswap32(s, datalo, TCG_TMP1);
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, LO_OFF);
- tcg_out_bswap32(s, datahi, TCG_TMP1);
+ if (TCG_TARGET_REG_BITS == 64) {
+ if (use_mips32r2_instructions) {
+ tcg_out_opc_imm(s, OPC_LD, lo, base, 0);
+ tcg_out_bswap64(s, lo, lo);
+ } else {
+ tcg_out_bswap_subr(s, bswap64_addr);
+ /* delay slot */
+ tcg_out_opc_imm(s, OPC_LD, TCG_TMP0, base, 0);
+ tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_TMP3);
+ }
+ } else if (use_mips32r2_instructions) {
+ tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0);
+ tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, 4);
+ tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, TCG_TMP0);
+ tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, TCG_TMP1);
+ tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? lo : hi, TCG_TMP0, 16);
+ tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? hi : lo, TCG_TMP1, 16);
+ } else {
+ tcg_out_bswap_subr(s, bswap32_addr);
+ /* delay slot */
+ tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0);
+ tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 4);
+ tcg_out_bswap_subr(s, bswap32_addr);
+ /* delay slot */
+ tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? lo : hi, TCG_TMP3);
+ tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? hi : lo, TCG_TMP3);
+ }
break;
case MO_Q:
- tcg_out_opc_imm(s, OPC_LW, datalo, base, LO_OFF);
- tcg_out_opc_imm(s, OPC_LW, datahi, base, HI_OFF);
+ /* Prefer to load from offset 0 first, but allow for overlap. */
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_out_opc_imm(s, OPC_LD, lo, base, 0);
+ } else if (MIPS_BE ? hi != base : lo == base) {
+ tcg_out_opc_imm(s, OPC_LW, hi, base, HI_OFF);
+ tcg_out_opc_imm(s, OPC_LW, lo, base, LO_OFF);
+ } else {
+ tcg_out_opc_imm(s, OPC_LW, lo, base, LO_OFF);
+ tcg_out_opc_imm(s, OPC_LW, hi, base, HI_OFF);
+ }
break;
default:
tcg_abort();
@@ -1282,69 +1521,94 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
#if defined(CONFIG_SOFTMMU)
tcg_insn_unit *label_ptr[2];
#endif
- /* Note that we've eliminated V0 from the output registers,
- so we won't overwrite the base register during loading. */
- TCGReg base = TCG_REG_V0;
+ TCGReg base = TCG_REG_A0;
data_regl = *args++;
- data_regh = (is_64 ? *args++ : 0);
+ data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
addr_regl = *args++;
- addr_regh = (TARGET_LONG_BITS == 64 ? *args++ : 0);
+ addr_regh = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
oi = *args++;
opc = get_memop(oi);
#if defined(CONFIG_SOFTMMU)
tcg_out_tlb_load(s, base, addr_regl, addr_regh, oi, label_ptr, 1);
- tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc);
- add_qemu_ldst_label(s, 1, oi, data_regl, data_regh, addr_regl, addr_regh,
+ tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
+ add_qemu_ldst_label(s, 1, oi,
+ (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
+ data_regl, data_regh, addr_regl, addr_regh,
s->code_ptr, label_ptr);
#else
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+ tcg_out_ext32u(s, base, addr_regl);
+ addr_regl = base;
+ }
if (guest_base == 0 && data_regl != addr_regl) {
base = addr_regl;
} else if (guest_base == (int16_t)guest_base) {
- tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, guest_base);
+ tcg_out_opc_imm(s, ALIAS_PADDI, base, addr_regl, guest_base);
} else {
tcg_out_movi(s, TCG_TYPE_PTR, base, guest_base);
- tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl);
+ tcg_out_opc_reg(s, ALIAS_PADD, base, base, addr_regl);
}
- tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc);
+ tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
#endif
}
-static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
+static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
TCGReg base, TCGMemOp opc)
{
+ /* Don't clutter the code below with checks to avoid bswapping ZERO. */
+ if ((lo | hi) == 0) {
+ opc &= ~MO_BSWAP;
+ }
+
switch (opc & (MO_SIZE | MO_BSWAP)) {
case MO_8:
- tcg_out_opc_imm(s, OPC_SB, datalo, base, 0);
+ tcg_out_opc_imm(s, OPC_SB, lo, base, 0);
break;
case MO_16 | MO_BSWAP:
- tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, datalo, 0xffff);
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, lo, 0xffff);
tcg_out_bswap16(s, TCG_TMP1, TCG_TMP1);
- datalo = TCG_TMP1;
+ lo = TCG_TMP1;
/* FALLTHRU */
case MO_16:
- tcg_out_opc_imm(s, OPC_SH, datalo, base, 0);
+ tcg_out_opc_imm(s, OPC_SH, lo, base, 0);
break;
case MO_32 | MO_BSWAP:
- tcg_out_bswap32(s, TCG_TMP1, datalo);
- datalo = TCG_TMP1;
+ tcg_out_bswap32(s, TCG_TMP3, lo);
+ lo = TCG_TMP3;
/* FALLTHRU */
case MO_32:
- tcg_out_opc_imm(s, OPC_SW, datalo, base, 0);
+ tcg_out_opc_imm(s, OPC_SW, lo, base, 0);
break;
case MO_64 | MO_BSWAP:
- tcg_out_bswap32(s, TCG_TMP1, datalo);
- tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, HI_OFF);
- tcg_out_bswap32(s, TCG_TMP1, datahi);
- tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, LO_OFF);
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_out_bswap64(s, TCG_TMP3, lo);
+ tcg_out_opc_imm(s, OPC_SD, TCG_TMP3, base, 0);
+ } else if (use_mips32r2_instructions) {
+ tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, MIPS_BE ? lo : hi);
+ tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, MIPS_BE ? hi : lo);
+ tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP0, TCG_TMP0, 16);
+ tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP1, TCG_TMP1, 16);
+ tcg_out_opc_imm(s, OPC_SW, TCG_TMP0, base, 0);
+ tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, 4);
+ } else {
+ tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? lo : hi);
+ tcg_out_opc_imm(s, OPC_SW, TCG_TMP3, base, 0);
+ tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? hi : lo);
+ tcg_out_opc_imm(s, OPC_SW, TCG_TMP3, base, 4);
+ }
break;
case MO_64:
- tcg_out_opc_imm(s, OPC_SW, datalo, base, LO_OFF);
- tcg_out_opc_imm(s, OPC_SW, datahi, base, HI_OFF);
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_out_opc_imm(s, OPC_SD, lo, base, 0);
+ } else {
+ tcg_out_opc_imm(s, OPC_SW, MIPS_BE ? hi : lo, base, 0);
+ tcg_out_opc_imm(s, OPC_SW, MIPS_BE ? lo : hi, base, 4);
+ }
break;
default:
@@ -1355,39 +1619,41 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
{
TCGReg addr_regl, addr_regh __attribute__((unused));
- TCGReg data_regl, data_regh, base;
+ TCGReg data_regl, data_regh;
TCGMemOpIdx oi;
TCGMemOp opc;
#if defined(CONFIG_SOFTMMU)
tcg_insn_unit *label_ptr[2];
#endif
+ TCGReg base = TCG_REG_A0;
data_regl = *args++;
- data_regh = (is_64 ? *args++ : 0);
+ data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
addr_regl = *args++;
- addr_regh = (TARGET_LONG_BITS == 64 ? *args++ : 0);
+ addr_regh = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
oi = *args++;
opc = get_memop(oi);
#if defined(CONFIG_SOFTMMU)
- /* Note that we eliminated the helper's address argument,
- so we can reuse that for the base. */
- base = (TARGET_LONG_BITS == 32 ? TCG_REG_A1 : TCG_REG_A2);
tcg_out_tlb_load(s, base, addr_regl, addr_regh, oi, label_ptr, 0);
tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
- add_qemu_ldst_label(s, 0, oi, data_regl, data_regh, addr_regl, addr_regh,
+ add_qemu_ldst_label(s, 0, oi,
+ (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
+ data_regl, data_regh, addr_regl, addr_regh,
s->code_ptr, label_ptr);
#else
+ base = TCG_REG_A0;
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+ tcg_out_ext32u(s, base, addr_regl);
+ addr_regl = base;
+ }
if (guest_base == 0) {
base = addr_regl;
+ } else if (guest_base == (int16_t)guest_base) {
+ tcg_out_opc_imm(s, ALIAS_PADDI, base, addr_regl, guest_base);
} else {
- base = TCG_REG_A0;
- if (guest_base == (int16_t)guest_base) {
- tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, guest_base);
- } else {
- tcg_out_movi(s, TCG_TYPE_PTR, base, guest_base);
- tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl);
- }
+ tcg_out_movi(s, TCG_TYPE_PTR, base, guest_base);
+ tcg_out_opc_reg(s, ALIAS_PADD, base, base, addr_regl);
}
tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
#endif
@@ -1409,6 +1675,33 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
tcg_out32(s, sync[a0 & TCG_MO_ALL]);
}
+static void tcg_out_clz(TCGContext *s, MIPSInsn opcv2, MIPSInsn opcv6,
+ int width, TCGReg a0, TCGReg a1, TCGArg a2)
+{
+ if (use_mips32r6_instructions) {
+ if (a2 == width) {
+ tcg_out_opc_reg(s, opcv6, a0, a1, 0);
+ } else {
+ tcg_out_opc_reg(s, opcv6, TCG_TMP0, a1, 0);
+ tcg_out_movcond(s, TCG_COND_EQ, a0, a1, 0, a2, TCG_TMP0);
+ }
+ } else {
+ if (a2 == width) {
+ tcg_out_opc_reg(s, opcv2, a0, a1, a1);
+ } else if (a0 == a2) {
+ tcg_out_opc_reg(s, opcv2, TCG_TMP0, a1, a1);
+ tcg_out_opc_reg(s, OPC_MOVN, a0, TCG_TMP0, a1);
+ } else if (a0 != a1) {
+ tcg_out_opc_reg(s, opcv2, a0, a1, a1);
+ tcg_out_opc_reg(s, OPC_MOVZ, a0, a2, a1);
+ } else {
+ tcg_out_opc_reg(s, opcv2, TCG_TMP0, a1, a1);
+ tcg_out_opc_reg(s, OPC_MOVZ, TCG_TMP0, a2, a1);
+ tcg_out_mov(s, TCG_TYPE_REG, a0, TCG_TMP0);
+ }
+ }
+}
+
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
const TCGArg *args, const int *const_args)
{
@@ -1426,6 +1719,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
{
TCGReg b0 = TCG_REG_ZERO;
+ a0 = (intptr_t)a0;
if (a0 & ~0xffff) {
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_V0, a0 & ~0xffff);
b0 = TCG_REG_V0;
@@ -1459,28 +1753,45 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_ld8u_i32:
+ case INDEX_op_ld8u_i64:
i1 = OPC_LBU;
goto do_ldst;
case INDEX_op_ld8s_i32:
+ case INDEX_op_ld8s_i64:
i1 = OPC_LB;
goto do_ldst;
case INDEX_op_ld16u_i32:
+ case INDEX_op_ld16u_i64:
i1 = OPC_LHU;
goto do_ldst;
case INDEX_op_ld16s_i32:
+ case INDEX_op_ld16s_i64:
i1 = OPC_LH;
goto do_ldst;
case INDEX_op_ld_i32:
+ case INDEX_op_ld32s_i64:
i1 = OPC_LW;
goto do_ldst;
+ case INDEX_op_ld32u_i64:
+ i1 = OPC_LWU;
+ goto do_ldst;
+ case INDEX_op_ld_i64:
+ i1 = OPC_LD;
+ goto do_ldst;
case INDEX_op_st8_i32:
+ case INDEX_op_st8_i64:
i1 = OPC_SB;
goto do_ldst;
case INDEX_op_st16_i32:
+ case INDEX_op_st16_i64:
i1 = OPC_SH;
goto do_ldst;
case INDEX_op_st_i32:
+ case INDEX_op_st32_i64:
i1 = OPC_SW;
+ goto do_ldst;
+ case INDEX_op_st_i64:
+ i1 = OPC_SD;
do_ldst:
tcg_out_ldst(s, i1, a0, a1, a2);
break;
@@ -1488,10 +1799,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_add_i32:
i1 = OPC_ADDU, i2 = OPC_ADDIU;
goto do_binary;
+ case INDEX_op_add_i64:
+ i1 = OPC_DADDU, i2 = OPC_DADDIU;
+ goto do_binary;
case INDEX_op_or_i32:
+ case INDEX_op_or_i64:
i1 = OPC_OR, i2 = OPC_ORI;
goto do_binary;
case INDEX_op_xor_i32:
+ case INDEX_op_xor_i64:
i1 = OPC_XOR, i2 = OPC_XORI;
do_binary:
if (c2) {
@@ -1503,12 +1819,16 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_sub_i32:
+ i1 = OPC_SUBU, i2 = OPC_ADDIU;
+ goto do_subtract;
+ case INDEX_op_sub_i64:
+ i1 = OPC_DSUBU, i2 = OPC_DADDIU;
+ do_subtract:
if (c2) {
- tcg_out_opc_imm(s, OPC_ADDIU, a0, a1, -a2);
+ tcg_out_opc_imm(s, i2, a0, a1, -a2);
break;
}
- i1 = OPC_SUBU;
- goto do_binary;
+ goto do_binaryv;
case INDEX_op_and_i32:
if (c2 && a2 != (uint16_t)a2) {
int msb = ctz32(~a2) - 1;
@@ -1519,7 +1839,18 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
i1 = OPC_AND, i2 = OPC_ANDI;
goto do_binary;
+ case INDEX_op_and_i64:
+ if (c2 && a2 != (uint16_t)a2) {
+ int msb = ctz64(~a2) - 1;
+ tcg_debug_assert(use_mips32r2_instructions);
+ tcg_debug_assert(is_p2m1(a2));
+ tcg_out_opc_bf64(s, OPC_DEXT, OPC_DEXTM, OPC_DEXTU, a0, a1, msb, 0);
+ break;
+ }
+ i1 = OPC_AND, i2 = OPC_ANDI;
+ goto do_binary;
case INDEX_op_nor_i32:
+ case INDEX_op_nor_i64:
i1 = OPC_NOR;
goto do_binaryv;
@@ -1571,6 +1902,55 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
}
i1 = OPC_DIVU, i2 = OPC_MFHI;
+ goto do_hilo1;
+ case INDEX_op_mul_i64:
+ if (use_mips32r6_instructions) {
+ tcg_out_opc_reg(s, OPC_DMUL, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DMULT, i2 = OPC_MFLO;
+ goto do_hilo1;
+ case INDEX_op_mulsh_i64:
+ if (use_mips32r6_instructions) {
+ tcg_out_opc_reg(s, OPC_DMUH, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DMULT, i2 = OPC_MFHI;
+ goto do_hilo1;
+ case INDEX_op_muluh_i64:
+ if (use_mips32r6_instructions) {
+ tcg_out_opc_reg(s, OPC_DMUHU, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DMULTU, i2 = OPC_MFHI;
+ goto do_hilo1;
+ case INDEX_op_div_i64:
+ if (use_mips32r6_instructions) {
+ tcg_out_opc_reg(s, OPC_DDIV_R6, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DDIV, i2 = OPC_MFLO;
+ goto do_hilo1;
+ case INDEX_op_divu_i64:
+ if (use_mips32r6_instructions) {
+ tcg_out_opc_reg(s, OPC_DDIVU_R6, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DDIVU, i2 = OPC_MFLO;
+ goto do_hilo1;
+ case INDEX_op_rem_i64:
+ if (use_mips32r6_instructions) {
+ tcg_out_opc_reg(s, OPC_DMOD, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DDIV, i2 = OPC_MFHI;
+ goto do_hilo1;
+ case INDEX_op_remu_i64:
+ if (use_mips32r6_instructions) {
+ tcg_out_opc_reg(s, OPC_DMODU, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DDIVU, i2 = OPC_MFHI;
do_hilo1:
tcg_out_opc_reg(s, i1, 0, a1, a2);
tcg_out_opc_reg(s, i2, a0, 0, 0);
@@ -1581,6 +1961,12 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
goto do_hilo2;
case INDEX_op_mulu2_i32:
i1 = OPC_MULTU;
+ goto do_hilo2;
+ case INDEX_op_muls2_i64:
+ i1 = OPC_DMULT;
+ goto do_hilo2;
+ case INDEX_op_mulu2_i64:
+ i1 = OPC_DMULTU;
do_hilo2:
tcg_out_opc_reg(s, i1, 0, a2, args[3]);
tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0);
@@ -1588,20 +1974,46 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_not_i32:
+ case INDEX_op_not_i64:
i1 = OPC_NOR;
goto do_unary;
case INDEX_op_bswap16_i32:
+ case INDEX_op_bswap16_i64:
i1 = OPC_WSBH;
goto do_unary;
case INDEX_op_ext8s_i32:
+ case INDEX_op_ext8s_i64:
i1 = OPC_SEB;
goto do_unary;
case INDEX_op_ext16s_i32:
+ case INDEX_op_ext16s_i64:
i1 = OPC_SEH;
do_unary:
tcg_out_opc_reg(s, i1, a0, TCG_REG_ZERO, a1);
break;
+ case INDEX_op_bswap32_i32:
+ tcg_out_bswap32(s, a0, a1);
+ break;
+ case INDEX_op_bswap32_i64:
+ tcg_out_bswap32u(s, a0, a1);
+ break;
+ case INDEX_op_bswap64_i64:
+ tcg_out_bswap64(s, a0, a1);
+ break;
+ case INDEX_op_extrh_i64_i32:
+ tcg_out_dsra(s, a0, a1, 32);
+ break;
+ case INDEX_op_ext32s_i64:
+ case INDEX_op_ext_i32_i64:
+ case INDEX_op_extrl_i64_i32:
+ tcg_out_opc_sa(s, OPC_SLL, a0, a1, 0);
+ break;
+ case INDEX_op_ext32u_i64:
+ case INDEX_op_extu_i32_i64:
+ tcg_out_ext32u(s, a0, a1);
+ break;
+
case INDEX_op_sar_i32:
i1 = OPC_SRAV, i2 = OPC_SRA;
goto do_shift;
@@ -1616,9 +2028,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
do_shift:
if (c2) {
tcg_out_opc_sa(s, i2, a0, a1, a2);
- } else {
- tcg_out_opc_reg(s, i1, a0, a2, a1);
+ break;
}
+ do_shiftv:
+ tcg_out_opc_reg(s, i1, a0, a2, a1);
break;
case INDEX_op_rotl_i32:
if (c2) {
@@ -1628,17 +2041,67 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_opc_reg(s, OPC_ROTRV, a0, TCG_TMP0, a1);
}
break;
+ case INDEX_op_sar_i64:
+ if (c2) {
+ tcg_out_dsra(s, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DSRAV;
+ goto do_shiftv;
+ case INDEX_op_shl_i64:
+ if (c2) {
+ tcg_out_dsll(s, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DSLLV;
+ goto do_shiftv;
+ case INDEX_op_shr_i64:
+ if (c2) {
+ tcg_out_dsrl(s, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DSRLV;
+ goto do_shiftv;
+ case INDEX_op_rotr_i64:
+ if (c2) {
+ tcg_out_opc_sa64(s, OPC_DROTR, OPC_DROTR32, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DROTRV;
+ goto do_shiftv;
+ case INDEX_op_rotl_i64:
+ if (c2) {
+ tcg_out_opc_sa64(s, OPC_DROTR, OPC_DROTR32, a0, a1, 64 - a2);
+ } else {
+ tcg_out_opc_reg(s, OPC_DSUBU, TCG_TMP0, TCG_REG_ZERO, a2);
+ tcg_out_opc_reg(s, OPC_DROTRV, a0, TCG_TMP0, a1);
+ }
+ break;
- case INDEX_op_bswap32_i32:
- tcg_out_opc_reg(s, OPC_WSBH, a0, 0, a1);
- tcg_out_opc_sa(s, OPC_ROTR, a0, a0, 16);
+ case INDEX_op_clz_i32:
+ tcg_out_clz(s, OPC_CLZ, OPC_CLZ_R6, 32, a0, a1, a2);
+ break;
+ case INDEX_op_clz_i64:
+ tcg_out_clz(s, OPC_DCLZ, OPC_DCLZ_R6, 64, a0, a1, a2);
break;
case INDEX_op_deposit_i32:
tcg_out_opc_bf(s, OPC_INS, a0, a2, args[3] + args[4] - 1, args[3]);
break;
+ case INDEX_op_deposit_i64:
+ tcg_out_opc_bf64(s, OPC_DINS, OPC_DINSM, OPC_DINSU, a0, a2,
+ args[3] + args[4] - 1, args[3]);
+ break;
+ case INDEX_op_extract_i32:
+ tcg_out_opc_bf(s, OPC_EXT, a0, a1, a2 + args[3] - 1, a2);
+ break;
+ case INDEX_op_extract_i64:
+ tcg_out_opc_bf64(s, OPC_DEXT, OPC_DEXTM, OPC_DEXTU, a0, a1,
+ a2 + args[3] - 1, a2);
+ break;
case INDEX_op_brcond_i32:
+ case INDEX_op_brcond_i64:
tcg_out_brcond(s, a2, a0, a1, arg_label(args[3]));
break;
case INDEX_op_brcond2_i32:
@@ -1646,10 +2109,12 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_movcond_i32:
+ case INDEX_op_movcond_i64:
tcg_out_movcond(s, args[5], a0, a1, a2, args[3], args[4]);
break;
case INDEX_op_setcond_i32:
+ case INDEX_op_setcond_i64:
tcg_out_setcond(s, args[3], a0, a1, a2);
break;
case INDEX_op_setcond2_i32:
@@ -1682,7 +2147,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_mb(s, a0);
break;
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
+ case INDEX_op_mov_i64:
case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
+ case INDEX_op_movi_i64:
case INDEX_op_call: /* Always emitted via tcg_out_call. */
default:
tcg_abort();
@@ -1728,6 +2195,7 @@ static const TCGTargetOpDef mips_op_defs[] = {
{ INDEX_op_sar_i32, { "r", "rZ", "ri" } },
{ INDEX_op_rotr_i32, { "r", "rZ", "ri" } },
{ INDEX_op_rotl_i32, { "r", "rZ", "ri" } },
+ { INDEX_op_clz_i32, { "r", "r", "rWZ" } },
{ INDEX_op_bswap16_i32, { "r", "r" } },
{ INDEX_op_bswap32_i32, { "r", "r" } },
@@ -1736,6 +2204,7 @@ static const TCGTargetOpDef mips_op_defs[] = {
{ INDEX_op_ext16s_i32, { "r", "rZ" } },
{ INDEX_op_deposit_i32, { "r", "0", "rZ" } },
+ { INDEX_op_extract_i32, { "r", "r" } },
{ INDEX_op_brcond_i32, { "rZ", "rZ" } },
#if use_mips32r6_instructions
@@ -1744,21 +2213,91 @@ static const TCGTargetOpDef mips_op_defs[] = {
{ INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "0" } },
#endif
{ INDEX_op_setcond_i32, { "r", "rZ", "rZ" } },
- { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rZ", "rZ" } },
+#if TCG_TARGET_REG_BITS == 32
{ INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rN", "rN" } },
{ INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rN", "rN" } },
+ { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rZ", "rZ" } },
{ INDEX_op_brcond2_i32, { "rZ", "rZ", "rZ", "rZ" } },
+#endif
-#if TARGET_LONG_BITS == 32
- { INDEX_op_qemu_ld_i32, { "L", "lZ" } },
+#if TCG_TARGET_REG_BITS == 64
+ { INDEX_op_ld8u_i64, { "r", "r" } },
+ { INDEX_op_ld8s_i64, { "r", "r" } },
+ { INDEX_op_ld16u_i64, { "r", "r" } },
+ { INDEX_op_ld16s_i64, { "r", "r" } },
+ { INDEX_op_ld32s_i64, { "r", "r" } },
+ { INDEX_op_ld32u_i64, { "r", "r" } },
+ { INDEX_op_ld_i64, { "r", "r" } },
+ { INDEX_op_st8_i64, { "rZ", "r" } },
+ { INDEX_op_st16_i64, { "rZ", "r" } },
+ { INDEX_op_st32_i64, { "rZ", "r" } },
+ { INDEX_op_st_i64, { "rZ", "r" } },
+
+ { INDEX_op_add_i64, { "r", "rZ", "rJ" } },
+ { INDEX_op_mul_i64, { "r", "rZ", "rZ" } },
+#if !use_mips32r6_instructions
+ { INDEX_op_muls2_i64, { "r", "r", "rZ", "rZ" } },
+ { INDEX_op_mulu2_i64, { "r", "r", "rZ", "rZ" } },
+#endif
+ { INDEX_op_mulsh_i64, { "r", "rZ", "rZ" } },
+ { INDEX_op_muluh_i64, { "r", "rZ", "rZ" } },
+ { INDEX_op_div_i64, { "r", "rZ", "rZ" } },
+ { INDEX_op_divu_i64, { "r", "rZ", "rZ" } },
+ { INDEX_op_rem_i64, { "r", "rZ", "rZ" } },
+ { INDEX_op_remu_i64, { "r", "rZ", "rZ" } },
+ { INDEX_op_sub_i64, { "r", "rZ", "rN" } },
+
+ { INDEX_op_and_i64, { "r", "rZ", "rIK" } },
+ { INDEX_op_nor_i64, { "r", "rZ", "rZ" } },
+ { INDEX_op_not_i64, { "r", "rZ" } },
+ { INDEX_op_or_i64, { "r", "rZ", "rI" } },
+ { INDEX_op_xor_i64, { "r", "rZ", "rI" } },
+
+ { INDEX_op_shl_i64, { "r", "rZ", "ri" } },
+ { INDEX_op_shr_i64, { "r", "rZ", "ri" } },
+ { INDEX_op_sar_i64, { "r", "rZ", "ri" } },
+ { INDEX_op_rotr_i64, { "r", "rZ", "ri" } },
+ { INDEX_op_rotl_i64, { "r", "rZ", "ri" } },
+ { INDEX_op_clz_i64, { "r", "r", "rWZ" } },
+
+ { INDEX_op_bswap16_i64, { "r", "r" } },
+ { INDEX_op_bswap32_i64, { "r", "r" } },
+ { INDEX_op_bswap64_i64, { "r", "r" } },
+
+ { INDEX_op_ext8s_i64, { "r", "rZ" } },
+ { INDEX_op_ext16s_i64, { "r", "rZ" } },
+ { INDEX_op_ext32s_i64, { "r", "rZ" } },
+ { INDEX_op_ext32u_i64, { "r", "rZ" } },
+ { INDEX_op_ext_i32_i64, { "r", "rZ" } },
+ { INDEX_op_extu_i32_i64, { "r", "rZ" } },
+ { INDEX_op_extrl_i64_i32, { "r", "rZ" } },
+ { INDEX_op_extrh_i64_i32, { "r", "rZ" } },
+
+ { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
+ { INDEX_op_extract_i64, { "r", "r" } },
+
+ { INDEX_op_brcond_i64, { "rZ", "rZ" } },
+#if use_mips32r6_instructions
+ { INDEX_op_movcond_i64, { "r", "rZ", "rZ", "rZ", "rZ" } },
+#else
+ { INDEX_op_movcond_i64, { "r", "rZ", "rZ", "rZ", "0" } },
+#endif
+ { INDEX_op_setcond_i64, { "r", "rZ", "rZ" } },
+
+ { INDEX_op_qemu_ld_i32, { "r", "LZ" } },
{ INDEX_op_qemu_st_i32, { "SZ", "SZ" } },
- { INDEX_op_qemu_ld_i64, { "L", "L", "lZ" } },
+ { INDEX_op_qemu_ld_i64, { "r", "LZ" } },
+ { INDEX_op_qemu_st_i64, { "SZ", "SZ" } },
+#elif TARGET_LONG_BITS == 32
+ { INDEX_op_qemu_ld_i32, { "r", "LZ" } },
+ { INDEX_op_qemu_st_i32, { "SZ", "SZ" } },
+ { INDEX_op_qemu_ld_i64, { "r", "r", "LZ" } },
{ INDEX_op_qemu_st_i64, { "SZ", "SZ", "SZ" } },
#else
- { INDEX_op_qemu_ld_i32, { "L", "lZ", "lZ" } },
+ { INDEX_op_qemu_ld_i32, { "r", "LZ", "LZ" } },
{ INDEX_op_qemu_st_i32, { "SZ", "SZ", "SZ" } },
- { INDEX_op_qemu_ld_i64, { "L", "L", "lZ", "lZ" } },
+ { INDEX_op_qemu_ld_i64, { "r", "r", "LZ", "LZ" } },
{ INDEX_op_qemu_st_i64, { "SZ", "SZ", "SZ", "SZ" } },
#endif
@@ -1766,6 +2305,18 @@ static const TCGTargetOpDef mips_op_defs[] = {
{ -1 },
};
+static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
+{
+ int i, n = ARRAY_SIZE(mips_op_defs);
+
+ for (i = 0; i < n; ++i) {
+ if (mips_op_defs[i].op == op) {
+ return &mips_op_defs[i];
+ }
+ }
+ return NULL;
+}
+
static int tcg_target_callee_save_regs[] = {
TCG_REG_S0, /* used for the global env (TCG_AREG0) */
TCG_REG_S1,
@@ -1858,47 +2409,186 @@ static void tcg_target_detect_isa(void)
sigaction(SIGILL, &sa_old, NULL);
}
+static tcg_insn_unit *align_code_ptr(TCGContext *s)
+{
+ uintptr_t p = (uintptr_t)s->code_ptr;
+ if (p & 15) {
+ p = (p + 15) & -16;
+ s->code_ptr = (void *)p;
+ }
+ return s->code_ptr;
+}
+
+/* Stack frame parameters. */
+#define REG_SIZE (TCG_TARGET_REG_BITS / 8)
+#define SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * REG_SIZE)
+#define TEMP_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
+
+#define FRAME_SIZE ((TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE + SAVE_SIZE \
+ + TCG_TARGET_STACK_ALIGN - 1) \
+ & -TCG_TARGET_STACK_ALIGN)
+#define SAVE_OFS (TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE)
+
+/* We're expecting to be able to use an immediate for frame allocation. */
+QEMU_BUILD_BUG_ON(FRAME_SIZE > 0x7fff);
+
/* Generate global QEMU prologue and epilogue code */
static void tcg_target_qemu_prologue(TCGContext *s)
{
- int i, frame_size;
+ int i;
- /* reserve some stack space, also for TCG temps. */
- frame_size = ARRAY_SIZE(tcg_target_callee_save_regs) * 4
- + TCG_STATIC_CALL_ARGS_SIZE
- + CPU_TEMP_BUF_NLONGS * sizeof(long);
- frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
- ~(TCG_TARGET_STACK_ALIGN - 1);
- tcg_set_frame(s, TCG_REG_SP, ARRAY_SIZE(tcg_target_callee_save_regs) * 4
- + TCG_STATIC_CALL_ARGS_SIZE,
- CPU_TEMP_BUF_NLONGS * sizeof(long));
+ tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, TEMP_SIZE);
/* TB prologue */
- tcg_out_addi(s, TCG_REG_SP, -frame_size);
- for(i = 0 ; i < ARRAY_SIZE(tcg_target_callee_save_regs) ; i++) {
- tcg_out_st(s, TCG_TYPE_I32, tcg_target_callee_save_regs[i],
- TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE + i * 4);
+ tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_SP, TCG_REG_SP, -FRAME_SIZE);
+ for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
+ tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
+ TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
}
/* Call generated code */
tcg_out_opc_reg(s, OPC_JR, 0, tcg_target_call_iarg_regs[1], 0);
+ /* delay slot */
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
- tb_ret_addr = s->code_ptr;
/* TB epilogue */
- for(i = 0 ; i < ARRAY_SIZE(tcg_target_callee_save_regs) ; i++) {
- tcg_out_ld(s, TCG_TYPE_I32, tcg_target_callee_save_regs[i],
- TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE + i * 4);
+ tb_ret_addr = s->code_ptr;
+ for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
+ tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
+ TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
}
tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_RA, 0);
- tcg_out_addi(s, TCG_REG_SP, frame_size);
+ /* delay slot */
+ tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_SP, TCG_REG_SP, FRAME_SIZE);
+
+ if (use_mips32r2_instructions) {
+ return;
+ }
+
+ /* Bswap subroutines: Input in TCG_TMP0, output in TCG_TMP3;
+ clobbers TCG_TMP1, TCG_TMP2. */
+
+ /*
+ * bswap32 -- 32-bit swap (signed result for mips64). a0 = abcd.
+ */
+ bswap32_addr = align_code_ptr(s);
+ /* t3 = (ssss)d000 */
+ tcg_out_opc_sa(s, OPC_SLL, TCG_TMP3, TCG_TMP0, 24);
+ /* t1 = 000a */
+ tcg_out_opc_sa(s, OPC_SRL, TCG_TMP1, TCG_TMP0, 24);
+ /* t2 = 00c0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP2, TCG_TMP0, 0xff00);
+ /* t3 = d00a */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP1);
+ /* t1 = 0abc */
+ tcg_out_opc_sa(s, OPC_SRL, TCG_TMP1, TCG_TMP0, 8);
+ /* t2 = 0c00 */
+ tcg_out_opc_sa(s, OPC_SLL, TCG_TMP2, TCG_TMP2, 8);
+ /* t1 = 00b0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP1, 0xff00);
+ /* t3 = dc0a */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP2);
+ tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_RA, 0);
+ /* t3 = dcba -- delay slot */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP1);
+
+ if (TCG_TARGET_REG_BITS == 32) {
+ return;
+ }
+
+ /*
+ * bswap32u -- unsigned 32-bit swap. a0 = ....abcd.
+ */
+ bswap32u_addr = align_code_ptr(s);
+ /* t1 = (0000)000d */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP0, 0xff);
+ /* t3 = 000a */
+ tcg_out_opc_sa(s, OPC_SRL, TCG_TMP3, TCG_TMP0, 24);
+ /* t1 = (0000)d000 */
+ tcg_out_dsll(s, TCG_TMP1, TCG_TMP1, 24);
+ /* t2 = 00c0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP2, TCG_TMP0, 0xff00);
+ /* t3 = d00a */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP1);
+ /* t1 = 0abc */
+ tcg_out_opc_sa(s, OPC_SRL, TCG_TMP1, TCG_TMP0, 8);
+ /* t2 = 0c00 */
+ tcg_out_opc_sa(s, OPC_SLL, TCG_TMP2, TCG_TMP2, 8);
+ /* t1 = 00b0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP1, 0xff00);
+ /* t3 = dc0a */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP2);
+ tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_RA, 0);
+ /* t3 = dcba -- delay slot */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP1);
+
+ /*
+ * bswap64 -- 64-bit swap. a0 = abcdefgh
+ */
+ bswap64_addr = align_code_ptr(s);
+ /* t3 = h0000000 */
+ tcg_out_dsll(s, TCG_TMP3, TCG_TMP0, 56);
+ /* t1 = 0000000a */
+ tcg_out_dsrl(s, TCG_TMP1, TCG_TMP0, 56);
+
+ /* t2 = 000000g0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP2, TCG_TMP0, 0xff00);
+ /* t3 = h000000a */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP1);
+ /* t1 = 00000abc */
+ tcg_out_dsrl(s, TCG_TMP1, TCG_TMP0, 40);
+ /* t2 = 0g000000 */
+ tcg_out_dsll(s, TCG_TMP2, TCG_TMP2, 40);
+ /* t1 = 000000b0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP1, 0xff00);
+
+ /* t3 = hg00000a */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP2);
+ /* t2 = 0000abcd */
+ tcg_out_dsrl(s, TCG_TMP2, TCG_TMP0, 32);
+ /* t3 = hg0000ba */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP1);
+
+ /* t1 = 000000c0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP2, 0xff00);
+ /* t2 = 0000000d */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP2, TCG_TMP2, 0x00ff);
+ /* t1 = 00000c00 */
+ tcg_out_dsll(s, TCG_TMP1, TCG_TMP1, 8);
+ /* t2 = 0000d000 */
+ tcg_out_dsll(s, TCG_TMP2, TCG_TMP2, 24);
+
+ /* t3 = hg000cba */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP1);
+ /* t1 = 00abcdef */
+ tcg_out_dsrl(s, TCG_TMP1, TCG_TMP0, 16);
+ /* t3 = hg00dcba */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP2);
+
+ /* t2 = 0000000f */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP2, TCG_TMP1, 0x00ff);
+ /* t1 = 000000e0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP1, 0xff00);
+ /* t2 = 00f00000 */
+ tcg_out_dsll(s, TCG_TMP2, TCG_TMP2, 40);
+ /* t1 = 000e0000 */
+ tcg_out_dsll(s, TCG_TMP1, TCG_TMP1, 24);
+
+ /* t3 = hgf0dcba */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP2);
+ tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_RA, 0);
+ /* t3 = hgfedcba -- delay slot */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP1);
}
static void tcg_target_init(TCGContext *s)
{
tcg_target_detect_isa();
tcg_regset_set(tcg_target_available_regs[TCG_TYPE_I32], 0xffffffff);
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_regset_set(tcg_target_available_regs[TCG_TYPE_I64], 0xffffffff);
+ }
tcg_regset_set(tcg_target_call_clobber_regs,
(1 << TCG_REG_V0) |
(1 << TCG_REG_V1) |
@@ -1923,11 +2613,11 @@ static void tcg_target_init(TCGContext *s)
tcg_regset_set_reg(s->reserved_regs, TCG_REG_K1); /* kernel use only */
tcg_regset_set_reg(s->reserved_regs, TCG_TMP0); /* internal use */
tcg_regset_set_reg(s->reserved_regs, TCG_TMP1); /* internal use */
+ tcg_regset_set_reg(s->reserved_regs, TCG_TMP2); /* internal use */
+ tcg_regset_set_reg(s->reserved_regs, TCG_TMP3); /* internal use */
tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA); /* return address */
tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); /* stack pointer */
tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP); /* global pointer */
-
- tcg_add_target_add_op_defs(mips_op_defs);
}
void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr)
@@ -1935,3 +2625,47 @@ void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr)
atomic_set((uint32_t *)jmp_addr, deposit32(OPC_J, 0, 26, addr >> 2));
flush_icache_range(jmp_addr, jmp_addr + 4);
}
+
+typedef struct {
+ DebugFrameHeader h;
+ uint8_t fde_def_cfa[4];
+ uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2];
+} DebugFrame;
+
+#define ELF_HOST_MACHINE EM_MIPS
+/* GDB doesn't appear to require proper setting of ELF_HOST_FLAGS,
+ which is good because they're really quite complicated for MIPS. */
+
+static const DebugFrame debug_frame = {
+ .h.cie.len = sizeof(DebugFrameCIE) - 4, /* length after .len member */
+ .h.cie.id = -1,
+ .h.cie.version = 1,
+ .h.cie.code_align = 1,
+ .h.cie.data_align = -(TCG_TARGET_REG_BITS / 8) & 0x7f, /* sleb128 */
+ .h.cie.return_column = TCG_REG_RA,
+
+ /* Total FDE size does not include the "len" member. */
+ .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
+
+ .fde_def_cfa = {
+ 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
+ (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
+ (FRAME_SIZE >> 7)
+ },
+ .fde_reg_ofs = {
+ 0x80 + 16, 9, /* DW_CFA_offset, s0, -72 */
+ 0x80 + 17, 8, /* DW_CFA_offset, s2, -64 */
+ 0x80 + 18, 7, /* DW_CFA_offset, s3, -56 */
+ 0x80 + 19, 6, /* DW_CFA_offset, s4, -48 */
+ 0x80 + 20, 5, /* DW_CFA_offset, s5, -40 */
+ 0x80 + 21, 4, /* DW_CFA_offset, s6, -32 */
+ 0x80 + 22, 3, /* DW_CFA_offset, s7, -24 */
+ 0x80 + 30, 2, /* DW_CFA_offset, s8, -16 */
+ 0x80 + 31, 1, /* DW_CFA_offset, ra, -8 */
+ }
+};
+
+void tcg_register_jit(void *buf, size_t buf_size)
+{
+ tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
+}
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 0f1349086b..adfc56ce62 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -296,6 +296,24 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
CASE_OP_32_64(nor):
return ~(x | y);
+ case INDEX_op_clz_i32:
+ return (uint32_t)x ? clz32(x) : y;
+
+ case INDEX_op_clz_i64:
+ return x ? clz64(x) : y;
+
+ case INDEX_op_ctz_i32:
+ return (uint32_t)x ? ctz32(x) : y;
+
+ case INDEX_op_ctz_i64:
+ return x ? ctz64(x) : y;
+
+ case INDEX_op_ctpop_i32:
+ return ctpop32(x);
+
+ case INDEX_op_ctpop_i64:
+ return ctpop64(x);
+
CASE_OP_32_64(ext8s):
return (int8_t)x;
@@ -878,11 +896,41 @@ void tcg_optimize(TCGContext *s)
temps[args[2]].mask);
break;
+ CASE_OP_32_64(extract):
+ mask = extract64(temps[args[1]].mask, args[2], args[3]);
+ if (args[2] == 0) {
+ affected = temps[args[1]].mask & ~mask;
+ }
+ break;
+ CASE_OP_32_64(sextract):
+ mask = sextract64(temps[args[1]].mask, args[2], args[3]);
+ if (args[2] == 0 && (tcg_target_long)mask >= 0) {
+ affected = temps[args[1]].mask & ~mask;
+ }
+ break;
+
CASE_OP_32_64(or):
CASE_OP_32_64(xor):
mask = temps[args[1]].mask | temps[args[2]].mask;
break;
+ case INDEX_op_clz_i32:
+ case INDEX_op_ctz_i32:
+ mask = temps[args[2]].mask | 31;
+ break;
+
+ case INDEX_op_clz_i64:
+ case INDEX_op_ctz_i64:
+ mask = temps[args[2]].mask | 63;
+ break;
+
+ case INDEX_op_ctpop_i32:
+ mask = 32 | 31;
+ break;
+ case INDEX_op_ctpop_i64:
+ mask = 64 | 63;
+ break;
+
CASE_OP_32_64(setcond):
case INDEX_op_setcond2_i32:
mask = 1;
@@ -996,6 +1044,7 @@ void tcg_optimize(TCGContext *s)
CASE_OP_32_64(ext8u):
CASE_OP_32_64(ext16s):
CASE_OP_32_64(ext16u):
+ CASE_OP_32_64(ctpop):
case INDEX_op_ext32s_i64:
case INDEX_op_ext32u_i64:
case INDEX_op_ext_i32_i64:
@@ -1039,6 +1088,20 @@ void tcg_optimize(TCGContext *s)
}
goto do_default;
+ CASE_OP_32_64(clz):
+ CASE_OP_32_64(ctz):
+ if (temp_is_const(args[1])) {
+ TCGArg v = temps[args[1]].val;
+ if (v != 0) {
+ tmp = do_constant_folding(opc, v, 0);
+ tcg_opt_gen_movi(s, op, args, args[0], tmp);
+ } else {
+ tcg_opt_gen_mov(s, op, args, args[0], args[2]);
+ }
+ break;
+ }
+ goto do_default;
+
CASE_OP_32_64(deposit):
if (temp_is_const(args[1]) && temp_is_const(args[2])) {
tmp = deposit64(temps[args[1]].val, args[3], args[4],
@@ -1048,6 +1111,22 @@ void tcg_optimize(TCGContext *s)
}
goto do_default;
+ CASE_OP_32_64(extract):
+ if (temp_is_const(args[1])) {
+ tmp = extract64(temps[args[1]].val, args[2], args[3]);
+ tcg_opt_gen_movi(s, op, args, args[0], tmp);
+ break;
+ }
+ goto do_default;
+
+ CASE_OP_32_64(sextract):
+ if (temp_is_const(args[1])) {
+ tmp = sextract64(temps[args[1]].val, args[2], args[3]);
+ tcg_opt_gen_movi(s, op, args, args[0], tmp);
+ break;
+ }
+ goto do_default;
+
CASE_OP_32_64(setcond):
tmp = do_constant_folding_cond(opc, args[1], args[2], args[3]);
if (tmp != 2) {
@@ -1076,6 +1155,21 @@ void tcg_optimize(TCGContext *s)
tcg_opt_gen_mov(s, op, args, args[0], args[4-tmp]);
break;
}
+ if (temp_is_const(args[3]) && temp_is_const(args[4])) {
+ tcg_target_ulong tv = temps[args[3]].val;
+ tcg_target_ulong fv = temps[args[4]].val;
+ TCGCond cond = args[5];
+ if (fv == 1 && tv == 0) {
+ cond = tcg_invert_cond(cond);
+ } else if (!(tv == 1 && fv == 0)) {
+ goto do_default;
+ }
+ args[3] = cond;
+ op->opc = opc = (opc == INDEX_op_movcond_i32
+ ? INDEX_op_setcond_i32
+ : INDEX_op_setcond_i64);
+ nb_iargs = 2;
+ }
goto do_default;
case INDEX_op_add2_i32:
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index dd032f286b..abd8b3d6cd 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -49,6 +49,9 @@ typedef enum {
TCG_AREG0 = TCG_REG_R27
} TCGReg;
+extern bool have_isa_2_06;
+extern bool have_isa_3_00;
+
/* optional instructions automatically implemented */
#define TCG_TARGET_HAS_ext8u_i32 0 /* andi */
#define TCG_TARGET_HAS_ext16u_i32 0
@@ -68,7 +71,12 @@ typedef enum {
#define TCG_TARGET_HAS_eqv_i32 1
#define TCG_TARGET_HAS_nand_i32 1
#define TCG_TARGET_HAS_nor_i32 1
+#define TCG_TARGET_HAS_clz_i32 1
+#define TCG_TARGET_HAS_ctz_i32 have_isa_3_00
+#define TCG_TARGET_HAS_ctpop_i32 have_isa_2_06
#define TCG_TARGET_HAS_deposit_i32 1
+#define TCG_TARGET_HAS_extract_i32 1
+#define TCG_TARGET_HAS_sextract_i32 0
#define TCG_TARGET_HAS_movcond_i32 1
#define TCG_TARGET_HAS_mulu2_i32 0
#define TCG_TARGET_HAS_muls2_i32 0
@@ -99,7 +107,12 @@ typedef enum {
#define TCG_TARGET_HAS_eqv_i64 1
#define TCG_TARGET_HAS_nand_i64 1
#define TCG_TARGET_HAS_nor_i64 1
+#define TCG_TARGET_HAS_clz_i64 1
+#define TCG_TARGET_HAS_ctz_i64 have_isa_3_00
+#define TCG_TARGET_HAS_ctpop_i64 have_isa_2_06
#define TCG_TARGET_HAS_deposit_i64 1
+#define TCG_TARGET_HAS_extract_i64 1
+#define TCG_TARGET_HAS_sextract_i64 0
#define TCG_TARGET_HAS_movcond_i64 1
#define TCG_TARGET_HAS_add2_i64 1
#define TCG_TARGET_HAS_sub2_i64 1
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
index a3262cfb0c..64f67d2c77 100644
--- a/tcg/ppc/tcg-target.inc.c
+++ b/tcg/ppc/tcg-target.inc.c
@@ -77,11 +77,15 @@
#define TCG_CT_CONST_U32 0x800
#define TCG_CT_CONST_ZERO 0x1000
#define TCG_CT_CONST_MONE 0x2000
+#define TCG_CT_CONST_WSZ 0x4000
static tcg_insn_unit *tb_ret_addr;
#include "elf.h"
-static bool have_isa_2_06;
+
+bool have_isa_2_06;
+bool have_isa_3_00;
+
#define HAVE_ISA_2_06 have_isa_2_06
#define HAVE_ISEL have_isa_2_06
@@ -259,12 +263,10 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type,
}
/* parse target specific constraints */
-static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
+static const char *target_parse_constraint(TCGArgConstraint *ct,
+ const char *ct_str, TCGType type)
{
- const char *ct_str;
-
- ct_str = *pct_str;
- switch (ct_str[0]) {
+ switch (*ct_str++) {
case 'A': case 'B': case 'C': case 'D':
ct->ct |= TCG_CT_REG;
tcg_regset_set_reg(ct->u.regs, 3 + ct_str[0] - 'A');
@@ -307,15 +309,16 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
case 'U':
ct->ct |= TCG_CT_CONST_U32;
break;
+ case 'W':
+ ct->ct |= TCG_CT_CONST_WSZ;
+ break;
case 'Z':
ct->ct |= TCG_CT_CONST_ZERO;
break;
default:
- return -1;
+ return NULL;
}
- ct_str++;
- *pct_str = ct_str;
- return 0;
+ return ct_str;
}
/* test if a constant matches the constraint */
@@ -345,6 +348,9 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
return 1;
} else if ((ct & TCG_CT_CONST_MONE) && val == -1) {
return 1;
+ } else if ((ct & TCG_CT_CONST_WSZ)
+ && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
+ return 1;
}
return 0;
}
@@ -449,6 +455,10 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
#define NOR XO31(124)
#define CNTLZW XO31( 26)
#define CNTLZD XO31( 58)
+#define CNTTZW XO31(538)
+#define CNTTZD XO31(570)
+#define CNTPOPW XO31(378)
+#define CNTPOPD XO31(506)
#define ANDC XO31( 60)
#define ORC XO31(412)
#define EQV XO31(284)
@@ -1170,6 +1180,32 @@ static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
}
}
+static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc,
+ TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2)
+{
+ if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) {
+ tcg_out32(s, opc | RA(a0) | RS(a1));
+ } else {
+ tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type);
+ /* Note that the only other valid constant for a2 is 0. */
+ if (HAVE_ISEL) {
+ tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
+ tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0));
+ } else if (!const_a2 && a0 == a2) {
+ tcg_out32(s, tcg_to_bc[TCG_COND_EQ] | 8);
+ tcg_out32(s, opc | RA(a0) | RS(a1));
+ } else {
+ tcg_out32(s, opc | RA(a0) | RS(a1));
+ tcg_out32(s, tcg_to_bc[TCG_COND_NE] | 8);
+ if (const_a2) {
+ tcg_out_movi(s, type, a0, 0);
+ } else {
+ tcg_out_mov(s, type, a0, a2);
+ }
+ }
+ }
+}
+
static void tcg_out_cmp2(TCGContext *s, const TCGArg *args,
const int *const_args)
{
@@ -2107,6 +2143,30 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
tcg_out32(s, NOR | SAB(args[1], args[0], args[2]));
break;
+ case INDEX_op_clz_i32:
+ tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1],
+ args[2], const_args[2]);
+ break;
+ case INDEX_op_ctz_i32:
+ tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1],
+ args[2], const_args[2]);
+ break;
+ case INDEX_op_ctpop_i32:
+ tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0));
+ break;
+
+ case INDEX_op_clz_i64:
+ tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, args[0], args[1],
+ args[2], const_args[2]);
+ break;
+ case INDEX_op_ctz_i64:
+ tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1],
+ args[2], const_args[2]);
+ break;
+ case INDEX_op_ctpop_i64:
+ tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0));
+ break;
+
case INDEX_op_mul_i32:
a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
@@ -2396,6 +2456,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
}
break;
+ case INDEX_op_extract_i32:
+ tcg_out_rlw(s, RLWINM, args[0], args[1],
+ 32 - args[2], 32 - args[3], 31);
+ break;
+ case INDEX_op_extract_i64:
+ tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]);
+ break;
+
case INDEX_op_movcond_i32:
tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2],
args[3], args[4], const_args[2]);
@@ -2511,6 +2579,9 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_eqv_i32, { "r", "r", "ri" } },
{ INDEX_op_nand_i32, { "r", "r", "r" } },
{ INDEX_op_nor_i32, { "r", "r", "r" } },
+ { INDEX_op_clz_i32, { "r", "r", "rZW" } },
+ { INDEX_op_ctz_i32, { "r", "r", "rZW" } },
+ { INDEX_op_ctpop_i32, { "r", "r" } },
{ INDEX_op_shl_i32, { "r", "r", "ri" } },
{ INDEX_op_shr_i32, { "r", "r", "ri" } },
@@ -2530,6 +2601,7 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_movcond_i32, { "r", "r", "ri", "rZ", "rZ" } },
{ INDEX_op_deposit_i32, { "r", "0", "rZ" } },
+ { INDEX_op_extract_i32, { "r", "r" } },
{ INDEX_op_muluh_i32, { "r", "r", "r" } },
{ INDEX_op_mulsh_i32, { "r", "r", "r" } },
@@ -2558,6 +2630,9 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_eqv_i64, { "r", "r", "r" } },
{ INDEX_op_nand_i64, { "r", "r", "r" } },
{ INDEX_op_nor_i64, { "r", "r", "r" } },
+ { INDEX_op_clz_i64, { "r", "r", "rZW" } },
+ { INDEX_op_ctz_i64, { "r", "r", "rZW" } },
+ { INDEX_op_ctpop_i64, { "r", "r" } },
{ INDEX_op_shl_i64, { "r", "r", "ri" } },
{ INDEX_op_shr_i64, { "r", "r", "ri" } },
@@ -2585,6 +2660,7 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_movcond_i64, { "r", "r", "ri", "rZ", "rZ" } },
{ INDEX_op_deposit_i64, { "r", "0", "rZ" } },
+ { INDEX_op_extract_i64, { "r", "r" } },
{ INDEX_op_mulsh_i64, { "r", "r", "r" } },
{ INDEX_op_muluh_i64, { "r", "r", "r" } },
@@ -2624,12 +2700,31 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ -1 },
};
+static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
+{
+ int i, n = ARRAY_SIZE(ppc_op_defs);
+
+ for (i = 0; i < n; ++i) {
+ if (ppc_op_defs[i].op == op) {
+ return &ppc_op_defs[i];
+ }
+ }
+ return NULL;
+}
+
static void tcg_target_init(TCGContext *s)
{
unsigned long hwcap = qemu_getauxval(AT_HWCAP);
+ unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2);
+
if (hwcap & PPC_FEATURE_ARCH_2_06) {
have_isa_2_06 = true;
}
+#ifdef PPC_FEATURE2_ARCH_3_00
+ if (hwcap2 & PPC_FEATURE2_ARCH_3_00) {
+ have_isa_3_00 = true;
+ }
+#endif
tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
@@ -2660,8 +2755,6 @@ static void tcg_target_init(TCGContext *s)
if (USE_REG_RA) {
tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA); /* return addr */
}
-
- tcg_add_target_add_op_defs(ppc_op_defs);
}
#ifdef __ELF__
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index 0c1af244f3..cbdd2a6275 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -49,63 +49,81 @@ typedef enum TCGReg {
#define TCG_TARGET_NB_REGS 16
-/* optional instructions */
-#define TCG_TARGET_HAS_div2_i32 1
-#define TCG_TARGET_HAS_rot_i32 1
-#define TCG_TARGET_HAS_ext8s_i32 1
-#define TCG_TARGET_HAS_ext16s_i32 1
-#define TCG_TARGET_HAS_ext8u_i32 1
-#define TCG_TARGET_HAS_ext16u_i32 1
-#define TCG_TARGET_HAS_bswap16_i32 1
-#define TCG_TARGET_HAS_bswap32_i32 1
-#define TCG_TARGET_HAS_not_i32 0
-#define TCG_TARGET_HAS_neg_i32 1
-#define TCG_TARGET_HAS_andc_i32 0
-#define TCG_TARGET_HAS_orc_i32 0
-#define TCG_TARGET_HAS_eqv_i32 0
-#define TCG_TARGET_HAS_nand_i32 0
-#define TCG_TARGET_HAS_nor_i32 0
-#define TCG_TARGET_HAS_deposit_i32 1
-#define TCG_TARGET_HAS_movcond_i32 1
-#define TCG_TARGET_HAS_add2_i32 1
-#define TCG_TARGET_HAS_sub2_i32 1
-#define TCG_TARGET_HAS_mulu2_i32 0
-#define TCG_TARGET_HAS_muls2_i32 0
-#define TCG_TARGET_HAS_muluh_i32 0
-#define TCG_TARGET_HAS_mulsh_i32 0
-#define TCG_TARGET_HAS_extrl_i64_i32 0
-#define TCG_TARGET_HAS_extrh_i64_i32 0
+/* A list of relevant facilities used by this translator. Some of these
+ are required for proper operation, and these are checked at startup. */
+
+#define FACILITY_ZARCH_ACTIVE (1ULL << (63 - 2))
+#define FACILITY_LONG_DISP (1ULL << (63 - 18))
+#define FACILITY_EXT_IMM (1ULL << (63 - 21))
+#define FACILITY_GEN_INST_EXT (1ULL << (63 - 34))
+#define FACILITY_LOAD_ON_COND (1ULL << (63 - 45))
+#define FACILITY_FAST_BCR_SER FACILITY_LOAD_ON_COND
-#define TCG_TARGET_HAS_div2_i64 1
-#define TCG_TARGET_HAS_rot_i64 1
-#define TCG_TARGET_HAS_ext8s_i64 1
-#define TCG_TARGET_HAS_ext16s_i64 1
-#define TCG_TARGET_HAS_ext32s_i64 1
-#define TCG_TARGET_HAS_ext8u_i64 1
-#define TCG_TARGET_HAS_ext16u_i64 1
-#define TCG_TARGET_HAS_ext32u_i64 1
-#define TCG_TARGET_HAS_bswap16_i64 1
-#define TCG_TARGET_HAS_bswap32_i64 1
-#define TCG_TARGET_HAS_bswap64_i64 1
-#define TCG_TARGET_HAS_not_i64 0
-#define TCG_TARGET_HAS_neg_i64 1
-#define TCG_TARGET_HAS_andc_i64 0
-#define TCG_TARGET_HAS_orc_i64 0
-#define TCG_TARGET_HAS_eqv_i64 0
-#define TCG_TARGET_HAS_nand_i64 0
-#define TCG_TARGET_HAS_nor_i64 0
-#define TCG_TARGET_HAS_deposit_i64 1
-#define TCG_TARGET_HAS_movcond_i64 1
-#define TCG_TARGET_HAS_add2_i64 1
-#define TCG_TARGET_HAS_sub2_i64 1
-#define TCG_TARGET_HAS_mulu2_i64 1
-#define TCG_TARGET_HAS_muls2_i64 0
-#define TCG_TARGET_HAS_muluh_i64 0
-#define TCG_TARGET_HAS_mulsh_i64 0
+extern uint64_t s390_facilities;
+
+/* optional instructions */
+#define TCG_TARGET_HAS_div2_i32 1
+#define TCG_TARGET_HAS_rot_i32 1
+#define TCG_TARGET_HAS_ext8s_i32 1
+#define TCG_TARGET_HAS_ext16s_i32 1
+#define TCG_TARGET_HAS_ext8u_i32 1
+#define TCG_TARGET_HAS_ext16u_i32 1
+#define TCG_TARGET_HAS_bswap16_i32 1
+#define TCG_TARGET_HAS_bswap32_i32 1
+#define TCG_TARGET_HAS_not_i32 0
+#define TCG_TARGET_HAS_neg_i32 1
+#define TCG_TARGET_HAS_andc_i32 0
+#define TCG_TARGET_HAS_orc_i32 0
+#define TCG_TARGET_HAS_eqv_i32 0
+#define TCG_TARGET_HAS_nand_i32 0
+#define TCG_TARGET_HAS_nor_i32 0
+#define TCG_TARGET_HAS_clz_i32 0
+#define TCG_TARGET_HAS_ctz_i32 0
+#define TCG_TARGET_HAS_ctpop_i32 0
+#define TCG_TARGET_HAS_deposit_i32 (s390_facilities & FACILITY_GEN_INST_EXT)
+#define TCG_TARGET_HAS_extract_i32 (s390_facilities & FACILITY_GEN_INST_EXT)
+#define TCG_TARGET_HAS_sextract_i32 0
+#define TCG_TARGET_HAS_movcond_i32 1
+#define TCG_TARGET_HAS_add2_i32 1
+#define TCG_TARGET_HAS_sub2_i32 1
+#define TCG_TARGET_HAS_mulu2_i32 0
+#define TCG_TARGET_HAS_muls2_i32 0
+#define TCG_TARGET_HAS_muluh_i32 0
+#define TCG_TARGET_HAS_mulsh_i32 0
+#define TCG_TARGET_HAS_extrl_i64_i32 0
+#define TCG_TARGET_HAS_extrh_i64_i32 0
-extern bool tcg_target_deposit_valid(int ofs, int len);
-#define TCG_TARGET_deposit_i32_valid tcg_target_deposit_valid
-#define TCG_TARGET_deposit_i64_valid tcg_target_deposit_valid
+#define TCG_TARGET_HAS_div2_i64 1
+#define TCG_TARGET_HAS_rot_i64 1
+#define TCG_TARGET_HAS_ext8s_i64 1
+#define TCG_TARGET_HAS_ext16s_i64 1
+#define TCG_TARGET_HAS_ext32s_i64 1
+#define TCG_TARGET_HAS_ext8u_i64 1
+#define TCG_TARGET_HAS_ext16u_i64 1
+#define TCG_TARGET_HAS_ext32u_i64 1
+#define TCG_TARGET_HAS_bswap16_i64 1
+#define TCG_TARGET_HAS_bswap32_i64 1
+#define TCG_TARGET_HAS_bswap64_i64 1
+#define TCG_TARGET_HAS_not_i64 0
+#define TCG_TARGET_HAS_neg_i64 1
+#define TCG_TARGET_HAS_andc_i64 0
+#define TCG_TARGET_HAS_orc_i64 0
+#define TCG_TARGET_HAS_eqv_i64 0
+#define TCG_TARGET_HAS_nand_i64 0
+#define TCG_TARGET_HAS_nor_i64 0
+#define TCG_TARGET_HAS_clz_i64 (s390_facilities & FACILITY_EXT_IMM)
+#define TCG_TARGET_HAS_ctz_i64 0
+#define TCG_TARGET_HAS_ctpop_i64 0
+#define TCG_TARGET_HAS_deposit_i64 (s390_facilities & FACILITY_GEN_INST_EXT)
+#define TCG_TARGET_HAS_extract_i64 (s390_facilities & FACILITY_GEN_INST_EXT)
+#define TCG_TARGET_HAS_sextract_i64 0
+#define TCG_TARGET_HAS_movcond_i64 1
+#define TCG_TARGET_HAS_add2_i64 1
+#define TCG_TARGET_HAS_sub2_i64 1
+#define TCG_TARGET_HAS_mulu2_i64 1
+#define TCG_TARGET_HAS_muls2_i64 0
+#define TCG_TARGET_HAS_muluh_i64 0
+#define TCG_TARGET_HAS_mulsh_i64 0
/* used for function call generation */
#define TCG_REG_CALL_STACK TCG_REG_R15
diff --git a/tcg/s390/tcg-target.inc.c b/tcg/s390/tcg-target.inc.c
index 253d4a0a0b..a679280b92 100644
--- a/tcg/s390/tcg-target.inc.c
+++ b/tcg/s390/tcg-target.inc.c
@@ -43,13 +43,14 @@
#define TCG_CT_CONST_XORI 0x400
#define TCG_CT_CONST_CMPI 0x800
#define TCG_CT_CONST_ADLI 0x1000
+#define TCG_CT_CONST_ZERO 0x2000
/* Several places within the instruction set 0 means "no register"
rather than TCG_REG_R0. */
#define TCG_REG_NONE 0
/* A scratch register that may be be used throughout the backend. */
-#define TCG_TMP0 TCG_REG_R14
+#define TCG_TMP0 TCG_REG_R1
#ifndef CONFIG_SOFTMMU
#define TCG_GUEST_BASE_REG TCG_REG_R13
@@ -132,6 +133,7 @@ typedef enum S390Opcode {
RRE_DLR = 0xb997,
RRE_DSGFR = 0xb91d,
RRE_DSGR = 0xb90d,
+ RRE_FLOGR = 0xb983,
RRE_LGBR = 0xb906,
RRE_LCGR = 0xb903,
RRE_LGFR = 0xb914,
@@ -334,18 +336,7 @@ static void * const qemu_st_helpers[16] = {
#endif
static tcg_insn_unit *tb_ret_addr;
-
-/* A list of relevant facilities used by this translator. Some of these
- are required for proper operation, and these are checked at startup. */
-
-#define FACILITY_ZARCH_ACTIVE (1ULL << (63 - 2))
-#define FACILITY_LONG_DISP (1ULL << (63 - 18))
-#define FACILITY_EXT_IMM (1ULL << (63 - 21))
-#define FACILITY_GEN_INST_EXT (1ULL << (63 - 34))
-#define FACILITY_LOAD_ON_COND (1ULL << (63 - 45))
-#define FACILITY_FAST_BCR_SER FACILITY_LOAD_ON_COND
-
-static uint64_t facilities;
+uint64_t s390_facilities;
static void patch_reloc(tcg_insn_unit *code_ptr, int type,
intptr_t value, intptr_t addend)
@@ -369,20 +360,14 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type,
}
/* parse target specific constraints */
-static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
+static const char *target_parse_constraint(TCGArgConstraint *ct,
+ const char *ct_str, TCGType type)
{
- const char *ct_str = *pct_str;
-
- switch (ct_str[0]) {
+ switch (*ct_str++) {
case 'r': /* all registers */
ct->ct |= TCG_CT_REG;
tcg_regset_set32(ct->u.regs, 0, 0xffff);
break;
- case 'R': /* not R0 */
- ct->ct |= TCG_CT_REG;
- tcg_regset_set32(ct->u.regs, 0, 0xffff);
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
- break;
case 'L': /* qemu_ld/st constraint */
ct->ct |= TCG_CT_REG;
tcg_regset_set32(ct->u.regs, 0, 0xffff);
@@ -415,13 +400,13 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
case 'C':
ct->ct |= TCG_CT_CONST_CMPI;
break;
+ case 'Z':
+ ct->ct |= TCG_CT_CONST_ZERO;
+ break;
default:
- return -1;
+ return NULL;
}
- ct_str++;
- *pct_str = ct_str;
-
- return 0;
+ return ct_str;
}
/* Immediates to be used with logical OR. This is an optimization only,
@@ -432,7 +417,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
static int tcg_match_ori(TCGType type, tcg_target_long val)
{
- if (facilities & FACILITY_EXT_IMM) {
+ if (s390_facilities & FACILITY_EXT_IMM) {
if (type == TCG_TYPE_I32) {
/* All 32-bit ORs can be performed with 1 48-bit insn. */
return 1;
@@ -444,7 +429,7 @@ static int tcg_match_ori(TCGType type, tcg_target_long val)
if (val == (int16_t)val) {
return 0;
}
- if (facilities & FACILITY_EXT_IMM) {
+ if (s390_facilities & FACILITY_EXT_IMM) {
if (val == (int32_t)val) {
return 0;
}
@@ -461,7 +446,7 @@ static int tcg_match_ori(TCGType type, tcg_target_long val)
static int tcg_match_xori(TCGType type, tcg_target_long val)
{
- if ((facilities & FACILITY_EXT_IMM) == 0) {
+ if ((s390_facilities & FACILITY_EXT_IMM) == 0) {
return 0;
}
@@ -482,7 +467,7 @@ static int tcg_match_xori(TCGType type, tcg_target_long val)
static int tcg_match_cmpi(TCGType type, tcg_target_long val)
{
- if (facilities & FACILITY_EXT_IMM) {
+ if (s390_facilities & FACILITY_EXT_IMM) {
/* The COMPARE IMMEDIATE instruction is available. */
if (type == TCG_TYPE_I32) {
/* We have a 32-bit immediate and can compare against anything. */
@@ -511,7 +496,7 @@ static int tcg_match_cmpi(TCGType type, tcg_target_long val)
static int tcg_match_add2i(TCGType type, tcg_target_long val)
{
- if (facilities & FACILITY_EXT_IMM) {
+ if (s390_facilities & FACILITY_EXT_IMM) {
if (type == TCG_TYPE_I32) {
return 1;
} else if (val >= -0xffffffffll && val <= 0xffffffffll) {
@@ -541,7 +526,7 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
general-instruction-extensions, then we have MULTIPLY SINGLE
IMMEDIATE with a signed 32-bit, otherwise we have only
MULTIPLY HALFWORD IMMEDIATE, with a signed 16-bit. */
- if (facilities & FACILITY_GEN_INST_EXT) {
+ if (s390_facilities & FACILITY_GEN_INST_EXT) {
return val == (int32_t)val;
} else {
return val == (int16_t)val;
@@ -554,6 +539,8 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
return tcg_match_xori(type, val);
} else if (ct & TCG_CT_CONST_CMPI) {
return tcg_match_cmpi(type, val);
+ } else if (ct & TCG_CT_CONST_ZERO) {
+ return val == 0;
}
return 0;
@@ -668,7 +655,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type,
}
/* Try all 48-bit insns that can load it in one go. */
- if (facilities & FACILITY_EXT_IMM) {
+ if (s390_facilities & FACILITY_EXT_IMM) {
if (sval == (int32_t)sval) {
tcg_out_insn(s, RIL, LGFI, ret, sval);
return;
@@ -694,7 +681,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type,
/* If extended immediates are not present, then we may have to issue
several instructions to load the low 32 bits. */
- if (!(facilities & FACILITY_EXT_IMM)) {
+ if (!(s390_facilities & FACILITY_EXT_IMM)) {
/* A 32-bit unsigned value can be loaded in 2 insns. And given
that the lli_insns loop above did not succeed, we know that
both insns are required. */
@@ -727,7 +714,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type,
/* Insert data into the high 32-bits. */
uval = uval >> 31 >> 1;
- if (facilities & FACILITY_EXT_IMM) {
+ if (s390_facilities & FACILITY_EXT_IMM) {
if (uval < 0x10000) {
tcg_out_insn(s, RI, IIHL, ret, uval);
} else if ((uval & 0xffff) == 0) {
@@ -810,7 +797,7 @@ static void tcg_out_ld_abs(TCGContext *s, TCGType type, TCGReg dest, void *abs)
{
intptr_t addr = (intptr_t)abs;
- if ((facilities & FACILITY_GEN_INST_EXT) && !(addr & 1)) {
+ if ((s390_facilities & FACILITY_GEN_INST_EXT) && !(addr & 1)) {
ptrdiff_t disp = tcg_pcrel_diff(s, abs) >> 1;
if (disp == (int32_t)disp) {
if (type == TCG_TYPE_I32) {
@@ -837,7 +824,7 @@ static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
{
- if (facilities & FACILITY_EXT_IMM) {
+ if (s390_facilities & FACILITY_EXT_IMM) {
tcg_out_insn(s, RRE, LGBR, dest, src);
return;
}
@@ -857,7 +844,7 @@ static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
static void tgen_ext8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
{
- if (facilities & FACILITY_EXT_IMM) {
+ if (s390_facilities & FACILITY_EXT_IMM) {
tcg_out_insn(s, RRE, LLGCR, dest, src);
return;
}
@@ -877,7 +864,7 @@ static void tgen_ext8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
static void tgen_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
{
- if (facilities & FACILITY_EXT_IMM) {
+ if (s390_facilities & FACILITY_EXT_IMM) {
tcg_out_insn(s, RRE, LGHR, dest, src);
return;
}
@@ -897,7 +884,7 @@ static void tgen_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
static void tgen_ext16u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
{
- if (facilities & FACILITY_EXT_IMM) {
+ if (s390_facilities & FACILITY_EXT_IMM) {
tcg_out_insn(s, RRE, LLGHR, dest, src);
return;
}
@@ -985,7 +972,7 @@ static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
tgen_ext32u(s, dest, dest);
return;
}
- if (facilities & FACILITY_EXT_IMM) {
+ if (s390_facilities & FACILITY_EXT_IMM) {
if ((val & valid) == 0xff) {
tgen_ext8u(s, TCG_TYPE_I64, dest, dest);
return;
@@ -1006,7 +993,7 @@ static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
}
/* Try all 48-bit insns that can perform it in one go. */
- if (facilities & FACILITY_EXT_IMM) {
+ if (s390_facilities & FACILITY_EXT_IMM) {
for (i = 0; i < 2; i++) {
tcg_target_ulong mask = ~(0xffffffffull << i*32);
if (((val | ~valid) & mask) == mask) {
@@ -1015,7 +1002,7 @@ static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
}
}
}
- if ((facilities & FACILITY_GEN_INST_EXT) && risbg_mask(val)) {
+ if ((s390_facilities & FACILITY_GEN_INST_EXT) && risbg_mask(val)) {
tgen_andi_risbg(s, dest, dest, val);
return;
}
@@ -1045,7 +1032,7 @@ static void tgen64_ori(TCGContext *s, TCGReg dest, tcg_target_ulong val)
return;
}
- if (facilities & FACILITY_EXT_IMM) {
+ if (s390_facilities & FACILITY_EXT_IMM) {
/* Try all 32-bit insns that can perform it in one go. */
for (i = 0; i < 4; i++) {
tcg_target_ulong mask = (0xffffull << i*16);
@@ -1093,33 +1080,43 @@ static void tgen64_xori(TCGContext *s, TCGReg dest, tcg_target_ulong val)
}
static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
- TCGArg c2, int c2const)
+ TCGArg c2, bool c2const, bool need_carry)
{
bool is_unsigned = is_unsigned_cond(c);
if (c2const) {
if (c2 == 0) {
+ if (!(is_unsigned && need_carry)) {
+ if (type == TCG_TYPE_I32) {
+ tcg_out_insn(s, RR, LTR, r1, r1);
+ } else {
+ tcg_out_insn(s, RRE, LTGR, r1, r1);
+ }
+ return tcg_cond_to_ltr_cond[c];
+ }
+ /* If we only got here because of load-and-test,
+ and we couldn't use that, then we need to load
+ the constant into a register. */
+ if (!(s390_facilities & FACILITY_EXT_IMM)) {
+ c2 = TCG_TMP0;
+ tcg_out_movi(s, type, c2, 0);
+ goto do_reg;
+ }
+ }
+ if (is_unsigned) {
if (type == TCG_TYPE_I32) {
- tcg_out_insn(s, RR, LTR, r1, r1);
+ tcg_out_insn(s, RIL, CLFI, r1, c2);
} else {
- tcg_out_insn(s, RRE, LTGR, r1, r1);
+ tcg_out_insn(s, RIL, CLGFI, r1, c2);
}
- return tcg_cond_to_ltr_cond[c];
} else {
- if (is_unsigned) {
- if (type == TCG_TYPE_I32) {
- tcg_out_insn(s, RIL, CLFI, r1, c2);
- } else {
- tcg_out_insn(s, RIL, CLGFI, r1, c2);
- }
+ if (type == TCG_TYPE_I32) {
+ tcg_out_insn(s, RIL, CFI, r1, c2);
} else {
- if (type == TCG_TYPE_I32) {
- tcg_out_insn(s, RIL, CFI, r1, c2);
- } else {
- tcg_out_insn(s, RIL, CGFI, r1, c2);
- }
+ tcg_out_insn(s, RIL, CGFI, r1, c2);
}
}
} else {
+ do_reg:
if (is_unsigned) {
if (type == TCG_TYPE_I32) {
tcg_out_insn(s, RR, CLR, r1, c2);
@@ -1148,7 +1145,7 @@ static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
do_greater:
/* The result of a compare has CC=2 for GT and CC=3 unused.
ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit. */
- tgen_cmp(s, type, cond, c1, c2, c2const);
+ tgen_cmp(s, type, cond, c1, c2, c2const, true);
tcg_out_movi(s, type, dest, 0);
tcg_out_insn(s, RRE, ALCGR, dest, dest);
return;
@@ -1219,8 +1216,8 @@ static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
break;
}
- cc = tgen_cmp(s, type, cond, c1, c2, c2const);
- if (facilities & FACILITY_LOAD_ON_COND) {
+ cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
+ if (s390_facilities & FACILITY_LOAD_ON_COND) {
/* Emit: d = 0, t = 1, d = (cc ? t : d). */
tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1);
@@ -1237,12 +1234,12 @@ static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
TCGReg c1, TCGArg c2, int c2const, TCGReg r3)
{
int cc;
- if (facilities & FACILITY_LOAD_ON_COND) {
- cc = tgen_cmp(s, type, c, c1, c2, c2const);
+ if (s390_facilities & FACILITY_LOAD_ON_COND) {
+ cc = tgen_cmp(s, type, c, c1, c2, c2const, false);
tcg_out_insn(s, RRF, LOCGR, dest, r3, cc);
} else {
c = tcg_invert_cond(c);
- cc = tgen_cmp(s, type, c, c1, c2, c2const);
+ cc = tgen_cmp(s, type, c, c1, c2, c2const, false);
/* Emit: if (cc) goto over; dest = r3; over: */
tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
@@ -1250,17 +1247,45 @@ static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
}
}
-bool tcg_target_deposit_valid(int ofs, int len)
+static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
+ TCGArg a2, int a2const)
{
- return (facilities & FACILITY_GEN_INST_EXT) != 0;
+ /* Since this sets both R and R+1, we have no choice but to store the
+ result into R0, allowing R1 == TCG_TMP0 to be clobbered as well. */
+ QEMU_BUILD_BUG_ON(TCG_TMP0 != TCG_REG_R1);
+ tcg_out_insn(s, RRE, FLOGR, TCG_REG_R0, a1);
+
+ if (a2const && a2 == 64) {
+ tcg_out_mov(s, TCG_TYPE_I64, dest, TCG_REG_R0);
+ } else {
+ if (a2const) {
+ tcg_out_movi(s, TCG_TYPE_I64, dest, a2);
+ } else {
+ tcg_out_mov(s, TCG_TYPE_I64, dest, a2);
+ }
+ if (s390_facilities & FACILITY_LOAD_ON_COND) {
+ /* Emit: if (one bit found) dest = r0. */
+ tcg_out_insn(s, RRF, LOCGR, dest, TCG_REG_R0, 2);
+ } else {
+ /* Emit: if (no one bit found) goto over; dest = r0; over: */
+ tcg_out_insn(s, RI, BRC, 8, (4 + 4) >> 1);
+ tcg_out_insn(s, RRE, LGR, dest, TCG_REG_R0);
+ }
+ }
}
static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
- int ofs, int len)
+ int ofs, int len, int z)
{
int lsb = (63 - ofs);
int msb = lsb - (len - 1);
- tcg_out_risbg(s, dest, src, msb, lsb, ofs, 0);
+ tcg_out_risbg(s, dest, src, msb, lsb, ofs, z);
+}
+
+static void tgen_extract(TCGContext *s, TCGReg dest, TCGReg src,
+ int ofs, int len)
+{
+ tcg_out_risbg(s, dest, src, 64 - len, 63, 64 - ofs, 1);
}
static void tgen_gotoi(TCGContext *s, int cc, tcg_insn_unit *dest)
@@ -1332,7 +1357,7 @@ static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
{
int cc;
- if (facilities & FACILITY_GEN_INST_EXT) {
+ if (s390_facilities & FACILITY_GEN_INST_EXT) {
bool is_unsigned = is_unsigned_cond(c);
bool in_range;
S390Opcode opc;
@@ -1374,7 +1399,7 @@ static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
}
}
- cc = tgen_cmp(s, type, c, r1, c2, c2const);
+ cc = tgen_cmp(s, type, c, r1, c2, c2const, false);
tgen_branch(s, cc, l);
}
@@ -1519,7 +1544,7 @@ static TCGReg tcg_out_tlb_read(TCGContext* s, TCGReg addr_reg, TCGMemOp opc,
a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
- if (facilities & FACILITY_GEN_INST_EXT) {
+ if (s390_facilities & FACILITY_GEN_INST_EXT) {
tcg_out_risbg(s, TCG_REG_R2, addr_reg,
64 - CPU_TLB_BITS - CPU_TLB_ENTRY_BITS,
63 - CPU_TLB_ENTRY_BITS,
@@ -1790,7 +1815,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_insn(s, RI, AHI, a0, a2);
break;
}
- if (facilities & FACILITY_EXT_IMM) {
+ if (s390_facilities & FACILITY_EXT_IMM) {
tcg_out_insn(s, RIL, AFI, a0, a2);
break;
}
@@ -1986,7 +2011,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_insn(s, RI, AGHI, a0, a2);
break;
}
- if (facilities & FACILITY_EXT_IMM) {
+ if (s390_facilities & FACILITY_EXT_IMM) {
if (a2 == (int32_t)a2) {
tcg_out_insn(s, RIL, AGFI, a0, a2);
break;
@@ -2167,7 +2192,30 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
OP_32_64(deposit):
- tgen_deposit(s, args[0], args[2], args[3], args[4]);
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[1]) {
+ tgen_deposit(s, a0, a2, args[3], args[4], 1);
+ } else {
+ /* Since we can't support "0Z" as a constraint, we allow a1 in
+ any register. Fix things up as if a matching constraint. */
+ if (a0 != a1) {
+ TCGType type = (opc == INDEX_op_deposit_i64);
+ if (a0 == a2) {
+ tcg_out_mov(s, type, TCG_TMP0, a2);
+ a2 = TCG_TMP0;
+ }
+ tcg_out_mov(s, type, a0, a1);
+ }
+ tgen_deposit(s, a0, a2, args[3], args[4], 0);
+ }
+ break;
+
+ OP_32_64(extract):
+ tgen_extract(s, args[0], args[1], args[2], args[3]);
+ break;
+
+ case INDEX_op_clz_i64:
+ tgen_clz(s, args[0], args[1], args[2], const_args[2]);
break;
case INDEX_op_mb:
@@ -2175,7 +2223,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
serialize the instruction stream. */
if (args[0] & TCG_MO_ST_LD) {
tcg_out_insn(s, RR, BCR,
- facilities & FACILITY_FAST_BCR_SER ? 14 : 15, 0);
+ s390_facilities & FACILITY_FAST_BCR_SER ? 14 : 15, 0);
}
break;
@@ -2216,12 +2264,12 @@ static const TCGTargetOpDef s390_op_defs[] = {
{ INDEX_op_neg_i32, { "r", "r" } },
- { INDEX_op_shl_i32, { "r", "0", "Ri" } },
- { INDEX_op_shr_i32, { "r", "0", "Ri" } },
- { INDEX_op_sar_i32, { "r", "0", "Ri" } },
+ { INDEX_op_shl_i32, { "r", "0", "ri" } },
+ { INDEX_op_shr_i32, { "r", "0", "ri" } },
+ { INDEX_op_sar_i32, { "r", "0", "ri" } },
- { INDEX_op_rotl_i32, { "r", "r", "Ri" } },
- { INDEX_op_rotr_i32, { "r", "r", "Ri" } },
+ { INDEX_op_rotl_i32, { "r", "r", "ri" } },
+ { INDEX_op_rotr_i32, { "r", "r", "ri" } },
{ INDEX_op_ext8s_i32, { "r", "r" } },
{ INDEX_op_ext8u_i32, { "r", "r" } },
@@ -2237,7 +2285,8 @@ static const TCGTargetOpDef s390_op_defs[] = {
{ INDEX_op_brcond_i32, { "r", "rC" } },
{ INDEX_op_setcond_i32, { "r", "r", "rC" } },
{ INDEX_op_movcond_i32, { "r", "r", "rC", "r", "0" } },
- { INDEX_op_deposit_i32, { "r", "0", "r" } },
+ { INDEX_op_deposit_i32, { "r", "rZ", "r" } },
+ { INDEX_op_extract_i32, { "r", "r" } },
{ INDEX_op_qemu_ld_i32, { "r", "L" } },
{ INDEX_op_qemu_ld_i64, { "r", "L" } },
@@ -2271,12 +2320,12 @@ static const TCGTargetOpDef s390_op_defs[] = {
{ INDEX_op_neg_i64, { "r", "r" } },
- { INDEX_op_shl_i64, { "r", "r", "Ri" } },
- { INDEX_op_shr_i64, { "r", "r", "Ri" } },
- { INDEX_op_sar_i64, { "r", "r", "Ri" } },
+ { INDEX_op_shl_i64, { "r", "r", "ri" } },
+ { INDEX_op_shr_i64, { "r", "r", "ri" } },
+ { INDEX_op_sar_i64, { "r", "r", "ri" } },
- { INDEX_op_rotl_i64, { "r", "r", "Ri" } },
- { INDEX_op_rotr_i64, { "r", "r", "Ri" } },
+ { INDEX_op_rotl_i64, { "r", "r", "ri" } },
+ { INDEX_op_rotr_i64, { "r", "r", "ri" } },
{ INDEX_op_ext8s_i64, { "r", "r" } },
{ INDEX_op_ext8u_i64, { "r", "r" } },
@@ -2292,6 +2341,8 @@ static const TCGTargetOpDef s390_op_defs[] = {
{ INDEX_op_bswap32_i64, { "r", "r" } },
{ INDEX_op_bswap64_i64, { "r", "r" } },
+ { INDEX_op_clz_i64, { "r", "r", "ri" } },
+
{ INDEX_op_add2_i64, { "r", "r", "0", "1", "rA", "r" } },
{ INDEX_op_sub2_i64, { "r", "r", "0", "1", "rA", "r" } },
@@ -2299,12 +2350,25 @@ static const TCGTargetOpDef s390_op_defs[] = {
{ INDEX_op_setcond_i64, { "r", "r", "rC" } },
{ INDEX_op_movcond_i64, { "r", "r", "rC", "r", "0" } },
{ INDEX_op_deposit_i64, { "r", "0", "r" } },
+ { INDEX_op_extract_i64, { "r", "r" } },
{ INDEX_op_mb, { } },
{ -1 },
};
-static void query_facilities(void)
+static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
+{
+ int i, n = ARRAY_SIZE(s390_op_defs);
+
+ for (i = 0; i < n; ++i) {
+ if (s390_op_defs[i].op == op) {
+ return &s390_op_defs[i];
+ }
+ }
+ return NULL;
+}
+
+static void query_s390_facilities(void)
{
unsigned long hwcap = qemu_getauxval(AT_HWCAP);
@@ -2315,7 +2379,7 @@ static void query_facilities(void)
register void *r1 __asm__("1");
/* stfle 0(%r1) */
- r1 = &facilities;
+ r1 = &s390_facilities;
asm volatile(".word 0xb2b0,0x1000"
: "=r"(r0) : "0"(0), "r"(r1) : "memory", "cc");
}
@@ -2323,7 +2387,7 @@ static void query_facilities(void)
static void tcg_target_init(TCGContext *s)
{
- query_facilities();
+ query_s390_facilities();
tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
@@ -2346,8 +2410,6 @@ static void tcg_target_init(TCGContext *s)
/* XXX many insns can't be used with R0, so we better avoid it for now */
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
-
- tcg_add_target_add_op_defs(s390_op_defs);
}
#define FRAME_SIZE ((int)(TCG_TARGET_CALL_STACK_OFFSET \
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index 88f9c90f5f..b8b74f96ff 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -110,7 +110,12 @@ extern bool use_vis3_instructions;
#define TCG_TARGET_HAS_eqv_i32 0
#define TCG_TARGET_HAS_nand_i32 0
#define TCG_TARGET_HAS_nor_i32 0
+#define TCG_TARGET_HAS_clz_i32 0
+#define TCG_TARGET_HAS_ctz_i32 0
+#define TCG_TARGET_HAS_ctpop_i32 0
#define TCG_TARGET_HAS_deposit_i32 0
+#define TCG_TARGET_HAS_extract_i32 0
+#define TCG_TARGET_HAS_sextract_i32 0
#define TCG_TARGET_HAS_movcond_i32 1
#define TCG_TARGET_HAS_add2_i32 1
#define TCG_TARGET_HAS_sub2_i32 1
@@ -140,7 +145,12 @@ extern bool use_vis3_instructions;
#define TCG_TARGET_HAS_eqv_i64 0
#define TCG_TARGET_HAS_nand_i64 0
#define TCG_TARGET_HAS_nor_i64 0
+#define TCG_TARGET_HAS_clz_i64 0
+#define TCG_TARGET_HAS_ctz_i64 0
+#define TCG_TARGET_HAS_ctpop_i64 0
#define TCG_TARGET_HAS_deposit_i64 0
+#define TCG_TARGET_HAS_extract_i64 0
+#define TCG_TARGET_HAS_sextract_i64 0
#define TCG_TARGET_HAS_movcond_i64 1
#define TCG_TARGET_HAS_add2_i64 1
#define TCG_TARGET_HAS_sub2_i64 1
diff --git a/tcg/sparc/tcg-target.inc.c b/tcg/sparc/tcg-target.inc.c
index 700c43487f..d1f4c0dead 100644
--- a/tcg/sparc/tcg-target.inc.c
+++ b/tcg/sparc/tcg-target.inc.c
@@ -319,12 +319,10 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type,
}
/* parse target specific constraints */
-static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
+static const char *target_parse_constraint(TCGArgConstraint *ct,
+ const char *ct_str, TCGType type)
{
- const char *ct_str;
-
- ct_str = *pct_str;
- switch (ct_str[0]) {
+ switch (*ct_str++) {
case 'r':
ct->ct |= TCG_CT_REG;
tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
@@ -360,11 +358,9 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
ct->ct |= TCG_CT_CONST_ZERO;
break;
default:
- return -1;
+ return NULL;
}
- ct_str++;
- *pct_str = ct_str;
- return 0;
+ return ct_str;
}
/* test if a constant matches the constraint */
@@ -1583,6 +1579,18 @@ static const TCGTargetOpDef sparc_op_defs[] = {
{ -1 },
};
+static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
+{
+ int i, n = ARRAY_SIZE(sparc_op_defs);
+
+ for (i = 0; i < n; ++i) {
+ if (sparc_op_defs[i].op == op) {
+ return &sparc_op_defs[i];
+ }
+ }
+ return NULL;
+}
+
static void tcg_target_init(TCGContext *s)
{
/* Only probe for the platform and capabilities if we havn't already
@@ -1622,8 +1630,6 @@ static void tcg_target_init(TCGContext *s)
tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6); /* stack pointer */
tcg_regset_set_reg(s->reserved_regs, TCG_REG_T1); /* for internal use */
tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */
-
- tcg_add_target_add_op_defs(sparc_op_defs);
}
#if SPARC64
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 6e2fb3522f..95a39b7d8c 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -457,6 +457,117 @@ void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
}
}
+void tcg_gen_clz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_clz_i32) {
+ tcg_gen_op3_i32(INDEX_op_clz_i32, ret, arg1, arg2);
+ } else if (TCG_TARGET_HAS_clz_i64) {
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+ tcg_gen_extu_i32_i64(t1, arg1);
+ tcg_gen_extu_i32_i64(t2, arg2);
+ tcg_gen_addi_i64(t2, t2, 32);
+ tcg_gen_clz_i64(t1, t1, t2);
+ tcg_gen_extrl_i64_i32(ret, t1);
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t2);
+ tcg_gen_subi_i32(ret, ret, 32);
+ } else {
+ gen_helper_clz_i32(ret, arg1, arg2);
+ }
+}
+
+void tcg_gen_clzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
+{
+ TCGv_i32 t = tcg_const_i32(arg2);
+ tcg_gen_clz_i32(ret, arg1, t);
+ tcg_temp_free_i32(t);
+}
+
+void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_ctz_i32) {
+ tcg_gen_op3_i32(INDEX_op_ctz_i32, ret, arg1, arg2);
+ } else if (TCG_TARGET_HAS_ctz_i64) {
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+ tcg_gen_extu_i32_i64(t1, arg1);
+ tcg_gen_extu_i32_i64(t2, arg2);
+ tcg_gen_ctz_i64(t1, t1, t2);
+ tcg_gen_extrl_i64_i32(ret, t1);
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t2);
+ } else if (TCG_TARGET_HAS_ctpop_i32
+ || TCG_TARGET_HAS_ctpop_i64
+ || TCG_TARGET_HAS_clz_i32
+ || TCG_TARGET_HAS_clz_i64) {
+ TCGv_i32 z, t = tcg_temp_new_i32();
+
+ if (TCG_TARGET_HAS_ctpop_i32 || TCG_TARGET_HAS_ctpop_i64) {
+ tcg_gen_subi_i32(t, arg1, 1);
+ tcg_gen_andc_i32(t, t, arg1);
+ tcg_gen_ctpop_i32(t, t);
+ } else {
+ /* Since all non-x86 hosts have clz(0) == 32, don't fight it. */
+ tcg_gen_neg_i32(t, arg1);
+ tcg_gen_and_i32(t, t, arg1);
+ tcg_gen_clzi_i32(t, t, 32);
+ tcg_gen_xori_i32(t, t, 31);
+ }
+ z = tcg_const_i32(0);
+ tcg_gen_movcond_i32(TCG_COND_EQ, ret, arg1, z, arg2, t);
+ tcg_temp_free_i32(t);
+ tcg_temp_free_i32(z);
+ } else {
+ gen_helper_ctz_i32(ret, arg1, arg2);
+ }
+}
+
+void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
+{
+ if (!TCG_TARGET_HAS_ctz_i32 && TCG_TARGET_HAS_ctpop_i32 && arg2 == 32) {
+ /* This equivalence has the advantage of not requiring a fixup. */
+ TCGv_i32 t = tcg_temp_new_i32();
+ tcg_gen_subi_i32(t, arg1, 1);
+ tcg_gen_andc_i32(t, t, arg1);
+ tcg_gen_ctpop_i32(ret, t);
+ tcg_temp_free_i32(t);
+ } else {
+ TCGv_i32 t = tcg_const_i32(arg2);
+ tcg_gen_ctz_i32(ret, arg1, t);
+ tcg_temp_free_i32(t);
+ }
+}
+
+void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+ if (TCG_TARGET_HAS_clz_i32) {
+ TCGv_i32 t = tcg_temp_new_i32();
+ tcg_gen_sari_i32(t, arg, 31);
+ tcg_gen_xor_i32(t, t, arg);
+ tcg_gen_clzi_i32(t, t, 32);
+ tcg_gen_subi_i32(ret, t, 1);
+ tcg_temp_free_i32(t);
+ } else {
+ gen_helper_clrsb_i32(ret, arg);
+ }
+}
+
+void tcg_gen_ctpop_i32(TCGv_i32 ret, TCGv_i32 arg1)
+{
+ if (TCG_TARGET_HAS_ctpop_i32) {
+ tcg_gen_op2_i32(INDEX_op_ctpop_i32, ret, arg1);
+ } else if (TCG_TARGET_HAS_ctpop_i64) {
+ TCGv_i64 t = tcg_temp_new_i64();
+ tcg_gen_extu_i32_i64(t, arg1);
+ tcg_gen_ctpop_i64(t, t);
+ tcg_gen_extrl_i64_i32(ret, t);
+ tcg_temp_free_i64(t);
+ } else {
+ gen_helper_ctpop_i32(ret, arg1);
+ }
+}
+
void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
{
if (TCG_TARGET_HAS_rot_i32) {
@@ -533,10 +644,11 @@ void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2,
TCGv_i32 t1;
tcg_debug_assert(ofs < 32);
+ tcg_debug_assert(len > 0);
tcg_debug_assert(len <= 32);
tcg_debug_assert(ofs + len <= 32);
- if (ofs == 0 && len == 32) {
+ if (len == 32) {
tcg_gen_mov_i32(ret, arg2);
return;
}
@@ -560,6 +672,189 @@ void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2,
tcg_temp_free_i32(t1);
}
+void tcg_gen_deposit_z_i32(TCGv_i32 ret, TCGv_i32 arg,
+ unsigned int ofs, unsigned int len)
+{
+ tcg_debug_assert(ofs < 32);
+ tcg_debug_assert(len > 0);
+ tcg_debug_assert(len <= 32);
+ tcg_debug_assert(ofs + len <= 32);
+
+ if (ofs + len == 32) {
+ tcg_gen_shli_i32(ret, arg, ofs);
+ } else if (ofs == 0) {
+ tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
+ } else if (TCG_TARGET_HAS_deposit_i32
+ && TCG_TARGET_deposit_i32_valid(ofs, len)) {
+ TCGv_i32 zero = tcg_const_i32(0);
+ tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, zero, arg, ofs, len);
+ tcg_temp_free_i32(zero);
+ } else {
+ /* To help two-operand hosts we prefer to zero-extend first,
+ which allows ARG to stay live. */
+ switch (len) {
+ case 16:
+ if (TCG_TARGET_HAS_ext16u_i32) {
+ tcg_gen_ext16u_i32(ret, arg);
+ tcg_gen_shli_i32(ret, ret, ofs);
+ return;
+ }
+ break;
+ case 8:
+ if (TCG_TARGET_HAS_ext8u_i32) {
+ tcg_gen_ext8u_i32(ret, arg);
+ tcg_gen_shli_i32(ret, ret, ofs);
+ return;
+ }
+ break;
+ }
+ /* Otherwise prefer zero-extension over AND for code size. */
+ switch (ofs + len) {
+ case 16:
+ if (TCG_TARGET_HAS_ext16u_i32) {
+ tcg_gen_shli_i32(ret, arg, ofs);
+ tcg_gen_ext16u_i32(ret, ret);
+ return;
+ }
+ break;
+ case 8:
+ if (TCG_TARGET_HAS_ext8u_i32) {
+ tcg_gen_shli_i32(ret, arg, ofs);
+ tcg_gen_ext8u_i32(ret, ret);
+ return;
+ }
+ break;
+ }
+ tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
+ tcg_gen_shli_i32(ret, ret, ofs);
+ }
+}
+
+void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg,
+ unsigned int ofs, unsigned int len)
+{
+ tcg_debug_assert(ofs < 32);
+ tcg_debug_assert(len > 0);
+ tcg_debug_assert(len <= 32);
+ tcg_debug_assert(ofs + len <= 32);
+
+ /* Canonicalize certain special cases, even if extract is supported. */
+ if (ofs + len == 32) {
+ tcg_gen_shri_i32(ret, arg, 32 - len);
+ return;
+ }
+ if (ofs == 0) {
+ tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
+ return;
+ }
+
+ if (TCG_TARGET_HAS_extract_i32
+ && TCG_TARGET_extract_i32_valid(ofs, len)) {
+ tcg_gen_op4ii_i32(INDEX_op_extract_i32, ret, arg, ofs, len);
+ return;
+ }
+
+ /* Assume that zero-extension, if available, is cheaper than a shift. */
+ switch (ofs + len) {
+ case 16:
+ if (TCG_TARGET_HAS_ext16u_i32) {
+ tcg_gen_ext16u_i32(ret, arg);
+ tcg_gen_shri_i32(ret, ret, ofs);
+ return;
+ }
+ break;
+ case 8:
+ if (TCG_TARGET_HAS_ext8u_i32) {
+ tcg_gen_ext8u_i32(ret, arg);
+ tcg_gen_shri_i32(ret, ret, ofs);
+ return;
+ }
+ break;
+ }
+
+ /* ??? Ideally we'd know what values are available for immediate AND.
+ Assume that 8 bits are available, plus the special case of 16,
+ so that we get ext8u, ext16u. */
+ switch (len) {
+ case 1 ... 8: case 16:
+ tcg_gen_shri_i32(ret, arg, ofs);
+ tcg_gen_andi_i32(ret, ret, (1u << len) - 1);
+ break;
+ default:
+ tcg_gen_shli_i32(ret, arg, 32 - len - ofs);
+ tcg_gen_shri_i32(ret, ret, 32 - len);
+ break;
+ }
+}
+
+void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg,
+ unsigned int ofs, unsigned int len)
+{
+ tcg_debug_assert(ofs < 32);
+ tcg_debug_assert(len > 0);
+ tcg_debug_assert(len <= 32);
+ tcg_debug_assert(ofs + len <= 32);
+
+ /* Canonicalize certain special cases, even if extract is supported. */
+ if (ofs + len == 32) {
+ tcg_gen_sari_i32(ret, arg, 32 - len);
+ return;
+ }
+ if (ofs == 0) {
+ switch (len) {
+ case 16:
+ tcg_gen_ext16s_i32(ret, arg);
+ return;
+ case 8:
+ tcg_gen_ext8s_i32(ret, arg);
+ return;
+ }
+ }
+
+ if (TCG_TARGET_HAS_sextract_i32
+ && TCG_TARGET_extract_i32_valid(ofs, len)) {
+ tcg_gen_op4ii_i32(INDEX_op_sextract_i32, ret, arg, ofs, len);
+ return;
+ }
+
+ /* Assume that sign-extension, if available, is cheaper than a shift. */
+ switch (ofs + len) {
+ case 16:
+ if (TCG_TARGET_HAS_ext16s_i32) {
+ tcg_gen_ext16s_i32(ret, arg);
+ tcg_gen_sari_i32(ret, ret, ofs);
+ return;
+ }
+ break;
+ case 8:
+ if (TCG_TARGET_HAS_ext8s_i32) {
+ tcg_gen_ext8s_i32(ret, arg);
+ tcg_gen_sari_i32(ret, ret, ofs);
+ return;
+ }
+ break;
+ }
+ switch (len) {
+ case 16:
+ if (TCG_TARGET_HAS_ext16s_i32) {
+ tcg_gen_shri_i32(ret, arg, ofs);
+ tcg_gen_ext16s_i32(ret, ret);
+ return;
+ }
+ break;
+ case 8:
+ if (TCG_TARGET_HAS_ext8s_i32) {
+ tcg_gen_shri_i32(ret, arg, ofs);
+ tcg_gen_ext8s_i32(ret, ret);
+ return;
+ }
+ break;
+ }
+
+ tcg_gen_shli_i32(ret, arg, 32 - len - ofs);
+ tcg_gen_sari_i32(ret, ret, 32 - len);
+}
+
void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1,
TCGv_i32 c2, TCGv_i32 v1, TCGv_i32 v2)
{
@@ -1519,6 +1814,115 @@ void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
}
}
+void tcg_gen_clz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_HAS_clz_i64) {
+ tcg_gen_op3_i64(INDEX_op_clz_i64, ret, arg1, arg2);
+ } else {
+ gen_helper_clz_i64(ret, arg1, arg2);
+ }
+}
+
+void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
+{
+ if (TCG_TARGET_REG_BITS == 32
+ && TCG_TARGET_HAS_clz_i32
+ && arg2 <= 0xffffffffu) {
+ TCGv_i32 t = tcg_const_i32((uint32_t)arg2 - 32);
+ tcg_gen_clz_i32(t, TCGV_LOW(arg1), t);
+ tcg_gen_addi_i32(t, t, 32);
+ tcg_gen_clz_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), t);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ tcg_temp_free_i32(t);
+ } else {
+ TCGv_i64 t = tcg_const_i64(arg2);
+ tcg_gen_clz_i64(ret, arg1, t);
+ tcg_temp_free_i64(t);
+ }
+}
+
+void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_HAS_ctz_i64) {
+ tcg_gen_op3_i64(INDEX_op_ctz_i64, ret, arg1, arg2);
+ } else if (TCG_TARGET_HAS_ctpop_i64 || TCG_TARGET_HAS_clz_i64) {
+ TCGv_i64 z, t = tcg_temp_new_i64();
+
+ if (TCG_TARGET_HAS_ctpop_i64) {
+ tcg_gen_subi_i64(t, arg1, 1);
+ tcg_gen_andc_i64(t, t, arg1);
+ tcg_gen_ctpop_i64(t, t);
+ } else {
+ /* Since all non-x86 hosts have clz(0) == 64, don't fight it. */
+ tcg_gen_neg_i64(t, arg1);
+ tcg_gen_and_i64(t, t, arg1);
+ tcg_gen_clzi_i64(t, t, 64);
+ tcg_gen_xori_i64(t, t, 63);
+ }
+ z = tcg_const_i64(0);
+ tcg_gen_movcond_i64(TCG_COND_EQ, ret, arg1, z, arg2, t);
+ tcg_temp_free_i64(t);
+ tcg_temp_free_i64(z);
+ } else {
+ gen_helper_ctz_i64(ret, arg1, arg2);
+ }
+}
+
+void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
+{
+ if (TCG_TARGET_REG_BITS == 32
+ && TCG_TARGET_HAS_ctz_i32
+ && arg2 <= 0xffffffffu) {
+ TCGv_i32 t32 = tcg_const_i32((uint32_t)arg2 - 32);
+ tcg_gen_ctz_i32(t32, TCGV_HIGH(arg1), t32);
+ tcg_gen_addi_i32(t32, t32, 32);
+ tcg_gen_ctz_i32(TCGV_LOW(ret), TCGV_LOW(arg1), t32);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ tcg_temp_free_i32(t32);
+ } else if (!TCG_TARGET_HAS_ctz_i64
+ && TCG_TARGET_HAS_ctpop_i64
+ && arg2 == 64) {
+ /* This equivalence has the advantage of not requiring a fixup. */
+ TCGv_i64 t = tcg_temp_new_i64();
+ tcg_gen_subi_i64(t, arg1, 1);
+ tcg_gen_andc_i64(t, t, arg1);
+ tcg_gen_ctpop_i64(ret, t);
+ tcg_temp_free_i64(t);
+ } else {
+ TCGv_i64 t64 = tcg_const_i64(arg2);
+ tcg_gen_ctz_i64(ret, arg1, t64);
+ tcg_temp_free_i64(t64);
+ }
+}
+
+void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ if (TCG_TARGET_HAS_clz_i64 || TCG_TARGET_HAS_clz_i32) {
+ TCGv_i64 t = tcg_temp_new_i64();
+ tcg_gen_sari_i64(t, arg, 63);
+ tcg_gen_xor_i64(t, t, arg);
+ tcg_gen_clzi_i64(t, t, 64);
+ tcg_gen_subi_i64(ret, t, 1);
+ tcg_temp_free_i64(t);
+ } else {
+ gen_helper_clrsb_i64(ret, arg);
+ }
+}
+
+void tcg_gen_ctpop_i64(TCGv_i64 ret, TCGv_i64 arg1)
+{
+ if (TCG_TARGET_HAS_ctpop_i64) {
+ tcg_gen_op2_i64(INDEX_op_ctpop_i64, ret, arg1);
+ } else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_ctpop_i32) {
+ tcg_gen_ctpop_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
+ tcg_gen_ctpop_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
+ tcg_gen_add_i32(TCGV_LOW(ret), TCGV_LOW(ret), TCGV_HIGH(ret));
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ } else {
+ gen_helper_ctpop_i64(ret, arg1);
+ }
+}
+
void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
{
if (TCG_TARGET_HAS_rot_i64) {
@@ -1593,10 +1997,11 @@ void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2,
TCGv_i64 t1;
tcg_debug_assert(ofs < 64);
+ tcg_debug_assert(len > 0);
tcg_debug_assert(len <= 64);
tcg_debug_assert(ofs + len <= 64);
- if (ofs == 0 && len == 64) {
+ if (len == 64) {
tcg_gen_mov_i64(ret, arg2);
return;
}
@@ -1635,6 +2040,289 @@ void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2,
tcg_temp_free_i64(t1);
}
+void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg,
+ unsigned int ofs, unsigned int len)
+{
+ tcg_debug_assert(ofs < 64);
+ tcg_debug_assert(len > 0);
+ tcg_debug_assert(len <= 64);
+ tcg_debug_assert(ofs + len <= 64);
+
+ if (ofs + len == 64) {
+ tcg_gen_shli_i64(ret, arg, ofs);
+ } else if (ofs == 0) {
+ tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
+ } else if (TCG_TARGET_HAS_deposit_i64
+ && TCG_TARGET_deposit_i64_valid(ofs, len)) {
+ TCGv_i64 zero = tcg_const_i64(0);
+ tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, zero, arg, ofs, len);
+ tcg_temp_free_i64(zero);
+ } else {
+ if (TCG_TARGET_REG_BITS == 32) {
+ if (ofs >= 32) {
+ tcg_gen_deposit_z_i32(TCGV_HIGH(ret), TCGV_LOW(arg),
+ ofs - 32, len);
+ tcg_gen_movi_i32(TCGV_LOW(ret), 0);
+ return;
+ }
+ if (ofs + len <= 32) {
+ tcg_gen_deposit_z_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ return;
+ }
+ }
+ /* To help two-operand hosts we prefer to zero-extend first,
+ which allows ARG to stay live. */
+ switch (len) {
+ case 32:
+ if (TCG_TARGET_HAS_ext32u_i64) {
+ tcg_gen_ext32u_i64(ret, arg);
+ tcg_gen_shli_i64(ret, ret, ofs);
+ return;
+ }
+ break;
+ case 16:
+ if (TCG_TARGET_HAS_ext16u_i64) {
+ tcg_gen_ext16u_i64(ret, arg);
+ tcg_gen_shli_i64(ret, ret, ofs);
+ return;
+ }
+ break;
+ case 8:
+ if (TCG_TARGET_HAS_ext8u_i64) {
+ tcg_gen_ext8u_i64(ret, arg);
+ tcg_gen_shli_i64(ret, ret, ofs);
+ return;
+ }
+ break;
+ }
+ /* Otherwise prefer zero-extension over AND for code size. */
+ switch (ofs + len) {
+ case 32:
+ if (TCG_TARGET_HAS_ext32u_i64) {
+ tcg_gen_shli_i64(ret, arg, ofs);
+ tcg_gen_ext32u_i64(ret, ret);
+ return;
+ }
+ break;
+ case 16:
+ if (TCG_TARGET_HAS_ext16u_i64) {
+ tcg_gen_shli_i64(ret, arg, ofs);
+ tcg_gen_ext16u_i64(ret, ret);
+ return;
+ }
+ break;
+ case 8:
+ if (TCG_TARGET_HAS_ext8u_i64) {
+ tcg_gen_shli_i64(ret, arg, ofs);
+ tcg_gen_ext8u_i64(ret, ret);
+ return;
+ }
+ break;
+ }
+ tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
+ tcg_gen_shli_i64(ret, ret, ofs);
+ }
+}
+
+void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg,
+ unsigned int ofs, unsigned int len)
+{
+ tcg_debug_assert(ofs < 64);
+ tcg_debug_assert(len > 0);
+ tcg_debug_assert(len <= 64);
+ tcg_debug_assert(ofs + len <= 64);
+
+ /* Canonicalize certain special cases, even if extract is supported. */
+ if (ofs + len == 64) {
+ tcg_gen_shri_i64(ret, arg, 64 - len);
+ return;
+ }
+ if (ofs == 0) {
+ tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
+ return;
+ }
+
+ if (TCG_TARGET_REG_BITS == 32) {
+ /* Look for a 32-bit extract within one of the two words. */
+ if (ofs >= 32) {
+ tcg_gen_extract_i32(TCGV_LOW(ret), TCGV_HIGH(arg), ofs - 32, len);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ return;
+ }
+ if (ofs + len <= 32) {
+ tcg_gen_extract_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ return;
+ }
+ /* The field is split across two words. One double-word
+ shift is better than two double-word shifts. */
+ goto do_shift_and;
+ }
+
+ if (TCG_TARGET_HAS_extract_i64
+ && TCG_TARGET_extract_i64_valid(ofs, len)) {
+ tcg_gen_op4ii_i64(INDEX_op_extract_i64, ret, arg, ofs, len);
+ return;
+ }
+
+ /* Assume that zero-extension, if available, is cheaper than a shift. */
+ switch (ofs + len) {
+ case 32:
+ if (TCG_TARGET_HAS_ext32u_i64) {
+ tcg_gen_ext32u_i64(ret, arg);
+ tcg_gen_shri_i64(ret, ret, ofs);
+ return;
+ }
+ break;
+ case 16:
+ if (TCG_TARGET_HAS_ext16u_i64) {
+ tcg_gen_ext16u_i64(ret, arg);
+ tcg_gen_shri_i64(ret, ret, ofs);
+ return;
+ }
+ break;
+ case 8:
+ if (TCG_TARGET_HAS_ext8u_i64) {
+ tcg_gen_ext8u_i64(ret, arg);
+ tcg_gen_shri_i64(ret, ret, ofs);
+ return;
+ }
+ break;
+ }
+
+ /* ??? Ideally we'd know what values are available for immediate AND.
+ Assume that 8 bits are available, plus the special cases of 16 and 32,
+ so that we get ext8u, ext16u, and ext32u. */
+ switch (len) {
+ case 1 ... 8: case 16: case 32:
+ do_shift_and:
+ tcg_gen_shri_i64(ret, arg, ofs);
+ tcg_gen_andi_i64(ret, ret, (1ull << len) - 1);
+ break;
+ default:
+ tcg_gen_shli_i64(ret, arg, 64 - len - ofs);
+ tcg_gen_shri_i64(ret, ret, 64 - len);
+ break;
+ }
+}
+
+void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg,
+ unsigned int ofs, unsigned int len)
+{
+ tcg_debug_assert(ofs < 64);
+ tcg_debug_assert(len > 0);
+ tcg_debug_assert(len <= 64);
+ tcg_debug_assert(ofs + len <= 64);
+
+ /* Canonicalize certain special cases, even if sextract is supported. */
+ if (ofs + len == 64) {
+ tcg_gen_sari_i64(ret, arg, 64 - len);
+ return;
+ }
+ if (ofs == 0) {
+ switch (len) {
+ case 32:
+ tcg_gen_ext32s_i64(ret, arg);
+ return;
+ case 16:
+ tcg_gen_ext16s_i64(ret, arg);
+ return;
+ case 8:
+ tcg_gen_ext8s_i64(ret, arg);
+ return;
+ }
+ }
+
+ if (TCG_TARGET_REG_BITS == 32) {
+ /* Look for a 32-bit extract within one of the two words. */
+ if (ofs >= 32) {
+ tcg_gen_sextract_i32(TCGV_LOW(ret), TCGV_HIGH(arg), ofs - 32, len);
+ } else if (ofs + len <= 32) {
+ tcg_gen_sextract_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len);
+ } else if (ofs == 0) {
+ tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+ tcg_gen_sextract_i32(TCGV_HIGH(ret), TCGV_HIGH(arg), 0, len - 32);
+ return;
+ } else if (len > 32) {
+ TCGv_i32 t = tcg_temp_new_i32();
+ /* Extract the bits for the high word normally. */
+ tcg_gen_sextract_i32(t, TCGV_HIGH(arg), ofs + 32, len - 32);
+ /* Shift the field down for the low part. */
+ tcg_gen_shri_i64(ret, arg, ofs);
+ /* Overwrite the shift into the high part. */
+ tcg_gen_mov_i32(TCGV_HIGH(ret), t);
+ tcg_temp_free_i32(t);
+ return;
+ } else {
+ /* Shift the field down for the low part, such that the
+ field sits at the MSB. */
+ tcg_gen_shri_i64(ret, arg, ofs + len - 32);
+ /* Shift the field down from the MSB, sign extending. */
+ tcg_gen_sari_i32(TCGV_LOW(ret), TCGV_LOW(ret), 32 - len);
+ }
+ /* Sign-extend the field from 32 bits. */
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+ return;
+ }
+
+ if (TCG_TARGET_HAS_sextract_i64
+ && TCG_TARGET_extract_i64_valid(ofs, len)) {
+ tcg_gen_op4ii_i64(INDEX_op_sextract_i64, ret, arg, ofs, len);
+ return;
+ }
+
+ /* Assume that sign-extension, if available, is cheaper than a shift. */
+ switch (ofs + len) {
+ case 32:
+ if (TCG_TARGET_HAS_ext32s_i64) {
+ tcg_gen_ext32s_i64(ret, arg);
+ tcg_gen_sari_i64(ret, ret, ofs);
+ return;
+ }
+ break;
+ case 16:
+ if (TCG_TARGET_HAS_ext16s_i64) {
+ tcg_gen_ext16s_i64(ret, arg);
+ tcg_gen_sari_i64(ret, ret, ofs);
+ return;
+ }
+ break;
+ case 8:
+ if (TCG_TARGET_HAS_ext8s_i64) {
+ tcg_gen_ext8s_i64(ret, arg);
+ tcg_gen_sari_i64(ret, ret, ofs);
+ return;
+ }
+ break;
+ }
+ switch (len) {
+ case 32:
+ if (TCG_TARGET_HAS_ext32s_i64) {
+ tcg_gen_shri_i64(ret, arg, ofs);
+ tcg_gen_ext32s_i64(ret, ret);
+ return;
+ }
+ break;
+ case 16:
+ if (TCG_TARGET_HAS_ext16s_i64) {
+ tcg_gen_shri_i64(ret, arg, ofs);
+ tcg_gen_ext16s_i64(ret, ret);
+ return;
+ }
+ break;
+ case 8:
+ if (TCG_TARGET_HAS_ext8s_i64) {
+ tcg_gen_shri_i64(ret, arg, ofs);
+ tcg_gen_ext8s_i64(ret, ret);
+ return;
+ }
+ break;
+ }
+ tcg_gen_shli_i64(ret, arg, 64 - len - ofs);
+ tcg_gen_sari_i64(ret, ret, 64 - len);
+}
+
void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1,
TCGv_i64 c2, TCGv_i64 v1, TCGv_i64 v2)
{
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 6d044b7c5b..c68e300a68 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -286,12 +286,24 @@ void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_clz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_clzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2);
+void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2);
+void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg);
+void tcg_gen_ctpop_i32(TCGv_i32 a1, TCGv_i32 a2);
void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2);
void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2);
void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2,
unsigned int ofs, unsigned int len);
+void tcg_gen_deposit_z_i32(TCGv_i32 ret, TCGv_i32 arg,
+ unsigned int ofs, unsigned int len);
+void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg,
+ unsigned int ofs, unsigned int len);
+void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg,
+ unsigned int ofs, unsigned int len);
void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *);
void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1, int32_t arg2, TCGLabel *);
void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret,
@@ -463,12 +475,24 @@ void tcg_gen_eqv_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
void tcg_gen_nand_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
void tcg_gen_nor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_clz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2);
+void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2);
+void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_ctpop_i64(TCGv_i64 a1, TCGv_i64 a2);
void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2);
void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2);
void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2,
unsigned int ofs, unsigned int len);
+void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg,
+ unsigned int ofs, unsigned int len);
+void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg,
+ unsigned int ofs, unsigned int len);
+void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg,
+ unsigned int ofs, unsigned int len);
void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *);
void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, int64_t arg2, TCGLabel *);
void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret,
@@ -946,11 +970,20 @@ void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
#define tcg_gen_nand_tl tcg_gen_nand_i64
#define tcg_gen_nor_tl tcg_gen_nor_i64
#define tcg_gen_orc_tl tcg_gen_orc_i64
+#define tcg_gen_clz_tl tcg_gen_clz_i64
+#define tcg_gen_ctz_tl tcg_gen_ctz_i64
+#define tcg_gen_clzi_tl tcg_gen_clzi_i64
+#define tcg_gen_ctzi_tl tcg_gen_ctzi_i64
+#define tcg_gen_clrsb_tl tcg_gen_clrsb_i64
+#define tcg_gen_ctpop_tl tcg_gen_ctpop_i64
#define tcg_gen_rotl_tl tcg_gen_rotl_i64
#define tcg_gen_rotli_tl tcg_gen_rotli_i64
#define tcg_gen_rotr_tl tcg_gen_rotr_i64
#define tcg_gen_rotri_tl tcg_gen_rotri_i64
#define tcg_gen_deposit_tl tcg_gen_deposit_i64
+#define tcg_gen_deposit_z_tl tcg_gen_deposit_z_i64
+#define tcg_gen_extract_tl tcg_gen_extract_i64
+#define tcg_gen_sextract_tl tcg_gen_sextract_i64
#define tcg_const_tl tcg_const_i64
#define tcg_const_local_tl tcg_const_local_i64
#define tcg_gen_movcond_tl tcg_gen_movcond_i64
@@ -1034,11 +1067,20 @@ void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
#define tcg_gen_nand_tl tcg_gen_nand_i32
#define tcg_gen_nor_tl tcg_gen_nor_i32
#define tcg_gen_orc_tl tcg_gen_orc_i32
+#define tcg_gen_clz_tl tcg_gen_clz_i32
+#define tcg_gen_ctz_tl tcg_gen_ctz_i32
+#define tcg_gen_clzi_tl tcg_gen_clzi_i32
+#define tcg_gen_ctzi_tl tcg_gen_ctzi_i32
+#define tcg_gen_clrsb_tl tcg_gen_clrsb_i32
+#define tcg_gen_ctpop_tl tcg_gen_ctpop_i32
#define tcg_gen_rotl_tl tcg_gen_rotl_i32
#define tcg_gen_rotli_tl tcg_gen_rotli_i32
#define tcg_gen_rotr_tl tcg_gen_rotr_i32
#define tcg_gen_rotri_tl tcg_gen_rotri_i32
#define tcg_gen_deposit_tl tcg_gen_deposit_i32
+#define tcg_gen_deposit_z_tl tcg_gen_deposit_z_i32
+#define tcg_gen_extract_tl tcg_gen_extract_i32
+#define tcg_gen_sextract_tl tcg_gen_sextract_i32
#define tcg_const_tl tcg_const_i32
#define tcg_const_local_tl tcg_const_local_i32
#define tcg_gen_movcond_tl tcg_gen_movcond_i32
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index 45528d2192..f06f89405e 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -77,6 +77,8 @@ DEF(sar_i32, 1, 2, 0, 0)
DEF(rotl_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32))
DEF(rotr_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32))
DEF(deposit_i32, 1, 2, 2, IMPL(TCG_TARGET_HAS_deposit_i32))
+DEF(extract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_extract_i32))
+DEF(sextract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_sextract_i32))
DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END)
@@ -102,6 +104,9 @@ DEF(orc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_orc_i32))
DEF(eqv_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_eqv_i32))
DEF(nand_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nand_i32))
DEF(nor_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nor_i32))
+DEF(clz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_clz_i32))
+DEF(ctz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_ctz_i32))
+DEF(ctpop_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ctpop_i32))
DEF(mov_i64, 1, 1, 0, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT)
DEF(movi_i64, 1, 0, 1, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT)
@@ -139,6 +144,8 @@ DEF(sar_i64, 1, 2, 0, IMPL64)
DEF(rotl_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64))
DEF(rotr_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64))
DEF(deposit_i64, 1, 2, 2, IMPL64 | IMPL(TCG_TARGET_HAS_deposit_i64))
+DEF(extract_i64, 1, 1, 2, IMPL64 | IMPL(TCG_TARGET_HAS_extract_i64))
+DEF(sextract_i64, 1, 1, 2, IMPL64 | IMPL(TCG_TARGET_HAS_sextract_i64))
/* size changing ops */
DEF(ext_i32_i64, 1, 1, 0, IMPL64)
@@ -167,6 +174,9 @@ DEF(orc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_orc_i64))
DEF(eqv_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_eqv_i64))
DEF(nand_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nand_i64))
DEF(nor_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nor_i64))
+DEF(clz_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_clz_i64))
+DEF(ctz_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ctz_i64))
+DEF(ctpop_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ctpop_i64))
DEF(add2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_add2_i64))
DEF(sub2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_sub2_i64))
diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h
index 1deb86a099..114ea6fecf 100644
--- a/tcg/tcg-runtime.h
+++ b/tcg/tcg-runtime.h
@@ -15,6 +15,15 @@ DEF_HELPER_FLAGS_2(sar_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64)
DEF_HELPER_FLAGS_2(mulsh_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64)
DEF_HELPER_FLAGS_2(muluh_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(clz_i32, TCG_CALL_NO_RWG_SE, i32, i32, i32)
+DEF_HELPER_FLAGS_2(ctz_i32, TCG_CALL_NO_RWG_SE, i32, i32, i32)
+DEF_HELPER_FLAGS_2(clz_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(ctz_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_1(clrsb_i32, TCG_CALL_NO_RWG_SE, i32, i32)
+DEF_HELPER_FLAGS_1(clrsb_i64, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_FLAGS_1(ctpop_i32, TCG_CALL_NO_RWG_SE, i32, i32)
+DEF_HELPER_FLAGS_1(ctpop_i64, TCG_CALL_NO_RWG_SE, i64, i64)
+
DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
#ifdef CONFIG_SOFTMMU
diff --git a/tcg/tcg.c b/tcg/tcg.c
index aabf94f365..cb898f1636 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -62,6 +62,7 @@
/* Forward declarations for functions declared in tcg-target.inc.c and
used here. */
static void tcg_target_init(TCGContext *s);
+static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
static void tcg_target_qemu_prologue(TCGContext *s);
static void patch_reloc(tcg_insn_unit *code_ptr, int type,
intptr_t value, intptr_t addend);
@@ -95,7 +96,8 @@ static void tcg_register_jit_int(void *buf, size_t size,
__attribute__((unused));
/* Forward declarations for functions declared and used in tcg-target.inc.c. */
-static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str);
+static const char *target_parse_constraint(TCGArgConstraint *ct,
+ const char *ct_str, TCGType type);
static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
intptr_t arg2);
static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
@@ -319,6 +321,7 @@ static const TCGHelperInfo all_helpers[] = {
};
static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
+static void process_op_defs(TCGContext *s);
void tcg_context_init(TCGContext *s)
{
@@ -362,6 +365,7 @@ void tcg_context_init(TCGContext *s)
}
tcg_target_init(s);
+ process_op_defs(s);
/* Reverse the order of the saved registers, assuming they're all at
the start of tcg_target_reg_alloc_order. */
@@ -1221,59 +1225,68 @@ static void sort_constraints(TCGOpDef *def, int start, int n)
}
}
-void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs)
+static void process_op_defs(TCGContext *s)
{
TCGOpcode op;
- TCGOpDef *def;
- const char *ct_str;
- int i, nb_args;
- for(;;) {
- if (tdefs->op == (TCGOpcode)-1)
- break;
- op = tdefs->op;
- tcg_debug_assert((unsigned)op < NB_OPS);
- def = &tcg_op_defs[op];
-#if defined(CONFIG_DEBUG_TCG)
- /* Duplicate entry in op definitions? */
- tcg_debug_assert(!def->used);
- def->used = 1;
-#endif
+ for (op = 0; op < NB_OPS; op++) {
+ TCGOpDef *def = &tcg_op_defs[op];
+ const TCGTargetOpDef *tdefs;
+ TCGType type;
+ int i, nb_args;
+
+ if (def->flags & TCG_OPF_NOT_PRESENT) {
+ continue;
+ }
+
nb_args = def->nb_iargs + def->nb_oargs;
- for(i = 0; i < nb_args; i++) {
- ct_str = tdefs->args_ct_str[i];
- /* Incomplete TCGTargetOpDef entry? */
+ if (nb_args == 0) {
+ continue;
+ }
+
+ tdefs = tcg_target_op_def(op);
+ /* Missing TCGTargetOpDef entry. */
+ tcg_debug_assert(tdefs != NULL);
+
+ type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
+ for (i = 0; i < nb_args; i++) {
+ const char *ct_str = tdefs->args_ct_str[i];
+ /* Incomplete TCGTargetOpDef entry. */
tcg_debug_assert(ct_str != NULL);
+
tcg_regset_clear(def->args_ct[i].u.regs);
def->args_ct[i].ct = 0;
- if (ct_str[0] >= '0' && ct_str[0] <= '9') {
- int oarg;
- oarg = ct_str[0] - '0';
- tcg_debug_assert(oarg < def->nb_oargs);
- tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
- /* TCG_CT_ALIAS is for the output arguments. The input
- argument is tagged with TCG_CT_IALIAS. */
- def->args_ct[i] = def->args_ct[oarg];
- def->args_ct[oarg].ct = TCG_CT_ALIAS;
- def->args_ct[oarg].alias_index = i;
- def->args_ct[i].ct |= TCG_CT_IALIAS;
- def->args_ct[i].alias_index = oarg;
- } else {
- for(;;) {
- if (*ct_str == '\0')
- break;
- switch(*ct_str) {
- case 'i':
- def->args_ct[i].ct |= TCG_CT_CONST;
- ct_str++;
- break;
- default:
- if (target_parse_constraint(&def->args_ct[i], &ct_str) < 0) {
- fprintf(stderr, "Invalid constraint '%s' for arg %d of operation '%s'\n",
- ct_str, i, def->name);
- exit(1);
- }
+ while (*ct_str != '\0') {
+ switch(*ct_str) {
+ case '0' ... '9':
+ {
+ int oarg = *ct_str - '0';
+ tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
+ tcg_debug_assert(oarg < def->nb_oargs);
+ tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
+ /* TCG_CT_ALIAS is for the output arguments.
+ The input is tagged with TCG_CT_IALIAS. */
+ def->args_ct[i] = def->args_ct[oarg];
+ def->args_ct[oarg].ct |= TCG_CT_ALIAS;
+ def->args_ct[oarg].alias_index = i;
+ def->args_ct[i].ct |= TCG_CT_IALIAS;
+ def->args_ct[i].alias_index = oarg;
}
+ ct_str++;
+ break;
+ case '&':
+ def->args_ct[i].ct |= TCG_CT_NEWREG;
+ ct_str++;
+ break;
+ case 'i':
+ def->args_ct[i].ct |= TCG_CT_CONST;
+ ct_str++;
+ break;
+ default:
+ ct_str = target_parse_constraint(&def->args_ct[i],
+ ct_str, type);
+ /* Typo in TCGTargetOpDef constraint. */
+ tcg_debug_assert(ct_str != NULL);
}
}
}
@@ -1284,42 +1297,7 @@ void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs)
/* sort the constraints (XXX: this is just an heuristic) */
sort_constraints(def, 0, def->nb_oargs);
sort_constraints(def, def->nb_oargs, def->nb_iargs);
-
-#if 0
- {
- int i;
-
- printf("%s: sorted=", def->name);
- for(i = 0; i < def->nb_oargs + def->nb_iargs; i++)
- printf(" %d", def->sorted_args[i]);
- printf("\n");
- }
-#endif
- tdefs++;
}
-
-#if defined(CONFIG_DEBUG_TCG)
- i = 0;
- for (op = 0; op < tcg_op_defs_max; op++) {
- const TCGOpDef *def = &tcg_op_defs[op];
- if (def->flags & TCG_OPF_NOT_PRESENT) {
- /* Wrong entry in op definitions? */
- if (def->used) {
- fprintf(stderr, "Invalid op definition for %s\n", def->name);
- i = 1;
- }
- } else {
- /* Missing entry in op definitions? */
- if (!def->used) {
- fprintf(stderr, "Missing op definition for %s\n", def->name);
- i = 1;
- }
- }
- }
- if (i == 1) {
- tcg_abort();
- }
-#endif
}
void tcg_op_remove(TCGContext *s, TCGOp *op)
@@ -2208,7 +2186,8 @@ static void tcg_reg_alloc_op(TCGContext *s,
const TCGOpDef *def, TCGOpcode opc,
const TCGArg *args, TCGLifeData arg_life)
{
- TCGRegSet allocated_regs;
+ TCGRegSet i_allocated_regs;
+ TCGRegSet o_allocated_regs;
int i, k, nb_iargs, nb_oargs;
TCGReg reg;
TCGArg arg;
@@ -2225,8 +2204,10 @@ static void tcg_reg_alloc_op(TCGContext *s,
args + nb_oargs + nb_iargs,
sizeof(TCGArg) * def->nb_cargs);
+ tcg_regset_set(i_allocated_regs, s->reserved_regs);
+ tcg_regset_set(o_allocated_regs, s->reserved_regs);
+
/* satisfy input constraints */
- tcg_regset_set(allocated_regs, s->reserved_regs);
for(k = 0; k < nb_iargs; k++) {
i = def->sorted_args[nb_oargs + k];
arg = args[i];
@@ -2241,7 +2222,7 @@ static void tcg_reg_alloc_op(TCGContext *s,
goto iarg_end;
}
- temp_load(s, ts, arg_ct->u.regs, allocated_regs);
+ temp_load(s, ts, arg_ct->u.regs, i_allocated_regs);
if (arg_ct->ct & TCG_CT_IALIAS) {
if (ts->fixed_reg) {
@@ -2275,13 +2256,13 @@ static void tcg_reg_alloc_op(TCGContext *s,
allocate_in_reg:
/* allocate a new register matching the constraint
and move the temporary register into it */
- reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs,
+ reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
ts->indirect_base);
tcg_out_mov(s, ts->type, reg, ts->reg);
}
new_args[i] = reg;
const_args[i] = 0;
- tcg_regset_set_reg(allocated_regs, reg);
+ tcg_regset_set_reg(i_allocated_regs, reg);
iarg_end: ;
}
@@ -2293,31 +2274,35 @@ static void tcg_reg_alloc_op(TCGContext *s,
}
if (def->flags & TCG_OPF_BB_END) {
- tcg_reg_alloc_bb_end(s, allocated_regs);
+ tcg_reg_alloc_bb_end(s, i_allocated_regs);
} else {
if (def->flags & TCG_OPF_CALL_CLOBBER) {
/* XXX: permit generic clobber register list ? */
for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
- tcg_reg_free(s, i, allocated_regs);
+ tcg_reg_free(s, i, i_allocated_regs);
}
}
}
if (def->flags & TCG_OPF_SIDE_EFFECTS) {
/* sync globals if the op has side effects and might trigger
an exception. */
- sync_globals(s, allocated_regs);
+ sync_globals(s, i_allocated_regs);
}
/* satisfy the output constraints */
- tcg_regset_set(allocated_regs, s->reserved_regs);
for(k = 0; k < nb_oargs; k++) {
i = def->sorted_args[k];
arg = args[i];
arg_ct = &def->args_ct[i];
ts = &s->temps[arg];
- if (arg_ct->ct & TCG_CT_ALIAS) {
+ if ((arg_ct->ct & TCG_CT_ALIAS)
+ && !const_args[arg_ct->alias_index]) {
reg = new_args[arg_ct->alias_index];
+ } else if (arg_ct->ct & TCG_CT_NEWREG) {
+ reg = tcg_reg_alloc(s, arg_ct->u.regs,
+ i_allocated_regs | o_allocated_regs,
+ ts->indirect_base);
} else {
/* if fixed register, we try to use it */
reg = ts->reg;
@@ -2325,10 +2310,10 @@ static void tcg_reg_alloc_op(TCGContext *s,
tcg_regset_test_reg(arg_ct->u.regs, reg)) {
goto oarg_end;
}
- reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs,
+ reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
ts->indirect_base);
}
- tcg_regset_set_reg(allocated_regs, reg);
+ tcg_regset_set_reg(o_allocated_regs, reg);
/* if a fixed register is used, then a move will be done afterwards */
if (!ts->fixed_reg) {
if (ts->val_type == TEMP_VAL_REG) {
@@ -2357,7 +2342,7 @@ static void tcg_reg_alloc_op(TCGContext *s,
tcg_out_mov(s, ts->type, ts->reg, reg);
}
if (NEED_SYNC_ARG(i)) {
- temp_sync(s, ts, allocated_regs, IS_DEAD_ARG(i));
+ temp_sync(s, ts, o_allocated_regs, IS_DEAD_ARG(i));
} else if (IS_DEAD_ARG(i)) {
temp_dead(s, ts);
}
diff --git a/tcg/tcg.h b/tcg/tcg.h
index a35e4c4fd4..631c6f69b1 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -111,7 +111,12 @@ typedef uint64_t TCGRegSet;
#define TCG_TARGET_HAS_eqv_i64 0
#define TCG_TARGET_HAS_nand_i64 0
#define TCG_TARGET_HAS_nor_i64 0
+#define TCG_TARGET_HAS_clz_i64 0
+#define TCG_TARGET_HAS_ctz_i64 0
+#define TCG_TARGET_HAS_ctpop_i64 0
#define TCG_TARGET_HAS_deposit_i64 0
+#define TCG_TARGET_HAS_extract_i64 0
+#define TCG_TARGET_HAS_sextract_i64 0
#define TCG_TARGET_HAS_movcond_i64 0
#define TCG_TARGET_HAS_add2_i64 0
#define TCG_TARGET_HAS_sub2_i64 0
@@ -130,6 +135,12 @@ typedef uint64_t TCGRegSet;
#ifndef TCG_TARGET_deposit_i64_valid
#define TCG_TARGET_deposit_i64_valid(ofs, len) 1
#endif
+#ifndef TCG_TARGET_extract_i32_valid
+#define TCG_TARGET_extract_i32_valid(ofs, len) 1
+#endif
+#ifndef TCG_TARGET_extract_i64_valid
+#define TCG_TARGET_extract_i64_valid(ofs, len) 1
+#endif
/* Only one of DIV or DIV2 should be defined. */
#if defined(TCG_TARGET_HAS_div_i32)
@@ -843,6 +854,7 @@ void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf);
#define TCG_CT_ALIAS 0x80
#define TCG_CT_IALIAS 0x40
+#define TCG_CT_NEWREG 0x20 /* output requires a new register */
#define TCG_CT_REG 0x01
#define TCG_CT_CONST 0x02 /* any constant of register size */
@@ -897,8 +909,6 @@ do {\
abort();\
} while (0)
-void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs);
-
#if UINTPTR_MAX == UINT32_MAX
#define TCGV_NAT_TO_PTR(n) MAKE_TCGV_PTR(GET_TCGV_I32(n))
#define TCGV_PTR_TO_NAT(n) MAKE_TCGV_I32(GET_TCGV_PTR(n))
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 868228b2e7..838bf3a858 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -69,9 +69,14 @@
#define TCG_TARGET_HAS_ext16u_i32 1
#define TCG_TARGET_HAS_andc_i32 0
#define TCG_TARGET_HAS_deposit_i32 1
+#define TCG_TARGET_HAS_extract_i32 0
+#define TCG_TARGET_HAS_sextract_i32 0
#define TCG_TARGET_HAS_eqv_i32 0
#define TCG_TARGET_HAS_nand_i32 0
#define TCG_TARGET_HAS_nor_i32 0
+#define TCG_TARGET_HAS_clz_i32 0
+#define TCG_TARGET_HAS_ctz_i32 0
+#define TCG_TARGET_HAS_ctpop_i32 0
#define TCG_TARGET_HAS_neg_i32 1
#define TCG_TARGET_HAS_not_i32 1
#define TCG_TARGET_HAS_orc_i32 0
@@ -88,6 +93,8 @@
#define TCG_TARGET_HAS_bswap32_i64 1
#define TCG_TARGET_HAS_bswap64_i64 1
#define TCG_TARGET_HAS_deposit_i64 1
+#define TCG_TARGET_HAS_extract_i64 0
+#define TCG_TARGET_HAS_sextract_i64 0
#define TCG_TARGET_HAS_div_i64 0
#define TCG_TARGET_HAS_rem_i64 0
#define TCG_TARGET_HAS_ext8s_i64 1
@@ -100,6 +107,9 @@
#define TCG_TARGET_HAS_eqv_i64 0
#define TCG_TARGET_HAS_nand_i64 0
#define TCG_TARGET_HAS_nor_i64 0
+#define TCG_TARGET_HAS_clz_i64 0
+#define TCG_TARGET_HAS_ctz_i64 0
+#define TCG_TARGET_HAS_ctpop_i64 0
#define TCG_TARGET_HAS_neg_i64 1
#define TCG_TARGET_HAS_not_i64 1
#define TCG_TARGET_HAS_orc_i64 0
diff --git a/tcg/tci/tcg-target.inc.c b/tcg/tci/tcg-target.inc.c
index 9dbf4d5512..26ee9b1664 100644
--- a/tcg/tci/tcg-target.inc.c
+++ b/tcg/tci/tcg-target.inc.c
@@ -259,6 +259,18 @@ static const TCGTargetOpDef tcg_target_op_defs[] = {
{ -1 },
};
+static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
+{
+ int i, n = ARRAY_SIZE(tcg_target_op_defs);
+
+ for (i = 0; i < n; ++i) {
+ if (tcg_target_op_defs[i].op == op) {
+ return &tcg_target_op_defs[i];
+ }
+ }
+ return NULL;
+}
+
static const int tcg_target_reg_alloc_order[] = {
TCG_REG_R0,
TCG_REG_R1,
@@ -372,10 +384,10 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type,
}
/* Parse target specific constraints. */
-static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
+static const char *target_parse_constraint(TCGArgConstraint *ct,
+ const char *ct_str, TCGType type)
{
- const char *ct_str = *pct_str;
- switch (ct_str[0]) {
+ switch (*ct_str++) {
case 'r':
case 'L': /* qemu_ld constraint */
case 'S': /* qemu_st constraint */
@@ -383,11 +395,9 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
tcg_regset_set32(ct->u.regs, 0, BIT(TCG_TARGET_NB_REGS) - 1);
break;
default:
- return -1;
+ return NULL;
}
- ct_str++;
- *pct_str = ct_str;
- return 0;
+ return ct_str;
}
#if defined(CONFIG_DEBUG_TCG_INTERPRETER)
@@ -875,7 +885,6 @@ static void tcg_target_init(TCGContext *s)
tcg_regset_clear(s->reserved_regs);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
- tcg_add_target_add_op_defs(tcg_target_op_defs);
/* We use negative offsets from "sp" so that we can distinguish
stores that might pretend to be call arguments. */