aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2021-06-05 11:25:52 +0100
committerPeter Maydell <peter.maydell@linaro.org>2021-06-05 11:25:52 +0100
commit6f398e533f5e259b4f937f4aa9de970f7201d166 (patch)
treeda9af13a5592ec7ac6a82dcaddd7c6a9cb383e4b
parent908a87706ec6214d4a72245c8a0c9d327baf436b (diff)
parent0006039e29b9e6118beab300146f7c4931f7a217 (diff)
Merge remote-tracking branch 'remotes/rth-gitlab/tags/pull-tcg-20210604' into staging
Host vector support for arm neon. # gpg: Signature made Fri 04 Jun 2021 19:56:59 BST # gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F # gpg: issuer "richard.henderson@linaro.org" # gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full] # Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F * remotes/rth-gitlab/tags/pull-tcg-20210604: tcg/arm: Implement TCG_TARGET_HAS_rotv_vec tcg/arm: Implement TCG_TARGET_HAS_roti_vec tcg/arm: Implement TCG_TARGET_HAS_shv_vec tcg/arm: Implement TCG_TARGET_HAS_bitsel_vec tcg/arm: Implement TCG_TARGET_HAS_minmax_vec tcg/arm: Implement TCG_TARGET_HAS_sat_vec tcg/arm: Implement TCG_TARGET_HAS_mul_vec tcg/arm: Implement TCG_TARGET_HAS_shi_vec tcg/arm: Implement andc, orc, abs, neg, not vector operations tcg/arm: Implement minimal vector operations tcg/arm: Implement tcg_out_dup*_vec tcg/arm: Implement tcg_out_mov for vector types tcg/arm: Implement tcg_out_ld/st for vector types tcg/arm: Add host vector framework tcg: Change parameters for tcg_target_const_match Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--tcg/aarch64/tcg-target.c.inc5
-rw-r--r--tcg/arm/tcg-target-con-set.h10
-rw-r--r--tcg/arm/tcg-target-con-str.h3
-rw-r--r--tcg/arm/tcg-target.c.inc956
-rw-r--r--tcg/arm/tcg-target.h52
-rw-r--r--tcg/arm/tcg-target.opc.h16
-rw-r--r--tcg/i386/tcg-target.c.inc4
-rw-r--r--tcg/mips/tcg-target.c.inc5
-rw-r--r--tcg/ppc/tcg-target.c.inc4
-rw-r--r--tcg/riscv/tcg-target.c.inc4
-rw-r--r--tcg/s390/tcg-target.c.inc5
-rw-r--r--tcg/sparc/tcg-target.c.inc5
-rw-r--r--tcg/tcg.c5
-rw-r--r--tcg/tci/tcg-target.c.inc6
14 files changed, 1001 insertions, 79 deletions
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index 5bd366f2d4..27cde314a9 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -277,11 +277,8 @@ static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
}
}
-static int tcg_target_const_match(tcg_target_long val, TCGType type,
- const TCGArgConstraint *arg_ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
{
- int ct = arg_ct->ct;
-
if (ct & TCG_CT_CONST) {
return 1;
}
diff --git a/tcg/arm/tcg-target-con-set.h b/tcg/arm/tcg-target-con-set.h
index ab63e089c2..3685e1786a 100644
--- a/tcg/arm/tcg-target-con-set.h
+++ b/tcg/arm/tcg-target-con-set.h
@@ -13,11 +13,15 @@ C_O0_I1(r)
C_O0_I2(r, r)
C_O0_I2(r, rIN)
C_O0_I2(s, s)
+C_O0_I2(w, r)
C_O0_I3(s, s, s)
C_O0_I4(r, r, rI, rI)
C_O0_I4(s, s, s, s)
C_O1_I1(r, l)
C_O1_I1(r, r)
+C_O1_I1(w, r)
+C_O1_I1(w, w)
+C_O1_I1(w, wr)
C_O1_I2(r, 0, rZ)
C_O1_I2(r, l, l)
C_O1_I2(r, r, r)
@@ -26,6 +30,12 @@ C_O1_I2(r, r, rIK)
C_O1_I2(r, r, rIN)
C_O1_I2(r, r, ri)
C_O1_I2(r, rZ, rZ)
+C_O1_I2(w, 0, w)
+C_O1_I2(w, w, w)
+C_O1_I2(w, w, wO)
+C_O1_I2(w, w, wV)
+C_O1_I2(w, w, wZ)
+C_O1_I3(w, w, w, w)
C_O1_I4(r, r, r, rI, rI)
C_O1_I4(r, r, rIN, rIK, 0)
C_O2_I1(r, r, l)
diff --git a/tcg/arm/tcg-target-con-str.h b/tcg/arm/tcg-target-con-str.h
index a0ab7747db..8f501149e1 100644
--- a/tcg/arm/tcg-target-con-str.h
+++ b/tcg/arm/tcg-target-con-str.h
@@ -11,6 +11,7 @@
REGS('r', ALL_GENERAL_REGS)
REGS('l', ALL_QLOAD_REGS)
REGS('s', ALL_QSTORE_REGS)
+REGS('w', ALL_VECTOR_REGS)
/*
* Define constraint letters for constants:
@@ -19,4 +20,6 @@ REGS('s', ALL_QSTORE_REGS)
CONST('I', TCG_CT_CONST_ARM)
CONST('K', TCG_CT_CONST_INV)
CONST('N', TCG_CT_CONST_NEG)
+CONST('O', TCG_CT_CONST_ORRI)
+CONST('V', TCG_CT_CONST_ANDI)
CONST('Z', TCG_CT_CONST_ZERO)
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 8457108a87..f4c9cb8f9f 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -30,6 +30,9 @@ int arm_arch = __ARM_ARCH;
#ifndef use_idiv_instructions
bool use_idiv_instructions;
#endif
+#ifndef use_neon_instructions
+bool use_neon_instructions;
+#endif
/* ??? Ought to think about changing CONFIG_SOFTMMU to always defined. */
#ifdef CONFIG_SOFTMMU
@@ -40,22 +43,10 @@ bool use_idiv_instructions;
#ifdef CONFIG_DEBUG_TCG
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
- "%r0",
- "%r1",
- "%r2",
- "%r3",
- "%r4",
- "%r5",
- "%r6",
- "%r7",
- "%r8",
- "%r9",
- "%r10",
- "%r11",
- "%r12",
- "%r13",
- "%r14",
- "%pc",
+ "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
+ "%r8", "%r9", "%r10", "%r11", "%r12", "%sp", "%r14", "%pc",
+ "%q0", "%q1", "%q2", "%q3", "%q4", "%q5", "%q6", "%q7",
+ "%q8", "%q9", "%q10", "%q11", "%q12", "%q13", "%q14", "%q15",
};
#endif
@@ -75,6 +66,20 @@ static const int tcg_target_reg_alloc_order[] = {
TCG_REG_R3,
TCG_REG_R12,
TCG_REG_R14,
+
+ TCG_REG_Q0,
+ TCG_REG_Q1,
+ TCG_REG_Q2,
+ TCG_REG_Q3,
+ /* Q4 - Q7 are call-saved, and skipped. */
+ TCG_REG_Q8,
+ TCG_REG_Q9,
+ TCG_REG_Q10,
+ TCG_REG_Q11,
+ TCG_REG_Q12,
+ TCG_REG_Q13,
+ TCG_REG_Q14,
+ TCG_REG_Q15,
};
static const int tcg_target_call_iarg_regs[4] = {
@@ -85,6 +90,7 @@ static const int tcg_target_call_oarg_regs[2] = {
};
#define TCG_REG_TMP TCG_REG_R12
+#define TCG_VEC_TMP TCG_REG_Q15
enum arm_cond_code_e {
COND_EQ = 0x0,
@@ -169,6 +175,60 @@ typedef enum {
INSN_NOP_v6k = 0xe320f000,
/* Otherwise the assembler uses mov r0,r0 */
INSN_NOP_v4 = (COND_AL << 28) | ARITH_MOV,
+
+ INSN_VADD = 0xf2000800,
+ INSN_VAND = 0xf2000110,
+ INSN_VBIC = 0xf2100110,
+ INSN_VEOR = 0xf3000110,
+ INSN_VORN = 0xf2300110,
+ INSN_VORR = 0xf2200110,
+ INSN_VSUB = 0xf3000800,
+ INSN_VMUL = 0xf2000910,
+ INSN_VQADD = 0xf2000010,
+ INSN_VQADD_U = 0xf3000010,
+ INSN_VQSUB = 0xf2000210,
+ INSN_VQSUB_U = 0xf3000210,
+ INSN_VMAX = 0xf2000600,
+ INSN_VMAX_U = 0xf3000600,
+ INSN_VMIN = 0xf2000610,
+ INSN_VMIN_U = 0xf3000610,
+
+ INSN_VABS = 0xf3b10300,
+ INSN_VMVN = 0xf3b00580,
+ INSN_VNEG = 0xf3b10380,
+
+ INSN_VCEQ0 = 0xf3b10100,
+ INSN_VCGT0 = 0xf3b10000,
+ INSN_VCGE0 = 0xf3b10080,
+ INSN_VCLE0 = 0xf3b10180,
+ INSN_VCLT0 = 0xf3b10200,
+
+ INSN_VCEQ = 0xf3000810,
+ INSN_VCGE = 0xf2000310,
+ INSN_VCGT = 0xf2000300,
+ INSN_VCGE_U = 0xf3000310,
+ INSN_VCGT_U = 0xf3000300,
+
+ INSN_VSHLI = 0xf2800510, /* VSHL (immediate) */
+ INSN_VSARI = 0xf2800010, /* VSHR.S */
+ INSN_VSHRI = 0xf3800010, /* VSHR.U */
+ INSN_VSLI = 0xf3800510,
+ INSN_VSHL_S = 0xf2000400, /* VSHL.S (register) */
+ INSN_VSHL_U = 0xf3000400, /* VSHL.U (register) */
+
+ INSN_VBSL = 0xf3100110,
+ INSN_VBIT = 0xf3200110,
+ INSN_VBIF = 0xf3300110,
+
+ INSN_VTST = 0xf2000810,
+
+ INSN_VDUP_G = 0xee800b10, /* VDUP (ARM core register) */
+ INSN_VDUP_S = 0xf3b00c00, /* VDUP (scalar) */
+ INSN_VLDR_D = 0xed100b00, /* VLDR.64 */
+ INSN_VLD1 = 0xf4200000, /* VLD1 (multiple single elements) */
+ INSN_VLD1R = 0xf4a00c00, /* VLD1 (single element to all lanes) */
+ INSN_VST1 = 0xf4000000, /* VST1 (multiple single elements) */
+ INSN_VMOVI = 0xf2800010, /* VMOV (immediate) */
} ARMInsn;
#define INSN_NOP (use_armv7_instructions ? INSN_NOP_v6k : INSN_NOP_v4)
@@ -187,6 +247,14 @@ static const uint8_t tcg_cond_to_arm_cond[] = {
[TCG_COND_GTU] = COND_HI,
};
+static int encode_imm(uint32_t imm);
+
+/* TCG private relocation type: add with pc+imm8 */
+#define R_ARM_PC8 11
+
+/* TCG private relocation type: vldr with imm8 << 2 */
+#define R_ARM_PC11 12
+
static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
{
const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
@@ -218,16 +286,52 @@ static bool reloc_pc13(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
return false;
}
+static bool reloc_pc11(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
+{
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+ ptrdiff_t offset = (tcg_ptr_byte_diff(target, src_rx) - 8) / 4;
+
+ if (offset >= -0xff && offset <= 0xff) {
+ tcg_insn_unit insn = *src_rw;
+ bool u = (offset >= 0);
+ if (!u) {
+ offset = -offset;
+ }
+ insn = deposit32(insn, 23, 1, u);
+ insn = deposit32(insn, 0, 8, offset);
+ *src_rw = insn;
+ return true;
+ }
+ return false;
+}
+
+static bool reloc_pc8(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
+{
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+ ptrdiff_t offset = tcg_ptr_byte_diff(target, src_rx) - 8;
+ int rot = encode_imm(offset);
+
+ if (rot >= 0) {
+ *src_rw = deposit32(*src_rw, 0, 12, rol32(offset, rot) | (rot << 7));
+ return true;
+ }
+ return false;
+}
+
static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
intptr_t value, intptr_t addend)
{
tcg_debug_assert(addend == 0);
-
- if (type == R_ARM_PC24) {
+ switch (type) {
+ case R_ARM_PC24:
return reloc_pc24(code_ptr, (const tcg_insn_unit *)value);
- } else if (type == R_ARM_PC13) {
+ case R_ARM_PC13:
return reloc_pc13(code_ptr, (const tcg_insn_unit *)value);
- } else {
+ case R_ARM_PC11:
+ return reloc_pc11(code_ptr, (const tcg_insn_unit *)value);
+ case R_ARM_PC8:
+ return reloc_pc8(code_ptr, (const tcg_insn_unit *)value);
+ default:
g_assert_not_reached();
}
}
@@ -236,8 +340,11 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
#define TCG_CT_CONST_INV 0x200
#define TCG_CT_CONST_NEG 0x400
#define TCG_CT_CONST_ZERO 0x800
+#define TCG_CT_CONST_ORRI 0x1000
+#define TCG_CT_CONST_ANDI 0x2000
#define ALL_GENERAL_REGS 0xffffu
+#define ALL_VECTOR_REGS 0xffff0000u
/*
* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
@@ -266,7 +373,7 @@ static inline uint32_t rotl(uint32_t val, int n)
/* ARM immediates for ALU instructions are made of an unsigned 8-bit
right-rotated by an even amount between 0 and 30. */
-static inline int encode_imm(uint32_t imm)
+static int encode_imm(uint32_t imm)
{
int shift;
@@ -293,6 +400,89 @@ static inline int check_fit_imm(uint32_t imm)
return encode_imm(imm) >= 0;
}
+/* Return true if v16 is a valid 16-bit shifted immediate. */
+static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
+{
+ if (v16 == (v16 & 0xff)) {
+ *cmode = 0x8;
+ *imm8 = v16 & 0xff;
+ return true;
+ } else if (v16 == (v16 & 0xff00)) {
+ *cmode = 0xa;
+ *imm8 = v16 >> 8;
+ return true;
+ }
+ return false;
+}
+
+/* Return true if v32 is a valid 32-bit shifted immediate. */
+static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
+{
+ if (v32 == (v32 & 0xff)) {
+ *cmode = 0x0;
+ *imm8 = v32 & 0xff;
+ return true;
+ } else if (v32 == (v32 & 0xff00)) {
+ *cmode = 0x2;
+ *imm8 = (v32 >> 8) & 0xff;
+ return true;
+ } else if (v32 == (v32 & 0xff0000)) {
+ *cmode = 0x4;
+ *imm8 = (v32 >> 16) & 0xff;
+ return true;
+ } else if (v32 == (v32 & 0xff000000)) {
+ *cmode = 0x6;
+ *imm8 = v32 >> 24;
+ return true;
+ }
+ return false;
+}
+
+/* Return true if v32 is a valid 32-bit shifting ones immediate. */
+static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
+{
+ if ((v32 & 0xffff00ff) == 0xff) {
+ *cmode = 0xc;
+ *imm8 = (v32 >> 8) & 0xff;
+ return true;
+ } else if ((v32 & 0xff00ffff) == 0xffff) {
+ *cmode = 0xd;
+ *imm8 = (v32 >> 16) & 0xff;
+ return true;
+ }
+ return false;
+}
+
+/*
+ * Return non-zero if v32 can be formed by MOVI+ORR.
+ * Place the parameters for MOVI in (cmode, imm8).
+ * Return the cmode for ORR; the imm8 can be had via extraction from v32.
+ */
+static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
+{
+ int i;
+
+ for (i = 6; i > 0; i -= 2) {
+ /* Mask out one byte we can add with ORR. */
+ uint32_t tmp = v32 & ~(0xffu << (i * 4));
+ if (is_shimm32(tmp, cmode, imm8) ||
+ is_soimm32(tmp, cmode, imm8)) {
+ break;
+ }
+ }
+ return i;
+}
+
+/* Return true if V is a valid 16-bit or 32-bit shifted immediate. */
+static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
+{
+ if (v32 == deposit32(v32, 16, 16, v32)) {
+ return is_shimm16(v32, cmode, imm8);
+ } else {
+ return is_shimm32(v32, cmode, imm8);
+ }
+}
+
/* Test if a constant matches the constraint.
* TODO: define constraints for:
*
@@ -301,11 +491,8 @@ static inline int check_fit_imm(uint32_t imm)
* mov operand2: values represented with x << (2 * y), x < 0x100
* add, sub, eor...: ditto
*/
-static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
- const TCGArgConstraint *arg_ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
{
- int ct;
- ct = arg_ct->ct;
if (ct & TCG_CT_CONST) {
return 1;
} else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {
@@ -316,9 +503,26 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
return 1;
} else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
return 1;
- } else {
- return 0;
}
+
+ switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
+ case 0:
+ break;
+ case TCG_CT_CONST_ANDI:
+ val = ~val;
+ /* fallthru */
+ case TCG_CT_CONST_ORRI:
+ if (val == deposit64(val, 32, 32, val)) {
+ int cmode, imm8;
+ return is_shimm1632(val, &cmode, &imm8);
+ }
+ break;
+ default:
+ /* Both bits should not be set for the same insn. */
+ g_assert_not_reached();
+ }
+
+ return 0;
}
static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset)
@@ -1092,6 +1296,76 @@ static TCGCond tcg_out_cmp2(TCGContext *s, const TCGArg *args,
}
}
+/*
+ * Note that TCGReg references Q-registers.
+ * Q-regno = 2 * D-regno, so shift left by 1 whlie inserting.
+ */
+static uint32_t encode_vd(TCGReg rd)
+{
+ tcg_debug_assert(rd >= TCG_REG_Q0);
+ return (extract32(rd, 3, 1) << 22) | (extract32(rd, 0, 3) << 13);
+}
+
+static uint32_t encode_vn(TCGReg rn)
+{
+ tcg_debug_assert(rn >= TCG_REG_Q0);
+ return (extract32(rn, 3, 1) << 7) | (extract32(rn, 0, 3) << 17);
+}
+
+static uint32_t encode_vm(TCGReg rm)
+{
+ tcg_debug_assert(rm >= TCG_REG_Q0);
+ return (extract32(rm, 3, 1) << 5) | (extract32(rm, 0, 3) << 1);
+}
+
+static void tcg_out_vreg2(TCGContext *s, ARMInsn insn, int q, int vece,
+ TCGReg d, TCGReg m)
+{
+ tcg_out32(s, insn | (vece << 18) | (q << 6) |
+ encode_vd(d) | encode_vm(m));
+}
+
+static void tcg_out_vreg3(TCGContext *s, ARMInsn insn, int q, int vece,
+ TCGReg d, TCGReg n, TCGReg m)
+{
+ tcg_out32(s, insn | (vece << 20) | (q << 6) |
+ encode_vd(d) | encode_vn(n) | encode_vm(m));
+}
+
+static void tcg_out_vmovi(TCGContext *s, TCGReg rd,
+ int q, int op, int cmode, uint8_t imm8)
+{
+ tcg_out32(s, INSN_VMOVI | encode_vd(rd) | (q << 6) | (op << 5)
+ | (cmode << 8) | extract32(imm8, 0, 4)
+ | (extract32(imm8, 4, 3) << 16)
+ | (extract32(imm8, 7, 1) << 24));
+}
+
+static void tcg_out_vshifti(TCGContext *s, ARMInsn insn, int q,
+ TCGReg rd, TCGReg rm, int l_imm6)
+{
+ tcg_out32(s, insn | (q << 6) | encode_vd(rd) | encode_vm(rm) |
+ (extract32(l_imm6, 6, 1) << 7) |
+ (extract32(l_imm6, 0, 6) << 16));
+}
+
+static void tcg_out_vldst(TCGContext *s, ARMInsn insn,
+ TCGReg rd, TCGReg rn, int offset)
+{
+ if (offset != 0) {
+ if (check_fit_imm(offset) || check_fit_imm(-offset)) {
+ tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
+ TCG_REG_TMP, rn, offset, true);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, offset);
+ tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
+ TCG_REG_TMP, TCG_REG_TMP, rn, 0);
+ }
+ rn = TCG_REG_TMP;
+ }
+ tcg_out32(s, insn | (rn << 16) | encode_vd(rd) | 0xf);
+}
+
#ifdef CONFIG_SOFTMMU
#include "../tcg-ldst.c.inc"
@@ -2120,6 +2394,48 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_qemu_st_i64:
return TARGET_LONG_BITS == 32 ? C_O0_I3(s, s, s) : C_O0_I4(s, s, s, s);
+ case INDEX_op_st_vec:
+ return C_O0_I2(w, r);
+ case INDEX_op_ld_vec:
+ case INDEX_op_dupm_vec:
+ return C_O1_I1(w, r);
+ case INDEX_op_dup_vec:
+ return C_O1_I1(w, wr);
+ case INDEX_op_abs_vec:
+ case INDEX_op_neg_vec:
+ case INDEX_op_not_vec:
+ case INDEX_op_shli_vec:
+ case INDEX_op_shri_vec:
+ case INDEX_op_sari_vec:
+ return C_O1_I1(w, w);
+ case INDEX_op_dup2_vec:
+ case INDEX_op_add_vec:
+ case INDEX_op_mul_vec:
+ case INDEX_op_smax_vec:
+ case INDEX_op_smin_vec:
+ case INDEX_op_ssadd_vec:
+ case INDEX_op_sssub_vec:
+ case INDEX_op_sub_vec:
+ case INDEX_op_umax_vec:
+ case INDEX_op_umin_vec:
+ case INDEX_op_usadd_vec:
+ case INDEX_op_ussub_vec:
+ case INDEX_op_xor_vec:
+ case INDEX_op_arm_sshl_vec:
+ case INDEX_op_arm_ushl_vec:
+ return C_O1_I2(w, w, w);
+ case INDEX_op_arm_sli_vec:
+ return C_O1_I2(w, 0, w);
+ case INDEX_op_or_vec:
+ case INDEX_op_andc_vec:
+ return C_O1_I2(w, w, wO);
+ case INDEX_op_and_vec:
+ case INDEX_op_orc_vec:
+ return C_O1_I2(w, w, wV);
+ case INDEX_op_cmp_vec:
+ return C_O1_I2(w, w, wZ);
+ case INDEX_op_bitsel_vec:
+ return C_O1_I3(w, w, w, w);
default:
g_assert_not_reached();
}
@@ -2129,12 +2445,18 @@ static void tcg_target_init(TCGContext *s)
{
/* Only probe for the platform and capabilities if we havn't already
determined maximum values at compile time. */
-#ifndef use_idiv_instructions
+#if !defined(use_idiv_instructions) || !defined(use_neon_instructions)
{
unsigned long hwcap = qemu_getauxval(AT_HWCAP);
+#ifndef use_idiv_instructions
use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0;
+#endif
+#ifndef use_neon_instructions
+ use_neon_instructions = (hwcap & HWCAP_ARM_NEON) != 0;
+#endif
}
#endif
+
if (__ARM_ARCH < 7) {
const char *pl = (const char *)qemu_getauxval(AT_PLATFORM);
if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') {
@@ -2142,7 +2464,7 @@ static void tcg_target_init(TCGContext *s)
}
}
- tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
+ tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
tcg_target_call_clobber_regs = 0;
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
@@ -2152,22 +2474,69 @@ static void tcg_target_init(TCGContext *s)
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
+ if (use_neon_instructions) {
+ tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
+ tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
+
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q0);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q1);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q2);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q3);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q8);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q9);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q10);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q11);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q12);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q13);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q14);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q15);
+ }
+
s->reserved_regs = 0;
tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
+ tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
}
-static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
- TCGReg arg1, intptr_t arg2)
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
+ TCGReg arg1, intptr_t arg2)
{
- tcg_out_ld32u(s, COND_AL, arg, arg1, arg2);
+ switch (type) {
+ case TCG_TYPE_I32:
+ tcg_out_ld32u(s, COND_AL, arg, arg1, arg2);
+ return;
+ case TCG_TYPE_V64:
+ /* regs 1; size 8; align 8 */
+ tcg_out_vldst(s, INSN_VLD1 | 0x7d0, arg, arg1, arg2);
+ return;
+ case TCG_TYPE_V128:
+ /* regs 2; size 8; align 16 */
+ tcg_out_vldst(s, INSN_VLD1 | 0xae0, arg, arg1, arg2);
+ return;
+ default:
+ g_assert_not_reached();
+ }
}
-static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
- TCGReg arg1, intptr_t arg2)
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
+ TCGReg arg1, intptr_t arg2)
{
- tcg_out_st32(s, COND_AL, arg, arg1, arg2);
+ switch (type) {
+ case TCG_TYPE_I32:
+ tcg_out_st32(s, COND_AL, arg, arg1, arg2);
+ return;
+ case TCG_TYPE_V64:
+ /* regs 1; size 8; align 8 */
+ tcg_out_vldst(s, INSN_VST1 | 0x7d0, arg, arg1, arg2);
+ return;
+ case TCG_TYPE_V128:
+ /* regs 2; size 8; align 16 */
+ tcg_out_vldst(s, INSN_VST1 | 0xae0, arg, arg1, arg2);
+ return;
+ default:
+ g_assert_not_reached();
+ }
}
static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
@@ -2176,19 +2545,524 @@ static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
return false;
}
-static inline bool tcg_out_mov(TCGContext *s, TCGType type,
- TCGReg ret, TCGReg arg)
+static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
{
- tcg_out_mov_reg(s, COND_AL, ret, arg);
- return true;
+ if (ret == arg) {
+ return true;
+ }
+ switch (type) {
+ case TCG_TYPE_I32:
+ if (ret < TCG_REG_Q0 && arg < TCG_REG_Q0) {
+ tcg_out_mov_reg(s, COND_AL, ret, arg);
+ return true;
+ }
+ return false;
+
+ case TCG_TYPE_V64:
+ case TCG_TYPE_V128:
+ /* "VMOV D,N" is an alias for "VORR D,N,N". */
+ tcg_out_vreg3(s, INSN_VORR, type - TCG_TYPE_V64, 0, ret, arg, arg);
+ return true;
+
+ default:
+ g_assert_not_reached();
+ }
}
-static inline void tcg_out_movi(TCGContext *s, TCGType type,
- TCGReg ret, tcg_target_long arg)
+static void tcg_out_movi(TCGContext *s, TCGType type,
+ TCGReg ret, tcg_target_long arg)
{
+ tcg_debug_assert(type == TCG_TYPE_I32);
+ tcg_debug_assert(ret < TCG_REG_Q0);
tcg_out_movi32(s, COND_AL, ret, arg);
}
+/* Type is always V128, with I64 elements. */
+static void tcg_out_dup2_vec(TCGContext *s, TCGReg rd, TCGReg rl, TCGReg rh)
+{
+ /* Move high element into place first. */
+ /* VMOV Dd+1, Ds */
+ tcg_out_vreg3(s, INSN_VORR | (1 << 12), 0, 0, rd, rh, rh);
+ /* Move low element into place; tcg_out_mov will check for nop. */
+ tcg_out_mov(s, TCG_TYPE_V64, rd, rl);
+}
+
+static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg rd, TCGReg rs)
+{
+ int q = type - TCG_TYPE_V64;
+
+ if (vece == MO_64) {
+ if (type == TCG_TYPE_V128) {
+ tcg_out_dup2_vec(s, rd, rs, rs);
+ } else {
+ tcg_out_mov(s, TCG_TYPE_V64, rd, rs);
+ }
+ } else if (rs < TCG_REG_Q0) {
+ int b = (vece == MO_8);
+ int e = (vece == MO_16);
+ tcg_out32(s, INSN_VDUP_G | (b << 22) | (q << 21) | (e << 5) |
+ encode_vn(rd) | (rs << 12));
+ } else {
+ int imm4 = 1 << vece;
+ tcg_out32(s, INSN_VDUP_S | (imm4 << 16) | (q << 6) |
+ encode_vd(rd) | encode_vm(rs));
+ }
+ return true;
+}
+
+static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg rd, TCGReg base, intptr_t offset)
+{
+ if (vece == MO_64) {
+ tcg_out_ld(s, TCG_TYPE_V64, rd, base, offset);
+ if (type == TCG_TYPE_V128) {
+ tcg_out_dup2_vec(s, rd, rd, rd);
+ }
+ } else {
+ int q = type - TCG_TYPE_V64;
+ tcg_out_vldst(s, INSN_VLD1R | (vece << 6) | (q << 5),
+ rd, base, offset);
+ }
+ return true;
+}
+
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg rd, int64_t v64)
+{
+ int q = type - TCG_TYPE_V64;
+ int cmode, imm8, i;
+
+ /* Test all bytes equal first. */
+ if (vece == MO_8) {
+ tcg_out_vmovi(s, rd, q, 0, 0xe, v64);
+ return;
+ }
+
+ /*
+ * Test all bytes 0x00 or 0xff second. This can match cases that
+ * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
+ */
+ for (i = imm8 = 0; i < 8; i++) {
+ uint8_t byte = v64 >> (i * 8);
+ if (byte == 0xff) {
+ imm8 |= 1 << i;
+ } else if (byte != 0) {
+ goto fail_bytes;
+ }
+ }
+ tcg_out_vmovi(s, rd, q, 1, 0xe, imm8);
+ return;
+ fail_bytes:
+
+ /*
+ * Tests for various replications. For each element width, if we
+ * cannot find an expansion there's no point checking a larger
+ * width because we already know by replication it cannot match.
+ */
+ if (vece == MO_16) {
+ uint16_t v16 = v64;
+
+ if (is_shimm16(v16, &cmode, &imm8)) {
+ tcg_out_vmovi(s, rd, q, 0, cmode, imm8);
+ return;
+ }
+ if (is_shimm16(~v16, &cmode, &imm8)) {
+ tcg_out_vmovi(s, rd, q, 1, cmode, imm8);
+ return;
+ }
+
+ /*
+ * Otherwise, all remaining constants can be loaded in two insns:
+ * rd = v16 & 0xff, rd |= v16 & 0xff00.
+ */
+ tcg_out_vmovi(s, rd, q, 0, 0x8, v16 & 0xff);
+ tcg_out_vmovi(s, rd, q, 0, 0xb, v16 >> 8); /* VORRI */
+ return;
+ }
+
+ if (vece == MO_32) {
+ uint32_t v32 = v64;
+
+ if (is_shimm32(v32, &cmode, &imm8) ||
+ is_soimm32(v32, &cmode, &imm8)) {
+ tcg_out_vmovi(s, rd, q, 0, cmode, imm8);
+ return;
+ }
+ if (is_shimm32(~v32, &cmode, &imm8) ||
+ is_soimm32(~v32, &cmode, &imm8)) {
+ tcg_out_vmovi(s, rd, q, 1, cmode, imm8);
+ return;
+ }
+
+ /*
+ * Restrict the set of constants to those we can load with
+ * two instructions. Others we load from the pool.
+ */
+ i = is_shimm32_pair(v32, &cmode, &imm8);
+ if (i) {
+ tcg_out_vmovi(s, rd, q, 0, cmode, imm8);
+ tcg_out_vmovi(s, rd, q, 0, i | 1, extract32(v32, i * 4, 8));
+ return;
+ }
+ i = is_shimm32_pair(~v32, &cmode, &imm8);
+ if (i) {
+ tcg_out_vmovi(s, rd, q, 1, cmode, imm8);
+ tcg_out_vmovi(s, rd, q, 1, i | 1, extract32(~v32, i * 4, 8));
+ return;
+ }
+ }
+
+ /*
+ * As a last resort, load from the constant pool.
+ */
+ if (!q || vece == MO_64) {
+ new_pool_l2(s, R_ARM_PC11, s->code_ptr, 0, v64, v64 >> 32);
+ /* VLDR Dd, [pc + offset] */
+ tcg_out32(s, INSN_VLDR_D | encode_vd(rd) | (0xf << 16));
+ if (q) {
+ tcg_out_dup2_vec(s, rd, rd, rd);
+ }
+ } else {
+ new_pool_label(s, (uint32_t)v64, R_ARM_PC8, s->code_ptr, 0);
+ /* add tmp, pc, offset */
+ tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_TMP, TCG_REG_PC, 0);
+ tcg_out_dupm_vec(s, type, MO_32, rd, TCG_REG_TMP, 0);
+ }
+}
+
+static const ARMInsn vec_cmp_insn[16] = {
+ [TCG_COND_EQ] = INSN_VCEQ,
+ [TCG_COND_GT] = INSN_VCGT,
+ [TCG_COND_GE] = INSN_VCGE,
+ [TCG_COND_GTU] = INSN_VCGT_U,
+ [TCG_COND_GEU] = INSN_VCGE_U,
+};
+
+static const ARMInsn vec_cmp0_insn[16] = {
+ [TCG_COND_EQ] = INSN_VCEQ0,
+ [TCG_COND_GT] = INSN_VCGT0,
+ [TCG_COND_GE] = INSN_VCGE0,
+ [TCG_COND_LT] = INSN_VCLT0,
+ [TCG_COND_LE] = INSN_VCLE0,
+};
+
+static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
+ unsigned vecl, unsigned vece,
+ const TCGArg *args, const int *const_args)
+{
+ TCGType type = vecl + TCG_TYPE_V64;
+ unsigned q = vecl;
+ TCGArg a0, a1, a2, a3;
+ int cmode, imm8;
+
+ a0 = args[0];
+ a1 = args[1];
+ a2 = args[2];
+
+ switch (opc) {
+ case INDEX_op_ld_vec:
+ tcg_out_ld(s, type, a0, a1, a2);
+ return;
+ case INDEX_op_st_vec:
+ tcg_out_st(s, type, a0, a1, a2);
+ return;
+ case INDEX_op_dupm_vec:
+ tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
+ return;
+ case INDEX_op_dup2_vec:
+ tcg_out_dup2_vec(s, a0, a1, a2);
+ return;
+ case INDEX_op_abs_vec:
+ tcg_out_vreg2(s, INSN_VABS, q, vece, a0, a1);
+ return;
+ case INDEX_op_neg_vec:
+ tcg_out_vreg2(s, INSN_VNEG, q, vece, a0, a1);
+ return;
+ case INDEX_op_not_vec:
+ tcg_out_vreg2(s, INSN_VMVN, q, 0, a0, a1);
+ return;
+ case INDEX_op_add_vec:
+ tcg_out_vreg3(s, INSN_VADD, q, vece, a0, a1, a2);
+ return;
+ case INDEX_op_mul_vec:
+ tcg_out_vreg3(s, INSN_VMUL, q, vece, a0, a1, a2);
+ return;
+ case INDEX_op_smax_vec:
+ tcg_out_vreg3(s, INSN_VMAX, q, vece, a0, a1, a2);
+ return;
+ case INDEX_op_smin_vec:
+ tcg_out_vreg3(s, INSN_VMIN, q, vece, a0, a1, a2);
+ return;
+ case INDEX_op_sub_vec:
+ tcg_out_vreg3(s, INSN_VSUB, q, vece, a0, a1, a2);
+ return;
+ case INDEX_op_ssadd_vec:
+ tcg_out_vreg3(s, INSN_VQADD, q, vece, a0, a1, a2);
+ return;
+ case INDEX_op_sssub_vec:
+ tcg_out_vreg3(s, INSN_VQSUB, q, vece, a0, a1, a2);
+ return;
+ case INDEX_op_umax_vec:
+ tcg_out_vreg3(s, INSN_VMAX_U, q, vece, a0, a1, a2);
+ return;
+ case INDEX_op_umin_vec:
+ tcg_out_vreg3(s, INSN_VMIN_U, q, vece, a0, a1, a2);
+ return;
+ case INDEX_op_usadd_vec:
+ tcg_out_vreg3(s, INSN_VQADD_U, q, vece, a0, a1, a2);
+ return;
+ case INDEX_op_ussub_vec:
+ tcg_out_vreg3(s, INSN_VQSUB_U, q, vece, a0, a1, a2);
+ return;
+ case INDEX_op_xor_vec:
+ tcg_out_vreg3(s, INSN_VEOR, q, 0, a0, a1, a2);
+ return;
+ case INDEX_op_arm_sshl_vec:
+ /*
+ * Note that Vm is the data and Vn is the shift count,
+ * therefore the arguments appear reversed.
+ */
+ tcg_out_vreg3(s, INSN_VSHL_S, q, vece, a0, a2, a1);
+ return;
+ case INDEX_op_arm_ushl_vec:
+ /* See above. */
+ tcg_out_vreg3(s, INSN_VSHL_U, q, vece, a0, a2, a1);
+ return;
+ case INDEX_op_shli_vec:
+ tcg_out_vshifti(s, INSN_VSHLI, q, a0, a1, a2 + (8 << vece));
+ return;
+ case INDEX_op_shri_vec:
+ tcg_out_vshifti(s, INSN_VSHRI, q, a0, a1, (16 << vece) - a2);
+ return;
+ case INDEX_op_sari_vec:
+ tcg_out_vshifti(s, INSN_VSARI, q, a0, a1, (16 << vece) - a2);
+ return;
+ case INDEX_op_arm_sli_vec:
+ tcg_out_vshifti(s, INSN_VSLI, q, a0, a2, args[3] + (8 << vece));
+ return;
+
+ case INDEX_op_andc_vec:
+ if (!const_args[2]) {
+ tcg_out_vreg3(s, INSN_VBIC, q, 0, a0, a1, a2);
+ return;
+ }
+ a2 = ~a2;
+ /* fall through */
+ case INDEX_op_and_vec:
+ if (const_args[2]) {
+ is_shimm1632(~a2, &cmode, &imm8);
+ if (a0 == a1) {
+ tcg_out_vmovi(s, a0, q, 1, cmode | 1, imm8); /* VBICI */
+ return;
+ }
+ tcg_out_vmovi(s, a0, q, 1, cmode, imm8); /* VMVNI */
+ a2 = a0;
+ }
+ tcg_out_vreg3(s, INSN_VAND, q, 0, a0, a1, a2);
+ return;
+
+ case INDEX_op_orc_vec:
+ if (!const_args[2]) {
+ tcg_out_vreg3(s, INSN_VORN, q, 0, a0, a1, a2);
+ return;
+ }
+ a2 = ~a2;
+ /* fall through */
+ case INDEX_op_or_vec:
+ if (const_args[2]) {
+ is_shimm1632(a2, &cmode, &imm8);
+ if (a0 == a1) {
+ tcg_out_vmovi(s, a0, q, 0, cmode | 1, imm8); /* VORRI */
+ return;
+ }
+ tcg_out_vmovi(s, a0, q, 0, cmode, imm8); /* VMOVI */
+ a2 = a0;
+ }
+ tcg_out_vreg3(s, INSN_VORR, q, 0, a0, a1, a2);
+ return;
+
+ case INDEX_op_cmp_vec:
+ {
+ TCGCond cond = args[3];
+
+ if (cond == TCG_COND_NE) {
+ if (const_args[2]) {
+ tcg_out_vreg3(s, INSN_VTST, q, vece, a0, a1, a1);
+ } else {
+ tcg_out_vreg3(s, INSN_VCEQ, q, vece, a0, a1, a2);
+ tcg_out_vreg2(s, INSN_VMVN, q, 0, a0, a0);
+ }
+ } else {
+ ARMInsn insn;
+
+ if (const_args[2]) {
+ insn = vec_cmp0_insn[cond];
+ if (insn) {
+ tcg_out_vreg2(s, insn, q, vece, a0, a1);
+ return;
+ }
+ tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
+ a2 = TCG_VEC_TMP;
+ }
+ insn = vec_cmp_insn[cond];
+ if (insn == 0) {
+ TCGArg t;
+ t = a1, a1 = a2, a2 = t;
+ cond = tcg_swap_cond(cond);
+ insn = vec_cmp_insn[cond];
+ tcg_debug_assert(insn != 0);
+ }
+ tcg_out_vreg3(s, insn, q, vece, a0, a1, a2);
+ }
+ }
+ return;
+
+ case INDEX_op_bitsel_vec:
+ a3 = args[3];
+ if (a0 == a3) {
+ tcg_out_vreg3(s, INSN_VBIT, q, 0, a0, a2, a1);
+ } else if (a0 == a2) {
+ tcg_out_vreg3(s, INSN_VBIF, q, 0, a0, a3, a1);
+ } else {
+ tcg_out_mov(s, type, a0, a1);
+ tcg_out_vreg3(s, INSN_VBSL, q, 0, a0, a2, a3);
+ }
+ return;
+
+ case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
+ case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
+ default:
+ g_assert_not_reached();
+ }
+}
+
+int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
+{
+ switch (opc) {
+ case INDEX_op_add_vec:
+ case INDEX_op_sub_vec:
+ case INDEX_op_and_vec:
+ case INDEX_op_andc_vec:
+ case INDEX_op_or_vec:
+ case INDEX_op_orc_vec:
+ case INDEX_op_xor_vec:
+ case INDEX_op_not_vec:
+ case INDEX_op_shli_vec:
+ case INDEX_op_shri_vec:
+ case INDEX_op_sari_vec:
+ case INDEX_op_ssadd_vec:
+ case INDEX_op_sssub_vec:
+ case INDEX_op_usadd_vec:
+ case INDEX_op_ussub_vec:
+ case INDEX_op_bitsel_vec:
+ return 1;
+ case INDEX_op_abs_vec:
+ case INDEX_op_cmp_vec:
+ case INDEX_op_mul_vec:
+ case INDEX_op_neg_vec:
+ case INDEX_op_smax_vec:
+ case INDEX_op_smin_vec:
+ case INDEX_op_umax_vec:
+ case INDEX_op_umin_vec:
+ return vece < MO_64;
+ case INDEX_op_shlv_vec:
+ case INDEX_op_shrv_vec:
+ case INDEX_op_sarv_vec:
+ case INDEX_op_rotli_vec:
+ case INDEX_op_rotlv_vec:
+ case INDEX_op_rotrv_vec:
+ return -1;
+ default:
+ return 0;
+ }
+}
+
+void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
+ TCGArg a0, ...)
+{
+ va_list va;
+ TCGv_vec v0, v1, v2, t1, t2, c1;
+ TCGArg a2;
+
+ va_start(va, a0);
+ v0 = temp_tcgv_vec(arg_temp(a0));
+ v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
+ a2 = va_arg(va, TCGArg);
+ va_end(va);
+
+ switch (opc) {
+ case INDEX_op_shlv_vec:
+ /*
+ * Merely propagate shlv_vec to arm_ushl_vec.
+ * In this way we don't set TCG_TARGET_HAS_shv_vec
+ * because everything is done via expansion.
+ */
+ v2 = temp_tcgv_vec(arg_temp(a2));
+ vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(v0),
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2));
+ break;
+
+ case INDEX_op_shrv_vec:
+ case INDEX_op_sarv_vec:
+ /* Right shifts are negative left shifts for NEON. */
+ v2 = temp_tcgv_vec(arg_temp(a2));
+ t1 = tcg_temp_new_vec(type);
+ tcg_gen_neg_vec(vece, t1, v2);
+ if (opc == INDEX_op_shrv_vec) {
+ opc = INDEX_op_arm_ushl_vec;
+ } else {
+ opc = INDEX_op_arm_sshl_vec;
+ }
+ vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
+ tcgv_vec_arg(v1), tcgv_vec_arg(t1));
+ tcg_temp_free_vec(t1);
+ break;
+
+ case INDEX_op_rotli_vec:
+ t1 = tcg_temp_new_vec(type);
+ tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
+ vec_gen_4(INDEX_op_arm_sli_vec, type, vece,
+ tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
+ tcg_temp_free_vec(t1);
+ break;
+
+ case INDEX_op_rotlv_vec:
+ v2 = temp_tcgv_vec(arg_temp(a2));
+ t1 = tcg_temp_new_vec(type);
+ c1 = tcg_constant_vec(type, vece, 8 << vece);
+ tcg_gen_sub_vec(vece, t1, v2, c1);
+ /* Right shifts are negative left shifts for NEON. */
+ vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(t1),
+ tcgv_vec_arg(v1), tcgv_vec_arg(t1));
+ vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(v0),
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2));
+ tcg_gen_or_vec(vece, v0, v0, t1);
+ tcg_temp_free_vec(t1);
+ break;
+
+ case INDEX_op_rotrv_vec:
+ v2 = temp_tcgv_vec(arg_temp(a2));
+ t1 = tcg_temp_new_vec(type);
+ t2 = tcg_temp_new_vec(type);
+ c1 = tcg_constant_vec(type, vece, 8 << vece);
+ tcg_gen_neg_vec(vece, t1, v2);
+ tcg_gen_sub_vec(vece, t2, c1, v2);
+ /* Right shifts are negative left shifts for NEON. */
+ vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(t1),
+ tcgv_vec_arg(v1), tcgv_vec_arg(t1));
+ vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(t2),
+ tcgv_vec_arg(v1), tcgv_vec_arg(t2));
+ tcg_gen_or_vec(vece, v0, t1, t2);
+ tcg_temp_free_vec(t1);
+ tcg_temp_free_vec(t2);
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+}
+
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
{
int i;
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 8d1fee6327..d6222ba2db 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -78,19 +78,42 @@ typedef enum {
TCG_REG_R13,
TCG_REG_R14,
TCG_REG_PC,
+
+ TCG_REG_Q0,
+ TCG_REG_Q1,
+ TCG_REG_Q2,
+ TCG_REG_Q3,
+ TCG_REG_Q4,
+ TCG_REG_Q5,
+ TCG_REG_Q6,
+ TCG_REG_Q7,
+ TCG_REG_Q8,
+ TCG_REG_Q9,
+ TCG_REG_Q10,
+ TCG_REG_Q11,
+ TCG_REG_Q12,
+ TCG_REG_Q13,
+ TCG_REG_Q14,
+ TCG_REG_Q15,
+
+ TCG_AREG0 = TCG_REG_R6,
+ TCG_REG_CALL_STACK = TCG_REG_R13,
} TCGReg;
-#define TCG_TARGET_NB_REGS 16
+#define TCG_TARGET_NB_REGS 32
#ifdef __ARM_ARCH_EXT_IDIV__
#define use_idiv_instructions 1
#else
extern bool use_idiv_instructions;
#endif
-
+#ifdef __ARM_NEON__
+#define use_neon_instructions 1
+#else
+extern bool use_neon_instructions;
+#endif
/* used for function call generation */
-#define TCG_REG_CALL_STACK TCG_REG_R13
#define TCG_TARGET_STACK_ALIGN 8
#define TCG_TARGET_CALL_ALIGN_ARGS 1
#define TCG_TARGET_CALL_STACK_OFFSET 0
@@ -128,9 +151,26 @@ extern bool use_idiv_instructions;
#define TCG_TARGET_HAS_direct_jump 0
#define TCG_TARGET_HAS_qemu_st8_i32 0
-enum {
- TCG_AREG0 = TCG_REG_R6,
-};
+#define TCG_TARGET_HAS_v64 use_neon_instructions
+#define TCG_TARGET_HAS_v128 use_neon_instructions
+#define TCG_TARGET_HAS_v256 0
+
+#define TCG_TARGET_HAS_andc_vec 1
+#define TCG_TARGET_HAS_orc_vec 1
+#define TCG_TARGET_HAS_not_vec 1
+#define TCG_TARGET_HAS_neg_vec 1
+#define TCG_TARGET_HAS_abs_vec 1
+#define TCG_TARGET_HAS_roti_vec 0
+#define TCG_TARGET_HAS_rots_vec 0
+#define TCG_TARGET_HAS_rotv_vec 0
+#define TCG_TARGET_HAS_shi_vec 1
+#define TCG_TARGET_HAS_shs_vec 0
+#define TCG_TARGET_HAS_shv_vec 0
+#define TCG_TARGET_HAS_mul_vec 1
+#define TCG_TARGET_HAS_sat_vec 1
+#define TCG_TARGET_HAS_minmax_vec 1
+#define TCG_TARGET_HAS_bitsel_vec 1
+#define TCG_TARGET_HAS_cmpsel_vec 0
#define TCG_TARGET_DEFAULT_MO (0)
#define TCG_TARGET_HAS_MEMORY_BSWAP 1
diff --git a/tcg/arm/tcg-target.opc.h b/tcg/arm/tcg-target.opc.h
new file mode 100644
index 0000000000..d38af9a808
--- /dev/null
+++ b/tcg/arm/tcg-target.opc.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2019 Linaro
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.
+ *
+ * See the COPYING file in the top-level directory for details.
+ *
+ * Target-specific opcodes for host vector expansion. These will be
+ * emitted by tcg_expand_vec_op. For those familiar with GCC internals,
+ * consider these to be UNSPEC with names.
+ */
+
+DEF(arm_sli_vec, 1, 2, 1, IMPLVEC)
+DEF(arm_sshl_vec, 1, 2, 0, IMPLVEC)
+DEF(arm_ushl_vec, 1, 2, 0, IMPLVEC)
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 415c5c0796..34113388ef 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -210,10 +210,8 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
}
/* test if a constant matches the constraint */
-static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
- const TCGArgConstraint *arg_ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
{
- int ct = arg_ct->ct;
if (ct & TCG_CT_CONST) {
return 1;
}
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index 8b16726242..5944448b2a 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -193,11 +193,8 @@ static inline bool is_p2m1(tcg_target_long val)
}
/* test if a constant matches the constraint */
-static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
- const TCGArgConstraint *arg_ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
{
- int ct;
- ct = arg_ct->ct;
if (ct & TCG_CT_CONST) {
return 1;
} else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 838ccfa42d..795701442b 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -238,10 +238,8 @@ static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
}
/* test if a constant matches the constraint */
-static int tcg_target_const_match(tcg_target_long val, TCGType type,
- const TCGArgConstraint *arg_ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
{
- int ct = arg_ct->ct;
if (ct & TCG_CT_CONST) {
return 1;
}
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index ef43147040..da7eecafc5 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -145,10 +145,8 @@ static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len)
}
/* test if a constant matches the constraint */
-static int tcg_target_const_match(tcg_target_long val, TCGType type,
- const TCGArgConstraint *arg_ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
{
- int ct = arg_ct->ct;
if (ct & TCG_CT_CONST) {
return 1;
}
diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc
index af8dfe81ac..5fe073f09a 100644
--- a/tcg/s390/tcg-target.c.inc
+++ b/tcg/s390/tcg-target.c.inc
@@ -417,11 +417,8 @@ static bool patch_reloc(tcg_insn_unit *src_rw, int type,
}
/* Test if a constant matches the constraint. */
-static int tcg_target_const_match(tcg_target_long val, TCGType type,
- const TCGArgConstraint *arg_ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
{
- int ct = arg_ct->ct;
-
if (ct & TCG_CT_CONST) {
return 1;
}
diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
index 3d50f985c6..ce39ac2d86 100644
--- a/tcg/sparc/tcg-target.c.inc
+++ b/tcg/sparc/tcg-target.c.inc
@@ -341,11 +341,8 @@ static bool patch_reloc(tcg_insn_unit *src_rw, int type,
}
/* test if a constant matches the constraint */
-static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
- const TCGArgConstraint *arg_ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
{
- int ct = arg_ct->ct;
-
if (ct & TCG_CT_CONST) {
return 1;
}
diff --git a/tcg/tcg.c b/tcg/tcg.c
index db806a6658..0dc271aac9 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -148,8 +148,7 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
TCGReg base, intptr_t ofs);
static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
-static int tcg_target_const_match(tcg_target_long val, TCGType type,
- const TCGArgConstraint *arg_ct);
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
#ifdef TCG_TARGET_NEED_LDST_LABELS
static int tcg_out_ldst_finalize(TCGContext *s);
#endif
@@ -4078,7 +4077,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
ts = arg_temp(arg);
if (ts->val_type == TEMP_VAL_CONST
- && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
+ && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
/* constant is OK for instruction */
const_args[i] = 1;
new_args[i] = ts->val;
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index ee6cdfec71..823ecd5d35 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -789,11 +789,9 @@ static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
}
/* Test if a constant matches the constraint. */
-static int tcg_target_const_match(tcg_target_long val, TCGType type,
- const TCGArgConstraint *arg_ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
{
- /* No need to return 0 or 1, 0 or != 0 is good enough. */
- return arg_ct->ct & TCG_CT_CONST;
+ return ct & TCG_CT_CONST;
}
static void tcg_target_init(TCGContext *s)