aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--disas/riscv.c2
-rw-r--r--disas/riscv.h2
-rw-r--r--host/include/riscv/host/cpuinfo.h2
-rw-r--r--include/exec/exec-all.h31
-rw-r--r--include/tcg/tcg.h6
-rw-r--r--linux-user/ppc/signal.c2
-rw-r--r--linux-user/strace.c84
-rw-r--r--linux-user/strace.list3
-rw-r--r--linux-user/syscall.c6
-rw-r--r--linux-user/syscall_defs.h6
-rw-r--r--target/i386/tcg/sysemu/excp_helper.c45
-rw-r--r--tcg/ppc/tcg-target.c.inc4
-rw-r--r--tcg/riscv/tcg-target-con-set.h9
-rw-r--r--tcg/riscv/tcg-target-con-str.h3
-rw-r--r--tcg/riscv/tcg-target.c.inc994
-rw-r--r--tcg/riscv/tcg-target.h78
-rw-r--r--tcg/riscv/tcg-target.opc.h12
-rw-r--r--tcg/tcg.c2
-rw-r--r--util/cpuinfo-riscv.c34
19 files changed, 1151 insertions, 174 deletions
diff --git a/disas/riscv.c b/disas/riscv.c
index 5965574d87..fc0331b90b 100644
--- a/disas/riscv.c
+++ b/disas/riscv.c
@@ -4808,7 +4808,7 @@ static void decode_inst_operands(rv_decode *dec, rv_isa isa)
break;
case rv_codec_vsetivli:
dec->rd = operand_rd(inst);
- dec->imm = operand_vimm(inst);
+ dec->imm = extract32(inst, 15, 5);
dec->vzimm = operand_vzimm10(inst);
break;
case rv_codec_zcb_lb:
diff --git a/disas/riscv.h b/disas/riscv.h
index 16a08e4895..0d1f89ce8a 100644
--- a/disas/riscv.h
+++ b/disas/riscv.h
@@ -290,7 +290,7 @@ enum {
#define rv_fmt_fd_vs2 "O\t3,F"
#define rv_fmt_vd_vm "O\tDm"
#define rv_fmt_vsetvli "O\t0,1,v"
-#define rv_fmt_vsetivli "O\t0,u,v"
+#define rv_fmt_vsetivli "O\t0,i,v"
#define rv_fmt_rs1_rs2_zce_ldst "O\t2,i(1)"
#define rv_fmt_push_rlist "O\tx,-i"
#define rv_fmt_pop_rlist "O\tx,i"
diff --git a/host/include/riscv/host/cpuinfo.h b/host/include/riscv/host/cpuinfo.h
index 2b00660e36..cdc784e7b6 100644
--- a/host/include/riscv/host/cpuinfo.h
+++ b/host/include/riscv/host/cpuinfo.h
@@ -10,9 +10,11 @@
#define CPUINFO_ZBA (1u << 1)
#define CPUINFO_ZBB (1u << 2)
#define CPUINFO_ZICOND (1u << 3)
+#define CPUINFO_ZVE64X (1u << 4)
/* Initialized with a constructor. */
extern unsigned cpuinfo;
+extern unsigned riscv_lg2_vlenb;
/*
* We cannot rely on constructor ordering, so other constructors must
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 72240ef426..2e4c4cc4b4 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -368,6 +368,13 @@ int probe_access_flags(CPUArchState *env, vaddr addr, int size,
* The CPUTLBEntryFull structure returned via @pfull is transient
* and must be consumed or copied immediately, before any further
* access or changes to TLB @mmu_idx.
+ *
+ * This function will not fault if @nonfault is set, but will
+ * return TLB_INVALID_MASK if the page is not mapped, or is not
+ * accessible with @access_type.
+ *
+ * This function will return TLB_MMIO in order to force the access
+ * to be handled out-of-line if plugins wish to instrument the access.
*/
int probe_access_full(CPUArchState *env, vaddr addr, int size,
MMUAccessType access_type, int mmu_idx,
@@ -375,22 +382,14 @@ int probe_access_full(CPUArchState *env, vaddr addr, int size,
CPUTLBEntryFull **pfull, uintptr_t retaddr);
/**
- * probe_access_mmu() - Like probe_access_full except cannot fault and
- * doesn't trigger instrumentation.
- *
- * @env: CPUArchState
- * @vaddr: virtual address to probe
- * @size: size of the probe
- * @access_type: read, write or execute permission
- * @mmu_idx: softmmu index
- * @phost: ptr to return value host address or NULL
- * @pfull: ptr to return value CPUTLBEntryFull structure or NULL
- *
- * The CPUTLBEntryFull structure returned via @pfull is transient
- * and must be consumed or copied immediately, before any further
- * access or changes to TLB @mmu_idx.
- *
- * Returns: TLB flags as per probe_access_flags()
+ * probe_access_full_mmu:
+ * Like probe_access_full, except:
+ *
+ * This function is intended to be used for page table accesses by
+ * the target mmu itself. Since such page walking happens while
+ * handling another potential mmu fault, this function never raises
+ * exceptions (akin to @nonfault true for probe_access_full).
+ * Likewise this function does not trigger plugin instrumentation.
*/
int probe_access_full_mmu(CPUArchState *env, vaddr addr, int size,
MMUAccessType access_type, int mmu_idx,
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index 824fb3560d..a77ed12b9d 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -521,6 +521,12 @@ struct TCGContext {
struct qemu_plugin_insn *plugin_insn;
#endif
+ /* For host-specific values. */
+#ifdef __riscv
+ MemOp riscv_cur_vsew;
+ TCGType riscv_cur_type;
+#endif
+
GHashTable *const_table[TCG_TYPE_COUNT];
TCGTempSet free_temps[TCG_TYPE_COUNT];
TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */
diff --git a/linux-user/ppc/signal.c b/linux-user/ppc/signal.c
index a1d8c0bccc..24e5a02a78 100644
--- a/linux-user/ppc/signal.c
+++ b/linux-user/ppc/signal.c
@@ -628,7 +628,7 @@ static int do_setcontext(struct target_ucontext *ucp, CPUPPCState *env, int sig)
if (!lock_user_struct(VERIFY_READ, mcp, mcp_addr, 1))
return 1;
- target_to_host_sigset_internal(&blocked, &set);
+ target_to_host_sigset(&blocked, &set);
set_sigmask(&blocked);
restore_user_regs(env, mcp, sig);
diff --git a/linux-user/strace.c b/linux-user/strace.c
index c3eb3a2706..b70eadc19e 100644
--- a/linux-user/strace.c
+++ b/linux-user/strace.c
@@ -161,19 +161,20 @@ static const char * const target_signal_name[] = {
};
static void
-print_signal(abi_ulong arg, int last)
+print_signal_1(abi_ulong arg)
{
- const char *signal_name = NULL;
-
if (arg < ARRAY_SIZE(target_signal_name)) {
- signal_name = target_signal_name[arg];
+ qemu_log("%s", target_signal_name[arg]);
+ } else {
+ qemu_log(TARGET_ABI_FMT_lu, arg);
}
+}
- if (signal_name == NULL) {
- print_raw_param("%ld", arg, last);
- return;
- }
- qemu_log("%s%s", signal_name, get_comma(last));
+static void
+print_signal(abi_ulong arg, int last)
+{
+ print_signal_1(arg);
+ qemu_log("%s", get_comma(last));
}
static void print_si_code(int arg)
@@ -718,6 +719,51 @@ print_ipc(CPUArchState *cpu_env, const struct syscallname *name,
}
#endif
+#ifdef TARGET_NR_rt_sigprocmask
+static void print_target_sigset_t_1(target_sigset_t *set, int last)
+{
+ bool first = true;
+ int i, sig = 1;
+
+ qemu_log("[");
+ for (i = 0; i < TARGET_NSIG_WORDS; i++) {
+ abi_ulong bits = 0;
+ int j;
+
+ __get_user(bits, &set->sig[i]);
+ for (j = 0; j < sizeof(bits) * 8; j++) {
+ if (bits & ((abi_ulong)1 << j)) {
+ if (first) {
+ first = false;
+ } else {
+ qemu_log(" ");
+ }
+ print_signal_1(sig);
+ }
+ sig++;
+ }
+ }
+ qemu_log("]%s", get_comma(last));
+}
+
+static void print_target_sigset_t(abi_ulong addr, abi_ulong size, int last)
+{
+ if (addr && size == sizeof(target_sigset_t)) {
+ target_sigset_t *set;
+
+ set = lock_user(VERIFY_READ, addr, sizeof(target_sigset_t), 1);
+ if (set) {
+ print_target_sigset_t_1(set, last);
+ unlock_user(set, addr, 0);
+ } else {
+ print_pointer(addr, last);
+ }
+ } else {
+ print_pointer(addr, last);
+ }
+}
+#endif
+
/*
* Variants for the return value output function
*/
@@ -3312,11 +3358,29 @@ print_rt_sigprocmask(CPUArchState *cpu_env, const struct syscallname *name,
case TARGET_SIG_SETMASK: how = "SIG_SETMASK"; break;
}
qemu_log("%s,", how);
- print_pointer(arg1, 0);
+ print_target_sigset_t(arg1, arg3, 0);
print_pointer(arg2, 0);
print_raw_param("%u", arg3, 1);
print_syscall_epilogue(name);
}
+
+static void
+print_rt_sigprocmask_ret(CPUArchState *cpu_env, const struct syscallname *name,
+ abi_long ret, abi_long arg0, abi_long arg1,
+ abi_long arg2, abi_long arg3, abi_long arg4,
+ abi_long arg5)
+{
+ if (!print_syscall_err(ret)) {
+ qemu_log(TARGET_ABI_FMT_ld, ret);
+ if (arg2) {
+ qemu_log(" (oldset=");
+ print_target_sigset_t(arg2, arg3, 1);
+ qemu_log(")");
+ }
+ }
+
+ qemu_log("\n");
+}
#endif
#ifdef TARGET_NR_rt_sigqueueinfo
diff --git a/linux-user/strace.list b/linux-user/strace.list
index 0d69fb3150..fdf94ef32a 100644
--- a/linux-user/strace.list
+++ b/linux-user/strace.list
@@ -1189,7 +1189,8 @@
{ TARGET_NR_rt_sigpending, "rt_sigpending" , NULL, NULL, NULL },
#endif
#ifdef TARGET_NR_rt_sigprocmask
-{ TARGET_NR_rt_sigprocmask, "rt_sigprocmask" , NULL, print_rt_sigprocmask, NULL },
+{ TARGET_NR_rt_sigprocmask, "rt_sigprocmask" , NULL, print_rt_sigprocmask,
+ print_rt_sigprocmask_ret },
#endif
#ifdef TARGET_NR_rt_sigqueueinfo
{ TARGET_NR_rt_sigqueueinfo, "rt_sigqueueinfo" , NULL, print_rt_sigqueueinfo, NULL },
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index d1b0f7c5bb..59b2080b98 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -8150,17 +8150,19 @@ static int open_self_maps_1(CPUArchState *env, int fd, bool smaps)
{
struct open_self_maps_data d = {
.ts = get_task_state(env_cpu(env)),
- .host_maps = read_self_maps(),
.fd = fd,
.smaps = smaps
};
+ mmap_lock();
+ d.host_maps = read_self_maps();
if (d.host_maps) {
walk_memory_regions(&d, open_self_maps_2);
free_self_maps(d.host_maps);
} else {
walk_memory_regions(&d, open_self_maps_3);
}
+ mmap_unlock();
return 0;
}
@@ -8942,7 +8944,7 @@ static int do_getdents64(abi_long dirfd, abi_long arg2, abi_long count)
#define RISCV_HWPROBE_EXT_ZFHMIN (1 << 28)
#define RISCV_HWPROBE_EXT_ZIHINTNTL (1 << 29)
#define RISCV_HWPROBE_EXT_ZVFH (1 << 30)
-#define RISCV_HWPROBE_EXT_ZVFHMIN (1 << 31)
+#define RISCV_HWPROBE_EXT_ZVFHMIN (1ULL << 31)
#define RISCV_HWPROBE_EXT_ZFA (1ULL << 32)
#define RISCV_HWPROBE_EXT_ZTSO (1ULL << 33)
#define RISCV_HWPROBE_EXT_ZACAS (1ULL << 34)
diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
index 0ade83745e..0e08dfae3e 100644
--- a/linux-user/syscall_defs.h
+++ b/linux-user/syscall_defs.h
@@ -2750,9 +2750,9 @@ struct target_sched_param {
/* from kernel's include/uapi/linux/openat2.h */
struct open_how_ver0 {
- __u64 flags;
- __u64 mode;
- __u64 resolve;
+ uint64_t flags;
+ uint64_t mode;
+ uint64_t resolve;
};
struct target_open_how_ver0 {
abi_ullong flags;
diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c
index 8fb05b1f53..da187c8792 100644
--- a/target/i386/tcg/sysemu/excp_helper.c
+++ b/target/i386/tcg/sysemu/excp_helper.c
@@ -60,14 +60,13 @@ typedef struct PTETranslate {
hwaddr gaddr;
} PTETranslate;
-static bool ptw_translate(PTETranslate *inout, hwaddr addr, uint64_t ra)
+static bool ptw_translate(PTETranslate *inout, hwaddr addr)
{
- CPUTLBEntryFull *full;
int flags;
inout->gaddr = addr;
- flags = probe_access_full(inout->env, addr, 0, MMU_DATA_STORE,
- inout->ptw_idx, true, &inout->haddr, &full, ra);
+ flags = probe_access_full_mmu(inout->env, addr, 0, MMU_DATA_STORE,
+ inout->ptw_idx, &inout->haddr, NULL);
if (unlikely(flags & TLB_INVALID_MASK)) {
TranslateFault *err = inout->err;
@@ -150,6 +149,7 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
uint32_t pkr;
int page_size;
int error_code;
+ int prot;
restart_all:
rsvd_mask = ~MAKE_64BIT_MASK(0, env_archcpu(env)->phys_bits);
@@ -166,7 +166,7 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
* Page table level 5
*/
pte_addr = (in->cr3 & ~0xfff) + (((addr >> 48) & 0x1ff) << 3);
- if (!ptw_translate(&pte_trans, pte_addr, ra)) {
+ if (!ptw_translate(&pte_trans, pte_addr)) {
return false;
}
restart_5:
@@ -190,7 +190,7 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
* Page table level 4
*/
pte_addr = (pte & PG_ADDRESS_MASK) + (((addr >> 39) & 0x1ff) << 3);
- if (!ptw_translate(&pte_trans, pte_addr, ra)) {
+ if (!ptw_translate(&pte_trans, pte_addr)) {
return false;
}
restart_4:
@@ -210,7 +210,7 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
* Page table level 3
*/
pte_addr = (pte & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3);
- if (!ptw_translate(&pte_trans, pte_addr, ra)) {
+ if (!ptw_translate(&pte_trans, pte_addr)) {
return false;
}
restart_3_lma:
@@ -237,7 +237,7 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
* Page table level 3
*/
pte_addr = (in->cr3 & 0xffffffe0ULL) + ((addr >> 27) & 0x18);
- if (!ptw_translate(&pte_trans, pte_addr, ra)) {
+ if (!ptw_translate(&pte_trans, pte_addr)) {
return false;
}
rsvd_mask |= PG_HI_USER_MASK;
@@ -259,7 +259,7 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
* Page table level 2
*/
pte_addr = (pte & PG_ADDRESS_MASK) + (((addr >> 21) & 0x1ff) << 3);
- if (!ptw_translate(&pte_trans, pte_addr, ra)) {
+ if (!ptw_translate(&pte_trans, pte_addr)) {
return false;
}
restart_2_pae:
@@ -285,7 +285,7 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
* Page table level 1
*/
pte_addr = (pte & PG_ADDRESS_MASK) + (((addr >> 12) & 0x1ff) << 3);
- if (!ptw_translate(&pte_trans, pte_addr, ra)) {
+ if (!ptw_translate(&pte_trans, pte_addr)) {
return false;
}
pte = ptw_ldq(&pte_trans, ra);
@@ -298,12 +298,12 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
/* combine pde and pte nx, user and rw protections */
ptep &= pte ^ PG_NX_MASK;
page_size = 4096;
- } else {
+ } else if (pg_mode) {
/*
* Page table level 2
*/
pte_addr = (in->cr3 & 0xfffff000ULL) + ((addr >> 20) & 0xffc);
- if (!ptw_translate(&pte_trans, pte_addr, ra)) {
+ if (!ptw_translate(&pte_trans, pte_addr)) {
return false;
}
restart_2_nopae:
@@ -332,7 +332,7 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
* Page table level 1
*/
pte_addr = (pte & ~0xfffu) + ((addr >> 10) & 0xffc);
- if (!ptw_translate(&pte_trans, pte_addr, ra)) {
+ if (!ptw_translate(&pte_trans, pte_addr)) {
return false;
}
pte = ptw_ldl(&pte_trans, ra);
@@ -343,6 +343,15 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
ptep &= pte | PG_NX_MASK;
page_size = 4096;
rsvd_mask = 0;
+ } else {
+ /*
+ * No paging (real mode), let's tentatively resolve the address as 1:1
+ * here, but conditionally still perform an NPT walk on it later.
+ */
+ page_size = 0x40000000;
+ paddr = in->addr;
+ prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+ goto stage2;
}
do_check_protect:
@@ -358,7 +367,7 @@ do_check_protect_pse36:
goto do_fault_protect;
}
- int prot = 0;
+ prot = 0;
if (!is_mmu_index_smap(in->mmu_idx) || !(ptep & PG_USER_MASK)) {
prot |= PAGE_READ;
if ((ptep & PG_RW_MASK) || !(is_user || (pg_mode & PG_MODE_WP))) {
@@ -420,6 +429,7 @@ do_check_protect_pse36:
/* merge offset within page */
paddr = (pte & PG_ADDRESS_MASK & ~(page_size - 1)) | (addr & (page_size - 1));
+ stage2:
/*
* Note that NPT is walked (for both paging structures and final guest
@@ -429,9 +439,8 @@ do_check_protect_pse36:
CPUTLBEntryFull *full;
int flags, nested_page_size;
- flags = probe_access_full(env, paddr, 0, access_type,
- MMU_NESTED_IDX, true,
- &pte_trans.haddr, &full, 0);
+ flags = probe_access_full_mmu(env, paddr, 0, access_type,
+ MMU_NESTED_IDX, &pte_trans.haddr, &full);
if (unlikely(flags & TLB_INVALID_MASK)) {
*err = (TranslateFault){
.error_code = env->error_code,
@@ -562,7 +571,7 @@ static bool get_physical_address(CPUX86State *env, vaddr addr,
addr = (uint32_t)addr;
}
- if (likely(env->cr[0] & CR0_PG_MASK)) {
+ if (likely(env->cr[0] & CR0_PG_MASK || use_stage2)) {
in.cr3 = env->cr[3];
in.mmu_idx = mmu_idx;
in.ptw_idx = use_stage2 ? MMU_NESTED_IDX : MMU_PHYS_IDX;
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 223f079524..9a11c26fd3 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -911,7 +911,9 @@ static void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
static void tcg_out_rlw_rc(TCGContext *s, int op, TCGReg ra, TCGReg rs,
int sh, int mb, int me, bool rc)
{
- tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me) | rc);
+ tcg_debug_assert((mb & 0x1f) == mb);
+ tcg_debug_assert((me & 0x1f) == me);
+ tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh & 0x1f) | MB(mb) | ME(me) | rc);
}
static void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
diff --git a/tcg/riscv/tcg-target-con-set.h b/tcg/riscv/tcg-target-con-set.h
index aac5ceee2b..3c4ef44eb0 100644
--- a/tcg/riscv/tcg-target-con-set.h
+++ b/tcg/riscv/tcg-target-con-set.h
@@ -21,3 +21,12 @@ C_O1_I2(r, rZ, rZ)
C_N1_I2(r, r, rM)
C_O1_I4(r, r, rI, rM, rM)
C_O2_I4(r, r, rZ, rZ, rM, rM)
+C_O0_I2(v, r)
+C_O1_I1(v, r)
+C_O1_I1(v, v)
+C_O1_I2(v, v, r)
+C_O1_I2(v, v, v)
+C_O1_I2(v, vK, v)
+C_O1_I2(v, v, vK)
+C_O1_I2(v, v, vL)
+C_O1_I4(v, v, vL, vK, vK)
diff --git a/tcg/riscv/tcg-target-con-str.h b/tcg/riscv/tcg-target-con-str.h
index d5c419dff1..089efe96ca 100644
--- a/tcg/riscv/tcg-target-con-str.h
+++ b/tcg/riscv/tcg-target-con-str.h
@@ -9,6 +9,7 @@
* REGS(letter, register_mask)
*/
REGS('r', ALL_GENERAL_REGS)
+REGS('v', ALL_VECTOR_REGS)
/*
* Define constraint letters for constants:
@@ -16,6 +17,8 @@ REGS('r', ALL_GENERAL_REGS)
*/
CONST('I', TCG_CT_CONST_S12)
CONST('J', TCG_CT_CONST_J12)
+CONST('K', TCG_CT_CONST_S5)
+CONST('L', TCG_CT_CONST_CMP_VI)
CONST('N', TCG_CT_CONST_N12)
CONST('M', TCG_CT_CONST_M12)
CONST('Z', TCG_CT_CONST_ZERO)
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index d334857226..f8331e4688 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -32,38 +32,14 @@
#ifdef CONFIG_DEBUG_TCG
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
- "zero",
- "ra",
- "sp",
- "gp",
- "tp",
- "t0",
- "t1",
- "t2",
- "s0",
- "s1",
- "a0",
- "a1",
- "a2",
- "a3",
- "a4",
- "a5",
- "a6",
- "a7",
- "s2",
- "s3",
- "s4",
- "s5",
- "s6",
- "s7",
- "s8",
- "s9",
- "s10",
- "s11",
- "t3",
- "t4",
- "t5",
- "t6"
+ "zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2",
+ "s0", "s1", "a0", "a1", "a2", "a3", "a4", "a5",
+ "a6", "a7", "s2", "s3", "s4", "s5", "s6", "s7",
+ "s8", "s9", "s10", "s11", "t3", "t4", "t5", "t6",
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
+ "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
};
#endif
@@ -100,6 +76,16 @@ static const int tcg_target_reg_alloc_order[] = {
TCG_REG_A5,
TCG_REG_A6,
TCG_REG_A7,
+
+ /* Vector registers and TCG_REG_V0 reserved for mask. */
+ TCG_REG_V1, TCG_REG_V2, TCG_REG_V3, TCG_REG_V4,
+ TCG_REG_V5, TCG_REG_V6, TCG_REG_V7, TCG_REG_V8,
+ TCG_REG_V9, TCG_REG_V10, TCG_REG_V11, TCG_REG_V12,
+ TCG_REG_V13, TCG_REG_V14, TCG_REG_V15, TCG_REG_V16,
+ TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, TCG_REG_V20,
+ TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, TCG_REG_V24,
+ TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, TCG_REG_V28,
+ TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
};
static const int tcg_target_call_iarg_regs[] = {
@@ -120,62 +106,50 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
return TCG_REG_A0 + slot;
}
-#define TCG_CT_CONST_ZERO 0x100
-#define TCG_CT_CONST_S12 0x200
-#define TCG_CT_CONST_N12 0x400
-#define TCG_CT_CONST_M12 0x800
-#define TCG_CT_CONST_J12 0x1000
+#define TCG_CT_CONST_ZERO 0x100
+#define TCG_CT_CONST_S12 0x200
+#define TCG_CT_CONST_N12 0x400
+#define TCG_CT_CONST_M12 0x800
+#define TCG_CT_CONST_J12 0x1000
+#define TCG_CT_CONST_S5 0x2000
+#define TCG_CT_CONST_CMP_VI 0x4000
#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
+#define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32)
+#define ALL_DVECTOR_REG_GROUPS 0x5555555500000000
+#define ALL_QVECTOR_REG_GROUPS 0x1111111100000000
#define sextreg sextract64
-/* test if a constant matches the constraint */
-static bool tcg_target_const_match(int64_t val, int ct,
- TCGType type, TCGCond cond, int vece)
-{
- if (ct & TCG_CT_CONST) {
- return 1;
- }
- if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
- return 1;
- }
- /*
- * Sign extended from 12 bits: [-0x800, 0x7ff].
- * Used for most arithmetic, as this is the isa field.
- */
- if ((ct & TCG_CT_CONST_S12) && val >= -0x800 && val <= 0x7ff) {
- return 1;
- }
- /*
- * Sign extended from 12 bits, negated: [-0x7ff, 0x800].
- * Used for subtraction, where a constant must be handled by ADDI.
- */
- if ((ct & TCG_CT_CONST_N12) && val >= -0x7ff && val <= 0x800) {
- return 1;
- }
- /*
- * Sign extended from 12 bits, +/- matching: [-0x7ff, 0x7ff].
- * Used by addsub2 and movcond, which may need the negative value,
- * and requires the modified constant to be representable.
- */
- if ((ct & TCG_CT_CONST_M12) && val >= -0x7ff && val <= 0x7ff) {
- return 1;
- }
- /*
- * Inverse of sign extended from 12 bits: ~[-0x800, 0x7ff].
- * Used to map ANDN back to ANDI, etc.
- */
- if ((ct & TCG_CT_CONST_J12) && ~val >= -0x800 && ~val <= 0x7ff) {
- return 1;
- }
- return 0;
-}
-
/*
* RISC-V Base ISA opcodes (IM)
*/
+#define V_OPIVV (0x0 << 12)
+#define V_OPFVV (0x1 << 12)
+#define V_OPMVV (0x2 << 12)
+#define V_OPIVI (0x3 << 12)
+#define V_OPIVX (0x4 << 12)
+#define V_OPFVF (0x5 << 12)
+#define V_OPMVX (0x6 << 12)
+#define V_OPCFG (0x7 << 12)
+
+/* NF <= 7 && NF >= 0 */
+#define V_NF(x) (x << 29)
+#define V_UNIT_STRIDE (0x0 << 20)
+#define V_UNIT_STRIDE_WHOLE_REG (0x8 << 20)
+
+typedef enum {
+ VLMUL_M1 = 0, /* LMUL=1 */
+ VLMUL_M2, /* LMUL=2 */
+ VLMUL_M4, /* LMUL=4 */
+ VLMUL_M8, /* LMUL=8 */
+ VLMUL_RESERVED,
+ VLMUL_MF8, /* LMUL=1/8 */
+ VLMUL_MF4, /* LMUL=1/4 */
+ VLMUL_MF2, /* LMUL=1/2 */
+} RISCVVlmul;
+
typedef enum {
OPC_ADD = 0x33,
OPC_ADDI = 0x13,
@@ -271,8 +245,199 @@ typedef enum {
/* Zicond: integer conditional operations */
OPC_CZERO_EQZ = 0x0e005033,
OPC_CZERO_NEZ = 0x0e007033,
+
+ /* V: Vector extension 1.0 */
+ OPC_VSETVLI = 0x57 | V_OPCFG,
+ OPC_VSETIVLI = 0xc0000057 | V_OPCFG,
+ OPC_VSETVL = 0x80000057 | V_OPCFG,
+
+ OPC_VLE8_V = 0x7 | V_UNIT_STRIDE,
+ OPC_VLE16_V = 0x5007 | V_UNIT_STRIDE,
+ OPC_VLE32_V = 0x6007 | V_UNIT_STRIDE,
+ OPC_VLE64_V = 0x7007 | V_UNIT_STRIDE,
+ OPC_VSE8_V = 0x27 | V_UNIT_STRIDE,
+ OPC_VSE16_V = 0x5027 | V_UNIT_STRIDE,
+ OPC_VSE32_V = 0x6027 | V_UNIT_STRIDE,
+ OPC_VSE64_V = 0x7027 | V_UNIT_STRIDE,
+
+ OPC_VL1RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(0),
+ OPC_VL2RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(1),
+ OPC_VL4RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(3),
+ OPC_VL8RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(7),
+
+ OPC_VS1R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(0),
+ OPC_VS2R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(1),
+ OPC_VS4R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(3),
+ OPC_VS8R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(7),
+
+ OPC_VMERGE_VIM = 0x5c000057 | V_OPIVI,
+ OPC_VMERGE_VVM = 0x5c000057 | V_OPIVV,
+
+ OPC_VADD_VV = 0x57 | V_OPIVV,
+ OPC_VADD_VI = 0x57 | V_OPIVI,
+ OPC_VSUB_VV = 0x8000057 | V_OPIVV,
+ OPC_VRSUB_VI = 0xc000057 | V_OPIVI,
+ OPC_VAND_VV = 0x24000057 | V_OPIVV,
+ OPC_VAND_VI = 0x24000057 | V_OPIVI,
+ OPC_VOR_VV = 0x28000057 | V_OPIVV,
+ OPC_VOR_VI = 0x28000057 | V_OPIVI,
+ OPC_VXOR_VV = 0x2c000057 | V_OPIVV,
+ OPC_VXOR_VI = 0x2c000057 | V_OPIVI,
+
+ OPC_VMUL_VV = 0x94000057 | V_OPMVV,
+ OPC_VSADD_VV = 0x84000057 | V_OPIVV,
+ OPC_VSADD_VI = 0x84000057 | V_OPIVI,
+ OPC_VSSUB_VV = 0x8c000057 | V_OPIVV,
+ OPC_VSSUB_VI = 0x8c000057 | V_OPIVI,
+ OPC_VSADDU_VV = 0x80000057 | V_OPIVV,
+ OPC_VSADDU_VI = 0x80000057 | V_OPIVI,
+ OPC_VSSUBU_VV = 0x88000057 | V_OPIVV,
+ OPC_VSSUBU_VI = 0x88000057 | V_OPIVI,
+
+ OPC_VMAX_VV = 0x1c000057 | V_OPIVV,
+ OPC_VMAX_VI = 0x1c000057 | V_OPIVI,
+ OPC_VMAXU_VV = 0x18000057 | V_OPIVV,
+ OPC_VMAXU_VI = 0x18000057 | V_OPIVI,
+ OPC_VMIN_VV = 0x14000057 | V_OPIVV,
+ OPC_VMIN_VI = 0x14000057 | V_OPIVI,
+ OPC_VMINU_VV = 0x10000057 | V_OPIVV,
+ OPC_VMINU_VI = 0x10000057 | V_OPIVI,
+
+ OPC_VMSEQ_VV = 0x60000057 | V_OPIVV,
+ OPC_VMSEQ_VI = 0x60000057 | V_OPIVI,
+ OPC_VMSEQ_VX = 0x60000057 | V_OPIVX,
+ OPC_VMSNE_VV = 0x64000057 | V_OPIVV,
+ OPC_VMSNE_VI = 0x64000057 | V_OPIVI,
+ OPC_VMSNE_VX = 0x64000057 | V_OPIVX,
+
+ OPC_VMSLTU_VV = 0x68000057 | V_OPIVV,
+ OPC_VMSLTU_VX = 0x68000057 | V_OPIVX,
+ OPC_VMSLT_VV = 0x6c000057 | V_OPIVV,
+ OPC_VMSLT_VX = 0x6c000057 | V_OPIVX,
+ OPC_VMSLEU_VV = 0x70000057 | V_OPIVV,
+ OPC_VMSLEU_VX = 0x70000057 | V_OPIVX,
+ OPC_VMSLE_VV = 0x74000057 | V_OPIVV,
+ OPC_VMSLE_VX = 0x74000057 | V_OPIVX,
+
+ OPC_VMSLEU_VI = 0x70000057 | V_OPIVI,
+ OPC_VMSLE_VI = 0x74000057 | V_OPIVI,
+ OPC_VMSGTU_VI = 0x78000057 | V_OPIVI,
+ OPC_VMSGTU_VX = 0x78000057 | V_OPIVX,
+ OPC_VMSGT_VI = 0x7c000057 | V_OPIVI,
+ OPC_VMSGT_VX = 0x7c000057 | V_OPIVX,
+
+ OPC_VSLL_VV = 0x94000057 | V_OPIVV,
+ OPC_VSLL_VI = 0x94000057 | V_OPIVI,
+ OPC_VSLL_VX = 0x94000057 | V_OPIVX,
+ OPC_VSRL_VV = 0xa0000057 | V_OPIVV,
+ OPC_VSRL_VI = 0xa0000057 | V_OPIVI,
+ OPC_VSRL_VX = 0xa0000057 | V_OPIVX,
+ OPC_VSRA_VV = 0xa4000057 | V_OPIVV,
+ OPC_VSRA_VI = 0xa4000057 | V_OPIVI,
+ OPC_VSRA_VX = 0xa4000057 | V_OPIVX,
+
+ OPC_VMV_V_V = 0x5e000057 | V_OPIVV,
+ OPC_VMV_V_I = 0x5e000057 | V_OPIVI,
+ OPC_VMV_V_X = 0x5e000057 | V_OPIVX,
+
+ OPC_VMVNR_V = 0x9e000057 | V_OPIVI,
} RISCVInsn;
+static const struct {
+ RISCVInsn op;
+ bool swap;
+} tcg_cmpcond_to_rvv_vv[] = {
+ [TCG_COND_EQ] = { OPC_VMSEQ_VV, false },
+ [TCG_COND_NE] = { OPC_VMSNE_VV, false },
+ [TCG_COND_LT] = { OPC_VMSLT_VV, false },
+ [TCG_COND_GE] = { OPC_VMSLE_VV, true },
+ [TCG_COND_GT] = { OPC_VMSLT_VV, true },
+ [TCG_COND_LE] = { OPC_VMSLE_VV, false },
+ [TCG_COND_LTU] = { OPC_VMSLTU_VV, false },
+ [TCG_COND_GEU] = { OPC_VMSLEU_VV, true },
+ [TCG_COND_GTU] = { OPC_VMSLTU_VV, true },
+ [TCG_COND_LEU] = { OPC_VMSLEU_VV, false }
+};
+
+static const struct {
+ RISCVInsn op;
+ int min;
+ int max;
+ bool adjust;
+} tcg_cmpcond_to_rvv_vi[] = {
+ [TCG_COND_EQ] = { OPC_VMSEQ_VI, -16, 15, false },
+ [TCG_COND_NE] = { OPC_VMSNE_VI, -16, 15, false },
+ [TCG_COND_GT] = { OPC_VMSGT_VI, -16, 15, false },
+ [TCG_COND_LE] = { OPC_VMSLE_VI, -16, 15, false },
+ [TCG_COND_LT] = { OPC_VMSLE_VI, -15, 16, true },
+ [TCG_COND_GE] = { OPC_VMSGT_VI, -15, 16, true },
+ [TCG_COND_LEU] = { OPC_VMSLEU_VI, 0, 15, false },
+ [TCG_COND_GTU] = { OPC_VMSGTU_VI, 0, 15, false },
+ [TCG_COND_LTU] = { OPC_VMSLEU_VI, 1, 16, true },
+ [TCG_COND_GEU] = { OPC_VMSGTU_VI, 1, 16, true },
+};
+
+/* test if a constant matches the constraint */
+static bool tcg_target_const_match(int64_t val, int ct,
+ TCGType type, TCGCond cond, int vece)
+{
+ if (ct & TCG_CT_CONST) {
+ return 1;
+ }
+ if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
+ return 1;
+ }
+ if (type >= TCG_TYPE_V64) {
+ /* Val is replicated by VECE; extract the highest element. */
+ val >>= (-8 << vece) & 63;
+ }
+ /*
+ * Sign extended from 12 bits: [-0x800, 0x7ff].
+ * Used for most arithmetic, as this is the isa field.
+ */
+ if ((ct & TCG_CT_CONST_S12) && val >= -0x800 && val <= 0x7ff) {
+ return 1;
+ }
+ /*
+ * Sign extended from 12 bits, negated: [-0x7ff, 0x800].
+ * Used for subtraction, where a constant must be handled by ADDI.
+ */
+ if ((ct & TCG_CT_CONST_N12) && val >= -0x7ff && val <= 0x800) {
+ return 1;
+ }
+ /*
+ * Sign extended from 12 bits, +/- matching: [-0x7ff, 0x7ff].
+ * Used by addsub2 and movcond, which may need the negative value,
+ * and requires the modified constant to be representable.
+ */
+ if ((ct & TCG_CT_CONST_M12) && val >= -0x7ff && val <= 0x7ff) {
+ return 1;
+ }
+ /*
+ * Inverse of sign extended from 12 bits: ~[-0x800, 0x7ff].
+ * Used to map ANDN back to ANDI, etc.
+ */
+ if ((ct & TCG_CT_CONST_J12) && ~val >= -0x800 && ~val <= 0x7ff) {
+ return 1;
+ }
+ /*
+ * Sign extended from 5 bits: [-0x10, 0x0f].
+ * Used for vector-immediate.
+ */
+ if ((ct & TCG_CT_CONST_S5) && val >= -0x10 && val <= 0x0f) {
+ return 1;
+ }
+ /*
+ * Used for vector compare OPIVI instructions.
+ */
+ if ((ct & TCG_CT_CONST_CMP_VI) &&
+ val >= tcg_cmpcond_to_rvv_vi[cond].min &&
+ val <= tcg_cmpcond_to_rvv_vi[cond].max) {
+ return true;
+ }
+ return 0;
+}
+
/*
* RISC-V immediate and instruction encoders (excludes 16-bit RVC)
*/
@@ -363,6 +528,45 @@ static int32_t encode_uj(RISCVInsn opc, TCGReg rd, uint32_t imm)
return opc | (rd & 0x1f) << 7 | encode_ujimm20(imm);
}
+
+/* Type-OPIVI */
+
+static int32_t encode_vi(RISCVInsn opc, TCGReg rd, int32_t imm,
+ TCGReg vs2, bool vm)
+{
+ return opc | (rd & 0x1f) << 7 | (imm & 0x1f) << 15 |
+ (vs2 & 0x1f) << 20 | (vm << 25);
+}
+
+/* Type-OPIVV/OPMVV/OPIVX/OPMVX, Vector load and store */
+
+static int32_t encode_v(RISCVInsn opc, TCGReg d, TCGReg s1,
+ TCGReg s2, bool vm)
+{
+ return opc | (d & 0x1f) << 7 | (s1 & 0x1f) << 15 |
+ (s2 & 0x1f) << 20 | (vm << 25);
+}
+
+/* Vector vtype */
+
+static uint32_t encode_vtype(bool vta, bool vma,
+ MemOp vsew, RISCVVlmul vlmul)
+{
+ return vma << 7 | vta << 6 | vsew << 3 | vlmul;
+}
+
+static int32_t encode_vset(RISCVInsn opc, TCGReg rd,
+ TCGArg rs1, uint32_t vtype)
+{
+ return opc | (rd & 0x1f) << 7 | (rs1 & 0x1f) << 15 | (vtype & 0x7ff) << 20;
+}
+
+static int32_t encode_vseti(RISCVInsn opc, TCGReg rd,
+ uint32_t uimm, uint32_t vtype)
+{
+ return opc | (rd & 0x1f) << 7 | (uimm & 0x1f) << 15 | (vtype & 0x3ff) << 20;
+}
+
/*
* RISC-V instruction emitters
*/
@@ -476,6 +680,91 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
}
/*
+ * RISC-V vector instruction emitters
+ */
+
+/*
+ * Vector registers uses the same 5 lower bits as GPR registers,
+ * and vm=0 (vm = false) means vector masking ENABLED.
+ * With RVV 1.0, vs2 is the first operand, while rs1/imm is the
+ * second operand.
+ */
+static void tcg_out_opc_vv(TCGContext *s, RISCVInsn opc,
+ TCGReg vd, TCGReg vs2, TCGReg vs1)
+{
+ tcg_out32(s, encode_v(opc, vd, vs1, vs2, true));
+}
+
+static void tcg_out_opc_vx(TCGContext *s, RISCVInsn opc,
+ TCGReg vd, TCGReg vs2, TCGReg rs1)
+{
+ tcg_out32(s, encode_v(opc, vd, rs1, vs2, true));
+}
+
+static void tcg_out_opc_vi(TCGContext *s, RISCVInsn opc,
+ TCGReg vd, TCGReg vs2, int32_t imm)
+{
+ tcg_out32(s, encode_vi(opc, vd, imm, vs2, true));
+}
+
+static void tcg_out_opc_vv_vi(TCGContext *s, RISCVInsn o_vv, RISCVInsn o_vi,
+ TCGReg vd, TCGReg vs2, TCGArg vi1, int c_vi1)
+{
+ if (c_vi1) {
+ tcg_out_opc_vi(s, o_vi, vd, vs2, vi1);
+ } else {
+ tcg_out_opc_vv(s, o_vv, vd, vs2, vi1);
+ }
+}
+
+static void tcg_out_opc_vim_mask(TCGContext *s, RISCVInsn opc, TCGReg vd,
+ TCGReg vs2, int32_t imm)
+{
+ tcg_out32(s, encode_vi(opc, vd, imm, vs2, false));
+}
+
+static void tcg_out_opc_vvm_mask(TCGContext *s, RISCVInsn opc, TCGReg vd,
+ TCGReg vs2, TCGReg vs1)
+{
+ tcg_out32(s, encode_v(opc, vd, vs1, vs2, false));
+}
+
+typedef struct VsetCache {
+ uint32_t movi_insn;
+ uint32_t vset_insn;
+} VsetCache;
+
+static VsetCache riscv_vset_cache[3][4];
+
+static void set_vtype(TCGContext *s, TCGType type, MemOp vsew)
+{
+ const VsetCache *p = &riscv_vset_cache[type - TCG_TYPE_V64][vsew];
+
+ s->riscv_cur_type = type;
+ s->riscv_cur_vsew = vsew;
+
+ if (p->movi_insn) {
+ tcg_out32(s, p->movi_insn);
+ }
+ tcg_out32(s, p->vset_insn);
+}
+
+static MemOp set_vtype_len(TCGContext *s, TCGType type)
+{
+ if (type != s->riscv_cur_type) {
+ set_vtype(s, type, MO_64);
+ }
+ return s->riscv_cur_vsew;
+}
+
+static void set_vtype_len_sew(TCGContext *s, TCGType type, MemOp vsew)
+{
+ if (type != s->riscv_cur_type || vsew != s->riscv_cur_vsew) {
+ set_vtype(s, type, vsew);
+ }
+}
+
+/*
* TCG intrinsics
*/
@@ -489,6 +778,15 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
case TCG_TYPE_I64:
tcg_out_opc_imm(s, OPC_ADDI, ret, arg, 0);
break;
+ case TCG_TYPE_V64:
+ case TCG_TYPE_V128:
+ case TCG_TYPE_V256:
+ {
+ int lmul = type - riscv_lg2_vlenb;
+ int nf = 1 << MAX(lmul, 0);
+ tcg_out_opc_vi(s, OPC_VMVNR_V, ret, arg, nf - 1);
+ }
+ break;
default:
g_assert_not_reached();
}
@@ -681,18 +979,101 @@ static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
}
}
+static void tcg_out_vec_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
+ TCGReg addr, intptr_t offset)
+{
+ tcg_debug_assert(data >= TCG_REG_V0);
+ tcg_debug_assert(addr < TCG_REG_V0);
+
+ if (offset) {
+ tcg_debug_assert(addr != TCG_REG_ZERO);
+ if (offset == sextreg(offset, 0, 12)) {
+ tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP0, addr, offset);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP0, addr);
+ }
+ addr = TCG_REG_TMP0;
+ }
+ tcg_out32(s, encode_v(opc, data, addr, 0, true));
+}
+
static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
TCGReg arg1, intptr_t arg2)
{
- RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_LW : OPC_LD;
- tcg_out_ldst(s, insn, arg, arg1, arg2);
+ RISCVInsn insn;
+
+ switch (type) {
+ case TCG_TYPE_I32:
+ tcg_out_ldst(s, OPC_LW, arg, arg1, arg2);
+ break;
+ case TCG_TYPE_I64:
+ tcg_out_ldst(s, OPC_LD, arg, arg1, arg2);
+ break;
+ case TCG_TYPE_V64:
+ case TCG_TYPE_V128:
+ case TCG_TYPE_V256:
+ if (type >= riscv_lg2_vlenb) {
+ static const RISCVInsn whole_reg_ld[] = {
+ OPC_VL1RE64_V, OPC_VL2RE64_V, OPC_VL4RE64_V, OPC_VL8RE64_V
+ };
+ unsigned idx = type - riscv_lg2_vlenb;
+
+ tcg_debug_assert(idx < ARRAY_SIZE(whole_reg_ld));
+ insn = whole_reg_ld[idx];
+ } else {
+ static const RISCVInsn unit_stride_ld[] = {
+ OPC_VLE8_V, OPC_VLE16_V, OPC_VLE32_V, OPC_VLE64_V
+ };
+ MemOp prev_vsew = set_vtype_len(s, type);
+
+ tcg_debug_assert(prev_vsew < ARRAY_SIZE(unit_stride_ld));
+ insn = unit_stride_ld[prev_vsew];
+ }
+ tcg_out_vec_ldst(s, insn, arg, arg1, arg2);
+ break;
+ default:
+ g_assert_not_reached();
+ }
}
static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
TCGReg arg1, intptr_t arg2)
{
- RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_SW : OPC_SD;
- tcg_out_ldst(s, insn, arg, arg1, arg2);
+ RISCVInsn insn;
+
+ switch (type) {
+ case TCG_TYPE_I32:
+ tcg_out_ldst(s, OPC_SW, arg, arg1, arg2);
+ break;
+ case TCG_TYPE_I64:
+ tcg_out_ldst(s, OPC_SD, arg, arg1, arg2);
+ break;
+ case TCG_TYPE_V64:
+ case TCG_TYPE_V128:
+ case TCG_TYPE_V256:
+ if (type >= riscv_lg2_vlenb) {
+ static const RISCVInsn whole_reg_st[] = {
+ OPC_VS1R_V, OPC_VS2R_V, OPC_VS4R_V, OPC_VS8R_V
+ };
+ unsigned idx = type - riscv_lg2_vlenb;
+
+ tcg_debug_assert(idx < ARRAY_SIZE(whole_reg_st));
+ insn = whole_reg_st[idx];
+ } else {
+ static const RISCVInsn unit_stride_st[] = {
+ OPC_VSE8_V, OPC_VSE16_V, OPC_VSE32_V, OPC_VSE64_V
+ };
+ MemOp prev_vsew = set_vtype_len(s, type);
+
+ tcg_debug_assert(prev_vsew < ARRAY_SIZE(unit_stride_st));
+ insn = unit_stride_st[prev_vsew];
+ }
+ tcg_out_vec_ldst(s, insn, arg, arg1, arg2);
+ break;
+ default:
+ g_assert_not_reached();
+ }
}
static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
@@ -766,6 +1147,40 @@ static void tcg_out_addsub2(TCGContext *s,
}
}
+static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg dst, TCGReg src)
+{
+ set_vtype_len_sew(s, type, vece);
+ tcg_out_opc_vx(s, OPC_VMV_V_X, dst, 0, src);
+ return true;
+}
+
+static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg dst, TCGReg base, intptr_t offset)
+{
+ tcg_out_ld(s, TCG_TYPE_REG, TCG_REG_TMP0, base, offset);
+ return tcg_out_dup_vec(s, type, vece, dst, TCG_REG_TMP0);
+}
+
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg dst, int64_t arg)
+{
+ /* Arg is replicated by VECE; extract the highest element. */
+ arg >>= (-8 << vece) & 63;
+
+ if (arg >= -16 && arg < 16) {
+ if (arg == 0 || arg == -1) {
+ set_vtype_len(s, type);
+ } else {
+ set_vtype_len_sew(s, type, vece);
+ }
+ tcg_out_opc_vi(s, OPC_VMV_V_I, dst, 0, arg);
+ return;
+ }
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, arg);
+ tcg_out_dup_vec(s, type, vece, dst, TCG_REG_TMP0);
+}
+
static const struct {
RISCVInsn op;
bool swap;
@@ -1104,12 +1519,72 @@ static void tcg_out_cltz(TCGContext *s, TCGType type, RISCVInsn insn,
}
}
+static void tcg_out_cmpsel(TCGContext *s, TCGType type, unsigned vece,
+ TCGCond cond, TCGReg ret,
+ TCGReg cmp1, TCGReg cmp2, bool c_cmp2,
+ TCGReg val1, bool c_val1,
+ TCGReg val2, bool c_val2)
+{
+ set_vtype_len_sew(s, type, vece);
+
+ /* Use only vmerge_vim if possible, by inverting the test. */
+ if (c_val2 && !c_val1) {
+ TCGArg temp = val1;
+ cond = tcg_invert_cond(cond);
+ val1 = val2;
+ val2 = temp;
+ c_val1 = true;
+ c_val2 = false;
+ }
+
+ /* Perform the comparison into V0 mask. */
+ if (c_cmp2) {
+ tcg_out_opc_vi(s, tcg_cmpcond_to_rvv_vi[cond].op, TCG_REG_V0, cmp1,
+ cmp2 - tcg_cmpcond_to_rvv_vi[cond].adjust);
+ } else if (tcg_cmpcond_to_rvv_vv[cond].swap) {
+ tcg_out_opc_vv(s, tcg_cmpcond_to_rvv_vv[cond].op,
+ TCG_REG_V0, cmp2, cmp1);
+ } else {
+ tcg_out_opc_vv(s, tcg_cmpcond_to_rvv_vv[cond].op,
+ TCG_REG_V0, cmp1, cmp2);
+ }
+ if (c_val1) {
+ if (c_val2) {
+ tcg_out_opc_vi(s, OPC_VMV_V_I, ret, 0, val2);
+ val2 = ret;
+ }
+ /* vd[i] == v0.mask[i] ? imm : vs2[i] */
+ tcg_out_opc_vim_mask(s, OPC_VMERGE_VIM, ret, val2, val1);
+ } else {
+ /* vd[i] == v0.mask[i] ? vs1[i] : vs2[i] */
+ tcg_out_opc_vvm_mask(s, OPC_VMERGE_VVM, ret, val2, val1);
+ }
+}
+
+static void tcg_out_vshifti(TCGContext *s, RISCVInsn opc_vi, RISCVInsn opc_vx,
+ TCGReg dst, TCGReg src, unsigned imm)
+{
+ if (imm < 32) {
+ tcg_out_opc_vi(s, opc_vi, dst, src, imm);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP0, imm);
+ tcg_out_opc_vx(s, opc_vx, dst, src, TCG_REG_TMP0);
+ }
+}
+
+static void init_setting_vtype(TCGContext *s)
+{
+ s->riscv_cur_type = TCG_TYPE_COUNT;
+}
+
static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail)
{
TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA;
ptrdiff_t offset = tcg_pcrel_diff(s, arg);
int ret;
+ init_setting_vtype(s);
+
tcg_debug_assert((offset & 1) == 0);
if (offset == sextreg(offset, 0, 20)) {
/* short jump: -2097150 to 2097152 */
@@ -1247,6 +1722,8 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
ldst->oi = oi;
ldst->addrlo_reg = addr_reg;
+ init_setting_vtype(s);
+
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
@@ -1308,6 +1785,8 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
ldst->oi = oi;
ldst->addrlo_reg = addr_reg;
+ init_setting_vtype(s);
+
/* We are expecting alignment max 7, so we can always use andi. */
tcg_debug_assert(a_mask == sextreg(a_mask, 0, 12));
tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask);
@@ -1881,6 +2360,223 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
}
+static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
+ unsigned vecl, unsigned vece,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
+{
+ TCGType type = vecl + TCG_TYPE_V64;
+ TCGArg a0, a1, a2;
+ int c2;
+
+ a0 = args[0];
+ a1 = args[1];
+ a2 = args[2];
+ c2 = const_args[2];
+
+ switch (opc) {
+ case INDEX_op_dupm_vec:
+ tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
+ break;
+ case INDEX_op_ld_vec:
+ tcg_out_ld(s, type, a0, a1, a2);
+ break;
+ case INDEX_op_st_vec:
+ tcg_out_st(s, type, a0, a1, a2);
+ break;
+ case INDEX_op_add_vec:
+ set_vtype_len_sew(s, type, vece);
+ tcg_out_opc_vv_vi(s, OPC_VADD_VV, OPC_VADD_VI, a0, a1, a2, c2);
+ break;
+ case INDEX_op_sub_vec:
+ set_vtype_len_sew(s, type, vece);
+ if (const_args[1]) {
+ tcg_out_opc_vi(s, OPC_VRSUB_VI, a0, a2, a1);
+ } else {
+ tcg_out_opc_vv(s, OPC_VSUB_VV, a0, a1, a2);
+ }
+ break;
+ case INDEX_op_and_vec:
+ set_vtype_len(s, type);
+ tcg_out_opc_vv_vi(s, OPC_VAND_VV, OPC_VAND_VI, a0, a1, a2, c2);
+ break;
+ case INDEX_op_or_vec:
+ set_vtype_len(s, type);
+ tcg_out_opc_vv_vi(s, OPC_VOR_VV, OPC_VOR_VI, a0, a1, a2, c2);
+ break;
+ case INDEX_op_xor_vec:
+ set_vtype_len(s, type);
+ tcg_out_opc_vv_vi(s, OPC_VXOR_VV, OPC_VXOR_VI, a0, a1, a2, c2);
+ break;
+ case INDEX_op_not_vec:
+ set_vtype_len(s, type);
+ tcg_out_opc_vi(s, OPC_VXOR_VI, a0, a1, -1);
+ break;
+ case INDEX_op_neg_vec:
+ set_vtype_len_sew(s, type, vece);
+ tcg_out_opc_vi(s, OPC_VRSUB_VI, a0, a1, 0);
+ break;
+ case INDEX_op_mul_vec:
+ set_vtype_len_sew(s, type, vece);
+ tcg_out_opc_vv(s, OPC_VMUL_VV, a0, a1, a2);
+ break;
+ case INDEX_op_ssadd_vec:
+ set_vtype_len_sew(s, type, vece);
+ tcg_out_opc_vv_vi(s, OPC_VSADD_VV, OPC_VSADD_VI, a0, a1, a2, c2);
+ break;
+ case INDEX_op_sssub_vec:
+ set_vtype_len_sew(s, type, vece);
+ tcg_out_opc_vv_vi(s, OPC_VSSUB_VV, OPC_VSSUB_VI, a0, a1, a2, c2);
+ break;
+ case INDEX_op_usadd_vec:
+ set_vtype_len_sew(s, type, vece);
+ tcg_out_opc_vv_vi(s, OPC_VSADDU_VV, OPC_VSADDU_VI, a0, a1, a2, c2);
+ break;
+ case INDEX_op_ussub_vec:
+ set_vtype_len_sew(s, type, vece);
+ tcg_out_opc_vv_vi(s, OPC_VSSUBU_VV, OPC_VSSUBU_VI, a0, a1, a2, c2);
+ break;
+ case INDEX_op_smax_vec:
+ set_vtype_len_sew(s, type, vece);
+ tcg_out_opc_vv_vi(s, OPC_VMAX_VV, OPC_VMAX_VI, a0, a1, a2, c2);
+ break;
+ case INDEX_op_smin_vec:
+ set_vtype_len_sew(s, type, vece);
+ tcg_out_opc_vv_vi(s, OPC_VMIN_VV, OPC_VMIN_VI, a0, a1, a2, c2);
+ break;
+ case INDEX_op_umax_vec:
+ set_vtype_len_sew(s, type, vece);
+ tcg_out_opc_vv_vi(s, OPC_VMAXU_VV, OPC_VMAXU_VI, a0, a1, a2, c2);
+ break;
+ case INDEX_op_umin_vec:
+ set_vtype_len_sew(s, type, vece);
+ tcg_out_opc_vv_vi(s, OPC_VMINU_VV, OPC_VMINU_VI, a0, a1, a2, c2);
+ break;
+ case INDEX_op_shls_vec:
+ set_vtype_len_sew(s, type, vece);
+ tcg_out_opc_vx(s, OPC_VSLL_VX, a0, a1, a2);
+ break;
+ case INDEX_op_shrs_vec:
+ set_vtype_len_sew(s, type, vece);
+ tcg_out_opc_vx(s, OPC_VSRL_VX, a0, a1, a2);
+ break;
+ case INDEX_op_sars_vec:
+ set_vtype_len_sew(s, type, vece);
+ tcg_out_opc_vx(s, OPC_VSRA_VX, a0, a1, a2);
+ break;
+ case INDEX_op_shlv_vec:
+ set_vtype_len_sew(s, type, vece);
+ tcg_out_opc_vv(s, OPC_VSLL_VV, a0, a1, a2);
+ break;
+ case INDEX_op_shrv_vec:
+ set_vtype_len_sew(s, type, vece);
+ tcg_out_opc_vv(s, OPC_VSRL_VV, a0, a1, a2);
+ break;
+ case INDEX_op_sarv_vec:
+ set_vtype_len_sew(s, type, vece);
+ tcg_out_opc_vv(s, OPC_VSRA_VV, a0, a1, a2);
+ break;
+ case INDEX_op_shli_vec:
+ set_vtype_len_sew(s, type, vece);
+ tcg_out_vshifti(s, OPC_VSLL_VI, OPC_VSLL_VX, a0, a1, a2);
+ break;
+ case INDEX_op_shri_vec:
+ set_vtype_len_sew(s, type, vece);
+ tcg_out_vshifti(s, OPC_VSRL_VI, OPC_VSRL_VX, a0, a1, a2);
+ break;
+ case INDEX_op_sari_vec:
+ set_vtype_len_sew(s, type, vece);
+ tcg_out_vshifti(s, OPC_VSRA_VI, OPC_VSRA_VX, a0, a1, a2);
+ break;
+ case INDEX_op_rotli_vec:
+ set_vtype_len_sew(s, type, vece);
+ tcg_out_vshifti(s, OPC_VSLL_VI, OPC_VSLL_VX, TCG_REG_V0, a1, a2);
+ tcg_out_vshifti(s, OPC_VSRL_VI, OPC_VSRL_VX, a0, a1,
+ -a2 & ((8 << vece) - 1));
+ tcg_out_opc_vv(s, OPC_VOR_VV, a0, a0, TCG_REG_V0);
+ break;
+ case INDEX_op_rotls_vec:
+ set_vtype_len_sew(s, type, vece);
+ tcg_out_opc_vx(s, OPC_VSLL_VX, TCG_REG_V0, a1, a2);
+ tcg_out_opc_reg(s, OPC_SUBW, TCG_REG_TMP0, TCG_REG_ZERO, a2);
+ tcg_out_opc_vx(s, OPC_VSRL_VX, a0, a1, TCG_REG_TMP0);
+ tcg_out_opc_vv(s, OPC_VOR_VV, a0, a0, TCG_REG_V0);
+ break;
+ case INDEX_op_rotlv_vec:
+ set_vtype_len_sew(s, type, vece);
+ tcg_out_opc_vi(s, OPC_VRSUB_VI, TCG_REG_V0, a2, 0);
+ tcg_out_opc_vv(s, OPC_VSRL_VV, TCG_REG_V0, a1, TCG_REG_V0);
+ tcg_out_opc_vv(s, OPC_VSLL_VV, a0, a1, a2);
+ tcg_out_opc_vv(s, OPC_VOR_VV, a0, a0, TCG_REG_V0);
+ break;
+ case INDEX_op_rotrv_vec:
+ set_vtype_len_sew(s, type, vece);
+ tcg_out_opc_vi(s, OPC_VRSUB_VI, TCG_REG_V0, a2, 0);
+ tcg_out_opc_vv(s, OPC_VSLL_VV, TCG_REG_V0, a1, TCG_REG_V0);
+ tcg_out_opc_vv(s, OPC_VSRL_VV, a0, a1, a2);
+ tcg_out_opc_vv(s, OPC_VOR_VV, a0, a0, TCG_REG_V0);
+ break;
+ case INDEX_op_cmp_vec:
+ tcg_out_cmpsel(s, type, vece, args[3], a0, a1, a2, c2,
+ -1, true, 0, true);
+ break;
+ case INDEX_op_cmpsel_vec:
+ tcg_out_cmpsel(s, type, vece, args[5], a0, a1, a2, c2,
+ args[3], const_args[3], args[4], const_args[4]);
+ break;
+ case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
+ case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
+ default:
+ g_assert_not_reached();
+ }
+}
+
+void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
+ TCGArg a0, ...)
+{
+ g_assert_not_reached();
+}
+
+int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
+{
+ switch (opc) {
+ case INDEX_op_add_vec:
+ case INDEX_op_sub_vec:
+ case INDEX_op_and_vec:
+ case INDEX_op_or_vec:
+ case INDEX_op_xor_vec:
+ case INDEX_op_not_vec:
+ case INDEX_op_neg_vec:
+ case INDEX_op_mul_vec:
+ case INDEX_op_ssadd_vec:
+ case INDEX_op_sssub_vec:
+ case INDEX_op_usadd_vec:
+ case INDEX_op_ussub_vec:
+ case INDEX_op_smax_vec:
+ case INDEX_op_smin_vec:
+ case INDEX_op_umax_vec:
+ case INDEX_op_umin_vec:
+ case INDEX_op_shls_vec:
+ case INDEX_op_shrs_vec:
+ case INDEX_op_sars_vec:
+ case INDEX_op_shlv_vec:
+ case INDEX_op_shrv_vec:
+ case INDEX_op_sarv_vec:
+ case INDEX_op_shri_vec:
+ case INDEX_op_shli_vec:
+ case INDEX_op_sari_vec:
+ case INDEX_op_rotls_vec:
+ case INDEX_op_rotlv_vec:
+ case INDEX_op_rotrv_vec:
+ case INDEX_op_rotli_vec:
+ case INDEX_op_cmp_vec:
+ case INDEX_op_cmpsel_vec:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
{
switch (op) {
@@ -2020,6 +2716,50 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_qemu_st_a64_i64:
return C_O0_I2(rZ, r);
+ case INDEX_op_st_vec:
+ return C_O0_I2(v, r);
+ case INDEX_op_dup_vec:
+ case INDEX_op_dupm_vec:
+ case INDEX_op_ld_vec:
+ return C_O1_I1(v, r);
+ case INDEX_op_neg_vec:
+ case INDEX_op_not_vec:
+ case INDEX_op_shli_vec:
+ case INDEX_op_shri_vec:
+ case INDEX_op_sari_vec:
+ case INDEX_op_rotli_vec:
+ return C_O1_I1(v, v);
+ case INDEX_op_add_vec:
+ case INDEX_op_and_vec:
+ case INDEX_op_or_vec:
+ case INDEX_op_xor_vec:
+ case INDEX_op_ssadd_vec:
+ case INDEX_op_sssub_vec:
+ case INDEX_op_usadd_vec:
+ case INDEX_op_ussub_vec:
+ case INDEX_op_smax_vec:
+ case INDEX_op_smin_vec:
+ case INDEX_op_umax_vec:
+ case INDEX_op_umin_vec:
+ return C_O1_I2(v, v, vK);
+ case INDEX_op_sub_vec:
+ return C_O1_I2(v, vK, v);
+ case INDEX_op_mul_vec:
+ case INDEX_op_shlv_vec:
+ case INDEX_op_shrv_vec:
+ case INDEX_op_sarv_vec:
+ case INDEX_op_rotlv_vec:
+ case INDEX_op_rotrv_vec:
+ return C_O1_I2(v, v, v);
+ case INDEX_op_shls_vec:
+ case INDEX_op_shrs_vec:
+ case INDEX_op_sars_vec:
+ case INDEX_op_rotls_vec:
+ return C_O1_I2(v, v, r);
+ case INDEX_op_cmp_vec:
+ return C_O1_I2(v, v, vL);
+ case INDEX_op_cmpsel_vec:
+ return C_O1_I4(v, v, vL, vK, vK);
default:
g_assert_not_reached();
}
@@ -2093,7 +2833,65 @@ static void tcg_target_qemu_prologue(TCGContext *s)
static void tcg_out_tb_start(TCGContext *s)
{
- /* nothing to do */
+ init_setting_vtype(s);
+}
+
+static bool vtype_check(unsigned vtype)
+{
+ unsigned long tmp;
+
+ /* vsetvl tmp, zero, vtype */
+ asm(".insn r 0x57, 7, 0x40, %0, zero, %1" : "=r"(tmp) : "r"(vtype));
+ return tmp != 0;
+}
+
+static void probe_frac_lmul_1(TCGType type, MemOp vsew)
+{
+ VsetCache *p = &riscv_vset_cache[type - TCG_TYPE_V64][vsew];
+ unsigned avl = tcg_type_size(type) >> vsew;
+ int lmul = type - riscv_lg2_vlenb;
+ unsigned vtype = encode_vtype(true, true, vsew, lmul & 7);
+ bool lmul_eq_avl = true;
+
+ /* Guaranteed by Zve64x. */
+ assert(lmul < 3);
+
+ /*
+ * For LMUL < -3, the host vector size is so large that TYPE
+ * is smaller than the minimum 1/8 fraction.
+ *
+ * For other fractional LMUL settings, implementations must
+ * support SEW settings between SEW_MIN and LMUL * ELEN, inclusive.
+ * So if ELEN = 64, LMUL = 1/2, then SEW will support e8, e16, e32,
+ * but e64 may not be supported. In other words, the hardware only
+ * guarantees SEW_MIN <= SEW <= LMUL * ELEN. Check.
+ */
+ if (lmul < 0 && (lmul < -3 || !vtype_check(vtype))) {
+ vtype = encode_vtype(true, true, vsew, VLMUL_M1);
+ lmul_eq_avl = false;
+ }
+
+ if (avl < 32) {
+ p->vset_insn = encode_vseti(OPC_VSETIVLI, TCG_REG_ZERO, avl, vtype);
+ } else if (lmul_eq_avl) {
+ /* rd != 0 and rs1 == 0 uses vlmax */
+ p->vset_insn = encode_vset(OPC_VSETVLI, TCG_REG_TMP0, TCG_REG_ZERO, vtype);
+ } else {
+ p->movi_insn = encode_i(OPC_ADDI, TCG_REG_TMP0, TCG_REG_ZERO, avl);
+ p->vset_insn = encode_vset(OPC_VSETVLI, TCG_REG_ZERO, TCG_REG_TMP0, vtype);
+ }
+}
+
+static void probe_frac_lmul(void)
+{
+ /* Match riscv_lg2_vlenb to TCG_TYPE_V64. */
+ QEMU_BUILD_BUG_ON(TCG_TYPE_V64 != 3);
+
+ for (TCGType t = TCG_TYPE_V64; t <= TCG_TYPE_V256; t++) {
+ for (MemOp e = MO_8; e <= MO_64; e++) {
+ probe_frac_lmul_1(t, e);
+ }
+ }
}
static void tcg_target_init(TCGContext *s)
@@ -2101,7 +2899,7 @@ static void tcg_target_init(TCGContext *s)
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
- tcg_target_call_clobber_regs = -1u;
+ tcg_target_call_clobber_regs = -1;
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S0);
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S1);
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S2);
@@ -2123,6 +2921,32 @@ static void tcg_target_init(TCGContext *s)
tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TP);
+
+ if (cpuinfo & CPUINFO_ZVE64X) {
+ switch (riscv_lg2_vlenb) {
+ case TCG_TYPE_V64:
+ tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
+ tcg_target_available_regs[TCG_TYPE_V128] = ALL_DVECTOR_REG_GROUPS;
+ tcg_target_available_regs[TCG_TYPE_V256] = ALL_QVECTOR_REG_GROUPS;
+ s->reserved_regs |= (~ALL_QVECTOR_REG_GROUPS & ALL_VECTOR_REGS);
+ break;
+ case TCG_TYPE_V128:
+ tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
+ tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
+ tcg_target_available_regs[TCG_TYPE_V256] = ALL_DVECTOR_REG_GROUPS;
+ s->reserved_regs |= (~ALL_DVECTOR_REG_GROUPS & ALL_VECTOR_REGS);
+ break;
+ default:
+ /* Guaranteed by Zve64x. */
+ tcg_debug_assert(riscv_lg2_vlenb >= TCG_TYPE_V256);
+ tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
+ tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
+ tcg_target_available_regs[TCG_TYPE_V256] = ALL_VECTOR_REGS;
+ break;
+ }
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_V0);
+ probe_frac_lmul();
+ }
}
typedef struct {
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
index 1a347eaf6e..334c37cbe6 100644
--- a/tcg/riscv/tcg-target.h
+++ b/tcg/riscv/tcg-target.h
@@ -28,42 +28,28 @@
#include "host/cpuinfo.h"
#define TCG_TARGET_INSN_UNIT_SIZE 4
-#define TCG_TARGET_NB_REGS 32
+#define TCG_TARGET_NB_REGS 64
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
typedef enum {
- TCG_REG_ZERO,
- TCG_REG_RA,
- TCG_REG_SP,
- TCG_REG_GP,
- TCG_REG_TP,
- TCG_REG_T0,
- TCG_REG_T1,
- TCG_REG_T2,
- TCG_REG_S0,
- TCG_REG_S1,
- TCG_REG_A0,
- TCG_REG_A1,
- TCG_REG_A2,
- TCG_REG_A3,
- TCG_REG_A4,
- TCG_REG_A5,
- TCG_REG_A6,
- TCG_REG_A7,
- TCG_REG_S2,
- TCG_REG_S3,
- TCG_REG_S4,
- TCG_REG_S5,
- TCG_REG_S6,
- TCG_REG_S7,
- TCG_REG_S8,
- TCG_REG_S9,
- TCG_REG_S10,
- TCG_REG_S11,
- TCG_REG_T3,
- TCG_REG_T4,
- TCG_REG_T5,
- TCG_REG_T6,
+ TCG_REG_ZERO, TCG_REG_RA, TCG_REG_SP, TCG_REG_GP,
+ TCG_REG_TP, TCG_REG_T0, TCG_REG_T1, TCG_REG_T2,
+ TCG_REG_S0, TCG_REG_S1, TCG_REG_A0, TCG_REG_A1,
+ TCG_REG_A2, TCG_REG_A3, TCG_REG_A4, TCG_REG_A5,
+ TCG_REG_A6, TCG_REG_A7, TCG_REG_S2, TCG_REG_S3,
+ TCG_REG_S4, TCG_REG_S5, TCG_REG_S6, TCG_REG_S7,
+ TCG_REG_S8, TCG_REG_S9, TCG_REG_S10, TCG_REG_S11,
+ TCG_REG_T3, TCG_REG_T4, TCG_REG_T5, TCG_REG_T6,
+
+ /* RISC-V V Extension registers */
+ TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
+ TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
+ TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11,
+ TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
+ TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
+ TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
+ TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
+ TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
/* aliases */
TCG_AREG0 = TCG_REG_S0,
@@ -156,6 +142,32 @@ typedef enum {
#define TCG_TARGET_HAS_tst 0
+/* vector instructions */
+#define TCG_TARGET_HAS_v64 (cpuinfo & CPUINFO_ZVE64X)
+#define TCG_TARGET_HAS_v128 (cpuinfo & CPUINFO_ZVE64X)
+#define TCG_TARGET_HAS_v256 (cpuinfo & CPUINFO_ZVE64X)
+#define TCG_TARGET_HAS_andc_vec 0
+#define TCG_TARGET_HAS_orc_vec 0
+#define TCG_TARGET_HAS_nand_vec 0
+#define TCG_TARGET_HAS_nor_vec 0
+#define TCG_TARGET_HAS_eqv_vec 0
+#define TCG_TARGET_HAS_not_vec 1
+#define TCG_TARGET_HAS_neg_vec 1
+#define TCG_TARGET_HAS_abs_vec 0
+#define TCG_TARGET_HAS_roti_vec 1
+#define TCG_TARGET_HAS_rots_vec 1
+#define TCG_TARGET_HAS_rotv_vec 1
+#define TCG_TARGET_HAS_shi_vec 1
+#define TCG_TARGET_HAS_shs_vec 1
+#define TCG_TARGET_HAS_shv_vec 1
+#define TCG_TARGET_HAS_mul_vec 1
+#define TCG_TARGET_HAS_sat_vec 1
+#define TCG_TARGET_HAS_minmax_vec 1
+#define TCG_TARGET_HAS_bitsel_vec 0
+#define TCG_TARGET_HAS_cmpsel_vec 1
+
+#define TCG_TARGET_HAS_tst_vec 0
+
#define TCG_TARGET_DEFAULT_MO (0)
#define TCG_TARGET_NEED_LDST_LABELS
diff --git a/tcg/riscv/tcg-target.opc.h b/tcg/riscv/tcg-target.opc.h
new file mode 100644
index 0000000000..b80b39e1e5
--- /dev/null
+++ b/tcg/riscv/tcg-target.opc.h
@@ -0,0 +1,12 @@
+/*
+ * Copyright (c) C-SKY Microsystems Co., Ltd.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.
+ *
+ * See the COPYING file in the top-level directory for details.
+ *
+ * Target-specific opcodes for host vector expansion. These will be
+ * emitted by tcg_expand_vec_op. For those familiar with GCC internals,
+ * consider these to be UNSPEC with names.
+ */
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 5decd83cf4..0babae1b88 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1399,7 +1399,6 @@ TranslationBlock *tcg_tb_alloc(TCGContext *s)
goto retry;
}
qatomic_set(&s->code_gen_ptr, next);
- s->data_gen_ptr = NULL;
return tb;
}
@@ -6172,6 +6171,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
*/
s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
s->code_ptr = s->code_buf;
+ s->data_gen_ptr = NULL;
#ifdef TCG_TARGET_NEED_LDST_LABELS
QSIMPLEQ_INIT(&s->ldst_labels);
diff --git a/util/cpuinfo-riscv.c b/util/cpuinfo-riscv.c
index 8cacc67645..971c924012 100644
--- a/util/cpuinfo-riscv.c
+++ b/util/cpuinfo-riscv.c
@@ -4,6 +4,7 @@
*/
#include "qemu/osdep.h"
+#include "qemu/host-utils.h"
#include "host/cpuinfo.h"
#ifdef CONFIG_ASM_HWPROBE_H
@@ -13,6 +14,7 @@
#endif
unsigned cpuinfo;
+unsigned riscv_lg2_vlenb;
static volatile sig_atomic_t got_sigill;
static void sigill_handler(int signo, siginfo_t *si, void *data)
@@ -34,7 +36,7 @@ static void sigill_handler(int signo, siginfo_t *si, void *data)
/* Called both as constructor and (possibly) via other constructors. */
unsigned __attribute__((constructor)) cpuinfo_init(void)
{
- unsigned left = CPUINFO_ZBA | CPUINFO_ZBB | CPUINFO_ZICOND;
+ unsigned left = CPUINFO_ZBA | CPUINFO_ZBB | CPUINFO_ZICOND | CPUINFO_ZVE64X;
unsigned info = cpuinfo;
if (info) {
@@ -51,6 +53,10 @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
#if defined(__riscv_arch_test) && defined(__riscv_zicond)
info |= CPUINFO_ZICOND;
#endif
+#if defined(__riscv_arch_test) && \
+ (defined(__riscv_vector) || defined(__riscv_zve64x))
+ info |= CPUINFO_ZVE64X;
+#endif
left &= ~info;
#ifdef CONFIG_ASM_HWPROBE_H
@@ -70,10 +76,21 @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
info |= pair.value & RISCV_HWPROBE_EXT_ZICOND ? CPUINFO_ZICOND : 0;
left &= ~CPUINFO_ZICOND;
#endif
+ /* For rv64, V is Zve64d, a superset of Zve64x. */
+ info |= pair.value & RISCV_HWPROBE_IMA_V ? CPUINFO_ZVE64X : 0;
+#ifdef RISCV_HWPROBE_EXT_ZVE64X
+ info |= pair.value & RISCV_HWPROBE_EXT_ZVE64X ? CPUINFO_ZVE64X : 0;
+#endif
}
}
#endif /* CONFIG_ASM_HWPROBE_H */
+ /*
+ * We only detect support for vectors with hwprobe. All kernels with
+ * support for vectors in userspace also support the hwprobe syscall.
+ */
+ left &= ~CPUINFO_ZVE64X;
+
if (left) {
struct sigaction sa_old, sa_new;
@@ -113,6 +130,21 @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
assert(left == 0);
}
+ if (info & CPUINFO_ZVE64X) {
+ /*
+ * We are guaranteed by RVV-1.0 that VLEN is a power of 2.
+ * We are guaranteed by Zve64x that VLEN >= 64, and that
+ * EEW of {8,16,32,64} are supported.
+ */
+ unsigned long vlenb;
+ /* csrr %0, vlenb */
+ asm volatile(".insn i 0x73, 0x2, %0, zero, -990" : "=r"(vlenb));
+ assert(vlenb >= 8);
+ assert(is_power_of_2(vlenb));
+ /* Cache VLEN in a convenient form. */
+ riscv_lg2_vlenb = ctz32(vlenb);
+ }
+
info |= CPUINFO_ALWAYS;
cpuinfo = info;
return info;