aboutsummaryrefslogtreecommitdiff
path: root/accel
diff options
context:
space:
mode:
Diffstat (limited to 'accel')
-rw-r--r--accel/tcg/cputlb.c626
-rw-r--r--accel/tcg/softmmu_template.h454
-rw-r--r--accel/tcg/tcg-runtime-gvec.c192
-rw-r--r--accel/tcg/tcg-runtime.h20
4 files changed, 758 insertions, 534 deletions
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 685e0f2ee4..cdcc377102 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -875,9 +875,8 @@ static void tlb_fill(CPUState *cpu, target_ulong addr, int size,
}
static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
- int mmu_idx,
- target_ulong addr, uintptr_t retaddr,
- bool recheck, MMUAccessType access_type, int size)
+ int mmu_idx, target_ulong addr, uintptr_t retaddr,
+ MMUAccessType access_type, int size)
{
CPUState *cpu = ENV_GET_CPU(env);
hwaddr mr_offset;
@@ -887,30 +886,6 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
bool locked = false;
MemTxResult r;
- if (recheck) {
- /*
- * This is a TLB_RECHECK access, where the MMU protection
- * covers a smaller range than a target page, and we must
- * repeat the MMU check here. This tlb_fill() call might
- * longjump out if this access should cause a guest exception.
- */
- CPUTLBEntry *entry;
- target_ulong tlb_addr;
-
- tlb_fill(cpu, addr, size, access_type, mmu_idx, retaddr);
-
- entry = tlb_entry(env, mmu_idx, addr);
- tlb_addr = (access_type == MMU_DATA_LOAD ?
- entry->addr_read : entry->addr_code);
- if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) {
- /* RAM access */
- uintptr_t haddr = addr + entry->addend;
-
- return ldn_p((void *)haddr, size);
- }
- /* Fall through for handling IO accesses */
- }
-
section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
mr = section->mr;
mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
@@ -944,9 +919,8 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
}
static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
- int mmu_idx,
- uint64_t val, target_ulong addr,
- uintptr_t retaddr, bool recheck, int size)
+ int mmu_idx, uint64_t val, target_ulong addr,
+ uintptr_t retaddr, int size)
{
CPUState *cpu = ENV_GET_CPU(env);
hwaddr mr_offset;
@@ -955,30 +929,6 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
bool locked = false;
MemTxResult r;
- if (recheck) {
- /*
- * This is a TLB_RECHECK access, where the MMU protection
- * covers a smaller range than a target page, and we must
- * repeat the MMU check here. This tlb_fill() call might
- * longjump out if this access should cause a guest exception.
- */
- CPUTLBEntry *entry;
- target_ulong tlb_addr;
-
- tlb_fill(cpu, addr, size, MMU_DATA_STORE, mmu_idx, retaddr);
-
- entry = tlb_entry(env, mmu_idx, addr);
- tlb_addr = tlb_addr_write(entry);
- if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) {
- /* RAM access */
- uintptr_t haddr = addr + entry->addend;
-
- stn_p((void *)haddr, size, val);
- return;
- }
- /* Fall through for handling IO accesses */
- }
-
section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
mr = section->mr;
mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
@@ -1240,26 +1190,481 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
}
#ifdef TARGET_WORDS_BIGENDIAN
-# define TGT_BE(X) (X)
-# define TGT_LE(X) BSWAP(X)
+#define NEED_BE_BSWAP 0
+#define NEED_LE_BSWAP 1
#else
-# define TGT_BE(X) BSWAP(X)
-# define TGT_LE(X) (X)
+#define NEED_BE_BSWAP 1
+#define NEED_LE_BSWAP 0
#endif
-#define MMUSUFFIX _mmu
+/*
+ * Byte Swap Helper
+ *
+ * This should all dead code away depending on the build host and
+ * access type.
+ */
-#define DATA_SIZE 1
-#include "softmmu_template.h"
+static inline uint64_t handle_bswap(uint64_t val, int size, bool big_endian)
+{
+ if ((big_endian && NEED_BE_BSWAP) || (!big_endian && NEED_LE_BSWAP)) {
+ switch (size) {
+ case 1: return val;
+ case 2: return bswap16(val);
+ case 4: return bswap32(val);
+ case 8: return bswap64(val);
+ default:
+ g_assert_not_reached();
+ }
+ } else {
+ return val;
+ }
+}
-#define DATA_SIZE 2
-#include "softmmu_template.h"
+/*
+ * Load Helpers
+ *
+ * We support two different access types. SOFTMMU_CODE_ACCESS is
+ * specifically for reading instructions from system memory. It is
+ * called by the translation loop and in some helpers where the code
+ * is disassembled. It shouldn't be called directly by guest code.
+ */
-#define DATA_SIZE 4
-#include "softmmu_template.h"
+typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr);
-#define DATA_SIZE 8
-#include "softmmu_template.h"
+static inline uint64_t __attribute__((always_inline))
+load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
+ uintptr_t retaddr, size_t size, bool big_endian, bool code_read,
+ FullLoadHelper *full_load)
+{
+ uintptr_t mmu_idx = get_mmuidx(oi);
+ uintptr_t index = tlb_index(env, mmu_idx, addr);
+ CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
+ target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read;
+ const size_t tlb_off = code_read ?
+ offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read);
+ const MMUAccessType access_type =
+ code_read ? MMU_INST_FETCH : MMU_DATA_LOAD;
+ unsigned a_bits = get_alignment_bits(get_memop(oi));
+ void *haddr;
+ uint64_t res;
+
+ /* Handle CPU specific unaligned behaviour */
+ if (addr & ((1 << a_bits) - 1)) {
+ cpu_unaligned_access(ENV_GET_CPU(env), addr, access_type,
+ mmu_idx, retaddr);
+ }
+
+ /* If the TLB entry is for a different page, reload and try again. */
+ if (!tlb_hit(tlb_addr, addr)) {
+ if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
+ addr & TARGET_PAGE_MASK)) {
+ tlb_fill(ENV_GET_CPU(env), addr, size,
+ access_type, mmu_idx, retaddr);
+ index = tlb_index(env, mmu_idx, addr);
+ entry = tlb_entry(env, mmu_idx, addr);
+ }
+ tlb_addr = code_read ? entry->addr_code : entry->addr_read;
+ }
+
+ /* Handle an IO access. */
+ if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
+ if ((addr & (size - 1)) != 0) {
+ goto do_unaligned_access;
+ }
+
+ if (tlb_addr & TLB_RECHECK) {
+ /*
+ * This is a TLB_RECHECK access, where the MMU protection
+ * covers a smaller range than a target page, and we must
+ * repeat the MMU check here. This tlb_fill() call might
+ * longjump out if this access should cause a guest exception.
+ */
+ tlb_fill(ENV_GET_CPU(env), addr, size,
+ access_type, mmu_idx, retaddr);
+ index = tlb_index(env, mmu_idx, addr);
+ entry = tlb_entry(env, mmu_idx, addr);
+
+ tlb_addr = code_read ? entry->addr_code : entry->addr_read;
+ tlb_addr &= ~TLB_RECHECK;
+ if (!(tlb_addr & ~TARGET_PAGE_MASK)) {
+ /* RAM access */
+ goto do_aligned_access;
+ }
+ }
+
+ res = io_readx(env, &env->iotlb[mmu_idx][index], mmu_idx, addr,
+ retaddr, access_type, size);
+ return handle_bswap(res, size, big_endian);
+ }
+
+ /* Handle slow unaligned access (it spans two pages or IO). */
+ if (size > 1
+ && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
+ >= TARGET_PAGE_SIZE)) {
+ target_ulong addr1, addr2;
+ tcg_target_ulong r1, r2;
+ unsigned shift;
+ do_unaligned_access:
+ addr1 = addr & ~(size - 1);
+ addr2 = addr1 + size;
+ r1 = full_load(env, addr1, oi, retaddr);
+ r2 = full_load(env, addr2, oi, retaddr);
+ shift = (addr & (size - 1)) * 8;
+
+ if (big_endian) {
+ /* Big-endian combine. */
+ res = (r1 << shift) | (r2 >> ((size * 8) - shift));
+ } else {
+ /* Little-endian combine. */
+ res = (r1 >> shift) | (r2 << ((size * 8) - shift));
+ }
+ return res & MAKE_64BIT_MASK(0, size * 8);
+ }
+
+ do_aligned_access:
+ haddr = (void *)((uintptr_t)addr + entry->addend);
+ switch (size) {
+ case 1:
+ res = ldub_p(haddr);
+ break;
+ case 2:
+ if (big_endian) {
+ res = lduw_be_p(haddr);
+ } else {
+ res = lduw_le_p(haddr);
+ }
+ break;
+ case 4:
+ if (big_endian) {
+ res = (uint32_t)ldl_be_p(haddr);
+ } else {
+ res = (uint32_t)ldl_le_p(haddr);
+ }
+ break;
+ case 8:
+ if (big_endian) {
+ res = ldq_be_p(haddr);
+ } else {
+ res = ldq_le_p(haddr);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ return res;
+}
+
+/*
+ * For the benefit of TCG generated code, we want to avoid the
+ * complication of ABI-specific return type promotion and always
+ * return a value extended to the register size of the host. This is
+ * tcg_target_long, except in the case of a 32-bit host and 64-bit
+ * data, and for that we always have uint64_t.
+ *
+ * We don't bother with this widened value for SOFTMMU_CODE_ACCESS.
+ */
+
+static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ return load_helper(env, addr, oi, retaddr, 1, false, false,
+ full_ldub_mmu);
+}
+
+tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ return full_ldub_mmu(env, addr, oi, retaddr);
+}
+
+static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ return load_helper(env, addr, oi, retaddr, 2, false, false,
+ full_le_lduw_mmu);
+}
+
+tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ return full_le_lduw_mmu(env, addr, oi, retaddr);
+}
+
+static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ return load_helper(env, addr, oi, retaddr, 2, true, false,
+ full_be_lduw_mmu);
+}
+
+tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ return full_be_lduw_mmu(env, addr, oi, retaddr);
+}
+
+static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ return load_helper(env, addr, oi, retaddr, 4, false, false,
+ full_le_ldul_mmu);
+}
+
+tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ return full_le_ldul_mmu(env, addr, oi, retaddr);
+}
+
+static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ return load_helper(env, addr, oi, retaddr, 4, true, false,
+ full_be_ldul_mmu);
+}
+
+tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ return full_be_ldul_mmu(env, addr, oi, retaddr);
+}
+
+uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ return load_helper(env, addr, oi, retaddr, 8, false, false,
+ helper_le_ldq_mmu);
+}
+
+uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ return load_helper(env, addr, oi, retaddr, 8, true, false,
+ helper_be_ldq_mmu);
+}
+
+/*
+ * Provide signed versions of the load routines as well. We can of course
+ * avoid this for 64-bit data, or for 32-bit data on 32-bit host.
+ */
+
+
+tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr);
+}
+
+tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr);
+}
+
+tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr);
+}
+
+tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr);
+}
+
+tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr);
+}
+
+/*
+ * Store Helpers
+ */
+
+static inline void __attribute__((always_inline))
+store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
+ TCGMemOpIdx oi, uintptr_t retaddr, size_t size, bool big_endian)
+{
+ uintptr_t mmu_idx = get_mmuidx(oi);
+ uintptr_t index = tlb_index(env, mmu_idx, addr);
+ CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
+ target_ulong tlb_addr = tlb_addr_write(entry);
+ const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
+ unsigned a_bits = get_alignment_bits(get_memop(oi));
+ void *haddr;
+
+ /* Handle CPU specific unaligned behaviour */
+ if (addr & ((1 << a_bits) - 1)) {
+ cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
+ mmu_idx, retaddr);
+ }
+
+ /* If the TLB entry is for a different page, reload and try again. */
+ if (!tlb_hit(tlb_addr, addr)) {
+ if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
+ addr & TARGET_PAGE_MASK)) {
+ tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE,
+ mmu_idx, retaddr);
+ index = tlb_index(env, mmu_idx, addr);
+ entry = tlb_entry(env, mmu_idx, addr);
+ }
+ tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
+ }
+
+ /* Handle an IO access. */
+ if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
+ if ((addr & (size - 1)) != 0) {
+ goto do_unaligned_access;
+ }
+
+ if (tlb_addr & TLB_RECHECK) {
+ /*
+ * This is a TLB_RECHECK access, where the MMU protection
+ * covers a smaller range than a target page, and we must
+ * repeat the MMU check here. This tlb_fill() call might
+ * longjump out if this access should cause a guest exception.
+ */
+ tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE,
+ mmu_idx, retaddr);
+ index = tlb_index(env, mmu_idx, addr);
+ entry = tlb_entry(env, mmu_idx, addr);
+
+ tlb_addr = tlb_addr_write(entry);
+ tlb_addr &= ~TLB_RECHECK;
+ if (!(tlb_addr & ~TARGET_PAGE_MASK)) {
+ /* RAM access */
+ goto do_aligned_access;
+ }
+ }
+
+ io_writex(env, &env->iotlb[mmu_idx][index], mmu_idx,
+ handle_bswap(val, size, big_endian),
+ addr, retaddr, size);
+ return;
+ }
+
+ /* Handle slow unaligned access (it spans two pages or IO). */
+ if (size > 1
+ && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
+ >= TARGET_PAGE_SIZE)) {
+ int i;
+ uintptr_t index2;
+ CPUTLBEntry *entry2;
+ target_ulong page2, tlb_addr2;
+ do_unaligned_access:
+ /*
+ * Ensure the second page is in the TLB. Note that the first page
+ * is already guaranteed to be filled, and that the second page
+ * cannot evict the first.
+ */
+ page2 = (addr + size) & TARGET_PAGE_MASK;
+ index2 = tlb_index(env, mmu_idx, page2);
+ entry2 = tlb_entry(env, mmu_idx, page2);
+ tlb_addr2 = tlb_addr_write(entry2);
+ if (!tlb_hit_page(tlb_addr2, page2)
+ && !victim_tlb_hit(env, mmu_idx, index2, tlb_off,
+ page2 & TARGET_PAGE_MASK)) {
+ tlb_fill(ENV_GET_CPU(env), page2, size, MMU_DATA_STORE,
+ mmu_idx, retaddr);
+ }
+
+ /*
+ * XXX: not efficient, but simple.
+ * This loop must go in the forward direction to avoid issues
+ * with self-modifying code in Windows 64-bit.
+ */
+ for (i = 0; i < size; ++i) {
+ uint8_t val8;
+ if (big_endian) {
+ /* Big-endian extract. */
+ val8 = val >> (((size - 1) * 8) - (i * 8));
+ } else {
+ /* Little-endian extract. */
+ val8 = val >> (i * 8);
+ }
+ helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr);
+ }
+ return;
+ }
+
+ do_aligned_access:
+ haddr = (void *)((uintptr_t)addr + entry->addend);
+ switch (size) {
+ case 1:
+ stb_p(haddr, val);
+ break;
+ case 2:
+ if (big_endian) {
+ stw_be_p(haddr, val);
+ } else {
+ stw_le_p(haddr, val);
+ }
+ break;
+ case 4:
+ if (big_endian) {
+ stl_be_p(haddr, val);
+ } else {
+ stl_le_p(haddr, val);
+ }
+ break;
+ case 8:
+ if (big_endian) {
+ stq_be_p(haddr, val);
+ } else {
+ stq_le_p(haddr, val);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ break;
+ }
+}
+
+void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ store_helper(env, addr, val, oi, retaddr, 1, false);
+}
+
+void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ store_helper(env, addr, val, oi, retaddr, 2, false);
+}
+
+void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ store_helper(env, addr, val, oi, retaddr, 2, true);
+}
+
+void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ store_helper(env, addr, val, oi, retaddr, 4, false);
+}
+
+void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ store_helper(env, addr, val, oi, retaddr, 4, true);
+}
+
+void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ store_helper(env, addr, val, oi, retaddr, 8, false);
+}
+
+void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ store_helper(env, addr, val, oi, retaddr, 8, true);
+}
/* First set of helpers allows passing in of OI and RETADDR. This makes
them callable from other helpers. */
@@ -1320,20 +1725,81 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
/* Code access functions. */
-#undef MMUSUFFIX
-#define MMUSUFFIX _cmmu
-#undef GETPC
-#define GETPC() ((uintptr_t)0)
-#define SOFTMMU_CODE_ACCESS
+static uint64_t full_ldub_cmmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ return load_helper(env, addr, oi, retaddr, 1, false, true,
+ full_ldub_cmmu);
+}
-#define DATA_SIZE 1
-#include "softmmu_template.h"
+uint8_t helper_ret_ldb_cmmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ return full_ldub_cmmu(env, addr, oi, retaddr);
+}
-#define DATA_SIZE 2
-#include "softmmu_template.h"
+static uint64_t full_le_lduw_cmmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ return load_helper(env, addr, oi, retaddr, 2, false, true,
+ full_le_lduw_cmmu);
+}
-#define DATA_SIZE 4
-#include "softmmu_template.h"
+uint16_t helper_le_ldw_cmmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ return full_le_lduw_cmmu(env, addr, oi, retaddr);
+}
-#define DATA_SIZE 8
-#include "softmmu_template.h"
+static uint64_t full_be_lduw_cmmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ return load_helper(env, addr, oi, retaddr, 2, true, true,
+ full_be_lduw_cmmu);
+}
+
+uint16_t helper_be_ldw_cmmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ return full_be_lduw_cmmu(env, addr, oi, retaddr);
+}
+
+static uint64_t full_le_ldul_cmmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ return load_helper(env, addr, oi, retaddr, 4, false, true,
+ full_le_ldul_cmmu);
+}
+
+uint32_t helper_le_ldl_cmmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ return full_le_ldul_cmmu(env, addr, oi, retaddr);
+}
+
+static uint64_t full_be_ldul_cmmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ return load_helper(env, addr, oi, retaddr, 4, true, true,
+ full_be_ldul_cmmu);
+}
+
+uint32_t helper_be_ldl_cmmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ return full_be_ldul_cmmu(env, addr, oi, retaddr);
+}
+
+uint64_t helper_le_ldq_cmmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ return load_helper(env, addr, oi, retaddr, 8, false, true,
+ helper_le_ldq_cmmu);
+}
+
+uint64_t helper_be_ldq_cmmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr)
+{
+ return load_helper(env, addr, oi, retaddr, 8, true, true,
+ helper_be_ldq_cmmu);
+}
diff --git a/accel/tcg/softmmu_template.h b/accel/tcg/softmmu_template.h
deleted file mode 100644
index e970a8b378..0000000000
--- a/accel/tcg/softmmu_template.h
+++ /dev/null
@@ -1,454 +0,0 @@
-/*
- * Software MMU support
- *
- * Generate helpers used by TCG for qemu_ld/st ops and code load
- * functions.
- *
- * Included from target op helpers and exec.c.
- *
- * Copyright (c) 2003 Fabrice Bellard
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-#if DATA_SIZE == 8
-#define SUFFIX q
-#define LSUFFIX q
-#define SDATA_TYPE int64_t
-#define DATA_TYPE uint64_t
-#elif DATA_SIZE == 4
-#define SUFFIX l
-#define LSUFFIX l
-#define SDATA_TYPE int32_t
-#define DATA_TYPE uint32_t
-#elif DATA_SIZE == 2
-#define SUFFIX w
-#define LSUFFIX uw
-#define SDATA_TYPE int16_t
-#define DATA_TYPE uint16_t
-#elif DATA_SIZE == 1
-#define SUFFIX b
-#define LSUFFIX ub
-#define SDATA_TYPE int8_t
-#define DATA_TYPE uint8_t
-#else
-#error unsupported data size
-#endif
-
-
-/* For the benefit of TCG generated code, we want to avoid the complication
- of ABI-specific return type promotion and always return a value extended
- to the register size of the host. This is tcg_target_long, except in the
- case of a 32-bit host and 64-bit data, and for that we always have
- uint64_t. Don't bother with this widened value for SOFTMMU_CODE_ACCESS. */
-#if defined(SOFTMMU_CODE_ACCESS) || DATA_SIZE == 8
-# define WORD_TYPE DATA_TYPE
-# define USUFFIX SUFFIX
-#else
-# define WORD_TYPE tcg_target_ulong
-# define USUFFIX glue(u, SUFFIX)
-# define SSUFFIX glue(s, SUFFIX)
-#endif
-
-#ifdef SOFTMMU_CODE_ACCESS
-#define READ_ACCESS_TYPE MMU_INST_FETCH
-#define ADDR_READ addr_code
-#else
-#define READ_ACCESS_TYPE MMU_DATA_LOAD
-#define ADDR_READ addr_read
-#endif
-
-#if DATA_SIZE == 8
-# define BSWAP(X) bswap64(X)
-#elif DATA_SIZE == 4
-# define BSWAP(X) bswap32(X)
-#elif DATA_SIZE == 2
-# define BSWAP(X) bswap16(X)
-#else
-# define BSWAP(X) (X)
-#endif
-
-#if DATA_SIZE == 1
-# define helper_le_ld_name glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)
-# define helper_be_ld_name helper_le_ld_name
-# define helper_le_lds_name glue(glue(helper_ret_ld, SSUFFIX), MMUSUFFIX)
-# define helper_be_lds_name helper_le_lds_name
-# define helper_le_st_name glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)
-# define helper_be_st_name helper_le_st_name
-#else
-# define helper_le_ld_name glue(glue(helper_le_ld, USUFFIX), MMUSUFFIX)
-# define helper_be_ld_name glue(glue(helper_be_ld, USUFFIX), MMUSUFFIX)
-# define helper_le_lds_name glue(glue(helper_le_ld, SSUFFIX), MMUSUFFIX)
-# define helper_be_lds_name glue(glue(helper_be_ld, SSUFFIX), MMUSUFFIX)
-# define helper_le_st_name glue(glue(helper_le_st, SUFFIX), MMUSUFFIX)
-# define helper_be_st_name glue(glue(helper_be_st, SUFFIX), MMUSUFFIX)
-#endif
-
-#ifndef SOFTMMU_CODE_ACCESS
-static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
- size_t mmu_idx, size_t index,
- target_ulong addr,
- uintptr_t retaddr,
- bool recheck,
- MMUAccessType access_type)
-{
- CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index];
- return io_readx(env, iotlbentry, mmu_idx, addr, retaddr, recheck,
- access_type, DATA_SIZE);
-}
-#endif
-
-WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr,
- TCGMemOpIdx oi, uintptr_t retaddr)
-{
- uintptr_t mmu_idx = get_mmuidx(oi);
- uintptr_t index = tlb_index(env, mmu_idx, addr);
- CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
- target_ulong tlb_addr = entry->ADDR_READ;
- unsigned a_bits = get_alignment_bits(get_memop(oi));
- uintptr_t haddr;
- DATA_TYPE res;
-
- if (addr & ((1 << a_bits) - 1)) {
- cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
- mmu_idx, retaddr);
- }
-
- /* If the TLB entry is for a different page, reload and try again. */
- if (!tlb_hit(tlb_addr, addr)) {
- if (!VICTIM_TLB_HIT(ADDR_READ, addr)) {
- tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, READ_ACCESS_TYPE,
- mmu_idx, retaddr);
- index = tlb_index(env, mmu_idx, addr);
- entry = tlb_entry(env, mmu_idx, addr);
- }
- tlb_addr = entry->ADDR_READ;
- }
-
- /* Handle an IO access. */
- if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
- if ((addr & (DATA_SIZE - 1)) != 0) {
- goto do_unaligned_access;
- }
-
- /* ??? Note that the io helpers always read data in the target
- byte ordering. We should push the LE/BE request down into io. */
- res = glue(io_read, SUFFIX)(env, mmu_idx, index, addr, retaddr,
- tlb_addr & TLB_RECHECK,
- READ_ACCESS_TYPE);
- res = TGT_LE(res);
- return res;
- }
-
- /* Handle slow unaligned access (it spans two pages or IO). */
- if (DATA_SIZE > 1
- && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
- >= TARGET_PAGE_SIZE)) {
- target_ulong addr1, addr2;
- DATA_TYPE res1, res2;
- unsigned shift;
- do_unaligned_access:
- addr1 = addr & ~(DATA_SIZE - 1);
- addr2 = addr1 + DATA_SIZE;
- res1 = helper_le_ld_name(env, addr1, oi, retaddr);
- res2 = helper_le_ld_name(env, addr2, oi, retaddr);
- shift = (addr & (DATA_SIZE - 1)) * 8;
-
- /* Little-endian combine. */
- res = (res1 >> shift) | (res2 << ((DATA_SIZE * 8) - shift));
- return res;
- }
-
- haddr = addr + entry->addend;
-#if DATA_SIZE == 1
- res = glue(glue(ld, LSUFFIX), _p)((uint8_t *)haddr);
-#else
- res = glue(glue(ld, LSUFFIX), _le_p)((uint8_t *)haddr);
-#endif
- return res;
-}
-
-#if DATA_SIZE > 1
-WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr,
- TCGMemOpIdx oi, uintptr_t retaddr)
-{
- uintptr_t mmu_idx = get_mmuidx(oi);
- uintptr_t index = tlb_index(env, mmu_idx, addr);
- CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
- target_ulong tlb_addr = entry->ADDR_READ;
- unsigned a_bits = get_alignment_bits(get_memop(oi));
- uintptr_t haddr;
- DATA_TYPE res;
-
- if (addr & ((1 << a_bits) - 1)) {
- cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
- mmu_idx, retaddr);
- }
-
- /* If the TLB entry is for a different page, reload and try again. */
- if (!tlb_hit(tlb_addr, addr)) {
- if (!VICTIM_TLB_HIT(ADDR_READ, addr)) {
- tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, READ_ACCESS_TYPE,
- mmu_idx, retaddr);
- index = tlb_index(env, mmu_idx, addr);
- entry = tlb_entry(env, mmu_idx, addr);
- }
- tlb_addr = entry->ADDR_READ;
- }
-
- /* Handle an IO access. */
- if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
- if ((addr & (DATA_SIZE - 1)) != 0) {
- goto do_unaligned_access;
- }
-
- /* ??? Note that the io helpers always read data in the target
- byte ordering. We should push the LE/BE request down into io. */
- res = glue(io_read, SUFFIX)(env, mmu_idx, index, addr, retaddr,
- tlb_addr & TLB_RECHECK,
- READ_ACCESS_TYPE);
- res = TGT_BE(res);
- return res;
- }
-
- /* Handle slow unaligned access (it spans two pages or IO). */
- if (DATA_SIZE > 1
- && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
- >= TARGET_PAGE_SIZE)) {
- target_ulong addr1, addr2;
- DATA_TYPE res1, res2;
- unsigned shift;
- do_unaligned_access:
- addr1 = addr & ~(DATA_SIZE - 1);
- addr2 = addr1 + DATA_SIZE;
- res1 = helper_be_ld_name(env, addr1, oi, retaddr);
- res2 = helper_be_ld_name(env, addr2, oi, retaddr);
- shift = (addr & (DATA_SIZE - 1)) * 8;
-
- /* Big-endian combine. */
- res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift));
- return res;
- }
-
- haddr = addr + entry->addend;
- res = glue(glue(ld, LSUFFIX), _be_p)((uint8_t *)haddr);
- return res;
-}
-#endif /* DATA_SIZE > 1 */
-
-#ifndef SOFTMMU_CODE_ACCESS
-
-/* Provide signed versions of the load routines as well. We can of course
- avoid this for 64-bit data, or for 32-bit data on 32-bit host. */
-#if DATA_SIZE * 8 < TCG_TARGET_REG_BITS
-WORD_TYPE helper_le_lds_name(CPUArchState *env, target_ulong addr,
- TCGMemOpIdx oi, uintptr_t retaddr)
-{
- return (SDATA_TYPE)helper_le_ld_name(env, addr, oi, retaddr);
-}
-
-# if DATA_SIZE > 1
-WORD_TYPE helper_be_lds_name(CPUArchState *env, target_ulong addr,
- TCGMemOpIdx oi, uintptr_t retaddr)
-{
- return (SDATA_TYPE)helper_be_ld_name(env, addr, oi, retaddr);
-}
-# endif
-#endif
-
-static inline void glue(io_write, SUFFIX)(CPUArchState *env,
- size_t mmu_idx, size_t index,
- DATA_TYPE val,
- target_ulong addr,
- uintptr_t retaddr,
- bool recheck)
-{
- CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index];
- return io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr,
- recheck, DATA_SIZE);
-}
-
-void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
- TCGMemOpIdx oi, uintptr_t retaddr)
-{
- uintptr_t mmu_idx = get_mmuidx(oi);
- uintptr_t index = tlb_index(env, mmu_idx, addr);
- CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
- target_ulong tlb_addr = tlb_addr_write(entry);
- unsigned a_bits = get_alignment_bits(get_memop(oi));
- uintptr_t haddr;
-
- if (addr & ((1 << a_bits) - 1)) {
- cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
- mmu_idx, retaddr);
- }
-
- /* If the TLB entry is for a different page, reload and try again. */
- if (!tlb_hit(tlb_addr, addr)) {
- if (!VICTIM_TLB_HIT(addr_write, addr)) {
- tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE,
- mmu_idx, retaddr);
- index = tlb_index(env, mmu_idx, addr);
- entry = tlb_entry(env, mmu_idx, addr);
- }
- tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
- }
-
- /* Handle an IO access. */
- if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
- if ((addr & (DATA_SIZE - 1)) != 0) {
- goto do_unaligned_access;
- }
-
- /* ??? Note that the io helpers always read data in the target
- byte ordering. We should push the LE/BE request down into io. */
- val = TGT_LE(val);
- glue(io_write, SUFFIX)(env, mmu_idx, index, val, addr,
- retaddr, tlb_addr & TLB_RECHECK);
- return;
- }
-
- /* Handle slow unaligned access (it spans two pages or IO). */
- if (DATA_SIZE > 1
- && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
- >= TARGET_PAGE_SIZE)) {
- int i;
- target_ulong page2;
- CPUTLBEntry *entry2;
- do_unaligned_access:
- /* Ensure the second page is in the TLB. Note that the first page
- is already guaranteed to be filled, and that the second page
- cannot evict the first. */
- page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK;
- entry2 = tlb_entry(env, mmu_idx, page2);
- if (!tlb_hit_page(tlb_addr_write(entry2), page2)
- && !VICTIM_TLB_HIT(addr_write, page2)) {
- tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE,
- mmu_idx, retaddr);
- }
-
- /* XXX: not efficient, but simple. */
- /* This loop must go in the forward direction to avoid issues
- with self-modifying code in Windows 64-bit. */
- for (i = 0; i < DATA_SIZE; ++i) {
- /* Little-endian extract. */
- uint8_t val8 = val >> (i * 8);
- glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8,
- oi, retaddr);
- }
- return;
- }
-
- haddr = addr + entry->addend;
-#if DATA_SIZE == 1
- glue(glue(st, SUFFIX), _p)((uint8_t *)haddr, val);
-#else
- glue(glue(st, SUFFIX), _le_p)((uint8_t *)haddr, val);
-#endif
-}
-
-#if DATA_SIZE > 1
-void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
- TCGMemOpIdx oi, uintptr_t retaddr)
-{
- uintptr_t mmu_idx = get_mmuidx(oi);
- uintptr_t index = tlb_index(env, mmu_idx, addr);
- CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
- target_ulong tlb_addr = tlb_addr_write(entry);
- unsigned a_bits = get_alignment_bits(get_memop(oi));
- uintptr_t haddr;
-
- if (addr & ((1 << a_bits) - 1)) {
- cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
- mmu_idx, retaddr);
- }
-
- /* If the TLB entry is for a different page, reload and try again. */
- if (!tlb_hit(tlb_addr, addr)) {
- if (!VICTIM_TLB_HIT(addr_write, addr)) {
- tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE,
- mmu_idx, retaddr);
- index = tlb_index(env, mmu_idx, addr);
- entry = tlb_entry(env, mmu_idx, addr);
- }
- tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
- }
-
- /* Handle an IO access. */
- if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
- if ((addr & (DATA_SIZE - 1)) != 0) {
- goto do_unaligned_access;
- }
-
- /* ??? Note that the io helpers always read data in the target
- byte ordering. We should push the LE/BE request down into io. */
- val = TGT_BE(val);
- glue(io_write, SUFFIX)(env, mmu_idx, index, val, addr, retaddr,
- tlb_addr & TLB_RECHECK);
- return;
- }
-
- /* Handle slow unaligned access (it spans two pages or IO). */
- if (DATA_SIZE > 1
- && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
- >= TARGET_PAGE_SIZE)) {
- int i;
- target_ulong page2;
- CPUTLBEntry *entry2;
- do_unaligned_access:
- /* Ensure the second page is in the TLB. Note that the first page
- is already guaranteed to be filled, and that the second page
- cannot evict the first. */
- page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK;
- entry2 = tlb_entry(env, mmu_idx, page2);
- if (!tlb_hit_page(tlb_addr_write(entry2), page2)
- && !VICTIM_TLB_HIT(addr_write, page2)) {
- tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE,
- mmu_idx, retaddr);
- }
-
- /* XXX: not efficient, but simple */
- /* This loop must go in the forward direction to avoid issues
- with self-modifying code. */
- for (i = 0; i < DATA_SIZE; ++i) {
- /* Big-endian extract. */
- uint8_t val8 = val >> (((DATA_SIZE - 1) * 8) - (i * 8));
- glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8,
- oi, retaddr);
- }
- return;
- }
-
- haddr = addr + entry->addend;
- glue(glue(st, SUFFIX), _be_p)((uint8_t *)haddr, val);
-}
-#endif /* DATA_SIZE > 1 */
-#endif /* !defined(SOFTMMU_CODE_ACCESS) */
-
-#undef READ_ACCESS_TYPE
-#undef DATA_TYPE
-#undef SUFFIX
-#undef LSUFFIX
-#undef DATA_SIZE
-#undef ADDR_READ
-#undef WORD_TYPE
-#undef SDATA_TYPE
-#undef USUFFIX
-#undef SSUFFIX
-#undef BSWAP
-#undef helper_le_ld_name
-#undef helper_be_ld_name
-#undef helper_le_lds_name
-#undef helper_be_lds_name
-#undef helper_le_st_name
-#undef helper_be_st_name
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
index e2c6f24262..0f09e0ef38 100644
--- a/accel/tcg/tcg-runtime-gvec.c
+++ b/accel/tcg/tcg-runtime-gvec.c
@@ -398,6 +398,54 @@ void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
clear_high(d, oprsz, desc);
}
+void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(int8_t)) {
+ int8_t aa = *(int8_t *)(a + i);
+ *(int8_t *)(d + i) = aa < 0 ? -aa : aa;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(int16_t)) {
+ int16_t aa = *(int16_t *)(a + i);
+ *(int16_t *)(d + i) = aa < 0 ? -aa : aa;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(int32_t)) {
+ int32_t aa = *(int32_t *)(a + i);
+ *(int32_t *)(d + i) = aa < 0 ? -aa : aa;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(int64_t)) {
+ int64_t aa = *(int64_t *)(a + i);
+ *(int64_t *)(d + i) = aa < 0 ? -aa : aa;
+ }
+ clear_high(d, oprsz, desc);
+}
+
void HELPER(gvec_mov)(void *d, void *a, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
@@ -725,6 +773,150 @@ void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
clear_high(d, oprsz, desc);
}
+void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
+ uint8_t sh = *(uint8_t *)(b + i) & 7;
+ *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
+ uint8_t sh = *(uint16_t *)(b + i) & 15;
+ *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
+ uint8_t sh = *(uint32_t *)(b + i) & 31;
+ *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+ uint8_t sh = *(uint64_t *)(b + i) & 63;
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
+ uint8_t sh = *(uint8_t *)(b + i) & 7;
+ *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
+ uint8_t sh = *(uint16_t *)(b + i) & 15;
+ *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
+ uint8_t sh = *(uint32_t *)(b + i) & 31;
+ *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+ uint8_t sh = *(uint64_t *)(b + i) & 63;
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec8)) {
+ uint8_t sh = *(uint8_t *)(b + i) & 7;
+ *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(int16_t)) {
+ uint8_t sh = *(uint16_t *)(b + i) & 15;
+ *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec32)) {
+ uint8_t sh = *(uint32_t *)(b + i) & 31;
+ *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec64)) {
+ uint8_t sh = *(uint64_t *)(b + i) & 63;
+ *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh;
+ }
+ clear_high(d, oprsz, desc);
+}
+
/* If vectors are enabled, the compiler fills in -1 for true.
Otherwise, we must take care of this by hand. */
#ifdef CONFIG_VECTOR16
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
index dfe325625c..6d73dc2d65 100644
--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -225,6 +225,11 @@ DEF_HELPER_FLAGS_3(gvec_neg16, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_neg32, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_neg64, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_abs8, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_abs16, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_abs32, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_abs64, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_3(gvec_not, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_and, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_or, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -254,6 +259,21 @@ DEF_HELPER_FLAGS_3(gvec_sar16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_sar32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_sar64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_shl8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_shl16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_shl32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_shl64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_shr8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_shr16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_shr32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_shr64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_sar8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sar16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sar32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sar64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_4(gvec_eq8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_eq16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_eq32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)